2bbeeb07c187e0977ed88feb405106aad178c939
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/pagemap.h>
28 #include <drm/drmP.h>
29 #include <drm/amdgpu_drm.h>
30 #include "amdgpu.h"
31 #include "amdgpu_trace.h"
32
33 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
34                        u32 ip_instance, u32 ring,
35                        struct amdgpu_ring **out_ring)
36 {
37         /* Right now all IPs have only one instance - multiple rings. */
38         if (ip_instance != 0) {
39                 DRM_ERROR("invalid ip instance: %d\n", ip_instance);
40                 return -EINVAL;
41         }
42
43         switch (ip_type) {
44         default:
45                 DRM_ERROR("unknown ip type: %d\n", ip_type);
46                 return -EINVAL;
47         case AMDGPU_HW_IP_GFX:
48                 if (ring < adev->gfx.num_gfx_rings) {
49                         *out_ring = &adev->gfx.gfx_ring[ring];
50                 } else {
51                         DRM_ERROR("only %d gfx rings are supported now\n",
52                                   adev->gfx.num_gfx_rings);
53                         return -EINVAL;
54                 }
55                 break;
56         case AMDGPU_HW_IP_COMPUTE:
57                 if (ring < adev->gfx.num_compute_rings) {
58                         *out_ring = &adev->gfx.compute_ring[ring];
59                 } else {
60                         DRM_ERROR("only %d compute rings are supported now\n",
61                                   adev->gfx.num_compute_rings);
62                         return -EINVAL;
63                 }
64                 break;
65         case AMDGPU_HW_IP_DMA:
66                 if (ring < adev->sdma.num_instances) {
67                         *out_ring = &adev->sdma.instance[ring].ring;
68                 } else {
69                         DRM_ERROR("only %d SDMA rings are supported\n",
70                                   adev->sdma.num_instances);
71                         return -EINVAL;
72                 }
73                 break;
74         case AMDGPU_HW_IP_UVD:
75                 *out_ring = &adev->uvd.ring;
76                 break;
77         case AMDGPU_HW_IP_VCE:
78                 if (ring < 2){
79                         *out_ring = &adev->vce.ring[ring];
80                 } else {
81                         DRM_ERROR("only two VCE rings are supported\n");
82                         return -EINVAL;
83                 }
84                 break;
85         }
86         return 0;
87 }
88
89 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
90                                       struct drm_amdgpu_cs_chunk_fence *data,
91                                       uint32_t *offset)
92 {
93         struct drm_gem_object *gobj;
94
95         gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
96                                      data->handle);
97         if (gobj == NULL)
98                 return -EINVAL;
99
100         p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
101         p->uf_entry.priority = 0;
102         p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
103         p->uf_entry.tv.shared = true;
104         p->uf_entry.user_pages = NULL;
105         *offset = data->offset;
106
107         drm_gem_object_unreference_unlocked(gobj);
108
109         if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
110                 amdgpu_bo_unref(&p->uf_entry.robj);
111                 return -EINVAL;
112         }
113
114         return 0;
115 }
116
117 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
118 {
119         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
120         struct amdgpu_vm *vm = &fpriv->vm;
121         union drm_amdgpu_cs *cs = data;
122         uint64_t *chunk_array_user;
123         uint64_t *chunk_array;
124         unsigned size, num_ibs = 0;
125         uint32_t uf_offset = 0;
126         int i;
127         int ret;
128
129         if (cs->in.num_chunks == 0)
130                 return 0;
131
132         chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
133         if (!chunk_array)
134                 return -ENOMEM;
135
136         p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
137         if (!p->ctx) {
138                 ret = -EINVAL;
139                 goto free_chunk;
140         }
141
142         /* get chunks */
143         chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
144         if (copy_from_user(chunk_array, chunk_array_user,
145                            sizeof(uint64_t)*cs->in.num_chunks)) {
146                 ret = -EFAULT;
147                 goto put_ctx;
148         }
149
150         p->nchunks = cs->in.num_chunks;
151         p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
152                             GFP_KERNEL);
153         if (!p->chunks) {
154                 ret = -ENOMEM;
155                 goto put_ctx;
156         }
157
158         for (i = 0; i < p->nchunks; i++) {
159                 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
160                 struct drm_amdgpu_cs_chunk user_chunk;
161                 uint32_t __user *cdata;
162
163                 chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
164                 if (copy_from_user(&user_chunk, chunk_ptr,
165                                        sizeof(struct drm_amdgpu_cs_chunk))) {
166                         ret = -EFAULT;
167                         i--;
168                         goto free_partial_kdata;
169                 }
170                 p->chunks[i].chunk_id = user_chunk.chunk_id;
171                 p->chunks[i].length_dw = user_chunk.length_dw;
172
173                 size = p->chunks[i].length_dw;
174                 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
175
176                 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
177                 if (p->chunks[i].kdata == NULL) {
178                         ret = -ENOMEM;
179                         i--;
180                         goto free_partial_kdata;
181                 }
182                 size *= sizeof(uint32_t);
183                 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
184                         ret = -EFAULT;
185                         goto free_partial_kdata;
186                 }
187
188                 switch (p->chunks[i].chunk_id) {
189                 case AMDGPU_CHUNK_ID_IB:
190                         ++num_ibs;
191                         break;
192
193                 case AMDGPU_CHUNK_ID_FENCE:
194                         size = sizeof(struct drm_amdgpu_cs_chunk_fence);
195                         if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
196                                 ret = -EINVAL;
197                                 goto free_partial_kdata;
198                         }
199
200                         ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
201                                                          &uf_offset);
202                         if (ret)
203                                 goto free_partial_kdata;
204
205                         break;
206
207                 case AMDGPU_CHUNK_ID_DEPENDENCIES:
208                         break;
209
210                 default:
211                         ret = -EINVAL;
212                         goto free_partial_kdata;
213                 }
214         }
215
216         ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
217         if (ret)
218                 goto free_all_kdata;
219
220         if (p->uf_entry.robj) {
221                 p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
222                 p->job->uf_offset = uf_offset;
223         }
224
225         kfree(chunk_array);
226         return 0;
227
228 free_all_kdata:
229         i = p->nchunks - 1;
230 free_partial_kdata:
231         for (; i >= 0; i--)
232                 drm_free_large(p->chunks[i].kdata);
233         kfree(p->chunks);
234 put_ctx:
235         amdgpu_ctx_put(p->ctx);
236 free_chunk:
237         kfree(chunk_array);
238
239         return ret;
240 }
241
242 /* Returns how many bytes TTM can move per IB.
243  */
244 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
245 {
246         u64 real_vram_size = adev->mc.real_vram_size;
247         u64 vram_usage = atomic64_read(&adev->vram_usage);
248
249         /* This function is based on the current VRAM usage.
250          *
251          * - If all of VRAM is free, allow relocating the number of bytes that
252          *   is equal to 1/4 of the size of VRAM for this IB.
253
254          * - If more than one half of VRAM is occupied, only allow relocating
255          *   1 MB of data for this IB.
256          *
257          * - From 0 to one half of used VRAM, the threshold decreases
258          *   linearly.
259          *         __________________
260          * 1/4 of -|\               |
261          * VRAM    | \              |
262          *         |  \             |
263          *         |   \            |
264          *         |    \           |
265          *         |     \          |
266          *         |      \         |
267          *         |       \________|1 MB
268          *         |----------------|
269          *    VRAM 0 %             100 %
270          *         used            used
271          *
272          * Note: It's a threshold, not a limit. The threshold must be crossed
273          * for buffer relocations to stop, so any buffer of an arbitrary size
274          * can be moved as long as the threshold isn't crossed before
275          * the relocation takes place. We don't want to disable buffer
276          * relocations completely.
277          *
278          * The idea is that buffers should be placed in VRAM at creation time
279          * and TTM should only do a minimum number of relocations during
280          * command submission. In practice, you need to submit at least
281          * a dozen IBs to move all buffers to VRAM if they are in GTT.
282          *
283          * Also, things can get pretty crazy under memory pressure and actual
284          * VRAM usage can change a lot, so playing safe even at 50% does
285          * consistently increase performance.
286          */
287
288         u64 half_vram = real_vram_size >> 1;
289         u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
290         u64 bytes_moved_threshold = half_free_vram >> 1;
291         return max(bytes_moved_threshold, 1024*1024ull);
292 }
293
294 int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
295                             struct list_head *validated)
296 {
297         struct amdgpu_bo_list_entry *lobj;
298         u64 initial_bytes_moved;
299         int r;
300
301         list_for_each_entry(lobj, validated, tv.head) {
302                 struct amdgpu_bo *bo = lobj->robj;
303                 bool binding_userptr = false;
304                 struct mm_struct *usermm;
305                 uint32_t domain;
306
307                 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
308                 if (usermm && usermm != current->mm)
309                         return -EPERM;
310
311                 /* Check if we have user pages and nobody bound the BO already */
312                 if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
313                         size_t size = sizeof(struct page *);
314
315                         size *= bo->tbo.ttm->num_pages;
316                         memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
317                         binding_userptr = true;
318                 }
319
320                 if (bo->pin_count)
321                         continue;
322
323                 /* Avoid moving this one if we have moved too many buffers
324                  * for this IB already.
325                  *
326                  * Note that this allows moving at least one buffer of
327                  * any size, because it doesn't take the current "bo"
328                  * into account. We don't want to disallow buffer moves
329                  * completely.
330                  */
331                 if (p->bytes_moved <= p->bytes_moved_threshold)
332                         domain = bo->prefered_domains;
333                 else
334                         domain = bo->allowed_domains;
335
336         retry:
337                 amdgpu_ttm_placement_from_domain(bo, domain);
338                 initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
339                 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
340                 p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
341                                initial_bytes_moved;
342
343                 if (unlikely(r)) {
344                         if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
345                                 domain = bo->allowed_domains;
346                                 goto retry;
347                         }
348                         return r;
349                 }
350
351                 if (binding_userptr) {
352                         drm_free_large(lobj->user_pages);
353                         lobj->user_pages = NULL;
354                 }
355         }
356         return 0;
357 }
358
359 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
360                                 union drm_amdgpu_cs *cs)
361 {
362         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
363         struct amdgpu_bo_list_entry *e;
364         struct list_head duplicates;
365         bool need_mmap_lock = false;
366         unsigned i, tries = 10;
367         int r;
368
369         INIT_LIST_HEAD(&p->validated);
370
371         p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
372         if (p->bo_list) {
373                 need_mmap_lock = p->bo_list->first_userptr !=
374                         p->bo_list->num_entries;
375                 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
376         }
377
378         INIT_LIST_HEAD(&duplicates);
379         amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
380
381         if (p->uf_entry.robj)
382                 list_add(&p->uf_entry.tv.head, &p->validated);
383
384         if (need_mmap_lock)
385                 down_read(&current->mm->mmap_sem);
386
387         while (1) {
388                 struct list_head need_pages;
389                 unsigned i;
390
391                 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
392                                            &duplicates);
393                 if (unlikely(r != 0))
394                         goto error_free_pages;
395
396                 /* Without a BO list we don't have userptr BOs */
397                 if (!p->bo_list)
398                         break;
399
400                 INIT_LIST_HEAD(&need_pages);
401                 for (i = p->bo_list->first_userptr;
402                      i < p->bo_list->num_entries; ++i) {
403
404                         e = &p->bo_list->array[i];
405
406                         if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
407                                  &e->user_invalidated) && e->user_pages) {
408
409                                 /* We acquired a page array, but somebody
410                                  * invalidated it. Free it an try again
411                                  */
412                                 release_pages(e->user_pages,
413                                               e->robj->tbo.ttm->num_pages,
414                                               false);
415                                 drm_free_large(e->user_pages);
416                                 e->user_pages = NULL;
417                         }
418
419                         if (e->robj->tbo.ttm->state != tt_bound &&
420                             !e->user_pages) {
421                                 list_del(&e->tv.head);
422                                 list_add(&e->tv.head, &need_pages);
423
424                                 amdgpu_bo_unreserve(e->robj);
425                         }
426                 }
427
428                 if (list_empty(&need_pages))
429                         break;
430
431                 /* Unreserve everything again. */
432                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
433
434                 /* We tried to often, just abort */
435                 if (!--tries) {
436                         r = -EDEADLK;
437                         goto error_free_pages;
438                 }
439
440                 /* Fill the page arrays for all useptrs. */
441                 list_for_each_entry(e, &need_pages, tv.head) {
442                         struct ttm_tt *ttm = e->robj->tbo.ttm;
443
444                         e->user_pages = drm_calloc_large(ttm->num_pages,
445                                                          sizeof(struct page*));
446                         if (!e->user_pages) {
447                                 r = -ENOMEM;
448                                 goto error_free_pages;
449                         }
450
451                         r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
452                         if (r) {
453                                 drm_free_large(e->user_pages);
454                                 e->user_pages = NULL;
455                                 goto error_free_pages;
456                         }
457                 }
458
459                 /* And try again. */
460                 list_splice(&need_pages, &p->validated);
461         }
462
463         amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
464
465         p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
466         p->bytes_moved = 0;
467
468         r = amdgpu_cs_list_validate(p, &duplicates);
469         if (r)
470                 goto error_validate;
471
472         r = amdgpu_cs_list_validate(p, &p->validated);
473         if (r)
474                 goto error_validate;
475
476         if (p->bo_list) {
477                 struct amdgpu_bo *gds = p->bo_list->gds_obj;
478                 struct amdgpu_bo *gws = p->bo_list->gws_obj;
479                 struct amdgpu_bo *oa = p->bo_list->oa_obj;
480                 struct amdgpu_vm *vm = &fpriv->vm;
481                 unsigned i;
482
483                 for (i = 0; i < p->bo_list->num_entries; i++) {
484                         struct amdgpu_bo *bo = p->bo_list->array[i].robj;
485
486                         p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
487                 }
488
489                 if (gds) {
490                         p->job->gds_base = amdgpu_bo_gpu_offset(gds);
491                         p->job->gds_size = amdgpu_bo_size(gds);
492                 }
493                 if (gws) {
494                         p->job->gws_base = amdgpu_bo_gpu_offset(gws);
495                         p->job->gws_size = amdgpu_bo_size(gws);
496                 }
497                 if (oa) {
498                         p->job->oa_base = amdgpu_bo_gpu_offset(oa);
499                         p->job->oa_size = amdgpu_bo_size(oa);
500                 }
501         }
502
503 error_validate:
504         if (r) {
505                 amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
506                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
507         }
508
509 error_free_pages:
510
511         if (need_mmap_lock)
512                 up_read(&current->mm->mmap_sem);
513
514         if (p->bo_list) {
515                 for (i = p->bo_list->first_userptr;
516                      i < p->bo_list->num_entries; ++i) {
517                         e = &p->bo_list->array[i];
518
519                         if (!e->user_pages)
520                                 continue;
521
522                         release_pages(e->user_pages,
523                                       e->robj->tbo.ttm->num_pages,
524                                       false);
525                         drm_free_large(e->user_pages);
526                 }
527         }
528
529         return r;
530 }
531
532 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
533 {
534         struct amdgpu_bo_list_entry *e;
535         int r;
536
537         list_for_each_entry(e, &p->validated, tv.head) {
538                 struct reservation_object *resv = e->robj->tbo.resv;
539                 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
540
541                 if (r)
542                         return r;
543         }
544         return 0;
545 }
546
547 /**
548  * cs_parser_fini() - clean parser states
549  * @parser:     parser structure holding parsing context.
550  * @error:      error number
551  *
552  * If error is set than unvalidate buffer, otherwise just free memory
553  * used by parsing context.
554  **/
555 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
556 {
557         struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
558         unsigned i;
559
560         if (!error) {
561                 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
562
563                 ttm_eu_fence_buffer_objects(&parser->ticket,
564                                             &parser->validated,
565                                             parser->fence);
566         } else if (backoff) {
567                 ttm_eu_backoff_reservation(&parser->ticket,
568                                            &parser->validated);
569         }
570         fence_put(parser->fence);
571
572         if (parser->ctx)
573                 amdgpu_ctx_put(parser->ctx);
574         if (parser->bo_list)
575                 amdgpu_bo_list_put(parser->bo_list);
576
577         for (i = 0; i < parser->nchunks; i++)
578                 drm_free_large(parser->chunks[i].kdata);
579         kfree(parser->chunks);
580         if (parser->job)
581                 amdgpu_job_free(parser->job);
582         amdgpu_bo_unref(&parser->uf_entry.robj);
583 }
584
585 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
586                                    struct amdgpu_vm *vm)
587 {
588         struct amdgpu_device *adev = p->adev;
589         struct amdgpu_bo_va *bo_va;
590         struct amdgpu_bo *bo;
591         int i, r;
592
593         r = amdgpu_vm_update_page_directory(adev, vm);
594         if (r)
595                 return r;
596
597         r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
598         if (r)
599                 return r;
600
601         r = amdgpu_vm_clear_freed(adev, vm);
602         if (r)
603                 return r;
604
605         if (p->bo_list) {
606                 for (i = 0; i < p->bo_list->num_entries; i++) {
607                         struct fence *f;
608
609                         /* ignore duplicates */
610                         bo = p->bo_list->array[i].robj;
611                         if (!bo)
612                                 continue;
613
614                         bo_va = p->bo_list->array[i].bo_va;
615                         if (bo_va == NULL)
616                                 continue;
617
618                         r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem);
619                         if (r)
620                                 return r;
621
622                         f = bo_va->last_pt_update;
623                         r = amdgpu_sync_fence(adev, &p->job->sync, f);
624                         if (r)
625                                 return r;
626                 }
627
628         }
629
630         r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
631
632         if (amdgpu_vm_debug && p->bo_list) {
633                 /* Invalidate all BOs to test for userspace bugs */
634                 for (i = 0; i < p->bo_list->num_entries; i++) {
635                         /* ignore duplicates */
636                         bo = p->bo_list->array[i].robj;
637                         if (!bo)
638                                 continue;
639
640                         amdgpu_vm_bo_invalidate(adev, bo);
641                 }
642         }
643
644         return r;
645 }
646
647 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
648                                  struct amdgpu_cs_parser *p)
649 {
650         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
651         struct amdgpu_vm *vm = &fpriv->vm;
652         struct amdgpu_ring *ring = p->job->ring;
653         int i, r;
654
655         /* Only for UVD/VCE VM emulation */
656         if (ring->funcs->parse_cs) {
657                 for (i = 0; i < p->job->num_ibs; i++) {
658                         r = amdgpu_ring_parse_cs(ring, p, i);
659                         if (r)
660                                 return r;
661                 }
662         }
663
664         r = amdgpu_bo_vm_update_pte(p, vm);
665         if (!r)
666                 amdgpu_cs_sync_rings(p);
667
668         return r;
669 }
670
671 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
672 {
673         if (r == -EDEADLK) {
674                 r = amdgpu_gpu_reset(adev);
675                 if (!r)
676                         r = -EAGAIN;
677         }
678         return r;
679 }
680
681 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
682                              struct amdgpu_cs_parser *parser)
683 {
684         struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
685         struct amdgpu_vm *vm = &fpriv->vm;
686         int i, j;
687         int r;
688
689         for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
690                 struct amdgpu_cs_chunk *chunk;
691                 struct amdgpu_ib *ib;
692                 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
693                 struct amdgpu_ring *ring;
694
695                 chunk = &parser->chunks[i];
696                 ib = &parser->job->ibs[j];
697                 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
698
699                 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
700                         continue;
701
702                 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
703                                        chunk_ib->ip_instance, chunk_ib->ring,
704                                        &ring);
705                 if (r)
706                         return r;
707
708                 if (parser->job->ring && parser->job->ring != ring)
709                         return -EINVAL;
710
711                 parser->job->ring = ring;
712
713                 if (ring->funcs->parse_cs) {
714                         struct amdgpu_bo_va_mapping *m;
715                         struct amdgpu_bo *aobj = NULL;
716                         uint64_t offset;
717                         uint8_t *kptr;
718
719                         m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
720                                                    &aobj);
721                         if (!aobj) {
722                                 DRM_ERROR("IB va_start is invalid\n");
723                                 return -EINVAL;
724                         }
725
726                         if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
727                             (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
728                                 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
729                                 return -EINVAL;
730                         }
731
732                         /* the IB should be reserved at this point */
733                         r = amdgpu_bo_kmap(aobj, (void **)&kptr);
734                         if (r) {
735                                 return r;
736                         }
737
738                         offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
739                         kptr += chunk_ib->va_start - offset;
740
741                         r =  amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
742                         if (r) {
743                                 DRM_ERROR("Failed to get ib !\n");
744                                 return r;
745                         }
746
747                         memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
748                         amdgpu_bo_kunmap(aobj);
749                 } else {
750                         r =  amdgpu_ib_get(adev, vm, 0, ib);
751                         if (r) {
752                                 DRM_ERROR("Failed to get ib !\n");
753                                 return r;
754                         }
755
756                         ib->gpu_addr = chunk_ib->va_start;
757                 }
758
759                 ib->length_dw = chunk_ib->ib_bytes / 4;
760                 ib->flags = chunk_ib->flags;
761                 j++;
762         }
763
764         /* UVD & VCE fw doesn't support user fences */
765         if (parser->job->uf_bo && (
766             parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
767             parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
768                 return -EINVAL;
769
770         return 0;
771 }
772
773 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
774                                   struct amdgpu_cs_parser *p)
775 {
776         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
777         int i, j, r;
778
779         for (i = 0; i < p->nchunks; ++i) {
780                 struct drm_amdgpu_cs_chunk_dep *deps;
781                 struct amdgpu_cs_chunk *chunk;
782                 unsigned num_deps;
783
784                 chunk = &p->chunks[i];
785
786                 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
787                         continue;
788
789                 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
790                 num_deps = chunk->length_dw * 4 /
791                         sizeof(struct drm_amdgpu_cs_chunk_dep);
792
793                 for (j = 0; j < num_deps; ++j) {
794                         struct amdgpu_ring *ring;
795                         struct amdgpu_ctx *ctx;
796                         struct fence *fence;
797
798                         r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
799                                                deps[j].ip_instance,
800                                                deps[j].ring, &ring);
801                         if (r)
802                                 return r;
803
804                         ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
805                         if (ctx == NULL)
806                                 return -EINVAL;
807
808                         fence = amdgpu_ctx_get_fence(ctx, ring,
809                                                      deps[j].handle);
810                         if (IS_ERR(fence)) {
811                                 r = PTR_ERR(fence);
812                                 amdgpu_ctx_put(ctx);
813                                 return r;
814
815                         } else if (fence) {
816                                 r = amdgpu_sync_fence(adev, &p->job->sync,
817                                                       fence);
818                                 fence_put(fence);
819                                 amdgpu_ctx_put(ctx);
820                                 if (r)
821                                         return r;
822                         }
823                 }
824         }
825
826         return 0;
827 }
828
829 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
830                             union drm_amdgpu_cs *cs)
831 {
832         struct amdgpu_ring *ring = p->job->ring;
833         struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
834         struct fence *fence;
835         struct amdgpu_job *job;
836         int r;
837
838         job = p->job;
839         p->job = NULL;
840
841         r = amd_sched_job_init(&job->base, &ring->sched,
842                                entity, amdgpu_job_timeout_func,
843                                amdgpu_job_free_func,
844                                p->filp, &fence);
845         if (r) {
846                 amdgpu_job_free(job);
847                 return r;
848         }
849
850         job->owner = p->filp;
851         job->ctx = entity->fence_context;
852         p->fence = fence_get(fence);
853         cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
854         job->uf_sequence = cs->out.handle;
855
856         trace_amdgpu_cs_ioctl(job);
857         amd_sched_entity_push_job(&job->base);
858
859         return 0;
860 }
861
862 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
863 {
864         struct amdgpu_device *adev = dev->dev_private;
865         union drm_amdgpu_cs *cs = data;
866         struct amdgpu_cs_parser parser = {};
867         bool reserved_buffers = false;
868         int i, r;
869
870         if (!adev->accel_working)
871                 return -EBUSY;
872
873         parser.adev = adev;
874         parser.filp = filp;
875
876         r = amdgpu_cs_parser_init(&parser, data);
877         if (r) {
878                 DRM_ERROR("Failed to initialize parser !\n");
879                 amdgpu_cs_parser_fini(&parser, r, false);
880                 r = amdgpu_cs_handle_lockup(adev, r);
881                 return r;
882         }
883         r = amdgpu_cs_parser_bos(&parser, data);
884         if (r == -ENOMEM)
885                 DRM_ERROR("Not enough memory for command submission!\n");
886         else if (r && r != -ERESTARTSYS)
887                 DRM_ERROR("Failed to process the buffer list %d!\n", r);
888         else if (!r) {
889                 reserved_buffers = true;
890                 r = amdgpu_cs_ib_fill(adev, &parser);
891         }
892
893         if (!r) {
894                 r = amdgpu_cs_dependencies(adev, &parser);
895                 if (r)
896                         DRM_ERROR("Failed in the dependencies handling %d!\n", r);
897         }
898
899         if (r)
900                 goto out;
901
902         for (i = 0; i < parser.job->num_ibs; i++)
903                 trace_amdgpu_cs(&parser, i);
904
905         r = amdgpu_cs_ib_vm_chunk(adev, &parser);
906         if (r)
907                 goto out;
908
909         r = amdgpu_cs_submit(&parser, cs);
910
911 out:
912         amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
913         r = amdgpu_cs_handle_lockup(adev, r);
914         return r;
915 }
916
917 /**
918  * amdgpu_cs_wait_ioctl - wait for a command submission to finish
919  *
920  * @dev: drm device
921  * @data: data from userspace
922  * @filp: file private
923  *
924  * Wait for the command submission identified by handle to finish.
925  */
926 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
927                          struct drm_file *filp)
928 {
929         union drm_amdgpu_wait_cs *wait = data;
930         struct amdgpu_device *adev = dev->dev_private;
931         unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
932         struct amdgpu_ring *ring = NULL;
933         struct amdgpu_ctx *ctx;
934         struct fence *fence;
935         long r;
936
937         r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
938                                wait->in.ring, &ring);
939         if (r)
940                 return r;
941
942         ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
943         if (ctx == NULL)
944                 return -EINVAL;
945
946         fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
947         if (IS_ERR(fence))
948                 r = PTR_ERR(fence);
949         else if (fence) {
950                 r = fence_wait_timeout(fence, true, timeout);
951                 fence_put(fence);
952         } else
953                 r = 1;
954
955         amdgpu_ctx_put(ctx);
956         if (r < 0)
957                 return r;
958
959         memset(wait, 0, sizeof(*wait));
960         wait->out.status = (r == 0);
961
962         return 0;
963 }
964
965 /**
966  * amdgpu_cs_find_bo_va - find bo_va for VM address
967  *
968  * @parser: command submission parser context
969  * @addr: VM address
970  * @bo: resulting BO of the mapping found
971  *
972  * Search the buffer objects in the command submission context for a certain
973  * virtual memory address. Returns allocation structure when found, NULL
974  * otherwise.
975  */
976 struct amdgpu_bo_va_mapping *
977 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
978                        uint64_t addr, struct amdgpu_bo **bo)
979 {
980         struct amdgpu_bo_va_mapping *mapping;
981         unsigned i;
982
983         if (!parser->bo_list)
984                 return NULL;
985
986         addr /= AMDGPU_GPU_PAGE_SIZE;
987
988         for (i = 0; i < parser->bo_list->num_entries; i++) {
989                 struct amdgpu_bo_list_entry *lobj;
990
991                 lobj = &parser->bo_list->array[i];
992                 if (!lobj->bo_va)
993                         continue;
994
995                 list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
996                         if (mapping->it.start > addr ||
997                             addr > mapping->it.last)
998                                 continue;
999
1000                         *bo = lobj->bo_va->bo;
1001                         return mapping;
1002                 }
1003
1004                 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
1005                         if (mapping->it.start > addr ||
1006                             addr > mapping->it.last)
1007                                 continue;
1008
1009                         *bo = lobj->bo_va->bo;
1010                         return mapping;
1011                 }
1012         }
1013
1014         return NULL;
1015 }