cfg80211: handle failed skb allocation
[cascardo/linux.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_cs.c
1 /*
2  * Copyright 2008 Jerome Glisse.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Jerome Glisse <glisse@freedesktop.org>
26  */
27 #include <linux/list_sort.h>
28 #include <linux/pagemap.h>
29 #include <drm/drmP.h>
30 #include <drm/amdgpu_drm.h>
31 #include "amdgpu.h"
32 #include "amdgpu_trace.h"
33
34 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
35                        u32 ip_instance, u32 ring,
36                        struct amdgpu_ring **out_ring)
37 {
38         /* Right now all IPs have only one instance - multiple rings. */
39         if (ip_instance != 0) {
40                 DRM_ERROR("invalid ip instance: %d\n", ip_instance);
41                 return -EINVAL;
42         }
43
44         switch (ip_type) {
45         default:
46                 DRM_ERROR("unknown ip type: %d\n", ip_type);
47                 return -EINVAL;
48         case AMDGPU_HW_IP_GFX:
49                 if (ring < adev->gfx.num_gfx_rings) {
50                         *out_ring = &adev->gfx.gfx_ring[ring];
51                 } else {
52                         DRM_ERROR("only %d gfx rings are supported now\n",
53                                   adev->gfx.num_gfx_rings);
54                         return -EINVAL;
55                 }
56                 break;
57         case AMDGPU_HW_IP_COMPUTE:
58                 if (ring < adev->gfx.num_compute_rings) {
59                         *out_ring = &adev->gfx.compute_ring[ring];
60                 } else {
61                         DRM_ERROR("only %d compute rings are supported now\n",
62                                   adev->gfx.num_compute_rings);
63                         return -EINVAL;
64                 }
65                 break;
66         case AMDGPU_HW_IP_DMA:
67                 if (ring < adev->sdma.num_instances) {
68                         *out_ring = &adev->sdma.instance[ring].ring;
69                 } else {
70                         DRM_ERROR("only %d SDMA rings are supported\n",
71                                   adev->sdma.num_instances);
72                         return -EINVAL;
73                 }
74                 break;
75         case AMDGPU_HW_IP_UVD:
76                 *out_ring = &adev->uvd.ring;
77                 break;
78         case AMDGPU_HW_IP_VCE:
79                 if (ring < 2){
80                         *out_ring = &adev->vce.ring[ring];
81                 } else {
82                         DRM_ERROR("only two VCE rings are supported\n");
83                         return -EINVAL;
84                 }
85                 break;
86         }
87         return 0;
88 }
89
90 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
91                                       struct amdgpu_user_fence *uf,
92                                       struct drm_amdgpu_cs_chunk_fence *fence_data)
93 {
94         struct drm_gem_object *gobj;
95         uint32_t handle;
96
97         handle = fence_data->handle;
98         gobj = drm_gem_object_lookup(p->adev->ddev, p->filp,
99                                      fence_data->handle);
100         if (gobj == NULL)
101                 return -EINVAL;
102
103         uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
104         uf->offset = fence_data->offset;
105
106         if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
107                 drm_gem_object_unreference_unlocked(gobj);
108                 return -EINVAL;
109         }
110
111         p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
112         p->uf_entry.priority = 0;
113         p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
114         p->uf_entry.tv.shared = true;
115         p->uf_entry.user_pages = NULL;
116
117         drm_gem_object_unreference_unlocked(gobj);
118         return 0;
119 }
120
121 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
122 {
123         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
124         union drm_amdgpu_cs *cs = data;
125         uint64_t *chunk_array_user;
126         uint64_t *chunk_array;
127         struct amdgpu_user_fence uf = {};
128         unsigned size, num_ibs = 0;
129         int i;
130         int ret;
131
132         if (cs->in.num_chunks == 0)
133                 return 0;
134
135         chunk_array = kmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
136         if (!chunk_array)
137                 return -ENOMEM;
138
139         p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
140         if (!p->ctx) {
141                 ret = -EINVAL;
142                 goto free_chunk;
143         }
144
145         /* get chunks */
146         chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
147         if (copy_from_user(chunk_array, chunk_array_user,
148                            sizeof(uint64_t)*cs->in.num_chunks)) {
149                 ret = -EFAULT;
150                 goto put_ctx;
151         }
152
153         p->nchunks = cs->in.num_chunks;
154         p->chunks = kmalloc_array(p->nchunks, sizeof(struct amdgpu_cs_chunk),
155                             GFP_KERNEL);
156         if (!p->chunks) {
157                 ret = -ENOMEM;
158                 goto put_ctx;
159         }
160
161         for (i = 0; i < p->nchunks; i++) {
162                 struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL;
163                 struct drm_amdgpu_cs_chunk user_chunk;
164                 uint32_t __user *cdata;
165
166                 chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
167                 if (copy_from_user(&user_chunk, chunk_ptr,
168                                        sizeof(struct drm_amdgpu_cs_chunk))) {
169                         ret = -EFAULT;
170                         i--;
171                         goto free_partial_kdata;
172                 }
173                 p->chunks[i].chunk_id = user_chunk.chunk_id;
174                 p->chunks[i].length_dw = user_chunk.length_dw;
175
176                 size = p->chunks[i].length_dw;
177                 cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
178
179                 p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
180                 if (p->chunks[i].kdata == NULL) {
181                         ret = -ENOMEM;
182                         i--;
183                         goto free_partial_kdata;
184                 }
185                 size *= sizeof(uint32_t);
186                 if (copy_from_user(p->chunks[i].kdata, cdata, size)) {
187                         ret = -EFAULT;
188                         goto free_partial_kdata;
189                 }
190
191                 switch (p->chunks[i].chunk_id) {
192                 case AMDGPU_CHUNK_ID_IB:
193                         ++num_ibs;
194                         break;
195
196                 case AMDGPU_CHUNK_ID_FENCE:
197                         size = sizeof(struct drm_amdgpu_cs_chunk_fence);
198                         if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
199                                 ret = -EINVAL;
200                                 goto free_partial_kdata;
201                         }
202
203                         ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
204                         if (ret)
205                                 goto free_partial_kdata;
206
207                         break;
208
209                 case AMDGPU_CHUNK_ID_DEPENDENCIES:
210                         break;
211
212                 default:
213                         ret = -EINVAL;
214                         goto free_partial_kdata;
215                 }
216         }
217
218         ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
219         if (ret)
220                 goto free_all_kdata;
221
222         p->job->uf = uf;
223
224         kfree(chunk_array);
225         return 0;
226
227 free_all_kdata:
228         i = p->nchunks - 1;
229 free_partial_kdata:
230         for (; i >= 0; i--)
231                 drm_free_large(p->chunks[i].kdata);
232         kfree(p->chunks);
233 put_ctx:
234         amdgpu_ctx_put(p->ctx);
235 free_chunk:
236         kfree(chunk_array);
237
238         return ret;
239 }
240
241 /* Returns how many bytes TTM can move per IB.
242  */
243 static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
244 {
245         u64 real_vram_size = adev->mc.real_vram_size;
246         u64 vram_usage = atomic64_read(&adev->vram_usage);
247
248         /* This function is based on the current VRAM usage.
249          *
250          * - If all of VRAM is free, allow relocating the number of bytes that
251          *   is equal to 1/4 of the size of VRAM for this IB.
252
253          * - If more than one half of VRAM is occupied, only allow relocating
254          *   1 MB of data for this IB.
255          *
256          * - From 0 to one half of used VRAM, the threshold decreases
257          *   linearly.
258          *         __________________
259          * 1/4 of -|\               |
260          * VRAM    | \              |
261          *         |  \             |
262          *         |   \            |
263          *         |    \           |
264          *         |     \          |
265          *         |      \         |
266          *         |       \________|1 MB
267          *         |----------------|
268          *    VRAM 0 %             100 %
269          *         used            used
270          *
271          * Note: It's a threshold, not a limit. The threshold must be crossed
272          * for buffer relocations to stop, so any buffer of an arbitrary size
273          * can be moved as long as the threshold isn't crossed before
274          * the relocation takes place. We don't want to disable buffer
275          * relocations completely.
276          *
277          * The idea is that buffers should be placed in VRAM at creation time
278          * and TTM should only do a minimum number of relocations during
279          * command submission. In practice, you need to submit at least
280          * a dozen IBs to move all buffers to VRAM if they are in GTT.
281          *
282          * Also, things can get pretty crazy under memory pressure and actual
283          * VRAM usage can change a lot, so playing safe even at 50% does
284          * consistently increase performance.
285          */
286
287         u64 half_vram = real_vram_size >> 1;
288         u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
289         u64 bytes_moved_threshold = half_free_vram >> 1;
290         return max(bytes_moved_threshold, 1024*1024ull);
291 }
292
293 int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
294                             struct list_head *validated)
295 {
296         struct amdgpu_bo_list_entry *lobj;
297         u64 initial_bytes_moved;
298         int r;
299
300         list_for_each_entry(lobj, validated, tv.head) {
301                 struct amdgpu_bo *bo = lobj->robj;
302                 bool binding_userptr = false;
303                 struct mm_struct *usermm;
304                 uint32_t domain;
305
306                 usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
307                 if (usermm && usermm != current->mm)
308                         return -EPERM;
309
310                 /* Check if we have user pages and nobody bound the BO already */
311                 if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
312                         size_t size = sizeof(struct page *);
313
314                         size *= bo->tbo.ttm->num_pages;
315                         memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
316                         binding_userptr = true;
317                 }
318
319                 if (bo->pin_count)
320                         continue;
321
322                 /* Avoid moving this one if we have moved too many buffers
323                  * for this IB already.
324                  *
325                  * Note that this allows moving at least one buffer of
326                  * any size, because it doesn't take the current "bo"
327                  * into account. We don't want to disallow buffer moves
328                  * completely.
329                  */
330                 if (p->bytes_moved <= p->bytes_moved_threshold)
331                         domain = bo->prefered_domains;
332                 else
333                         domain = bo->allowed_domains;
334
335         retry:
336                 amdgpu_ttm_placement_from_domain(bo, domain);
337                 initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
338                 r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
339                 p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
340                                initial_bytes_moved;
341
342                 if (unlikely(r)) {
343                         if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
344                                 domain = bo->allowed_domains;
345                                 goto retry;
346                         }
347                         return r;
348                 }
349
350                 if (binding_userptr) {
351                         drm_free_large(lobj->user_pages);
352                         lobj->user_pages = NULL;
353                 }
354         }
355         return 0;
356 }
357
358 static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
359                                 union drm_amdgpu_cs *cs)
360 {
361         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
362         struct amdgpu_bo_list_entry *e;
363         struct list_head duplicates;
364         bool need_mmap_lock = false;
365         unsigned i, tries = 10;
366         int r;
367
368         INIT_LIST_HEAD(&p->validated);
369
370         p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
371         if (p->bo_list) {
372                 need_mmap_lock = p->bo_list->first_userptr !=
373                         p->bo_list->num_entries;
374                 amdgpu_bo_list_get_list(p->bo_list, &p->validated);
375         }
376
377         INIT_LIST_HEAD(&duplicates);
378         amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
379
380         if (p->job->uf.bo)
381                 list_add(&p->uf_entry.tv.head, &p->validated);
382
383         if (need_mmap_lock)
384                 down_read(&current->mm->mmap_sem);
385
386         while (1) {
387                 struct list_head need_pages;
388                 unsigned i;
389
390                 r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
391                                            &duplicates);
392                 if (unlikely(r != 0))
393                         goto error_free_pages;
394
395                 /* Without a BO list we don't have userptr BOs */
396                 if (!p->bo_list)
397                         break;
398
399                 INIT_LIST_HEAD(&need_pages);
400                 for (i = p->bo_list->first_userptr;
401                      i < p->bo_list->num_entries; ++i) {
402
403                         e = &p->bo_list->array[i];
404
405                         if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
406                                  &e->user_invalidated) && e->user_pages) {
407
408                                 /* We acquired a page array, but somebody
409                                  * invalidated it. Free it an try again
410                                  */
411                                 release_pages(e->user_pages,
412                                               e->robj->tbo.ttm->num_pages,
413                                               false);
414                                 drm_free_large(e->user_pages);
415                                 e->user_pages = NULL;
416                         }
417
418                         if (e->robj->tbo.ttm->state != tt_bound &&
419                             !e->user_pages) {
420                                 list_del(&e->tv.head);
421                                 list_add(&e->tv.head, &need_pages);
422
423                                 amdgpu_bo_unreserve(e->robj);
424                         }
425                 }
426
427                 if (list_empty(&need_pages))
428                         break;
429
430                 /* Unreserve everything again. */
431                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
432
433                 /* We tried to often, just abort */
434                 if (!--tries) {
435                         r = -EDEADLK;
436                         goto error_free_pages;
437                 }
438
439                 /* Fill the page arrays for all useptrs. */
440                 list_for_each_entry(e, &need_pages, tv.head) {
441                         struct ttm_tt *ttm = e->robj->tbo.ttm;
442
443                         e->user_pages = drm_calloc_large(ttm->num_pages,
444                                                          sizeof(struct page*));
445                         if (!e->user_pages) {
446                                 r = -ENOMEM;
447                                 goto error_free_pages;
448                         }
449
450                         r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
451                         if (r) {
452                                 drm_free_large(e->user_pages);
453                                 e->user_pages = NULL;
454                                 goto error_free_pages;
455                         }
456                 }
457
458                 /* And try again. */
459                 list_splice(&need_pages, &p->validated);
460         }
461
462         amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
463
464         p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
465         p->bytes_moved = 0;
466
467         r = amdgpu_cs_list_validate(p, &duplicates);
468         if (r)
469                 goto error_validate;
470
471         r = amdgpu_cs_list_validate(p, &p->validated);
472         if (r)
473                 goto error_validate;
474
475         if (p->bo_list) {
476                 struct amdgpu_vm *vm = &fpriv->vm;
477                 unsigned i;
478
479                 for (i = 0; i < p->bo_list->num_entries; i++) {
480                         struct amdgpu_bo *bo = p->bo_list->array[i].robj;
481
482                         p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
483                 }
484         }
485
486 error_validate:
487         if (r) {
488                 amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
489                 ttm_eu_backoff_reservation(&p->ticket, &p->validated);
490         }
491
492 error_free_pages:
493
494         if (need_mmap_lock)
495                 up_read(&current->mm->mmap_sem);
496
497         if (p->bo_list) {
498                 for (i = p->bo_list->first_userptr;
499                      i < p->bo_list->num_entries; ++i) {
500                         e = &p->bo_list->array[i];
501
502                         if (!e->user_pages)
503                                 continue;
504
505                         release_pages(e->user_pages,
506                                       e->robj->tbo.ttm->num_pages,
507                                       false);
508                         drm_free_large(e->user_pages);
509                 }
510         }
511
512         return r;
513 }
514
515 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
516 {
517         struct amdgpu_bo_list_entry *e;
518         int r;
519
520         list_for_each_entry(e, &p->validated, tv.head) {
521                 struct reservation_object *resv = e->robj->tbo.resv;
522                 r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
523
524                 if (r)
525                         return r;
526         }
527         return 0;
528 }
529
530 static int cmp_size_smaller_first(void *priv, struct list_head *a,
531                                   struct list_head *b)
532 {
533         struct amdgpu_bo_list_entry *la = list_entry(a, struct amdgpu_bo_list_entry, tv.head);
534         struct amdgpu_bo_list_entry *lb = list_entry(b, struct amdgpu_bo_list_entry, tv.head);
535
536         /* Sort A before B if A is smaller. */
537         return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
538 }
539
540 /**
541  * cs_parser_fini() - clean parser states
542  * @parser:     parser structure holding parsing context.
543  * @error:      error number
544  *
545  * If error is set than unvalidate buffer, otherwise just free memory
546  * used by parsing context.
547  **/
548 static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bool backoff)
549 {
550         struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
551         unsigned i;
552
553         if (!error) {
554                 amdgpu_vm_move_pt_bos_in_lru(parser->adev, &fpriv->vm);
555
556                 /* Sort the buffer list from the smallest to largest buffer,
557                  * which affects the order of buffers in the LRU list.
558                  * This assures that the smallest buffers are added first
559                  * to the LRU list, so they are likely to be later evicted
560                  * first, instead of large buffers whose eviction is more
561                  * expensive.
562                  *
563                  * This slightly lowers the number of bytes moved by TTM
564                  * per frame under memory pressure.
565                  */
566                 list_sort(NULL, &parser->validated, cmp_size_smaller_first);
567
568                 ttm_eu_fence_buffer_objects(&parser->ticket,
569                                             &parser->validated,
570                                             parser->fence);
571         } else if (backoff) {
572                 ttm_eu_backoff_reservation(&parser->ticket,
573                                            &parser->validated);
574         }
575         fence_put(parser->fence);
576
577         if (parser->ctx)
578                 amdgpu_ctx_put(parser->ctx);
579         if (parser->bo_list)
580                 amdgpu_bo_list_put(parser->bo_list);
581
582         for (i = 0; i < parser->nchunks; i++)
583                 drm_free_large(parser->chunks[i].kdata);
584         kfree(parser->chunks);
585         if (parser->job)
586                 amdgpu_job_free(parser->job);
587         amdgpu_bo_unref(&parser->uf_entry.robj);
588 }
589
590 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
591                                    struct amdgpu_vm *vm)
592 {
593         struct amdgpu_device *adev = p->adev;
594         struct amdgpu_bo_va *bo_va;
595         struct amdgpu_bo *bo;
596         int i, r;
597
598         r = amdgpu_vm_update_page_directory(adev, vm);
599         if (r)
600                 return r;
601
602         r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
603         if (r)
604                 return r;
605
606         r = amdgpu_vm_clear_freed(adev, vm);
607         if (r)
608                 return r;
609
610         if (p->bo_list) {
611                 for (i = 0; i < p->bo_list->num_entries; i++) {
612                         struct fence *f;
613
614                         /* ignore duplicates */
615                         bo = p->bo_list->array[i].robj;
616                         if (!bo)
617                                 continue;
618
619                         bo_va = p->bo_list->array[i].bo_va;
620                         if (bo_va == NULL)
621                                 continue;
622
623                         r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem);
624                         if (r)
625                                 return r;
626
627                         f = bo_va->last_pt_update;
628                         r = amdgpu_sync_fence(adev, &p->job->sync, f);
629                         if (r)
630                                 return r;
631                 }
632
633         }
634
635         r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
636
637         if (amdgpu_vm_debug && p->bo_list) {
638                 /* Invalidate all BOs to test for userspace bugs */
639                 for (i = 0; i < p->bo_list->num_entries; i++) {
640                         /* ignore duplicates */
641                         bo = p->bo_list->array[i].robj;
642                         if (!bo)
643                                 continue;
644
645                         amdgpu_vm_bo_invalidate(adev, bo);
646                 }
647         }
648
649         return r;
650 }
651
652 static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
653                                  struct amdgpu_cs_parser *p)
654 {
655         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
656         struct amdgpu_vm *vm = &fpriv->vm;
657         struct amdgpu_ring *ring = p->job->ring;
658         int i, r;
659
660         /* Only for UVD/VCE VM emulation */
661         if (ring->funcs->parse_cs) {
662                 for (i = 0; i < p->job->num_ibs; i++) {
663                         r = amdgpu_ring_parse_cs(ring, p, i);
664                         if (r)
665                                 return r;
666                 }
667         }
668
669         r = amdgpu_bo_vm_update_pte(p, vm);
670         if (!r)
671                 amdgpu_cs_sync_rings(p);
672
673         return r;
674 }
675
676 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
677 {
678         if (r == -EDEADLK) {
679                 r = amdgpu_gpu_reset(adev);
680                 if (!r)
681                         r = -EAGAIN;
682         }
683         return r;
684 }
685
686 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
687                              struct amdgpu_cs_parser *parser)
688 {
689         struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
690         struct amdgpu_vm *vm = &fpriv->vm;
691         int i, j;
692         int r;
693
694         for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
695                 struct amdgpu_cs_chunk *chunk;
696                 struct amdgpu_ib *ib;
697                 struct drm_amdgpu_cs_chunk_ib *chunk_ib;
698                 struct amdgpu_ring *ring;
699
700                 chunk = &parser->chunks[i];
701                 ib = &parser->job->ibs[j];
702                 chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
703
704                 if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
705                         continue;
706
707                 r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type,
708                                        chunk_ib->ip_instance, chunk_ib->ring,
709                                        &ring);
710                 if (r)
711                         return r;
712
713                 if (parser->job->ring && parser->job->ring != ring)
714                         return -EINVAL;
715
716                 parser->job->ring = ring;
717
718                 if (ring->funcs->parse_cs) {
719                         struct amdgpu_bo_va_mapping *m;
720                         struct amdgpu_bo *aobj = NULL;
721                         uint64_t offset;
722                         uint8_t *kptr;
723
724                         m = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
725                                                    &aobj);
726                         if (!aobj) {
727                                 DRM_ERROR("IB va_start is invalid\n");
728                                 return -EINVAL;
729                         }
730
731                         if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
732                             (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
733                                 DRM_ERROR("IB va_start+ib_bytes is invalid\n");
734                                 return -EINVAL;
735                         }
736
737                         /* the IB should be reserved at this point */
738                         r = amdgpu_bo_kmap(aobj, (void **)&kptr);
739                         if (r) {
740                                 return r;
741                         }
742
743                         offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
744                         kptr += chunk_ib->va_start - offset;
745
746                         r =  amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
747                         if (r) {
748                                 DRM_ERROR("Failed to get ib !\n");
749                                 return r;
750                         }
751
752                         memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
753                         amdgpu_bo_kunmap(aobj);
754                 } else {
755                         r =  amdgpu_ib_get(adev, vm, 0, ib);
756                         if (r) {
757                                 DRM_ERROR("Failed to get ib !\n");
758                                 return r;
759                         }
760
761                         ib->gpu_addr = chunk_ib->va_start;
762                 }
763
764                 ib->length_dw = chunk_ib->ib_bytes / 4;
765                 ib->flags = chunk_ib->flags;
766                 ib->ctx = parser->ctx;
767                 j++;
768         }
769
770         /* add GDS resources to first IB */
771         if (parser->bo_list) {
772                 struct amdgpu_bo *gds = parser->bo_list->gds_obj;
773                 struct amdgpu_bo *gws = parser->bo_list->gws_obj;
774                 struct amdgpu_bo *oa = parser->bo_list->oa_obj;
775                 struct amdgpu_ib *ib = &parser->job->ibs[0];
776
777                 if (gds) {
778                         ib->gds_base = amdgpu_bo_gpu_offset(gds);
779                         ib->gds_size = amdgpu_bo_size(gds);
780                 }
781                 if (gws) {
782                         ib->gws_base = amdgpu_bo_gpu_offset(gws);
783                         ib->gws_size = amdgpu_bo_size(gws);
784                 }
785                 if (oa) {
786                         ib->oa_base = amdgpu_bo_gpu_offset(oa);
787                         ib->oa_size = amdgpu_bo_size(oa);
788                 }
789         }
790         /* wrap the last IB with user fence */
791         if (parser->job->uf.bo) {
792                 struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
793
794                 /* UVD & VCE fw doesn't support user fences */
795                 if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
796                     parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
797                         return -EINVAL;
798
799                 ib->user = &parser->job->uf;
800         }
801
802         return 0;
803 }
804
805 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
806                                   struct amdgpu_cs_parser *p)
807 {
808         struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
809         int i, j, r;
810
811         for (i = 0; i < p->nchunks; ++i) {
812                 struct drm_amdgpu_cs_chunk_dep *deps;
813                 struct amdgpu_cs_chunk *chunk;
814                 unsigned num_deps;
815
816                 chunk = &p->chunks[i];
817
818                 if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
819                         continue;
820
821                 deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
822                 num_deps = chunk->length_dw * 4 /
823                         sizeof(struct drm_amdgpu_cs_chunk_dep);
824
825                 for (j = 0; j < num_deps; ++j) {
826                         struct amdgpu_ring *ring;
827                         struct amdgpu_ctx *ctx;
828                         struct fence *fence;
829
830                         r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
831                                                deps[j].ip_instance,
832                                                deps[j].ring, &ring);
833                         if (r)
834                                 return r;
835
836                         ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
837                         if (ctx == NULL)
838                                 return -EINVAL;
839
840                         fence = amdgpu_ctx_get_fence(ctx, ring,
841                                                      deps[j].handle);
842                         if (IS_ERR(fence)) {
843                                 r = PTR_ERR(fence);
844                                 amdgpu_ctx_put(ctx);
845                                 return r;
846
847                         } else if (fence) {
848                                 r = amdgpu_sync_fence(adev, &p->job->sync,
849                                                       fence);
850                                 fence_put(fence);
851                                 amdgpu_ctx_put(ctx);
852                                 if (r)
853                                         return r;
854                         }
855                 }
856         }
857
858         return 0;
859 }
860
861 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
862                             union drm_amdgpu_cs *cs)
863 {
864         struct amdgpu_ring *ring = p->job->ring;
865         struct amd_sched_fence *fence;
866         struct amdgpu_job *job;
867
868         job = p->job;
869         p->job = NULL;
870
871         job->base.sched = &ring->sched;
872         job->base.s_entity = &p->ctx->rings[ring->idx].entity;
873         job->owner = p->filp;
874
875         fence = amd_sched_fence_create(job->base.s_entity, p->filp);
876         if (!fence) {
877                 amdgpu_job_free(job);
878                 return -ENOMEM;
879         }
880
881         job->base.s_fence = fence;
882         p->fence = fence_get(&fence->base);
883
884         cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring,
885                                               &fence->base);
886         job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
887
888         trace_amdgpu_cs_ioctl(job);
889         amd_sched_entity_push_job(&job->base);
890
891         return 0;
892 }
893
894 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
895 {
896         struct amdgpu_device *adev = dev->dev_private;
897         union drm_amdgpu_cs *cs = data;
898         struct amdgpu_cs_parser parser = {};
899         bool reserved_buffers = false;
900         int i, r;
901
902         if (!adev->accel_working)
903                 return -EBUSY;
904
905         parser.adev = adev;
906         parser.filp = filp;
907
908         r = amdgpu_cs_parser_init(&parser, data);
909         if (r) {
910                 DRM_ERROR("Failed to initialize parser !\n");
911                 amdgpu_cs_parser_fini(&parser, r, false);
912                 r = amdgpu_cs_handle_lockup(adev, r);
913                 return r;
914         }
915         r = amdgpu_cs_parser_bos(&parser, data);
916         if (r == -ENOMEM)
917                 DRM_ERROR("Not enough memory for command submission!\n");
918         else if (r && r != -ERESTARTSYS)
919                 DRM_ERROR("Failed to process the buffer list %d!\n", r);
920         else if (!r) {
921                 reserved_buffers = true;
922                 r = amdgpu_cs_ib_fill(adev, &parser);
923         }
924
925         if (!r) {
926                 r = amdgpu_cs_dependencies(adev, &parser);
927                 if (r)
928                         DRM_ERROR("Failed in the dependencies handling %d!\n", r);
929         }
930
931         if (r)
932                 goto out;
933
934         for (i = 0; i < parser.job->num_ibs; i++)
935                 trace_amdgpu_cs(&parser, i);
936
937         r = amdgpu_cs_ib_vm_chunk(adev, &parser);
938         if (r)
939                 goto out;
940
941         r = amdgpu_cs_submit(&parser, cs);
942
943 out:
944         amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
945         r = amdgpu_cs_handle_lockup(adev, r);
946         return r;
947 }
948
949 /**
950  * amdgpu_cs_wait_ioctl - wait for a command submission to finish
951  *
952  * @dev: drm device
953  * @data: data from userspace
954  * @filp: file private
955  *
956  * Wait for the command submission identified by handle to finish.
957  */
958 int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
959                          struct drm_file *filp)
960 {
961         union drm_amdgpu_wait_cs *wait = data;
962         struct amdgpu_device *adev = dev->dev_private;
963         unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
964         struct amdgpu_ring *ring = NULL;
965         struct amdgpu_ctx *ctx;
966         struct fence *fence;
967         long r;
968
969         r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
970                                wait->in.ring, &ring);
971         if (r)
972                 return r;
973
974         ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
975         if (ctx == NULL)
976                 return -EINVAL;
977
978         fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
979         if (IS_ERR(fence))
980                 r = PTR_ERR(fence);
981         else if (fence) {
982                 r = fence_wait_timeout(fence, true, timeout);
983                 fence_put(fence);
984         } else
985                 r = 1;
986
987         amdgpu_ctx_put(ctx);
988         if (r < 0)
989                 return r;
990
991         memset(wait, 0, sizeof(*wait));
992         wait->out.status = (r == 0);
993
994         return 0;
995 }
996
997 /**
998  * amdgpu_cs_find_bo_va - find bo_va for VM address
999  *
1000  * @parser: command submission parser context
1001  * @addr: VM address
1002  * @bo: resulting BO of the mapping found
1003  *
1004  * Search the buffer objects in the command submission context for a certain
1005  * virtual memory address. Returns allocation structure when found, NULL
1006  * otherwise.
1007  */
1008 struct amdgpu_bo_va_mapping *
1009 amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
1010                        uint64_t addr, struct amdgpu_bo **bo)
1011 {
1012         struct amdgpu_bo_va_mapping *mapping;
1013         unsigned i;
1014
1015         if (!parser->bo_list)
1016                 return NULL;
1017
1018         addr /= AMDGPU_GPU_PAGE_SIZE;
1019
1020         for (i = 0; i < parser->bo_list->num_entries; i++) {
1021                 struct amdgpu_bo_list_entry *lobj;
1022
1023                 lobj = &parser->bo_list->array[i];
1024                 if (!lobj->bo_va)
1025                         continue;
1026
1027                 list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
1028                         if (mapping->it.start > addr ||
1029                             addr > mapping->it.last)
1030                                 continue;
1031
1032                         *bo = lobj->bo_va->bo;
1033                         return mapping;
1034                 }
1035
1036                 list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
1037                         if (mapping->it.start > addr ||
1038                             addr > mapping->it.last)
1039                                 continue;
1040
1041                         *bo = lobj->bo_va->bo;
1042                         return mapping;
1043                 }
1044         }
1045
1046         return NULL;
1047 }