Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
[cascardo/linux.git] / drivers / gpu / drm / i915 / i915_gem_execbuffer.c
1 /*
2  * Copyright © 2008,2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *    Chris Wilson <chris@chris-wilson.co.uk>
26  *
27  */
28
29 #include <drm/drmP.h>
30 #include <drm/i915_drm.h>
31 #include "i915_drv.h"
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/dma_remapping.h>
35
36 struct eb_objects {
37         int and;
38         struct hlist_head buckets[0];
39 };
40
41 static struct eb_objects *
42 eb_create(int size)
43 {
44         struct eb_objects *eb;
45         int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
46         BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
47         while (count > size)
48                 count >>= 1;
49         eb = kzalloc(count*sizeof(struct hlist_head) +
50                      sizeof(struct eb_objects),
51                      GFP_KERNEL);
52         if (eb == NULL)
53                 return eb;
54
55         eb->and = count - 1;
56         return eb;
57 }
58
59 static void
60 eb_reset(struct eb_objects *eb)
61 {
62         memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
63 }
64
65 static void
66 eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
67 {
68         hlist_add_head(&obj->exec_node,
69                        &eb->buckets[obj->exec_handle & eb->and]);
70 }
71
72 static struct drm_i915_gem_object *
73 eb_get_object(struct eb_objects *eb, unsigned long handle)
74 {
75         struct hlist_head *head;
76         struct hlist_node *node;
77         struct drm_i915_gem_object *obj;
78
79         head = &eb->buckets[handle & eb->and];
80         hlist_for_each(node, head) {
81                 obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
82                 if (obj->exec_handle == handle)
83                         return obj;
84         }
85
86         return NULL;
87 }
88
89 static void
90 eb_destroy(struct eb_objects *eb)
91 {
92         kfree(eb);
93 }
94
95 static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
96 {
97         return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
98                 !obj->map_and_fenceable ||
99                 obj->cache_level != I915_CACHE_NONE);
100 }
101
102 static int
103 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
104                                    struct eb_objects *eb,
105                                    struct drm_i915_gem_relocation_entry *reloc)
106 {
107         struct drm_device *dev = obj->base.dev;
108         struct drm_gem_object *target_obj;
109         struct drm_i915_gem_object *target_i915_obj;
110         uint32_t target_offset;
111         int ret = -EINVAL;
112
113         /* we've already hold a reference to all valid objects */
114         target_obj = &eb_get_object(eb, reloc->target_handle)->base;
115         if (unlikely(target_obj == NULL))
116                 return -ENOENT;
117
118         target_i915_obj = to_intel_bo(target_obj);
119         target_offset = target_i915_obj->gtt_offset;
120
121         /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
122          * pipe_control writes because the gpu doesn't properly redirect them
123          * through the ppgtt for non_secure batchbuffers. */
124         if (unlikely(IS_GEN6(dev) &&
125             reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
126             !target_i915_obj->has_global_gtt_mapping)) {
127                 i915_gem_gtt_bind_object(target_i915_obj,
128                                          target_i915_obj->cache_level);
129         }
130
131         /* The target buffer should have appeared before us in the
132          * exec_object list, so it should have a GTT space bound by now.
133          */
134         if (unlikely(target_offset == 0)) {
135                 DRM_DEBUG("No GTT space found for object %d\n",
136                           reloc->target_handle);
137                 return ret;
138         }
139
140         /* Validate that the target is in a valid r/w GPU domain */
141         if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
142                 DRM_DEBUG("reloc with multiple write domains: "
143                           "obj %p target %d offset %d "
144                           "read %08x write %08x",
145                           obj, reloc->target_handle,
146                           (int) reloc->offset,
147                           reloc->read_domains,
148                           reloc->write_domain);
149                 return ret;
150         }
151         if (unlikely((reloc->write_domain | reloc->read_domains)
152                      & ~I915_GEM_GPU_DOMAINS)) {
153                 DRM_DEBUG("reloc with read/write non-GPU domains: "
154                           "obj %p target %d offset %d "
155                           "read %08x write %08x",
156                           obj, reloc->target_handle,
157                           (int) reloc->offset,
158                           reloc->read_domains,
159                           reloc->write_domain);
160                 return ret;
161         }
162         if (unlikely(reloc->write_domain && target_obj->pending_write_domain &&
163                      reloc->write_domain != target_obj->pending_write_domain)) {
164                 DRM_DEBUG("Write domain conflict: "
165                           "obj %p target %d offset %d "
166                           "new %08x old %08x\n",
167                           obj, reloc->target_handle,
168                           (int) reloc->offset,
169                           reloc->write_domain,
170                           target_obj->pending_write_domain);
171                 return ret;
172         }
173
174         target_obj->pending_read_domains |= reloc->read_domains;
175         target_obj->pending_write_domain |= reloc->write_domain;
176
177         /* If the relocation already has the right value in it, no
178          * more work needs to be done.
179          */
180         if (target_offset == reloc->presumed_offset)
181                 return 0;
182
183         /* Check that the relocation address is valid... */
184         if (unlikely(reloc->offset > obj->base.size - 4)) {
185                 DRM_DEBUG("Relocation beyond object bounds: "
186                           "obj %p target %d offset %d size %d.\n",
187                           obj, reloc->target_handle,
188                           (int) reloc->offset,
189                           (int) obj->base.size);
190                 return ret;
191         }
192         if (unlikely(reloc->offset & 3)) {
193                 DRM_DEBUG("Relocation not 4-byte aligned: "
194                           "obj %p target %d offset %d.\n",
195                           obj, reloc->target_handle,
196                           (int) reloc->offset);
197                 return ret;
198         }
199
200         /* We can't wait for rendering with pagefaults disabled */
201         if (obj->active && in_atomic())
202                 return -EFAULT;
203
204         reloc->delta += target_offset;
205         if (use_cpu_reloc(obj)) {
206                 uint32_t page_offset = reloc->offset & ~PAGE_MASK;
207                 char *vaddr;
208
209                 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
210                 if (ret)
211                         return ret;
212
213                 vaddr = kmap_atomic(i915_gem_object_get_page(obj,
214                                                              reloc->offset >> PAGE_SHIFT));
215                 *(uint32_t *)(vaddr + page_offset) = reloc->delta;
216                 kunmap_atomic(vaddr);
217         } else {
218                 struct drm_i915_private *dev_priv = dev->dev_private;
219                 uint32_t __iomem *reloc_entry;
220                 void __iomem *reloc_page;
221
222                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
223                 if (ret)
224                         return ret;
225
226                 ret = i915_gem_object_put_fence(obj);
227                 if (ret)
228                         return ret;
229
230                 /* Map the page containing the relocation we're going to perform.  */
231                 reloc->offset += obj->gtt_offset;
232                 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
233                                                       reloc->offset & PAGE_MASK);
234                 reloc_entry = (uint32_t __iomem *)
235                         (reloc_page + (reloc->offset & ~PAGE_MASK));
236                 iowrite32(reloc->delta, reloc_entry);
237                 io_mapping_unmap_atomic(reloc_page);
238         }
239
240         /* and update the user's relocation entry */
241         reloc->presumed_offset = target_offset;
242
243         return 0;
244 }
245
246 static int
247 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
248                                     struct eb_objects *eb)
249 {
250 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
251         struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
252         struct drm_i915_gem_relocation_entry __user *user_relocs;
253         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
254         int remain, ret;
255
256         user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
257
258         remain = entry->relocation_count;
259         while (remain) {
260                 struct drm_i915_gem_relocation_entry *r = stack_reloc;
261                 int count = remain;
262                 if (count > ARRAY_SIZE(stack_reloc))
263                         count = ARRAY_SIZE(stack_reloc);
264                 remain -= count;
265
266                 if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
267                         return -EFAULT;
268
269                 do {
270                         u64 offset = r->presumed_offset;
271
272                         ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
273                         if (ret)
274                                 return ret;
275
276                         if (r->presumed_offset != offset &&
277                             __copy_to_user_inatomic(&user_relocs->presumed_offset,
278                                                     &r->presumed_offset,
279                                                     sizeof(r->presumed_offset))) {
280                                 return -EFAULT;
281                         }
282
283                         user_relocs++;
284                         r++;
285                 } while (--count);
286         }
287
288         return 0;
289 #undef N_RELOC
290 }
291
292 static int
293 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
294                                          struct eb_objects *eb,
295                                          struct drm_i915_gem_relocation_entry *relocs)
296 {
297         const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
298         int i, ret;
299
300         for (i = 0; i < entry->relocation_count; i++) {
301                 ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
302                 if (ret)
303                         return ret;
304         }
305
306         return 0;
307 }
308
309 static int
310 i915_gem_execbuffer_relocate(struct drm_device *dev,
311                              struct eb_objects *eb,
312                              struct list_head *objects)
313 {
314         struct drm_i915_gem_object *obj;
315         int ret = 0;
316
317         /* This is the fast path and we cannot handle a pagefault whilst
318          * holding the struct mutex lest the user pass in the relocations
319          * contained within a mmaped bo. For in such a case we, the page
320          * fault handler would call i915_gem_fault() and we would try to
321          * acquire the struct mutex again. Obviously this is bad and so
322          * lockdep complains vehemently.
323          */
324         pagefault_disable();
325         list_for_each_entry(obj, objects, exec_list) {
326                 ret = i915_gem_execbuffer_relocate_object(obj, eb);
327                 if (ret)
328                         break;
329         }
330         pagefault_enable();
331
332         return ret;
333 }
334
335 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
336 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
337
338 static int
339 need_reloc_mappable(struct drm_i915_gem_object *obj)
340 {
341         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
342         return entry->relocation_count && !use_cpu_reloc(obj);
343 }
344
345 static int
346 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
347                                    struct intel_ring_buffer *ring)
348 {
349         struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
350         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
351         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
352         bool need_fence, need_mappable;
353         int ret;
354
355         need_fence =
356                 has_fenced_gpu_access &&
357                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
358                 obj->tiling_mode != I915_TILING_NONE;
359         need_mappable = need_fence || need_reloc_mappable(obj);
360
361         ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
362         if (ret)
363                 return ret;
364
365         entry->flags |= __EXEC_OBJECT_HAS_PIN;
366
367         if (has_fenced_gpu_access) {
368                 if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
369                         ret = i915_gem_object_get_fence(obj);
370                         if (ret)
371                                 return ret;
372
373                         if (i915_gem_object_pin_fence(obj))
374                                 entry->flags |= __EXEC_OBJECT_HAS_FENCE;
375
376                         obj->pending_fenced_gpu_access = true;
377                 }
378         }
379
380         /* Ensure ppgtt mapping exists if needed */
381         if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
382                 i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
383                                        obj, obj->cache_level);
384
385                 obj->has_aliasing_ppgtt_mapping = 1;
386         }
387
388         entry->offset = obj->gtt_offset;
389         return 0;
390 }
391
392 static void
393 i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
394 {
395         struct drm_i915_gem_exec_object2 *entry;
396
397         if (!obj->gtt_space)
398                 return;
399
400         entry = obj->exec_entry;
401
402         if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
403                 i915_gem_object_unpin_fence(obj);
404
405         if (entry->flags & __EXEC_OBJECT_HAS_PIN)
406                 i915_gem_object_unpin(obj);
407
408         entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
409 }
410
411 static int
412 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
413                             struct drm_file *file,
414                             struct list_head *objects)
415 {
416         struct drm_i915_gem_object *obj;
417         struct list_head ordered_objects;
418         bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
419         int retry;
420
421         INIT_LIST_HEAD(&ordered_objects);
422         while (!list_empty(objects)) {
423                 struct drm_i915_gem_exec_object2 *entry;
424                 bool need_fence, need_mappable;
425
426                 obj = list_first_entry(objects,
427                                        struct drm_i915_gem_object,
428                                        exec_list);
429                 entry = obj->exec_entry;
430
431                 need_fence =
432                         has_fenced_gpu_access &&
433                         entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
434                         obj->tiling_mode != I915_TILING_NONE;
435                 need_mappable = need_fence || need_reloc_mappable(obj);
436
437                 if (need_mappable)
438                         list_move(&obj->exec_list, &ordered_objects);
439                 else
440                         list_move_tail(&obj->exec_list, &ordered_objects);
441
442                 obj->base.pending_read_domains = 0;
443                 obj->base.pending_write_domain = 0;
444                 obj->pending_fenced_gpu_access = false;
445         }
446         list_splice(&ordered_objects, objects);
447
448         /* Attempt to pin all of the buffers into the GTT.
449          * This is done in 3 phases:
450          *
451          * 1a. Unbind all objects that do not match the GTT constraints for
452          *     the execbuffer (fenceable, mappable, alignment etc).
453          * 1b. Increment pin count for already bound objects.
454          * 2.  Bind new objects.
455          * 3.  Decrement pin count.
456          *
457          * This avoid unnecessary unbinding of later objects in order to make
458          * room for the earlier objects *unless* we need to defragment.
459          */
460         retry = 0;
461         do {
462                 int ret = 0;
463
464                 /* Unbind any ill-fitting objects or pin. */
465                 list_for_each_entry(obj, objects, exec_list) {
466                         struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
467                         bool need_fence, need_mappable;
468
469                         if (!obj->gtt_space)
470                                 continue;
471
472                         need_fence =
473                                 has_fenced_gpu_access &&
474                                 entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
475                                 obj->tiling_mode != I915_TILING_NONE;
476                         need_mappable = need_fence || need_reloc_mappable(obj);
477
478                         if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
479                             (need_mappable && !obj->map_and_fenceable))
480                                 ret = i915_gem_object_unbind(obj);
481                         else
482                                 ret = i915_gem_execbuffer_reserve_object(obj, ring);
483                         if (ret)
484                                 goto err;
485                 }
486
487                 /* Bind fresh objects */
488                 list_for_each_entry(obj, objects, exec_list) {
489                         if (obj->gtt_space)
490                                 continue;
491
492                         ret = i915_gem_execbuffer_reserve_object(obj, ring);
493                         if (ret)
494                                 goto err;
495                 }
496
497 err:            /* Decrement pin count for bound objects */
498                 list_for_each_entry(obj, objects, exec_list)
499                         i915_gem_execbuffer_unreserve_object(obj);
500
501                 if (ret != -ENOSPC || retry++)
502                         return ret;
503
504                 ret = i915_gem_evict_everything(ring->dev);
505                 if (ret)
506                         return ret;
507         } while (1);
508 }
509
510 static int
511 i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
512                                   struct drm_file *file,
513                                   struct intel_ring_buffer *ring,
514                                   struct list_head *objects,
515                                   struct eb_objects *eb,
516                                   struct drm_i915_gem_exec_object2 *exec,
517                                   int count)
518 {
519         struct drm_i915_gem_relocation_entry *reloc;
520         struct drm_i915_gem_object *obj;
521         int *reloc_offset;
522         int i, total, ret;
523
524         /* We may process another execbuffer during the unlock... */
525         while (!list_empty(objects)) {
526                 obj = list_first_entry(objects,
527                                        struct drm_i915_gem_object,
528                                        exec_list);
529                 list_del_init(&obj->exec_list);
530                 drm_gem_object_unreference(&obj->base);
531         }
532
533         mutex_unlock(&dev->struct_mutex);
534
535         total = 0;
536         for (i = 0; i < count; i++)
537                 total += exec[i].relocation_count;
538
539         reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
540         reloc = drm_malloc_ab(total, sizeof(*reloc));
541         if (reloc == NULL || reloc_offset == NULL) {
542                 drm_free_large(reloc);
543                 drm_free_large(reloc_offset);
544                 mutex_lock(&dev->struct_mutex);
545                 return -ENOMEM;
546         }
547
548         total = 0;
549         for (i = 0; i < count; i++) {
550                 struct drm_i915_gem_relocation_entry __user *user_relocs;
551
552                 user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
553
554                 if (copy_from_user(reloc+total, user_relocs,
555                                    exec[i].relocation_count * sizeof(*reloc))) {
556                         ret = -EFAULT;
557                         mutex_lock(&dev->struct_mutex);
558                         goto err;
559                 }
560
561                 reloc_offset[i] = total;
562                 total += exec[i].relocation_count;
563         }
564
565         ret = i915_mutex_lock_interruptible(dev);
566         if (ret) {
567                 mutex_lock(&dev->struct_mutex);
568                 goto err;
569         }
570
571         /* reacquire the objects */
572         eb_reset(eb);
573         for (i = 0; i < count; i++) {
574                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
575                                                         exec[i].handle));
576                 if (&obj->base == NULL) {
577                         DRM_DEBUG("Invalid object handle %d at index %d\n",
578                                    exec[i].handle, i);
579                         ret = -ENOENT;
580                         goto err;
581                 }
582
583                 list_add_tail(&obj->exec_list, objects);
584                 obj->exec_handle = exec[i].handle;
585                 obj->exec_entry = &exec[i];
586                 eb_add_object(eb, obj);
587         }
588
589         ret = i915_gem_execbuffer_reserve(ring, file, objects);
590         if (ret)
591                 goto err;
592
593         list_for_each_entry(obj, objects, exec_list) {
594                 int offset = obj->exec_entry - exec;
595                 ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
596                                                                reloc + reloc_offset[offset]);
597                 if (ret)
598                         goto err;
599         }
600
601         /* Leave the user relocations as are, this is the painfully slow path,
602          * and we want to avoid the complication of dropping the lock whilst
603          * having buffers reserved in the aperture and so causing spurious
604          * ENOSPC for random operations.
605          */
606
607 err:
608         drm_free_large(reloc);
609         drm_free_large(reloc_offset);
610         return ret;
611 }
612
613 static int
614 i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
615 {
616         u32 plane, flip_mask;
617         int ret;
618
619         /* Check for any pending flips. As we only maintain a flip queue depth
620          * of 1, we can simply insert a WAIT for the next display flip prior
621          * to executing the batch and avoid stalling the CPU.
622          */
623
624         for (plane = 0; flips >> plane; plane++) {
625                 if (((flips >> plane) & 1) == 0)
626                         continue;
627
628                 if (plane)
629                         flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
630                 else
631                         flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
632
633                 ret = intel_ring_begin(ring, 2);
634                 if (ret)
635                         return ret;
636
637                 intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
638                 intel_ring_emit(ring, MI_NOOP);
639                 intel_ring_advance(ring);
640         }
641
642         return 0;
643 }
644
645 static int
646 i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
647                                 struct list_head *objects)
648 {
649         struct drm_i915_gem_object *obj;
650         uint32_t flush_domains = 0;
651         uint32_t flips = 0;
652         int ret;
653
654         list_for_each_entry(obj, objects, exec_list) {
655                 ret = i915_gem_object_sync(obj, ring);
656                 if (ret)
657                         return ret;
658
659                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
660                         i915_gem_clflush_object(obj);
661
662                 if (obj->base.pending_write_domain)
663                         flips |= atomic_read(&obj->pending_flip);
664
665                 flush_domains |= obj->base.write_domain;
666         }
667
668         if (flips) {
669                 ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
670                 if (ret)
671                         return ret;
672         }
673
674         if (flush_domains & I915_GEM_DOMAIN_CPU)
675                 intel_gtt_chipset_flush();
676
677         if (flush_domains & I915_GEM_DOMAIN_GTT)
678                 wmb();
679
680         /* Unconditionally invalidate gpu caches and ensure that we do flush
681          * any residual writes from the previous batch.
682          */
683         return intel_ring_invalidate_all_caches(ring);
684 }
685
686 static bool
687 i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
688 {
689         return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
690 }
691
692 static int
693 validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
694                    int count)
695 {
696         int i;
697
698         for (i = 0; i < count; i++) {
699                 char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
700                 int length; /* limited by fault_in_pages_readable() */
701
702                 /* First check for malicious input causing overflow */
703                 if (exec[i].relocation_count >
704                     INT_MAX / sizeof(struct drm_i915_gem_relocation_entry))
705                         return -EINVAL;
706
707                 length = exec[i].relocation_count *
708                         sizeof(struct drm_i915_gem_relocation_entry);
709                 if (!access_ok(VERIFY_READ, ptr, length))
710                         return -EFAULT;
711
712                 /* we may also need to update the presumed offsets */
713                 if (!access_ok(VERIFY_WRITE, ptr, length))
714                         return -EFAULT;
715
716                 if (fault_in_multipages_readable(ptr, length))
717                         return -EFAULT;
718         }
719
720         return 0;
721 }
722
723 static void
724 i915_gem_execbuffer_move_to_active(struct list_head *objects,
725                                    struct intel_ring_buffer *ring,
726                                    u32 seqno)
727 {
728         struct drm_i915_gem_object *obj;
729
730         list_for_each_entry(obj, objects, exec_list) {
731                 u32 old_read = obj->base.read_domains;
732                 u32 old_write = obj->base.write_domain;
733
734                 obj->base.read_domains = obj->base.pending_read_domains;
735                 obj->base.write_domain = obj->base.pending_write_domain;
736                 obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
737
738                 i915_gem_object_move_to_active(obj, ring, seqno);
739                 if (obj->base.write_domain) {
740                         obj->dirty = 1;
741                         obj->last_write_seqno = seqno;
742                         if (obj->pin_count) /* check for potential scanout */
743                                 intel_mark_fb_busy(obj);
744                 }
745
746                 trace_i915_gem_object_change_domain(obj, old_read, old_write);
747         }
748 }
749
750 static void
751 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
752                                     struct drm_file *file,
753                                     struct intel_ring_buffer *ring)
754 {
755         /* Unconditionally force add_request to emit a full flush. */
756         ring->gpu_caches_dirty = true;
757
758         /* Add a breadcrumb for the completion of the batch buffer */
759         (void)i915_add_request(ring, file, NULL);
760 }
761
762 static int
763 i915_reset_gen7_sol_offsets(struct drm_device *dev,
764                             struct intel_ring_buffer *ring)
765 {
766         drm_i915_private_t *dev_priv = dev->dev_private;
767         int ret, i;
768
769         if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
770                 return 0;
771
772         ret = intel_ring_begin(ring, 4 * 3);
773         if (ret)
774                 return ret;
775
776         for (i = 0; i < 4; i++) {
777                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
778                 intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
779                 intel_ring_emit(ring, 0);
780         }
781
782         intel_ring_advance(ring);
783
784         return 0;
785 }
786
787 static int
788 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
789                        struct drm_file *file,
790                        struct drm_i915_gem_execbuffer2 *args,
791                        struct drm_i915_gem_exec_object2 *exec)
792 {
793         drm_i915_private_t *dev_priv = dev->dev_private;
794         struct list_head objects;
795         struct eb_objects *eb;
796         struct drm_i915_gem_object *batch_obj;
797         struct drm_clip_rect *cliprects = NULL;
798         struct intel_ring_buffer *ring;
799         u32 ctx_id = i915_execbuffer2_get_context_id(*args);
800         u32 exec_start, exec_len;
801         u32 seqno;
802         u32 mask;
803         int ret, mode, i;
804
805         if (!i915_gem_check_execbuffer(args)) {
806                 DRM_DEBUG("execbuf with invalid offset/length\n");
807                 return -EINVAL;
808         }
809
810         ret = validate_exec_list(exec, args->buffer_count);
811         if (ret)
812                 return ret;
813
814         switch (args->flags & I915_EXEC_RING_MASK) {
815         case I915_EXEC_DEFAULT:
816         case I915_EXEC_RENDER:
817                 ring = &dev_priv->ring[RCS];
818                 break;
819         case I915_EXEC_BSD:
820                 ring = &dev_priv->ring[VCS];
821                 if (ctx_id != 0) {
822                         DRM_DEBUG("Ring %s doesn't support contexts\n",
823                                   ring->name);
824                         return -EPERM;
825                 }
826                 break;
827         case I915_EXEC_BLT:
828                 ring = &dev_priv->ring[BCS];
829                 if (ctx_id != 0) {
830                         DRM_DEBUG("Ring %s doesn't support contexts\n",
831                                   ring->name);
832                         return -EPERM;
833                 }
834                 break;
835         default:
836                 DRM_DEBUG("execbuf with unknown ring: %d\n",
837                           (int)(args->flags & I915_EXEC_RING_MASK));
838                 return -EINVAL;
839         }
840         if (!intel_ring_initialized(ring)) {
841                 DRM_DEBUG("execbuf with invalid ring: %d\n",
842                           (int)(args->flags & I915_EXEC_RING_MASK));
843                 return -EINVAL;
844         }
845
846         mode = args->flags & I915_EXEC_CONSTANTS_MASK;
847         mask = I915_EXEC_CONSTANTS_MASK;
848         switch (mode) {
849         case I915_EXEC_CONSTANTS_REL_GENERAL:
850         case I915_EXEC_CONSTANTS_ABSOLUTE:
851         case I915_EXEC_CONSTANTS_REL_SURFACE:
852                 if (ring == &dev_priv->ring[RCS] &&
853                     mode != dev_priv->relative_constants_mode) {
854                         if (INTEL_INFO(dev)->gen < 4)
855                                 return -EINVAL;
856
857                         if (INTEL_INFO(dev)->gen > 5 &&
858                             mode == I915_EXEC_CONSTANTS_REL_SURFACE)
859                                 return -EINVAL;
860
861                         /* The HW changed the meaning on this bit on gen6 */
862                         if (INTEL_INFO(dev)->gen >= 6)
863                                 mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
864                 }
865                 break;
866         default:
867                 DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
868                 return -EINVAL;
869         }
870
871         if (args->buffer_count < 1) {
872                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
873                 return -EINVAL;
874         }
875
876         if (args->num_cliprects != 0) {
877                 if (ring != &dev_priv->ring[RCS]) {
878                         DRM_DEBUG("clip rectangles are only valid with the render ring\n");
879                         return -EINVAL;
880                 }
881
882                 if (INTEL_INFO(dev)->gen >= 5) {
883                         DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
884                         return -EINVAL;
885                 }
886
887                 if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
888                         DRM_DEBUG("execbuf with %u cliprects\n",
889                                   args->num_cliprects);
890                         return -EINVAL;
891                 }
892
893                 cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
894                                     GFP_KERNEL);
895                 if (cliprects == NULL) {
896                         ret = -ENOMEM;
897                         goto pre_mutex_err;
898                 }
899
900                 if (copy_from_user(cliprects,
901                                      (struct drm_clip_rect __user *)(uintptr_t)
902                                      args->cliprects_ptr,
903                                      sizeof(*cliprects)*args->num_cliprects)) {
904                         ret = -EFAULT;
905                         goto pre_mutex_err;
906                 }
907         }
908
909         ret = i915_mutex_lock_interruptible(dev);
910         if (ret)
911                 goto pre_mutex_err;
912
913         if (dev_priv->mm.suspended) {
914                 mutex_unlock(&dev->struct_mutex);
915                 ret = -EBUSY;
916                 goto pre_mutex_err;
917         }
918
919         eb = eb_create(args->buffer_count);
920         if (eb == NULL) {
921                 mutex_unlock(&dev->struct_mutex);
922                 ret = -ENOMEM;
923                 goto pre_mutex_err;
924         }
925
926         /* Look up object handles */
927         INIT_LIST_HEAD(&objects);
928         for (i = 0; i < args->buffer_count; i++) {
929                 struct drm_i915_gem_object *obj;
930
931                 obj = to_intel_bo(drm_gem_object_lookup(dev, file,
932                                                         exec[i].handle));
933                 if (&obj->base == NULL) {
934                         DRM_DEBUG("Invalid object handle %d at index %d\n",
935                                    exec[i].handle, i);
936                         /* prevent error path from reading uninitialized data */
937                         ret = -ENOENT;
938                         goto err;
939                 }
940
941                 if (!list_empty(&obj->exec_list)) {
942                         DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
943                                    obj, exec[i].handle, i);
944                         ret = -EINVAL;
945                         goto err;
946                 }
947
948                 list_add_tail(&obj->exec_list, &objects);
949                 obj->exec_handle = exec[i].handle;
950                 obj->exec_entry = &exec[i];
951                 eb_add_object(eb, obj);
952         }
953
954         /* take note of the batch buffer before we might reorder the lists */
955         batch_obj = list_entry(objects.prev,
956                                struct drm_i915_gem_object,
957                                exec_list);
958
959         /* Move the objects en-masse into the GTT, evicting if necessary. */
960         ret = i915_gem_execbuffer_reserve(ring, file, &objects);
961         if (ret)
962                 goto err;
963
964         /* The objects are in their final locations, apply the relocations. */
965         ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
966         if (ret) {
967                 if (ret == -EFAULT) {
968                         ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
969                                                                 &objects, eb,
970                                                                 exec,
971                                                                 args->buffer_count);
972                         BUG_ON(!mutex_is_locked(&dev->struct_mutex));
973                 }
974                 if (ret)
975                         goto err;
976         }
977
978         /* Set the pending read domains for the batch buffer to COMMAND */
979         if (batch_obj->base.pending_write_domain) {
980                 DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
981                 ret = -EINVAL;
982                 goto err;
983         }
984         batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
985
986         ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
987         if (ret)
988                 goto err;
989
990         seqno = i915_gem_next_request_seqno(ring);
991         for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
992                 if (seqno < ring->sync_seqno[i]) {
993                         /* The GPU can not handle its semaphore value wrapping,
994                          * so every billion or so execbuffers, we need to stall
995                          * the GPU in order to reset the counters.
996                          */
997                         ret = i915_gpu_idle(dev);
998                         if (ret)
999                                 goto err;
1000                         i915_gem_retire_requests(dev);
1001
1002                         BUG_ON(ring->sync_seqno[i]);
1003                 }
1004         }
1005
1006         ret = i915_switch_context(ring, file, ctx_id);
1007         if (ret)
1008                 goto err;
1009
1010         if (ring == &dev_priv->ring[RCS] &&
1011             mode != dev_priv->relative_constants_mode) {
1012                 ret = intel_ring_begin(ring, 4);
1013                 if (ret)
1014                                 goto err;
1015
1016                 intel_ring_emit(ring, MI_NOOP);
1017                 intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1018                 intel_ring_emit(ring, INSTPM);
1019                 intel_ring_emit(ring, mask << 16 | mode);
1020                 intel_ring_advance(ring);
1021
1022                 dev_priv->relative_constants_mode = mode;
1023         }
1024
1025         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1026                 ret = i915_reset_gen7_sol_offsets(dev, ring);
1027                 if (ret)
1028                         goto err;
1029         }
1030
1031         trace_i915_gem_ring_dispatch(ring, seqno);
1032
1033         exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1034         exec_len = args->batch_len;
1035         if (cliprects) {
1036                 for (i = 0; i < args->num_cliprects; i++) {
1037                         ret = i915_emit_box(dev, &cliprects[i],
1038                                             args->DR1, args->DR4);
1039                         if (ret)
1040                                 goto err;
1041
1042                         ret = ring->dispatch_execbuffer(ring,
1043                                                         exec_start, exec_len);
1044                         if (ret)
1045                                 goto err;
1046                 }
1047         } else {
1048                 ret = ring->dispatch_execbuffer(ring, exec_start, exec_len);
1049                 if (ret)
1050                         goto err;
1051         }
1052
1053         i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
1054         i915_gem_execbuffer_retire_commands(dev, file, ring);
1055
1056 err:
1057         eb_destroy(eb);
1058         while (!list_empty(&objects)) {
1059                 struct drm_i915_gem_object *obj;
1060
1061                 obj = list_first_entry(&objects,
1062                                        struct drm_i915_gem_object,
1063                                        exec_list);
1064                 list_del_init(&obj->exec_list);
1065                 drm_gem_object_unreference(&obj->base);
1066         }
1067
1068         mutex_unlock(&dev->struct_mutex);
1069
1070 pre_mutex_err:
1071         kfree(cliprects);
1072         return ret;
1073 }
1074
1075 /*
1076  * Legacy execbuffer just creates an exec2 list from the original exec object
1077  * list array and passes it to the real function.
1078  */
1079 int
1080 i915_gem_execbuffer(struct drm_device *dev, void *data,
1081                     struct drm_file *file)
1082 {
1083         struct drm_i915_gem_execbuffer *args = data;
1084         struct drm_i915_gem_execbuffer2 exec2;
1085         struct drm_i915_gem_exec_object *exec_list = NULL;
1086         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1087         int ret, i;
1088
1089         if (args->buffer_count < 1) {
1090                 DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1091                 return -EINVAL;
1092         }
1093
1094         /* Copy in the exec list from userland */
1095         exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1096         exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1097         if (exec_list == NULL || exec2_list == NULL) {
1098                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1099                           args->buffer_count);
1100                 drm_free_large(exec_list);
1101                 drm_free_large(exec2_list);
1102                 return -ENOMEM;
1103         }
1104         ret = copy_from_user(exec_list,
1105                              (void __user *)(uintptr_t)args->buffers_ptr,
1106                              sizeof(*exec_list) * args->buffer_count);
1107         if (ret != 0) {
1108                 DRM_DEBUG("copy %d exec entries failed %d\n",
1109                           args->buffer_count, ret);
1110                 drm_free_large(exec_list);
1111                 drm_free_large(exec2_list);
1112                 return -EFAULT;
1113         }
1114
1115         for (i = 0; i < args->buffer_count; i++) {
1116                 exec2_list[i].handle = exec_list[i].handle;
1117                 exec2_list[i].relocation_count = exec_list[i].relocation_count;
1118                 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1119                 exec2_list[i].alignment = exec_list[i].alignment;
1120                 exec2_list[i].offset = exec_list[i].offset;
1121                 if (INTEL_INFO(dev)->gen < 4)
1122                         exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1123                 else
1124                         exec2_list[i].flags = 0;
1125         }
1126
1127         exec2.buffers_ptr = args->buffers_ptr;
1128         exec2.buffer_count = args->buffer_count;
1129         exec2.batch_start_offset = args->batch_start_offset;
1130         exec2.batch_len = args->batch_len;
1131         exec2.DR1 = args->DR1;
1132         exec2.DR4 = args->DR4;
1133         exec2.num_cliprects = args->num_cliprects;
1134         exec2.cliprects_ptr = args->cliprects_ptr;
1135         exec2.flags = I915_EXEC_RENDER;
1136         i915_execbuffer2_set_context_id(exec2, 0);
1137
1138         ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1139         if (!ret) {
1140                 /* Copy the new buffer offsets back to the user's exec list. */
1141                 for (i = 0; i < args->buffer_count; i++)
1142                         exec_list[i].offset = exec2_list[i].offset;
1143                 /* ... and back out to userspace */
1144                 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1145                                    exec_list,
1146                                    sizeof(*exec_list) * args->buffer_count);
1147                 if (ret) {
1148                         ret = -EFAULT;
1149                         DRM_DEBUG("failed to copy %d exec entries "
1150                                   "back to user (%d)\n",
1151                                   args->buffer_count, ret);
1152                 }
1153         }
1154
1155         drm_free_large(exec_list);
1156         drm_free_large(exec2_list);
1157         return ret;
1158 }
1159
1160 int
1161 i915_gem_execbuffer2(struct drm_device *dev, void *data,
1162                      struct drm_file *file)
1163 {
1164         struct drm_i915_gem_execbuffer2 *args = data;
1165         struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1166         int ret;
1167
1168         if (args->buffer_count < 1 ||
1169             args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1170                 DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1171                 return -EINVAL;
1172         }
1173
1174         exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1175                              GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1176         if (exec2_list == NULL)
1177                 exec2_list = drm_malloc_ab(sizeof(*exec2_list),
1178                                            args->buffer_count);
1179         if (exec2_list == NULL) {
1180                 DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1181                           args->buffer_count);
1182                 return -ENOMEM;
1183         }
1184         ret = copy_from_user(exec2_list,
1185                              (struct drm_i915_relocation_entry __user *)
1186                              (uintptr_t) args->buffers_ptr,
1187                              sizeof(*exec2_list) * args->buffer_count);
1188         if (ret != 0) {
1189                 DRM_DEBUG("copy %d exec entries failed %d\n",
1190                           args->buffer_count, ret);
1191                 drm_free_large(exec2_list);
1192                 return -EFAULT;
1193         }
1194
1195         ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1196         if (!ret) {
1197                 /* Copy the new buffer offsets back to the user's exec list. */
1198                 ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1199                                    exec2_list,
1200                                    sizeof(*exec2_list) * args->buffer_count);
1201                 if (ret) {
1202                         ret = -EFAULT;
1203                         DRM_DEBUG("failed to copy %d exec entries "
1204                                   "back to user (%d)\n",
1205                                   args->buffer_count, ret);
1206                 }
1207         }
1208
1209         drm_free_large(exec2_list);
1210         return ret;
1211 }