drivers/gpu/drm/i915/intel_breadcrumbs.c

   1 /*
   2  * Copyright © 2015 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #include "i915_drv.h"
  26
  27 static void intel_breadcrumbs_fake_irq(unsigned long data)
  28 {
  29         struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
  30
  31         /*
  32          * The timer persists in case we cannot enable interrupts,
  33          * or if we have previously seen seqno/interrupt incoherency
  34          * ("missed interrupt" syndrome). Here the worker will wake up
  35          * every jiffie in order to kick the oldest waiter to do the
  36          * coherent seqno check.
  37          */
  38         rcu_read_lock();
  39         if (intel_engine_wakeup(engine))
  40                 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
  41         rcu_read_unlock();
  42 }
  43
  44 static void irq_enable(struct intel_engine_cs *engine)
  45 {
  46         WARN_ON(!engine->irq_get(engine));
  47 }
  48
  49 static void irq_disable(struct intel_engine_cs *engine)
  50 {
  51         engine->irq_put(engine);
  52 }
  53
  54 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
  55 {
  56         struct intel_engine_cs *engine =
  57                 container_of(b, struct intel_engine_cs, breadcrumbs);
  58         struct drm_i915_private *i915 = engine->i915;
  59         bool irq_posted = false;
  60
  61         assert_spin_locked(&b->lock);
  62         if (b->rpm_wakelock)
  63                 return false;
  64
  65         /* Since we are waiting on a request, the GPU should be busy
  66          * and should have its own rpm reference. For completeness,
  67          * record an rpm reference for ourselves to cover the
  68          * interrupt we unmask.
  69          */
  70         intel_runtime_pm_get_noresume(i915);
  71         b->rpm_wakelock = true;
  72
  73         /* No interrupts? Kick the waiter every jiffie! */
  74         if (intel_irqs_enabled(i915)) {
  75                 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) {
  76                         irq_enable(engine);
  77                         irq_posted = true;
  78                 }
  79                 b->irq_enabled = true;
  80         }
  81
  82         if (!b->irq_enabled ||
  83             test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
  84                 mod_timer(&b->fake_irq, jiffies + 1);
  85
  86         return irq_posted;
  87 }
  88
  89 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
  90 {
  91         struct intel_engine_cs *engine =
  92                 container_of(b, struct intel_engine_cs, breadcrumbs);
  93
  94         assert_spin_locked(&b->lock);
  95         if (!b->rpm_wakelock)
  96                 return;
  97
  98         if (b->irq_enabled) {
  99                 irq_disable(engine);
 100                 b->irq_enabled = false;
 101         }
 102
 103         intel_runtime_pm_put(engine->i915);
 104         b->rpm_wakelock = false;
 105 }
 106
 107 static inline struct intel_wait *to_wait(struct rb_node *node)
 108 {
 109         return container_of(node, struct intel_wait, node);
 110 }
 111
 112 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
 113                                               struct intel_wait *wait)
 114 {
 115         assert_spin_locked(&b->lock);
 116
 117         /* This request is completed, so remove it from the tree, mark it as
 118          * complete, and *then* wake up the associated task.
 119          */
 120         rb_erase(&wait->node, &b->waiters);
 121         RB_CLEAR_NODE(&wait->node);
 122
 123         wake_up_process(wait->tsk); /* implicit smp_wmb() */
 124 }
 125
 126 static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
 127                                     struct intel_wait *wait)
 128 {
 129         struct intel_breadcrumbs *b = &engine->breadcrumbs;
 130         struct rb_node **p, *parent, *completed;
 131         bool first;
 132         u32 seqno;
 133
 134         /* Insert the request into the retirement ordered list
 135          * of waiters by walking the rbtree. If we are the oldest
 136          * seqno in the tree (the first to be retired), then
 137          * set ourselves as the bottom-half.
 138          *
 139          * As we descend the tree, prune completed branches since we hold the
 140          * spinlock we know that the first_waiter must be delayed and can
 141          * reduce some of the sequential wake up latency if we take action
 142          * ourselves and wake up the completed tasks in parallel. Also, by
 143          * removing stale elements in the tree, we may be able to reduce the
 144          * ping-pong between the old bottom-half and ourselves as first-waiter.
 145          */
 146         first = true;
 147         parent = NULL;
 148         completed = NULL;
 149         seqno = engine->get_seqno(engine);
 150
 151          /* If the request completed before we managed to grab the spinlock,
 152           * return now before adding ourselves to the rbtree. We let the
 153           * current bottom-half handle any pending wakeups and instead
 154           * try and get out of the way quickly.
 155           */
 156         if (i915_seqno_passed(seqno, wait->seqno)) {
 157                 RB_CLEAR_NODE(&wait->node);
 158                 return first;
 159         }
 160
 161         p = &b->waiters.rb_node;
 162         while (*p) {
 163                 parent = *p;
 164                 if (wait->seqno == to_wait(parent)->seqno) {
 165                         /* We have multiple waiters on the same seqno, select
 166                          * the highest priority task (that with the smallest
 167                          * task->prio) to serve as the bottom-half for this
 168                          * group.
 169                          */
 170                         if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
 171                                 p = &parent->rb_right;
 172                                 first = false;
 173                         } else {
 174                                 p = &parent->rb_left;
 175                         }
 176                 } else if (i915_seqno_passed(wait->seqno,
 177                                              to_wait(parent)->seqno)) {
 178                         p = &parent->rb_right;
 179                         if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
 180                                 completed = parent;
 181                         else
 182                                 first = false;
 183                 } else {
 184                         p = &parent->rb_left;
 185                 }
 186         }
 187         rb_link_node(&wait->node, parent, p);
 188         rb_insert_color(&wait->node, &b->waiters);
 189         GEM_BUG_ON(!first && !b->tasklet);
 190
 191         if (completed) {
 192                 struct rb_node *next = rb_next(completed);
 193
 194                 GEM_BUG_ON(!next && !first);
 195                 if (next && next != &wait->node) {
 196                         GEM_BUG_ON(first);
 197                         b->first_wait = to_wait(next);
 198                         smp_store_mb(b->tasklet, b->first_wait->tsk);
 199                         /* As there is a delay between reading the current
 200                          * seqno, processing the completed tasks and selecting
 201                          * the next waiter, we may have missed the interrupt
 202                          * and so need for the next bottom-half to wakeup.
 203                          *
 204                          * Also as we enable the IRQ, we may miss the
 205                          * interrupt for that seqno, so we have to wake up
 206                          * the next bottom-half in order to do a coherent check
 207                          * in case the seqno passed.
 208                          */
 209                         __intel_breadcrumbs_enable_irq(b);
 210                         wake_up_process(to_wait(next)->tsk);
 211                 }
 212
 213                 do {
 214                         struct intel_wait *crumb = to_wait(completed);
 215                         completed = rb_prev(completed);
 216                         __intel_breadcrumbs_finish(b, crumb);
 217                 } while (completed);
 218         }
 219
 220         if (first) {
 221                 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
 222                 b->first_wait = wait;
 223                 smp_store_mb(b->tasklet, wait->tsk);
 224                 first = __intel_breadcrumbs_enable_irq(b);
 225         }
 226         GEM_BUG_ON(!b->tasklet);
 227         GEM_BUG_ON(!b->first_wait);
 228         GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
 229
 230         return first;
 231 }
 232
 233 bool intel_engine_add_wait(struct intel_engine_cs *engine,
 234                            struct intel_wait *wait)
 235 {
 236         struct intel_breadcrumbs *b = &engine->breadcrumbs;
 237         bool first;
 238
 239         spin_lock(&b->lock);
 240         first = __intel_engine_add_wait(engine, wait);
 241         spin_unlock(&b->lock);
 242
 243         return first;
 244 }
 245
 246 void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
 247 {
 248         mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
 249 }
 250
 251 static inline bool chain_wakeup(struct rb_node *rb, int priority)
 252 {
 253         return rb && to_wait(rb)->tsk->prio <= priority;
 254 }
 255
 256 void intel_engine_remove_wait(struct intel_engine_cs *engine,
 257                               struct intel_wait *wait)
 258 {
 259         struct intel_breadcrumbs *b = &engine->breadcrumbs;
 260
 261         /* Quick check to see if this waiter was already decoupled from
 262          * the tree by the bottom-half to avoid contention on the spinlock
 263          * by the herd.
 264          */
 265         if (RB_EMPTY_NODE(&wait->node))
 266                 return;
 267
 268         spin_lock(&b->lock);
 269
 270         if (RB_EMPTY_NODE(&wait->node))
 271                 goto out_unlock;
 272
 273         if (b->first_wait == wait) {
 274                 struct rb_node *next;
 275                 const int priority = wait->tsk->prio;
 276
 277                 GEM_BUG_ON(b->tasklet != wait->tsk);
 278
 279                 /* We are the current bottom-half. Find the next candidate,
 280                  * the first waiter in the queue on the remaining oldest
 281                  * request. As multiple seqnos may complete in the time it
 282                  * takes us to wake up and find the next waiter, we have to
 283                  * wake up that waiter for it to perform its own coherent
 284                  * completion check.
 285                  */
 286                 next = rb_next(&wait->node);
 287                 if (chain_wakeup(next, priority)) {
 288                         /* If the next waiter is already complete,
 289                          * wake it up and continue onto the next waiter. So
 290                          * if have a small herd, they will wake up in parallel
 291                          * rather than sequentially, which should reduce
 292                          * the overall latency in waking all the completed
 293                          * clients.
 294                          *
 295                          * However, waking up a chain adds extra latency to
 296                          * the first_waiter. This is undesirable if that
 297                          * waiter is a high priority task.
 298                          */
 299                         u32 seqno = engine->get_seqno(engine);
 300
 301                         while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
 302                                 struct rb_node *n = rb_next(next);
 303
 304                                 __intel_breadcrumbs_finish(b, to_wait(next));
 305                                 next = n;
 306                                 if (!chain_wakeup(next, priority))
 307                                         break;
 308                         }
 309                 }
 310
 311                 if (next) {
 312                         /* In our haste, we may have completed the first waiter
 313                          * before we enabled the interrupt. Do so now as we
 314                          * have a second waiter for a future seqno. Afterwards,
 315                          * we have to wake up that waiter in case we missed
 316                          * the interrupt, or if we have to handle an
 317                          * exception rather than a seqno completion.
 318                          */
 319                         b->first_wait = to_wait(next);
 320                         smp_store_mb(b->tasklet, b->first_wait->tsk);
 321                         if (b->first_wait->seqno != wait->seqno)
 322                                 __intel_breadcrumbs_enable_irq(b);
 323                         wake_up_process(b->tasklet);
 324                 } else {
 325                         b->first_wait = NULL;
 326                         WRITE_ONCE(b->tasklet, NULL);
 327                         __intel_breadcrumbs_disable_irq(b);
 328                 }
 329         } else {
 330                 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
 331         }
 332
 333         GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
 334         rb_erase(&wait->node, &b->waiters);
 335
 336 out_unlock:
 337         GEM_BUG_ON(b->first_wait == wait);
 338         GEM_BUG_ON(rb_first(&b->waiters) !=
 339                    (b->first_wait ? &b->first_wait->node : NULL));
 340         GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters));
 341         spin_unlock(&b->lock);
 342 }
 343
 344 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 345 {
 346         struct intel_breadcrumbs *b = &engine->breadcrumbs;
 347
 348         spin_lock_init(&b->lock);
 349         setup_timer(&b->fake_irq,
 350                     intel_breadcrumbs_fake_irq,
 351                     (unsigned long)engine);
 352
 353         return 0;
 354 }
 355
 356 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 357 {
 358         struct intel_breadcrumbs *b = &engine->breadcrumbs;
 359
 360         del_timer_sync(&b->fake_irq);
 361 }
 362
 363 unsigned int intel_kick_waiters(struct drm_i915_private *i915)
 364 {
 365         struct intel_engine_cs *engine;
 366         unsigned int mask = 0;
 367
 368         /* To avoid the task_struct disappearing beneath us as we wake up
 369          * the process, we must first inspect the task_struct->state under the
 370          * RCU lock, i.e. as we call wake_up_process() we must be holding the
 371          * rcu_read_lock().
 372          */
 373         rcu_read_lock();
 374         for_each_engine(engine, i915)
 375                 if (unlikely(intel_engine_wakeup(engine)))
 376                         mask |= intel_engine_flag(engine);
 377         rcu_read_unlock();
 378
 379         return mask;
 380 }