2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include <linux/kthread.h>
29 static void intel_breadcrumbs_fake_irq(unsigned long data)
31 struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
34 * The timer persists in case we cannot enable interrupts,
35 * or if we have previously seen seqno/interrupt incoherency
36 * ("missed interrupt" syndrome). Here the worker will wake up
37 * every jiffie in order to kick the oldest waiter to do the
38 * coherent seqno check.
41 if (intel_engine_wakeup(engine))
42 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
46 static void irq_enable(struct intel_engine_cs *engine)
48 /* Enabling the IRQ may miss the generation of the interrupt, but
49 * we still need to force the barrier before reading the seqno,
52 engine->irq_posted = true;
54 spin_lock_irq(&engine->i915->irq_lock);
55 engine->irq_enable(engine);
56 spin_unlock_irq(&engine->i915->irq_lock);
59 static void irq_disable(struct intel_engine_cs *engine)
61 spin_lock_irq(&engine->i915->irq_lock);
62 engine->irq_disable(engine);
63 spin_unlock_irq(&engine->i915->irq_lock);
65 engine->irq_posted = false;
68 static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
70 struct intel_engine_cs *engine =
71 container_of(b, struct intel_engine_cs, breadcrumbs);
72 struct drm_i915_private *i915 = engine->i915;
74 assert_spin_locked(&b->lock);
78 /* Since we are waiting on a request, the GPU should be busy
79 * and should have its own rpm reference. For completeness,
80 * record an rpm reference for ourselves to cover the
81 * interrupt we unmask.
83 intel_runtime_pm_get_noresume(i915);
84 b->rpm_wakelock = true;
86 /* No interrupts? Kick the waiter every jiffie! */
87 if (intel_irqs_enabled(i915)) {
88 if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
90 b->irq_enabled = true;
93 if (!b->irq_enabled ||
94 test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
95 mod_timer(&b->fake_irq, jiffies + 1);
97 return engine->irq_posted;
100 static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
102 struct intel_engine_cs *engine =
103 container_of(b, struct intel_engine_cs, breadcrumbs);
105 assert_spin_locked(&b->lock);
106 if (!b->rpm_wakelock)
109 if (b->irq_enabled) {
111 b->irq_enabled = false;
114 intel_runtime_pm_put(engine->i915);
115 b->rpm_wakelock = false;
118 static inline struct intel_wait *to_wait(struct rb_node *node)
120 return container_of(node, struct intel_wait, node);
123 static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
124 struct intel_wait *wait)
126 assert_spin_locked(&b->lock);
128 /* This request is completed, so remove it from the tree, mark it as
129 * complete, and *then* wake up the associated task.
131 rb_erase(&wait->node, &b->waiters);
132 RB_CLEAR_NODE(&wait->node);
134 wake_up_process(wait->tsk); /* implicit smp_wmb() */
137 static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
138 struct intel_wait *wait)
140 struct intel_breadcrumbs *b = &engine->breadcrumbs;
141 struct rb_node **p, *parent, *completed;
145 /* Insert the request into the retirement ordered list
146 * of waiters by walking the rbtree. If we are the oldest
147 * seqno in the tree (the first to be retired), then
148 * set ourselves as the bottom-half.
150 * As we descend the tree, prune completed branches since we hold the
151 * spinlock we know that the first_waiter must be delayed and can
152 * reduce some of the sequential wake up latency if we take action
153 * ourselves and wake up the completed tasks in parallel. Also, by
154 * removing stale elements in the tree, we may be able to reduce the
155 * ping-pong between the old bottom-half and ourselves as first-waiter.
160 seqno = intel_engine_get_seqno(engine);
162 /* If the request completed before we managed to grab the spinlock,
163 * return now before adding ourselves to the rbtree. We let the
164 * current bottom-half handle any pending wakeups and instead
165 * try and get out of the way quickly.
167 if (i915_seqno_passed(seqno, wait->seqno)) {
168 RB_CLEAR_NODE(&wait->node);
172 p = &b->waiters.rb_node;
175 if (wait->seqno == to_wait(parent)->seqno) {
176 /* We have multiple waiters on the same seqno, select
177 * the highest priority task (that with the smallest
178 * task->prio) to serve as the bottom-half for this
181 if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
182 p = &parent->rb_right;
185 p = &parent->rb_left;
187 } else if (i915_seqno_passed(wait->seqno,
188 to_wait(parent)->seqno)) {
189 p = &parent->rb_right;
190 if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
195 p = &parent->rb_left;
198 rb_link_node(&wait->node, parent, p);
199 rb_insert_color(&wait->node, &b->waiters);
200 GEM_BUG_ON(!first && !b->tasklet);
203 struct rb_node *next = rb_next(completed);
205 GEM_BUG_ON(!next && !first);
206 if (next && next != &wait->node) {
208 b->first_wait = to_wait(next);
209 smp_store_mb(b->tasklet, b->first_wait->tsk);
210 /* As there is a delay between reading the current
211 * seqno, processing the completed tasks and selecting
212 * the next waiter, we may have missed the interrupt
213 * and so need for the next bottom-half to wakeup.
215 * Also as we enable the IRQ, we may miss the
216 * interrupt for that seqno, so we have to wake up
217 * the next bottom-half in order to do a coherent check
218 * in case the seqno passed.
220 __intel_breadcrumbs_enable_irq(b);
221 if (READ_ONCE(engine->irq_posted))
222 wake_up_process(to_wait(next)->tsk);
226 struct intel_wait *crumb = to_wait(completed);
227 completed = rb_prev(completed);
228 __intel_breadcrumbs_finish(b, crumb);
233 GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
234 b->first_wait = wait;
235 smp_store_mb(b->tasklet, wait->tsk);
236 first = __intel_breadcrumbs_enable_irq(b);
238 GEM_BUG_ON(!b->tasklet);
239 GEM_BUG_ON(!b->first_wait);
240 GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
245 bool intel_engine_add_wait(struct intel_engine_cs *engine,
246 struct intel_wait *wait)
248 struct intel_breadcrumbs *b = &engine->breadcrumbs;
252 first = __intel_engine_add_wait(engine, wait);
253 spin_unlock(&b->lock);
258 void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
260 mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
263 static inline bool chain_wakeup(struct rb_node *rb, int priority)
265 return rb && to_wait(rb)->tsk->prio <= priority;
268 static inline int wakeup_priority(struct intel_breadcrumbs *b,
269 struct task_struct *tsk)
271 if (tsk == b->signaler)
277 void intel_engine_remove_wait(struct intel_engine_cs *engine,
278 struct intel_wait *wait)
280 struct intel_breadcrumbs *b = &engine->breadcrumbs;
282 /* Quick check to see if this waiter was already decoupled from
283 * the tree by the bottom-half to avoid contention on the spinlock
286 if (RB_EMPTY_NODE(&wait->node))
291 if (RB_EMPTY_NODE(&wait->node))
294 if (b->first_wait == wait) {
295 const int priority = wakeup_priority(b, wait->tsk);
296 struct rb_node *next;
298 GEM_BUG_ON(b->tasklet != wait->tsk);
300 /* We are the current bottom-half. Find the next candidate,
301 * the first waiter in the queue on the remaining oldest
302 * request. As multiple seqnos may complete in the time it
303 * takes us to wake up and find the next waiter, we have to
304 * wake up that waiter for it to perform its own coherent
307 next = rb_next(&wait->node);
308 if (chain_wakeup(next, priority)) {
309 /* If the next waiter is already complete,
310 * wake it up and continue onto the next waiter. So
311 * if have a small herd, they will wake up in parallel
312 * rather than sequentially, which should reduce
313 * the overall latency in waking all the completed
316 * However, waking up a chain adds extra latency to
317 * the first_waiter. This is undesirable if that
318 * waiter is a high priority task.
320 u32 seqno = intel_engine_get_seqno(engine);
322 while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
323 struct rb_node *n = rb_next(next);
325 __intel_breadcrumbs_finish(b, to_wait(next));
327 if (!chain_wakeup(next, priority))
333 /* In our haste, we may have completed the first waiter
334 * before we enabled the interrupt. Do so now as we
335 * have a second waiter for a future seqno. Afterwards,
336 * we have to wake up that waiter in case we missed
337 * the interrupt, or if we have to handle an
338 * exception rather than a seqno completion.
340 b->first_wait = to_wait(next);
341 smp_store_mb(b->tasklet, b->first_wait->tsk);
342 if (b->first_wait->seqno != wait->seqno)
343 __intel_breadcrumbs_enable_irq(b);
344 wake_up_process(b->tasklet);
346 b->first_wait = NULL;
347 WRITE_ONCE(b->tasklet, NULL);
348 __intel_breadcrumbs_disable_irq(b);
351 GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
354 GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
355 rb_erase(&wait->node, &b->waiters);
358 GEM_BUG_ON(b->first_wait == wait);
359 GEM_BUG_ON(rb_first(&b->waiters) !=
360 (b->first_wait ? &b->first_wait->node : NULL));
361 GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters));
362 spin_unlock(&b->lock);
365 static bool signal_complete(struct drm_i915_gem_request *request)
370 /* If another process served as the bottom-half it may have already
371 * signalled that this wait is already completed.
373 if (intel_wait_complete(&request->signaling.wait))
376 /* Carefully check if the request is complete, giving time for the
377 * seqno to be visible or if the GPU hung.
379 if (__i915_request_irq_complete(request))
385 static struct drm_i915_gem_request *to_signaler(struct rb_node *rb)
387 return container_of(rb, struct drm_i915_gem_request, signaling.node);
390 static void signaler_set_rtpriority(void)
392 struct sched_param param = { .sched_priority = 1 };
394 sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
397 static int intel_breadcrumbs_signaler(void *arg)
399 struct intel_engine_cs *engine = arg;
400 struct intel_breadcrumbs *b = &engine->breadcrumbs;
401 struct drm_i915_gem_request *request;
403 /* Install ourselves with high priority to reduce signalling latency */
404 signaler_set_rtpriority();
407 set_current_state(TASK_INTERRUPTIBLE);
409 /* We are either woken up by the interrupt bottom-half,
410 * or by a client adding a new signaller. In both cases,
411 * the GPU seqno may have advanced beyond our oldest signal.
412 * If it has, propagate the signal, remove the waiter and
413 * check again with the next oldest signal. Otherwise we
414 * need to wait for a new interrupt from the GPU or for
417 request = READ_ONCE(b->first_signal);
418 if (signal_complete(request)) {
419 /* Wake up all other completed waiters and select the
420 * next bottom-half for the next user interrupt.
422 intel_engine_remove_wait(engine,
423 &request->signaling.wait);
425 /* Find the next oldest signal. Note that as we have
426 * not been holding the lock, another client may
427 * have installed an even older signal than the one
428 * we just completed - so double check we are still
429 * the oldest before picking the next one.
432 if (request == b->first_signal) {
434 rb_next(&request->signaling.node);
435 b->first_signal = rb ? to_signaler(rb) : NULL;
437 rb_erase(&request->signaling.node, &b->signals);
438 spin_unlock(&b->lock);
440 i915_gem_request_unreference(request);
442 if (kthread_should_stop())
448 __set_current_state(TASK_RUNNING);
453 void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
455 struct intel_engine_cs *engine = request->engine;
456 struct intel_breadcrumbs *b = &engine->breadcrumbs;
457 struct rb_node *parent, **p;
460 if (unlikely(READ_ONCE(request->signaling.wait.tsk)))
464 if (unlikely(request->signaling.wait.tsk)) {
469 request->signaling.wait.tsk = b->signaler;
470 request->signaling.wait.seqno = request->seqno;
471 i915_gem_request_reference(request);
473 /* First add ourselves into the list of waiters, but register our
474 * bottom-half as the signaller thread. As per usual, only the oldest
475 * waiter (not just signaller) is tasked as the bottom-half waking
476 * up all completed waiters after the user interrupt.
478 * If we are the oldest waiter, enable the irq (after which we
479 * must double check that the seqno did not complete).
481 wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
483 /* Now insert ourselves into the retirement ordered list of signals
484 * on this engine. We track the oldest seqno as that will be the
485 * first signal to complete.
489 p = &b->signals.rb_node;
492 if (i915_seqno_passed(request->seqno,
493 to_signaler(parent)->seqno)) {
494 p = &parent->rb_right;
497 p = &parent->rb_left;
500 rb_link_node(&request->signaling.node, parent, p);
501 rb_insert_color(&request->signaling.node, &b->signals);
503 smp_store_mb(b->first_signal, request);
506 spin_unlock(&b->lock);
509 wake_up_process(b->signaler);
512 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
514 struct intel_breadcrumbs *b = &engine->breadcrumbs;
515 struct task_struct *tsk;
517 spin_lock_init(&b->lock);
518 setup_timer(&b->fake_irq,
519 intel_breadcrumbs_fake_irq,
520 (unsigned long)engine);
522 /* Spawn a thread to provide a common bottom-half for all signals.
523 * As this is an asynchronous interface we cannot steal the current
524 * task for handling the bottom-half to the user interrupt, therefore
525 * we create a thread to do the coherent seqno dance after the
526 * interrupt and then signal the waitqueue (via the dma-buf/fence).
528 tsk = kthread_run(intel_breadcrumbs_signaler, engine,
529 "i915/signal:%d", engine->id);
538 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
540 struct intel_breadcrumbs *b = &engine->breadcrumbs;
542 if (!IS_ERR_OR_NULL(b->signaler))
543 kthread_stop(b->signaler);
545 del_timer_sync(&b->fake_irq);
548 unsigned int intel_kick_waiters(struct drm_i915_private *i915)
550 struct intel_engine_cs *engine;
551 unsigned int mask = 0;
553 /* To avoid the task_struct disappearing beneath us as we wake up
554 * the process, we must first inspect the task_struct->state under the
555 * RCU lock, i.e. as we call wake_up_process() we must be holding the
559 for_each_engine(engine, i915)
560 if (unlikely(intel_engine_wakeup(engine)))
561 mask |= intel_engine_flag(engine);
567 unsigned int intel_kick_signalers(struct drm_i915_private *i915)
569 struct intel_engine_cs *engine;
570 unsigned int mask = 0;
572 for_each_engine(engine, i915) {
573 if (unlikely(READ_ONCE(engine->breadcrumbs.first_signal))) {
574 wake_up_process(engine->breadcrumbs.signaler);
575 mask |= intel_engine_flag(engine);