virt/kvm/arm/arch_timer.c

   1 /*
   2  * Copyright (C) 2012 ARM Ltd.
   3  * Author: Marc Zyngier <marc.zyngier@arm.com>
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License version 2 as
   7  * published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17  */
  18
  19 #include <linux/cpu.h>
  20 #include <linux/kvm.h>
  21 #include <linux/kvm_host.h>
  22 #include <linux/interrupt.h>
  23
  24 #include <clocksource/arm_arch_timer.h>
  25 #include <asm/arch_timer.h>
  26
  27 #include <kvm/arm_vgic.h>
  28 #include <kvm/arm_arch_timer.h>
  29
  30 #include "trace.h"
  31
  32 static struct timecounter *timecounter;
  33 static struct workqueue_struct *wqueue;
  34 static unsigned int host_vtimer_irq;
  35
  36 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
  37 {
  38         vcpu->arch.timer_cpu.active_cleared_last = false;
  39 }
  40
  41 static cycle_t kvm_phys_timer_read(void)
  42 {
  43         return timecounter->cc->read(timecounter->cc);
  44 }
  45
  46 static bool timer_is_armed(struct arch_timer_cpu *timer)
  47 {
  48         return timer->armed;
  49 }
  50
  51 /* timer_arm: as in "arm the timer", not as in ARM the company */
  52 static void timer_arm(struct arch_timer_cpu *timer, u64 ns)
  53 {
  54         timer->armed = true;
  55         hrtimer_start(&timer->timer, ktime_add_ns(ktime_get(), ns),
  56                       HRTIMER_MODE_ABS);
  57 }
  58
  59 static void timer_disarm(struct arch_timer_cpu *timer)
  60 {
  61         if (timer_is_armed(timer)) {
  62                 hrtimer_cancel(&timer->timer);
  63                 cancel_work_sync(&timer->expired);
  64                 timer->armed = false;
  65         }
  66 }
  67
  68 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
  69 {
  70         struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
  71
  72         /*
  73          * We disable the timer in the world switch and let it be
  74          * handled by kvm_timer_sync_hwstate(). Getting a timer
  75          * interrupt at this point is a sure sign of some major
  76          * breakage.
  77          */
  78         pr_warn("Unexpected interrupt %d on vcpu %p\n", irq, vcpu);
  79         return IRQ_HANDLED;
  80 }
  81
  82 /*
  83  * Work function for handling the backup timer that we schedule when a vcpu is
  84  * no longer running, but had a timer programmed to fire in the future.
  85  */
  86 static void kvm_timer_inject_irq_work(struct work_struct *work)
  87 {
  88         struct kvm_vcpu *vcpu;
  89
  90         vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
  91         vcpu->arch.timer_cpu.armed = false;
  92
  93         WARN_ON(!kvm_timer_should_fire(vcpu));
  94
  95         /*
  96          * If the vcpu is blocked we want to wake it up so that it will see
  97          * the timer has expired when entering the guest.
  98          */
  99         kvm_vcpu_kick(vcpu);
 100 }
 101
 102 static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
 103 {
 104         cycle_t cval, now;
 105
 106         cval = vcpu->arch.timer_cpu.cntv_cval;
 107         now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 108
 109         if (now < cval) {
 110                 u64 ns;
 111
 112                 ns = cyclecounter_cyc2ns(timecounter->cc,
 113                                          cval - now,
 114                                          timecounter->mask,
 115                                          &timecounter->frac);
 116                 return ns;
 117         }
 118
 119         return 0;
 120 }
 121
 122 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 123 {
 124         struct arch_timer_cpu *timer;
 125         struct kvm_vcpu *vcpu;
 126         u64 ns;
 127
 128         timer = container_of(hrt, struct arch_timer_cpu, timer);
 129         vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
 130
 131         /*
 132          * Check that the timer has really expired from the guest's
 133          * PoV (NTP on the host may have forced it to expire
 134          * early). If we should have slept longer, restart it.
 135          */
 136         ns = kvm_timer_compute_delta(vcpu);
 137         if (unlikely(ns)) {
 138                 hrtimer_forward_now(hrt, ns_to_ktime(ns));
 139                 return HRTIMER_RESTART;
 140         }
 141
 142         queue_work(wqueue, &timer->expired);
 143         return HRTIMER_NORESTART;
 144 }
 145
 146 static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
 147 {
 148         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 149
 150         return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
 151                 (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
 152 }
 153
 154 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 155 {
 156         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 157         cycle_t cval, now;
 158
 159         if (!kvm_timer_irq_can_fire(vcpu))
 160                 return false;
 161
 162         cval = timer->cntv_cval;
 163         now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 164
 165         return cval <= now;
 166 }
 167
 168 static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
 169 {
 170         int ret;
 171         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 172
 173         BUG_ON(!vgic_initialized(vcpu->kvm));
 174
 175         timer->active_cleared_last = false;
 176         timer->irq.level = new_level;
 177         trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
 178                                    timer->irq.level);
 179         ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
 180                                          timer->map,
 181                                          timer->irq.level);
 182         WARN_ON(ret);
 183 }
 184
 185 /*
 186  * Check if there was a change in the timer state (should we raise or lower
 187  * the line level to the GIC).
 188  */
 189 static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 190 {
 191         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 192
 193         /*
 194          * If userspace modified the timer registers via SET_ONE_REG before
 195          * the vgic was initialized, we mustn't set the timer->irq.level value
 196          * because the guest would never see the interrupt.  Instead wait
 197          * until we call this function from kvm_timer_flush_hwstate.
 198          */
 199         if (!vgic_initialized(vcpu->kvm))
 200                 return -ENODEV;
 201
 202         if (kvm_timer_should_fire(vcpu) != timer->irq.level)
 203                 kvm_timer_update_irq(vcpu, !timer->irq.level);
 204
 205         return 0;
 206 }
 207
 208 /*
 209  * Schedule the background timer before calling kvm_vcpu_block, so that this
 210  * thread is removed from its waitqueue and made runnable when there's a timer
 211  * interrupt to handle.
 212  */
 213 void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 214 {
 215         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 216
 217         BUG_ON(timer_is_armed(timer));
 218
 219         /*
 220          * No need to schedule a background timer if the guest timer has
 221          * already expired, because kvm_vcpu_block will return before putting
 222          * the thread to sleep.
 223          */
 224         if (kvm_timer_should_fire(vcpu))
 225                 return;
 226
 227         /*
 228          * If the timer is not capable of raising interrupts (disabled or
 229          * masked), then there's no more work for us to do.
 230          */
 231         if (!kvm_timer_irq_can_fire(vcpu))
 232                 return;
 233
 234         /*  The timer has not yet expired, schedule a background timer */
 235         timer_arm(timer, kvm_timer_compute_delta(vcpu));
 236 }
 237
 238 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
 239 {
 240         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 241         timer_disarm(timer);
 242 }
 243
 244 /**
 245  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
 246  * @vcpu: The vcpu pointer
 247  *
 248  * Check if the virtual timer has expired while we were running in the host,
 249  * and inject an interrupt if that was the case.
 250  */
 251 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 252 {
 253         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 254         bool phys_active;
 255         int ret;
 256
 257         if (kvm_timer_update_state(vcpu))
 258                 return;
 259
 260         /*
 261         * If we enter the guest with the virtual input level to the VGIC
 262         * asserted, then we have already told the VGIC what we need to, and
 263         * we don't need to exit from the guest until the guest deactivates
 264         * the already injected interrupt, so therefore we should set the
 265         * hardware active state to prevent unnecessary exits from the guest.
 266         *
 267         * Also, if we enter the guest with the virtual timer interrupt active,
 268         * then it must be active on the physical distributor, because we set
 269         * the HW bit and the guest must be able to deactivate the virtual and
 270         * physical interrupt at the same time.
 271         *
 272         * Conversely, if the virtual input level is deasserted and the virtual
 273         * interrupt is not active, then always clear the hardware active state
 274         * to ensure that hardware interrupts from the timer triggers a guest
 275         * exit.
 276         */
 277         if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
 278                 phys_active = true;
 279         else
 280                 phys_active = false;
 281
 282         /*
 283          * We want to avoid hitting the (re)distributor as much as
 284          * possible, as this is a potentially expensive MMIO access
 285          * (not to mention locks in the irq layer), and a solution for
 286          * this is to cache the "active" state in memory.
 287          *
 288          * Things to consider: we cannot cache an "active set" state,
 289          * because the HW can change this behind our back (it becomes
 290          * "clear" in the HW). We must then restrict the caching to
 291          * the "clear" state.
 292          *
 293          * The cache is invalidated on:
 294          * - vcpu put, indicating that the HW cannot be trusted to be
 295          *   in a sane state on the next vcpu load,
 296          * - any change in the interrupt state
 297          *
 298          * Usage conditions:
 299          * - cached value is "active clear"
 300          * - value to be programmed is "active clear"
 301          */
 302         if (timer->active_cleared_last && !phys_active)
 303                 return;
 304
 305         ret = irq_set_irqchip_state(timer->map->irq,
 306                                     IRQCHIP_STATE_ACTIVE,
 307                                     phys_active);
 308         WARN_ON(ret);
 309
 310         timer->active_cleared_last = !phys_active;
 311 }
 312
 313 /**
 314  * kvm_timer_sync_hwstate - sync timer state from cpu
 315  * @vcpu: The vcpu pointer
 316  *
 317  * Check if the virtual timer has expired while we were running in the guest,
 318  * and inject an interrupt if that was the case.
 319  */
 320 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 321 {
 322         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 323
 324         BUG_ON(timer_is_armed(timer));
 325
 326         /*
 327          * The guest could have modified the timer registers or the timer
 328          * could have expired, update the timer state.
 329          */
 330         kvm_timer_update_state(vcpu);
 331 }
 332
 333 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 334                          const struct kvm_irq_level *irq)
 335 {
 336         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 337         struct irq_phys_map *map;
 338
 339         /*
 340          * The vcpu timer irq number cannot be determined in
 341          * kvm_timer_vcpu_init() because it is called much before
 342          * kvm_vcpu_set_target(). To handle this, we determine
 343          * vcpu timer irq number when the vcpu is reset.
 344          */
 345         timer->irq.irq = irq->irq;
 346
 347         /*
 348          * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
 349          * and to 0 for ARMv7.  We provide an implementation that always
 350          * resets the timer to be disabled and unmasked and is compliant with
 351          * the ARMv7 architecture.
 352          */
 353         timer->cntv_ctl = 0;
 354         kvm_timer_update_state(vcpu);
 355
 356         /*
 357          * Tell the VGIC that the virtual interrupt is tied to a
 358          * physical interrupt. We do that once per VCPU.
 359          */
 360         map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
 361         if (WARN_ON(IS_ERR(map)))
 362                 return PTR_ERR(map);
 363
 364         timer->map = map;
 365         return 0;
 366 }
 367
 368 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 369 {
 370         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 371
 372         INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
 373         hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 374         timer->timer.function = kvm_timer_expire;
 375 }
 376
 377 static void kvm_timer_init_interrupt(void *info)
 378 {
 379         enable_percpu_irq(host_vtimer_irq, 0);
 380 }
 381
 382 int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 383 {
 384         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 385
 386         switch (regid) {
 387         case KVM_REG_ARM_TIMER_CTL:
 388                 timer->cntv_ctl = value;
 389                 break;
 390         case KVM_REG_ARM_TIMER_CNT:
 391                 vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
 392                 break;
 393         case KVM_REG_ARM_TIMER_CVAL:
 394                 timer->cntv_cval = value;
 395                 break;
 396         default:
 397                 return -1;
 398         }
 399
 400         kvm_timer_update_state(vcpu);
 401         return 0;
 402 }
 403
 404 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
 405 {
 406         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 407
 408         switch (regid) {
 409         case KVM_REG_ARM_TIMER_CTL:
 410                 return timer->cntv_ctl;
 411         case KVM_REG_ARM_TIMER_CNT:
 412                 return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
 413         case KVM_REG_ARM_TIMER_CVAL:
 414                 return timer->cntv_cval;
 415         }
 416         return (u64)-1;
 417 }
 418
 419 static int kvm_timer_cpu_notify(struct notifier_block *self,
 420                                 unsigned long action, void *cpu)
 421 {
 422         switch (action) {
 423         case CPU_STARTING:
 424         case CPU_STARTING_FROZEN:
 425                 kvm_timer_init_interrupt(NULL);
 426                 break;
 427         case CPU_DYING:
 428         case CPU_DYING_FROZEN:
 429                 disable_percpu_irq(host_vtimer_irq);
 430                 break;
 431         }
 432
 433         return NOTIFY_OK;
 434 }
 435
 436 static struct notifier_block kvm_timer_cpu_nb = {
 437         .notifier_call = kvm_timer_cpu_notify,
 438 };
 439
 440 int kvm_timer_hyp_init(void)
 441 {
 442         struct arch_timer_kvm_info *info;
 443         int err;
 444
 445         info = arch_timer_get_kvm_info();
 446         timecounter = &info->timecounter;
 447
 448         if (info->virtual_irq <= 0) {
 449                 kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
 450                         info->virtual_irq);
 451                 return -ENODEV;
 452         }
 453         host_vtimer_irq = info->virtual_irq;
 454
 455         err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
 456                                  "kvm guest timer", kvm_get_running_vcpus());
 457         if (err) {
 458                 kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
 459                         host_vtimer_irq, err);
 460                 goto out;
 461         }
 462
 463         err = __register_cpu_notifier(&kvm_timer_cpu_nb);
 464         if (err) {
 465                 kvm_err("Cannot register timer CPU notifier\n");
 466                 goto out_free;
 467         }
 468
 469         wqueue = create_singlethread_workqueue("kvm_arch_timer");
 470         if (!wqueue) {
 471                 err = -ENOMEM;
 472                 goto out_free;
 473         }
 474
 475         kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
 476         on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
 477
 478         goto out;
 479 out_free:
 480         free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
 481 out:
 482         return err;
 483 }
 484
 485 void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
 486 {
 487         struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 488
 489         timer_disarm(timer);
 490         if (timer->map)
 491                 kvm_vgic_unmap_phys_irq(vcpu, timer->map);
 492 }
 493
 494 void kvm_timer_enable(struct kvm *kvm)
 495 {
 496         if (kvm->arch.timer.enabled)
 497                 return;
 498
 499         /*
 500          * There is a potential race here between VCPUs starting for the first
 501          * time, which may be enabling the timer multiple times.  That doesn't
 502          * hurt though, because we're just setting a variable to the same
 503          * variable that it already was.  The important thing is that all
 504          * VCPUs have the enabled variable set, before entering the guest, if
 505          * the arch timers are enabled.
 506          */
 507         if (timecounter && wqueue)
 508                 kvm->arch.timer.enabled = 1;
 509 }
 510
 511 void kvm_timer_init(struct kvm *kvm)
 512 {
 513         kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 514 }