Merge tag 'mac80211-for-davem-2016-06-09' of git://git.kernel.org/pub/scm/linux/kerne...
[cascardo/linux.git] / virt / kvm / arm / vgic.c
1 /*
2  * Copyright (C) 2012 ARM Ltd.
3  * Author: Marc Zyngier <marc.zyngier@arm.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18
19 #include <linux/cpu.h>
20 #include <linux/kvm.h>
21 #include <linux/kvm_host.h>
22 #include <linux/interrupt.h>
23 #include <linux/io.h>
24 #include <linux/irq.h>
25 #include <linux/rculist.h>
26 #include <linux/uaccess.h>
27
28 #include <asm/kvm_emulate.h>
29 #include <asm/kvm_arm.h>
30 #include <asm/kvm_mmu.h>
31 #include <trace/events/kvm.h>
32 #include <asm/kvm.h>
33 #include <kvm/iodev.h>
34 #include <linux/irqchip/arm-gic-common.h>
35
36 #define CREATE_TRACE_POINTS
37 #include "trace.h"
38
39 /*
40  * How the whole thing works (courtesy of Christoffer Dall):
41  *
42  * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if
43  *   something is pending on the CPU interface.
44  * - Interrupts that are pending on the distributor are stored on the
45  *   vgic.irq_pending vgic bitmap (this bitmap is updated by both user land
46  *   ioctls and guest mmio ops, and other in-kernel peripherals such as the
47  *   arch. timers).
48  * - Every time the bitmap changes, the irq_pending_on_cpu oracle is
49  *   recalculated
50  * - To calculate the oracle, we need info for each cpu from
51  *   compute_pending_for_cpu, which considers:
52  *   - PPI: dist->irq_pending & dist->irq_enable
53  *   - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target
54  *   - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn
55  *     registers, stored on each vcpu. We only keep one bit of
56  *     information per interrupt, making sure that only one vcpu can
57  *     accept the interrupt.
58  * - If any of the above state changes, we must recalculate the oracle.
59  * - The same is true when injecting an interrupt, except that we only
60  *   consider a single interrupt at a time. The irq_spi_cpu array
61  *   contains the target CPU for each SPI.
62  *
63  * The handling of level interrupts adds some extra complexity. We
64  * need to track when the interrupt has been EOIed, so we can sample
65  * the 'line' again. This is achieved as such:
66  *
67  * - When a level interrupt is moved onto a vcpu, the corresponding
68  *   bit in irq_queued is set. As long as this bit is set, the line
69  *   will be ignored for further interrupts. The interrupt is injected
70  *   into the vcpu with the GICH_LR_EOI bit set (generate a
71  *   maintenance interrupt on EOI).
72  * - When the interrupt is EOIed, the maintenance interrupt fires,
73  *   and clears the corresponding bit in irq_queued. This allows the
74  *   interrupt line to be sampled again.
75  * - Note that level-triggered interrupts can also be set to pending from
76  *   writes to GICD_ISPENDRn and lowering the external input line does not
77  *   cause the interrupt to become inactive in such a situation.
78  *   Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
79  *   inactive as long as the external input line is held high.
80  *
81  *
82  * Initialization rules: there are multiple stages to the vgic
83  * initialization, both for the distributor and the CPU interfaces.
84  *
85  * Distributor:
86  *
87  * - kvm_vgic_early_init(): initialization of static data that doesn't
88  *   depend on any sizing information or emulation type. No allocation
89  *   is allowed there.
90  *
91  * - vgic_init(): allocation and initialization of the generic data
92  *   structures that depend on sizing information (number of CPUs,
93  *   number of interrupts). Also initializes the vcpu specific data
94  *   structures. Can be executed lazily for GICv2.
95  *   [to be renamed to kvm_vgic_init??]
96  *
97  * CPU Interface:
98  *
99  * - kvm_vgic_cpu_early_init(): initialization of static data that
100  *   doesn't depend on any sizing information or emulation type. No
101  *   allocation is allowed there.
102  */
103
104 #include "vgic.h"
105
106 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
107 static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
108 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
109 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
110 static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
111 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
112                                                 int virt_irq);
113 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
114
115 static const struct vgic_ops *vgic_ops;
116 static const struct vgic_params *vgic;
117
118 static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source)
119 {
120         vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source);
121 }
122
123 static bool queue_sgi(struct kvm_vcpu *vcpu, int irq)
124 {
125         return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq);
126 }
127
128 int kvm_vgic_map_resources(struct kvm *kvm)
129 {
130         return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic);
131 }
132
133 /*
134  * struct vgic_bitmap contains a bitmap made of unsigned longs, but
135  * extracts u32s out of them.
136  *
137  * This does not work on 64-bit BE systems, because the bitmap access
138  * will store two consecutive 32-bit words with the higher-addressed
139  * register's bits at the lower index and the lower-addressed register's
140  * bits at the higher index.
141  *
142  * Therefore, swizzle the register index when accessing the 32-bit word
143  * registers to access the right register's value.
144  */
145 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64
146 #define REG_OFFSET_SWIZZLE      1
147 #else
148 #define REG_OFFSET_SWIZZLE      0
149 #endif
150
151 static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs)
152 {
153         int nr_longs;
154
155         nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
156
157         b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL);
158         if (!b->private)
159                 return -ENOMEM;
160
161         b->shared = b->private + nr_cpus;
162
163         return 0;
164 }
165
166 static void vgic_free_bitmap(struct vgic_bitmap *b)
167 {
168         kfree(b->private);
169         b->private = NULL;
170         b->shared = NULL;
171 }
172
173 /*
174  * Call this function to convert a u64 value to an unsigned long * bitmask
175  * in a way that works on both 32-bit and 64-bit LE and BE platforms.
176  *
177  * Warning: Calling this function may modify *val.
178  */
179 static unsigned long *u64_to_bitmask(u64 *val)
180 {
181 #if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
182         *val = (*val >> 32) | (*val << 32);
183 #endif
184         return (unsigned long *)val;
185 }
186
187 u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset)
188 {
189         offset >>= 2;
190         if (!offset)
191                 return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE;
192         else
193                 return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE);
194 }
195
196 static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x,
197                                    int cpuid, int irq)
198 {
199         if (irq < VGIC_NR_PRIVATE_IRQS)
200                 return test_bit(irq, x->private + cpuid);
201
202         return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared);
203 }
204
205 void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid,
206                              int irq, int val)
207 {
208         unsigned long *reg;
209
210         if (irq < VGIC_NR_PRIVATE_IRQS) {
211                 reg = x->private + cpuid;
212         } else {
213                 reg = x->shared;
214                 irq -= VGIC_NR_PRIVATE_IRQS;
215         }
216
217         if (val)
218                 set_bit(irq, reg);
219         else
220                 clear_bit(irq, reg);
221 }
222
223 static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid)
224 {
225         return x->private + cpuid;
226 }
227
228 unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x)
229 {
230         return x->shared;
231 }
232
233 static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs)
234 {
235         int size;
236
237         size  = nr_cpus * VGIC_NR_PRIVATE_IRQS;
238         size += nr_irqs - VGIC_NR_PRIVATE_IRQS;
239
240         x->private = kzalloc(size, GFP_KERNEL);
241         if (!x->private)
242                 return -ENOMEM;
243
244         x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32);
245         return 0;
246 }
247
248 static void vgic_free_bytemap(struct vgic_bytemap *b)
249 {
250         kfree(b->private);
251         b->private = NULL;
252         b->shared = NULL;
253 }
254
255 u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset)
256 {
257         u32 *reg;
258
259         if (offset < VGIC_NR_PRIVATE_IRQS) {
260                 reg = x->private;
261                 offset += cpuid * VGIC_NR_PRIVATE_IRQS;
262         } else {
263                 reg = x->shared;
264                 offset -= VGIC_NR_PRIVATE_IRQS;
265         }
266
267         return reg + (offset / sizeof(u32));
268 }
269
270 #define VGIC_CFG_LEVEL  0
271 #define VGIC_CFG_EDGE   1
272
273 static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq)
274 {
275         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
276         int irq_val;
277
278         irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq);
279         return irq_val == VGIC_CFG_EDGE;
280 }
281
282 static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq)
283 {
284         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
285
286         return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq);
287 }
288
289 static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
290 {
291         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
292
293         return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
294 }
295
296 static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
297 {
298         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
299
300         return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
301 }
302
303 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
304 {
305         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
306
307         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1);
308 }
309
310 static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
311 {
312         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
313
314         vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
315 }
316
317 static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
318 {
319         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
320
321         vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
322 }
323
324 static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
325 {
326         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
327
328         vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
329 }
330
331 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
332 {
333         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
334
335         return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq);
336 }
337
338 static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq)
339 {
340         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
341
342         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1);
343 }
344
345 static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq)
346 {
347         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
348
349         vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0);
350 }
351
352 static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq)
353 {
354         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
355
356         return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq);
357 }
358
359 static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
360 {
361         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
362
363         vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
364         if (!vgic_dist_irq_get_level(vcpu, irq)) {
365                 vgic_dist_irq_clear_pending(vcpu, irq);
366                 if (!compute_pending_for_cpu(vcpu))
367                         clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
368         }
369 }
370
371 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
372 {
373         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
374
375         return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq);
376 }
377
378 void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq)
379 {
380         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
381
382         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1);
383 }
384
385 void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq)
386 {
387         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
388
389         vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0);
390 }
391
392 static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq)
393 {
394         if (irq < VGIC_NR_PRIVATE_IRQS)
395                 set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
396         else
397                 set_bit(irq - VGIC_NR_PRIVATE_IRQS,
398                         vcpu->arch.vgic_cpu.pending_shared);
399 }
400
401 void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
402 {
403         if (irq < VGIC_NR_PRIVATE_IRQS)
404                 clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu);
405         else
406                 clear_bit(irq - VGIC_NR_PRIVATE_IRQS,
407                           vcpu->arch.vgic_cpu.pending_shared);
408 }
409
410 static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
411 {
412         return !vgic_irq_is_queued(vcpu, irq);
413 }
414
415 /**
416  * vgic_reg_access - access vgic register
417  * @mmio:   pointer to the data describing the mmio access
418  * @reg:    pointer to the virtual backing of vgic distributor data
419  * @offset: least significant 2 bits used for word offset
420  * @mode:   ACCESS_ mode (see defines above)
421  *
422  * Helper to make vgic register access easier using one of the access
423  * modes defined for vgic register access
424  * (read,raz,write-ignored,setbit,clearbit,write)
425  */
426 void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
427                      phys_addr_t offset, int mode)
428 {
429         int word_offset = (offset & 3) * 8;
430         u32 mask = (1UL << (mmio->len * 8)) - 1;
431         u32 regval;
432
433         /*
434          * Any alignment fault should have been delivered to the guest
435          * directly (ARM ARM B3.12.7 "Prioritization of aborts").
436          */
437
438         if (reg) {
439                 regval = *reg;
440         } else {
441                 BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED));
442                 regval = 0;
443         }
444
445         if (mmio->is_write) {
446                 u32 data = mmio_data_read(mmio, mask) << word_offset;
447                 switch (ACCESS_WRITE_MASK(mode)) {
448                 case ACCESS_WRITE_IGNORED:
449                         return;
450
451                 case ACCESS_WRITE_SETBIT:
452                         regval |= data;
453                         break;
454
455                 case ACCESS_WRITE_CLEARBIT:
456                         regval &= ~data;
457                         break;
458
459                 case ACCESS_WRITE_VALUE:
460                         regval = (regval & ~(mask << word_offset)) | data;
461                         break;
462                 }
463                 *reg = regval;
464         } else {
465                 switch (ACCESS_READ_MASK(mode)) {
466                 case ACCESS_READ_RAZ:
467                         regval = 0;
468                         /* fall through */
469
470                 case ACCESS_READ_VALUE:
471                         mmio_data_write(mmio, mask, regval >> word_offset);
472                 }
473         }
474 }
475
476 bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
477                         phys_addr_t offset)
478 {
479         vgic_reg_access(mmio, NULL, offset,
480                         ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
481         return false;
482 }
483
484 bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
485                             phys_addr_t offset, int vcpu_id, int access)
486 {
487         u32 *reg;
488         int mode = ACCESS_READ_VALUE | access;
489         struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id);
490
491         reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset);
492         vgic_reg_access(mmio, reg, offset, mode);
493         if (mmio->is_write) {
494                 if (access & ACCESS_WRITE_CLEARBIT) {
495                         if (offset < 4) /* Force SGI enabled */
496                                 *reg |= 0xffff;
497                         vgic_retire_disabled_irqs(target_vcpu);
498                 }
499                 vgic_update_state(kvm);
500                 return true;
501         }
502
503         return false;
504 }
505
506 bool vgic_handle_set_pending_reg(struct kvm *kvm,
507                                  struct kvm_exit_mmio *mmio,
508                                  phys_addr_t offset, int vcpu_id)
509 {
510         u32 *reg, orig;
511         u32 level_mask;
512         int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT;
513         struct vgic_dist *dist = &kvm->arch.vgic;
514
515         reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset);
516         level_mask = (~(*reg));
517
518         /* Mark both level and edge triggered irqs as pending */
519         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
520         orig = *reg;
521         vgic_reg_access(mmio, reg, offset, mode);
522
523         if (mmio->is_write) {
524                 /* Set the soft-pending flag only for level-triggered irqs */
525                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
526                                           vcpu_id, offset);
527                 vgic_reg_access(mmio, reg, offset, mode);
528                 *reg &= level_mask;
529
530                 /* Ignore writes to SGIs */
531                 if (offset < 2) {
532                         *reg &= ~0xffff;
533                         *reg |= orig & 0xffff;
534                 }
535
536                 vgic_update_state(kvm);
537                 return true;
538         }
539
540         return false;
541 }
542
543 bool vgic_handle_clear_pending_reg(struct kvm *kvm,
544                                    struct kvm_exit_mmio *mmio,
545                                    phys_addr_t offset, int vcpu_id)
546 {
547         u32 *level_active;
548         u32 *reg, orig;
549         int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT;
550         struct vgic_dist *dist = &kvm->arch.vgic;
551
552         reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
553         orig = *reg;
554         vgic_reg_access(mmio, reg, offset, mode);
555         if (mmio->is_write) {
556                 /* Re-set level triggered level-active interrupts */
557                 level_active = vgic_bitmap_get_reg(&dist->irq_level,
558                                           vcpu_id, offset);
559                 reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset);
560                 *reg |= *level_active;
561
562                 /* Ignore writes to SGIs */
563                 if (offset < 2) {
564                         *reg &= ~0xffff;
565                         *reg |= orig & 0xffff;
566                 }
567
568                 /* Clear soft-pending flags */
569                 reg = vgic_bitmap_get_reg(&dist->irq_soft_pend,
570                                           vcpu_id, offset);
571                 vgic_reg_access(mmio, reg, offset, mode);
572
573                 vgic_update_state(kvm);
574                 return true;
575         }
576         return false;
577 }
578
579 bool vgic_handle_set_active_reg(struct kvm *kvm,
580                                 struct kvm_exit_mmio *mmio,
581                                 phys_addr_t offset, int vcpu_id)
582 {
583         u32 *reg;
584         struct vgic_dist *dist = &kvm->arch.vgic;
585
586         reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
587         vgic_reg_access(mmio, reg, offset,
588                         ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
589
590         if (mmio->is_write) {
591                 vgic_update_state(kvm);
592                 return true;
593         }
594
595         return false;
596 }
597
598 bool vgic_handle_clear_active_reg(struct kvm *kvm,
599                                   struct kvm_exit_mmio *mmio,
600                                   phys_addr_t offset, int vcpu_id)
601 {
602         u32 *reg;
603         struct vgic_dist *dist = &kvm->arch.vgic;
604
605         reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
606         vgic_reg_access(mmio, reg, offset,
607                         ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
608
609         if (mmio->is_write) {
610                 vgic_update_state(kvm);
611                 return true;
612         }
613
614         return false;
615 }
616
617 static u32 vgic_cfg_expand(u16 val)
618 {
619         u32 res = 0;
620         int i;
621
622         /*
623          * Turn a 16bit value like abcd...mnop into a 32bit word
624          * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is.
625          */
626         for (i = 0; i < 16; i++)
627                 res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1);
628
629         return res;
630 }
631
632 static u16 vgic_cfg_compress(u32 val)
633 {
634         u16 res = 0;
635         int i;
636
637         /*
638          * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like
639          * abcd...mnop which is what we really care about.
640          */
641         for (i = 0; i < 16; i++)
642                 res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i;
643
644         return res;
645 }
646
647 /*
648  * The distributor uses 2 bits per IRQ for the CFG register, but the
649  * LSB is always 0. As such, we only keep the upper bit, and use the
650  * two above functions to compress/expand the bits
651  */
652 bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
653                          phys_addr_t offset)
654 {
655         u32 val;
656
657         if (offset & 4)
658                 val = *reg >> 16;
659         else
660                 val = *reg & 0xffff;
661
662         val = vgic_cfg_expand(val);
663         vgic_reg_access(mmio, &val, offset,
664                         ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
665         if (mmio->is_write) {
666                 /* Ignore writes to read-only SGI and PPI bits */
667                 if (offset < 8)
668                         return false;
669
670                 val = vgic_cfg_compress(val);
671                 if (offset & 4) {
672                         *reg &= 0xffff;
673                         *reg |= val << 16;
674                 } else {
675                         *reg &= 0xffff << 16;
676                         *reg |= val;
677                 }
678         }
679
680         return false;
681 }
682
683 /**
684  * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
685  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
686  *
687  * Move any IRQs that have already been assigned to LRs back to the
688  * emulated distributor state so that the complete emulated state can be read
689  * from the main emulation structures without investigating the LRs.
690  */
691 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
692 {
693         u64 elrsr = vgic_get_elrsr(vcpu);
694         unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
695         int i;
696
697         for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) {
698                 struct vgic_lr lr = vgic_get_lr(vcpu, i);
699
700                 /*
701                  * There are three options for the state bits:
702                  *
703                  * 01: pending
704                  * 10: active
705                  * 11: pending and active
706                  */
707                 BUG_ON(!(lr.state & LR_STATE_MASK));
708
709                 /* Reestablish SGI source for pending and active IRQs */
710                 if (lr.irq < VGIC_NR_SGIS)
711                         add_sgi_source(vcpu, lr.irq, lr.source);
712
713                 /*
714                  * If the LR holds an active (10) or a pending and active (11)
715                  * interrupt then move the active state to the
716                  * distributor tracking bit.
717                  */
718                 if (lr.state & LR_STATE_ACTIVE)
719                         vgic_irq_set_active(vcpu, lr.irq);
720
721                 /*
722                  * Reestablish the pending state on the distributor and the
723                  * CPU interface and mark the LR as free for other use.
724                  */
725                 vgic_retire_lr(i, vcpu);
726
727                 /* Finally update the VGIC state. */
728                 vgic_update_state(vcpu->kvm);
729         }
730 }
731
732 const
733 struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
734                                       int len, gpa_t offset)
735 {
736         while (ranges->len) {
737                 if (offset >= ranges->base &&
738                     (offset + len) <= (ranges->base + ranges->len))
739                         return ranges;
740                 ranges++;
741         }
742
743         return NULL;
744 }
745
746 static bool vgic_validate_access(const struct vgic_dist *dist,
747                                  const struct vgic_io_range *range,
748                                  unsigned long offset)
749 {
750         int irq;
751
752         if (!range->bits_per_irq)
753                 return true;    /* Not an irq-based access */
754
755         irq = offset * 8 / range->bits_per_irq;
756         if (irq >= dist->nr_irqs)
757                 return false;
758
759         return true;
760 }
761
762 /*
763  * Call the respective handler function for the given range.
764  * We split up any 64 bit accesses into two consecutive 32 bit
765  * handler calls and merge the result afterwards.
766  * We do this in a little endian fashion regardless of the host's
767  * or guest's endianness, because the GIC is always LE and the rest of
768  * the code (vgic_reg_access) also puts it in a LE fashion already.
769  * At this point we have already identified the handle function, so
770  * range points to that one entry and offset is relative to this.
771  */
772 static bool call_range_handler(struct kvm_vcpu *vcpu,
773                                struct kvm_exit_mmio *mmio,
774                                unsigned long offset,
775                                const struct vgic_io_range *range)
776 {
777         struct kvm_exit_mmio mmio32;
778         bool ret;
779
780         if (likely(mmio->len <= 4))
781                 return range->handle_mmio(vcpu, mmio, offset);
782
783         /*
784          * Any access bigger than 4 bytes (that we currently handle in KVM)
785          * is actually 8 bytes long, caused by a 64-bit access
786          */
787
788         mmio32.len = 4;
789         mmio32.is_write = mmio->is_write;
790         mmio32.private = mmio->private;
791
792         mmio32.phys_addr = mmio->phys_addr + 4;
793         mmio32.data = &((u32 *)mmio->data)[1];
794         ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
795
796         mmio32.phys_addr = mmio->phys_addr;
797         mmio32.data = &((u32 *)mmio->data)[0];
798         ret |= range->handle_mmio(vcpu, &mmio32, offset);
799
800         return ret;
801 }
802
803 /**
804  * vgic_handle_mmio_access - handle an in-kernel MMIO access
805  * This is called by the read/write KVM IO device wrappers below.
806  * @vcpu:       pointer to the vcpu performing the access
807  * @this:       pointer to the KVM IO device in charge
808  * @addr:       guest physical address of the access
809  * @len:        size of the access
810  * @val:        pointer to the data region
811  * @is_write:   read or write access
812  *
813  * returns true if the MMIO access could be performed
814  */
815 static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
816                                    struct kvm_io_device *this, gpa_t addr,
817                                    int len, void *val, bool is_write)
818 {
819         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
820         struct vgic_io_device *iodev = container_of(this,
821                                                     struct vgic_io_device, dev);
822         const struct vgic_io_range *range;
823         struct kvm_exit_mmio mmio;
824         bool updated_state;
825         gpa_t offset;
826
827         offset = addr - iodev->addr;
828         range = vgic_find_range(iodev->reg_ranges, len, offset);
829         if (unlikely(!range || !range->handle_mmio)) {
830                 pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
831                 return -ENXIO;
832         }
833
834         mmio.phys_addr = addr;
835         mmio.len = len;
836         mmio.is_write = is_write;
837         mmio.data = val;
838         mmio.private = iodev->redist_vcpu;
839
840         spin_lock(&dist->lock);
841         offset -= range->base;
842         if (vgic_validate_access(dist, range, offset)) {
843                 updated_state = call_range_handler(vcpu, &mmio, offset, range);
844         } else {
845                 if (!is_write)
846                         memset(val, 0, len);
847                 updated_state = false;
848         }
849         spin_unlock(&dist->lock);
850
851         if (updated_state)
852                 vgic_kick_vcpus(vcpu->kvm);
853
854         return 0;
855 }
856
857 static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
858                                  struct kvm_io_device *this,
859                                  gpa_t addr, int len, void *val)
860 {
861         return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
862 }
863
864 static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
865                                   struct kvm_io_device *this,
866                                   gpa_t addr, int len, const void *val)
867 {
868         return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
869                                        true);
870 }
871
872 static struct kvm_io_device_ops vgic_io_ops = {
873         .read   = vgic_handle_mmio_read,
874         .write  = vgic_handle_mmio_write,
875 };
876
877 /**
878  * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
879  * @kvm:            The VM structure pointer
880  * @base:           The (guest) base address for the register frame
881  * @len:            Length of the register frame window
882  * @ranges:         Describing the handler functions for each register
883  * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
884  * @iodev:          Points to memory to be passed on to the handler
885  *
886  * @iodev stores the parameters of this function to be usable by the handler
887  * respectively the dispatcher function (since the KVM I/O bus framework lacks
888  * an opaque parameter). Initialization is done in this function, but the
889  * reference should be valid and unique for the whole VGIC lifetime.
890  * If the register frame is not mapped for a specific VCPU, pass -1 to
891  * @redist_vcpu_id.
892  */
893 int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
894                              const struct vgic_io_range *ranges,
895                              int redist_vcpu_id,
896                              struct vgic_io_device *iodev)
897 {
898         struct kvm_vcpu *vcpu = NULL;
899         int ret;
900
901         if (redist_vcpu_id >= 0)
902                 vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
903
904         iodev->addr             = base;
905         iodev->len              = len;
906         iodev->reg_ranges       = ranges;
907         iodev->redist_vcpu      = vcpu;
908
909         kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
910
911         mutex_lock(&kvm->slots_lock);
912
913         ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
914                                       &iodev->dev);
915         mutex_unlock(&kvm->slots_lock);
916
917         /* Mark the iodev as invalid if registration fails. */
918         if (ret)
919                 iodev->dev.ops = NULL;
920
921         return ret;
922 }
923
924 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
925 {
926         return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
927 }
928
929 static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
930 {
931         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
932         unsigned long *active, *enabled, *act_percpu, *act_shared;
933         unsigned long active_private, active_shared;
934         int nr_shared = vgic_nr_shared_irqs(dist);
935         int vcpu_id;
936
937         vcpu_id = vcpu->vcpu_id;
938         act_percpu = vcpu->arch.vgic_cpu.active_percpu;
939         act_shared = vcpu->arch.vgic_cpu.active_shared;
940
941         active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
942         enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
943         bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
944
945         active = vgic_bitmap_get_shared_map(&dist->irq_active);
946         enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
947         bitmap_and(act_shared, active, enabled, nr_shared);
948         bitmap_and(act_shared, act_shared,
949                    vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
950                    nr_shared);
951
952         active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
953         active_shared = find_first_bit(act_shared, nr_shared);
954
955         return (active_private < VGIC_NR_PRIVATE_IRQS ||
956                 active_shared < nr_shared);
957 }
958
959 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
960 {
961         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
962         unsigned long *pending, *enabled, *pend_percpu, *pend_shared;
963         unsigned long pending_private, pending_shared;
964         int nr_shared = vgic_nr_shared_irqs(dist);
965         int vcpu_id;
966
967         vcpu_id = vcpu->vcpu_id;
968         pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
969         pend_shared = vcpu->arch.vgic_cpu.pending_shared;
970
971         if (!dist->enabled) {
972                 bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
973                 bitmap_zero(pend_shared, nr_shared);
974                 return 0;
975         }
976
977         pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
978         enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
979         bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
980
981         pending = vgic_bitmap_get_shared_map(&dist->irq_pending);
982         enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
983         bitmap_and(pend_shared, pending, enabled, nr_shared);
984         bitmap_and(pend_shared, pend_shared,
985                    vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
986                    nr_shared);
987
988         pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS);
989         pending_shared = find_first_bit(pend_shared, nr_shared);
990         return (pending_private < VGIC_NR_PRIVATE_IRQS ||
991                 pending_shared < vgic_nr_shared_irqs(dist));
992 }
993
994 /*
995  * Update the interrupt state and determine which CPUs have pending
996  * or active interrupts. Must be called with distributor lock held.
997  */
998 void vgic_update_state(struct kvm *kvm)
999 {
1000         struct vgic_dist *dist = &kvm->arch.vgic;
1001         struct kvm_vcpu *vcpu;
1002         int c;
1003
1004         kvm_for_each_vcpu(c, vcpu, kvm) {
1005                 if (compute_pending_for_cpu(vcpu))
1006                         set_bit(c, dist->irq_pending_on_cpu);
1007
1008                 if (compute_active_for_cpu(vcpu))
1009                         set_bit(c, dist->irq_active_on_cpu);
1010                 else
1011                         clear_bit(c, dist->irq_active_on_cpu);
1012         }
1013 }
1014
1015 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr)
1016 {
1017         return vgic_ops->get_lr(vcpu, lr);
1018 }
1019
1020 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
1021                                struct vgic_lr vlr)
1022 {
1023         vgic_ops->set_lr(vcpu, lr, vlr);
1024 }
1025
1026 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
1027 {
1028         return vgic_ops->get_elrsr(vcpu);
1029 }
1030
1031 static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
1032 {
1033         return vgic_ops->get_eisr(vcpu);
1034 }
1035
1036 static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
1037 {
1038         vgic_ops->clear_eisr(vcpu);
1039 }
1040
1041 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
1042 {
1043         return vgic_ops->get_interrupt_status(vcpu);
1044 }
1045
1046 static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu)
1047 {
1048         vgic_ops->enable_underflow(vcpu);
1049 }
1050
1051 static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu)
1052 {
1053         vgic_ops->disable_underflow(vcpu);
1054 }
1055
1056 void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1057 {
1058         vgic_ops->get_vmcr(vcpu, vmcr);
1059 }
1060
1061 void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
1062 {
1063         vgic_ops->set_vmcr(vcpu, vmcr);
1064 }
1065
1066 static inline void vgic_enable(struct kvm_vcpu *vcpu)
1067 {
1068         vgic_ops->enable(vcpu);
1069 }
1070
1071 static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
1072 {
1073         struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
1074
1075         vgic_irq_clear_queued(vcpu, vlr.irq);
1076
1077         /*
1078          * We must transfer the pending state back to the distributor before
1079          * retiring the LR, otherwise we may loose edge-triggered interrupts.
1080          */
1081         if (vlr.state & LR_STATE_PENDING) {
1082                 vgic_dist_irq_set_pending(vcpu, vlr.irq);
1083                 vlr.hwirq = 0;
1084         }
1085
1086         vlr.state = 0;
1087         vgic_set_lr(vcpu, lr_nr, vlr);
1088 }
1089
1090 static bool dist_active_irq(struct kvm_vcpu *vcpu)
1091 {
1092         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1093
1094         return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
1095 }
1096
1097 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
1098 {
1099         int i;
1100
1101         for (i = 0; i < vgic->nr_lr; i++) {
1102                 struct vgic_lr vlr = vgic_get_lr(vcpu, i);
1103
1104                 if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE)
1105                         return true;
1106         }
1107
1108         return vgic_irq_is_active(vcpu, virt_irq);
1109 }
1110
1111 /*
1112  * An interrupt may have been disabled after being made pending on the
1113  * CPU interface (the classic case is a timer running while we're
1114  * rebooting the guest - the interrupt would kick as soon as the CPU
1115  * interface gets enabled, with deadly consequences).
1116  *
1117  * The solution is to examine already active LRs, and check the
1118  * interrupt is still enabled. If not, just retire it.
1119  */
1120 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
1121 {
1122         u64 elrsr = vgic_get_elrsr(vcpu);
1123         unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1124         int lr;
1125
1126         for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1127                 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1128
1129                 if (!vgic_irq_is_enabled(vcpu, vlr.irq))
1130                         vgic_retire_lr(lr, vcpu);
1131         }
1132 }
1133
1134 static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
1135                                  int lr_nr, struct vgic_lr vlr)
1136 {
1137         if (vgic_irq_is_active(vcpu, irq)) {
1138                 vlr.state |= LR_STATE_ACTIVE;
1139                 kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
1140                 vgic_irq_clear_active(vcpu, irq);
1141                 vgic_update_state(vcpu->kvm);
1142         } else {
1143                 WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
1144                 vlr.state |= LR_STATE_PENDING;
1145                 kvm_debug("Set pending: 0x%x\n", vlr.state);
1146         }
1147
1148         if (!vgic_irq_is_edge(vcpu, irq))
1149                 vlr.state |= LR_EOI_INT;
1150
1151         if (vlr.irq >= VGIC_NR_SGIS) {
1152                 struct irq_phys_map *map;
1153                 map = vgic_irq_map_search(vcpu, irq);
1154
1155                 if (map) {
1156                         vlr.hwirq = map->phys_irq;
1157                         vlr.state |= LR_HW;
1158                         vlr.state &= ~LR_EOI_INT;
1159
1160                         /*
1161                          * Make sure we're not going to sample this
1162                          * again, as a HW-backed interrupt cannot be
1163                          * in the PENDING_ACTIVE stage.
1164                          */
1165                         vgic_irq_set_queued(vcpu, irq);
1166                 }
1167         }
1168
1169         vgic_set_lr(vcpu, lr_nr, vlr);
1170 }
1171
1172 /*
1173  * Queue an interrupt to a CPU virtual interface. Return true on success,
1174  * or false if it wasn't possible to queue it.
1175  * sgi_source must be zero for any non-SGI interrupts.
1176  */
1177 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
1178 {
1179         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1180         u64 elrsr = vgic_get_elrsr(vcpu);
1181         unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
1182         struct vgic_lr vlr;
1183         int lr;
1184
1185         /* Sanitize the input... */
1186         BUG_ON(sgi_source_id & ~7);
1187         BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS);
1188         BUG_ON(irq >= dist->nr_irqs);
1189
1190         kvm_debug("Queue IRQ%d\n", irq);
1191
1192         /* Do we have an active interrupt for the same CPUID? */
1193         for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
1194                 vlr = vgic_get_lr(vcpu, lr);
1195                 if (vlr.irq == irq && vlr.source == sgi_source_id) {
1196                         kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
1197                         vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1198                         return true;
1199                 }
1200         }
1201
1202         /* Try to use another LR for this interrupt */
1203         lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
1204         if (lr >= vgic->nr_lr)
1205                 return false;
1206
1207         kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
1208
1209         vlr.irq = irq;
1210         vlr.source = sgi_source_id;
1211         vlr.state = 0;
1212         vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
1213
1214         return true;
1215 }
1216
1217 static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
1218 {
1219         if (!vgic_can_sample_irq(vcpu, irq))
1220                 return true; /* level interrupt, already queued */
1221
1222         if (vgic_queue_irq(vcpu, 0, irq)) {
1223                 if (vgic_irq_is_edge(vcpu, irq)) {
1224                         vgic_dist_irq_clear_pending(vcpu, irq);
1225                         vgic_cpu_irq_clear(vcpu, irq);
1226                 } else {
1227                         vgic_irq_set_queued(vcpu, irq);
1228                 }
1229
1230                 return true;
1231         }
1232
1233         return false;
1234 }
1235
1236 /*
1237  * Fill the list registers with pending interrupts before running the
1238  * guest.
1239  */
1240 static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1241 {
1242         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1243         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1244         unsigned long *pa_percpu, *pa_shared;
1245         int i, vcpu_id;
1246         int overflow = 0;
1247         int nr_shared = vgic_nr_shared_irqs(dist);
1248
1249         vcpu_id = vcpu->vcpu_id;
1250
1251         pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
1252         pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
1253
1254         bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
1255                   VGIC_NR_PRIVATE_IRQS);
1256         bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
1257                   nr_shared);
1258         /*
1259          * We may not have any pending interrupt, or the interrupts
1260          * may have been serviced from another vcpu. In all cases,
1261          * move along.
1262          */
1263         if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu))
1264                 goto epilog;
1265
1266         /* SGIs */
1267         for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
1268                 if (!queue_sgi(vcpu, i))
1269                         overflow = 1;
1270         }
1271
1272         /* PPIs */
1273         for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
1274                 if (!vgic_queue_hwirq(vcpu, i))
1275                         overflow = 1;
1276         }
1277
1278         /* SPIs */
1279         for_each_set_bit(i, pa_shared, nr_shared) {
1280                 if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
1281                         overflow = 1;
1282         }
1283
1284
1285
1286
1287 epilog:
1288         if (overflow) {
1289                 vgic_enable_underflow(vcpu);
1290         } else {
1291                 vgic_disable_underflow(vcpu);
1292                 /*
1293                  * We're about to run this VCPU, and we've consumed
1294                  * everything the distributor had in store for
1295                  * us. Claim we don't have anything pending. We'll
1296                  * adjust that if needed while exiting.
1297                  */
1298                 clear_bit(vcpu_id, dist->irq_pending_on_cpu);
1299         }
1300 }
1301
1302 static int process_queued_irq(struct kvm_vcpu *vcpu,
1303                                    int lr, struct vgic_lr vlr)
1304 {
1305         int pending = 0;
1306
1307         /*
1308          * If the IRQ was EOIed (called from vgic_process_maintenance) or it
1309          * went from active to non-active (called from vgic_sync_hwirq) it was
1310          * also ACKed and we we therefore assume we can clear the soft pending
1311          * state (should it had been set) for this interrupt.
1312          *
1313          * Note: if the IRQ soft pending state was set after the IRQ was
1314          * acked, it actually shouldn't be cleared, but we have no way of
1315          * knowing that unless we start trapping ACKs when the soft-pending
1316          * state is set.
1317          */
1318         vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
1319
1320         /*
1321          * Tell the gic to start sampling this interrupt again.
1322          */
1323         vgic_irq_clear_queued(vcpu, vlr.irq);
1324
1325         /* Any additional pending interrupt? */
1326         if (vgic_irq_is_edge(vcpu, vlr.irq)) {
1327                 BUG_ON(!(vlr.state & LR_HW));
1328                 pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
1329         } else {
1330                 if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
1331                         vgic_cpu_irq_set(vcpu, vlr.irq);
1332                         pending = 1;
1333                 } else {
1334                         vgic_dist_irq_clear_pending(vcpu, vlr.irq);
1335                         vgic_cpu_irq_clear(vcpu, vlr.irq);
1336                 }
1337         }
1338
1339         /*
1340          * Despite being EOIed, the LR may not have
1341          * been marked as empty.
1342          */
1343         vlr.state = 0;
1344         vlr.hwirq = 0;
1345         vgic_set_lr(vcpu, lr, vlr);
1346
1347         return pending;
1348 }
1349
1350 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
1351 {
1352         u32 status = vgic_get_interrupt_status(vcpu);
1353         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1354         struct kvm *kvm = vcpu->kvm;
1355         int level_pending = 0;
1356
1357         kvm_debug("STATUS = %08x\n", status);
1358
1359         if (status & INT_STATUS_EOI) {
1360                 /*
1361                  * Some level interrupts have been EOIed. Clear their
1362                  * active bit.
1363                  */
1364                 u64 eisr = vgic_get_eisr(vcpu);
1365                 unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
1366                 int lr;
1367
1368                 for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
1369                         struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1370
1371                         WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
1372                         WARN_ON(vlr.state & LR_STATE_MASK);
1373
1374
1375                         /*
1376                          * kvm_notify_acked_irq calls kvm_set_irq()
1377                          * to reset the IRQ level, which grabs the dist->lock
1378                          * so we call this before taking the dist->lock.
1379                          */
1380                         kvm_notify_acked_irq(kvm, 0,
1381                                              vlr.irq - VGIC_NR_PRIVATE_IRQS);
1382
1383                         spin_lock(&dist->lock);
1384                         level_pending |= process_queued_irq(vcpu, lr, vlr);
1385                         spin_unlock(&dist->lock);
1386                 }
1387         }
1388
1389         if (status & INT_STATUS_UNDERFLOW)
1390                 vgic_disable_underflow(vcpu);
1391
1392         /*
1393          * In the next iterations of the vcpu loop, if we sync the vgic state
1394          * after flushing it, but before entering the guest (this happens for
1395          * pending signals and vmid rollovers), then make sure we don't pick
1396          * up any old maintenance interrupts here.
1397          */
1398         vgic_clear_eisr(vcpu);
1399
1400         return level_pending;
1401 }
1402
1403 /*
1404  * Save the physical active state, and reset it to inactive.
1405  *
1406  * Return true if there's a pending forwarded interrupt to queue.
1407  */
1408 static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
1409 {
1410         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1411         bool level_pending;
1412
1413         if (!(vlr.state & LR_HW))
1414                 return false;
1415
1416         if (vlr.state & LR_STATE_ACTIVE)
1417                 return false;
1418
1419         spin_lock(&dist->lock);
1420         level_pending = process_queued_irq(vcpu, lr, vlr);
1421         spin_unlock(&dist->lock);
1422         return level_pending;
1423 }
1424
1425 /* Sync back the VGIC state after a guest run */
1426 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1427 {
1428         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1429         u64 elrsr;
1430         unsigned long *elrsr_ptr;
1431         int lr, pending;
1432         bool level_pending;
1433
1434         level_pending = vgic_process_maintenance(vcpu);
1435
1436         /* Deal with HW interrupts, and clear mappings for empty LRs */
1437         for (lr = 0; lr < vgic->nr_lr; lr++) {
1438                 struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
1439
1440                 level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
1441                 BUG_ON(vlr.irq >= dist->nr_irqs);
1442         }
1443
1444         /* Check if we still have something up our sleeve... */
1445         elrsr = vgic_get_elrsr(vcpu);
1446         elrsr_ptr = u64_to_bitmask(&elrsr);
1447         pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
1448         if (level_pending || pending < vgic->nr_lr)
1449                 set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1450 }
1451
1452 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
1453 {
1454         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1455
1456         if (!irqchip_in_kernel(vcpu->kvm))
1457                 return;
1458
1459         spin_lock(&dist->lock);
1460         __kvm_vgic_flush_hwstate(vcpu);
1461         spin_unlock(&dist->lock);
1462 }
1463
1464 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
1465 {
1466         if (!irqchip_in_kernel(vcpu->kvm))
1467                 return;
1468
1469         __kvm_vgic_sync_hwstate(vcpu);
1470 }
1471
1472 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
1473 {
1474         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1475
1476         if (!irqchip_in_kernel(vcpu->kvm))
1477                 return 0;
1478
1479         return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
1480 }
1481
1482 void vgic_kick_vcpus(struct kvm *kvm)
1483 {
1484         struct kvm_vcpu *vcpu;
1485         int c;
1486
1487         /*
1488          * We've injected an interrupt, time to find out who deserves
1489          * a good kick...
1490          */
1491         kvm_for_each_vcpu(c, vcpu, kvm) {
1492                 if (kvm_vgic_vcpu_pending_irq(vcpu))
1493                         kvm_vcpu_kick(vcpu);
1494         }
1495 }
1496
1497 static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
1498 {
1499         int edge_triggered = vgic_irq_is_edge(vcpu, irq);
1500
1501         /*
1502          * Only inject an interrupt if:
1503          * - edge triggered and we have a rising edge
1504          * - level triggered and we change level
1505          */
1506         if (edge_triggered) {
1507                 int state = vgic_dist_irq_is_pending(vcpu, irq);
1508                 return level > state;
1509         } else {
1510                 int state = vgic_dist_irq_get_level(vcpu, irq);
1511                 return level != state;
1512         }
1513 }
1514
1515 static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
1516                                    unsigned int irq_num, bool level)
1517 {
1518         struct vgic_dist *dist = &kvm->arch.vgic;
1519         struct kvm_vcpu *vcpu;
1520         int edge_triggered, level_triggered;
1521         int enabled;
1522         bool ret = true, can_inject = true;
1523
1524         trace_vgic_update_irq_pending(cpuid, irq_num, level);
1525
1526         if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
1527                 return -EINVAL;
1528
1529         spin_lock(&dist->lock);
1530
1531         vcpu = kvm_get_vcpu(kvm, cpuid);
1532         edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
1533         level_triggered = !edge_triggered;
1534
1535         if (!vgic_validate_injection(vcpu, irq_num, level)) {
1536                 ret = false;
1537                 goto out;
1538         }
1539
1540         if (irq_num >= VGIC_NR_PRIVATE_IRQS) {
1541                 cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS];
1542                 if (cpuid == VCPU_NOT_ALLOCATED) {
1543                         /* Pretend we use CPU0, and prevent injection */
1544                         cpuid = 0;
1545                         can_inject = false;
1546                 }
1547                 vcpu = kvm_get_vcpu(kvm, cpuid);
1548         }
1549
1550         kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid);
1551
1552         if (level) {
1553                 if (level_triggered)
1554                         vgic_dist_irq_set_level(vcpu, irq_num);
1555                 vgic_dist_irq_set_pending(vcpu, irq_num);
1556         } else {
1557                 if (level_triggered) {
1558                         vgic_dist_irq_clear_level(vcpu, irq_num);
1559                         if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
1560                                 vgic_dist_irq_clear_pending(vcpu, irq_num);
1561                                 vgic_cpu_irq_clear(vcpu, irq_num);
1562                                 if (!compute_pending_for_cpu(vcpu))
1563                                         clear_bit(cpuid, dist->irq_pending_on_cpu);
1564                         }
1565                 }
1566
1567                 ret = false;
1568                 goto out;
1569         }
1570
1571         enabled = vgic_irq_is_enabled(vcpu, irq_num);
1572
1573         if (!enabled || !can_inject) {
1574                 ret = false;
1575                 goto out;
1576         }
1577
1578         if (!vgic_can_sample_irq(vcpu, irq_num)) {
1579                 /*
1580                  * Level interrupt in progress, will be picked up
1581                  * when EOId.
1582                  */
1583                 ret = false;
1584                 goto out;
1585         }
1586
1587         if (level) {
1588                 vgic_cpu_irq_set(vcpu, irq_num);
1589                 set_bit(cpuid, dist->irq_pending_on_cpu);
1590         }
1591
1592 out:
1593         spin_unlock(&dist->lock);
1594
1595         if (ret) {
1596                 /* kick the specified vcpu */
1597                 kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
1598         }
1599
1600         return 0;
1601 }
1602
1603 static int vgic_lazy_init(struct kvm *kvm)
1604 {
1605         int ret = 0;
1606
1607         if (unlikely(!vgic_initialized(kvm))) {
1608                 /*
1609                  * We only provide the automatic initialization of the VGIC
1610                  * for the legacy case of a GICv2. Any other type must
1611                  * be explicitly initialized once setup with the respective
1612                  * KVM device call.
1613                  */
1614                 if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
1615                         return -EBUSY;
1616
1617                 mutex_lock(&kvm->lock);
1618                 ret = vgic_init(kvm);
1619                 mutex_unlock(&kvm->lock);
1620         }
1621
1622         return ret;
1623 }
1624
1625 /**
1626  * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
1627  * @kvm:     The VM structure pointer
1628  * @cpuid:   The CPU for PPIs
1629  * @irq_num: The IRQ number that is assigned to the device. This IRQ
1630  *           must not be mapped to a HW interrupt.
1631  * @level:   Edge-triggered:  true:  to trigger the interrupt
1632  *                            false: to ignore the call
1633  *           Level-sensitive  true:  raise the input signal
1634  *                            false: lower the input signal
1635  *
1636  * The GIC is not concerned with devices being active-LOW or active-HIGH for
1637  * level-sensitive interrupts.  You can think of the level parameter as 1
1638  * being HIGH and 0 being LOW and all devices being active-HIGH.
1639  */
1640 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
1641                         bool level)
1642 {
1643         struct irq_phys_map *map;
1644         int ret;
1645
1646         ret = vgic_lazy_init(kvm);
1647         if (ret)
1648                 return ret;
1649
1650         map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
1651         if (map)
1652                 return -EINVAL;
1653
1654         return vgic_update_irq_pending(kvm, cpuid, irq_num, level);
1655 }
1656
1657 /**
1658  * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
1659  * @kvm:     The VM structure pointer
1660  * @cpuid:   The CPU for PPIs
1661  * @virt_irq: The virtual IRQ to be injected
1662  * @level:   Edge-triggered:  true:  to trigger the interrupt
1663  *                            false: to ignore the call
1664  *           Level-sensitive  true:  raise the input signal
1665  *                            false: lower the input signal
1666  *
1667  * The GIC is not concerned with devices being active-LOW or active-HIGH for
1668  * level-sensitive interrupts.  You can think of the level parameter as 1
1669  * being HIGH and 0 being LOW and all devices being active-HIGH.
1670  */
1671 int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
1672                                unsigned int virt_irq, bool level)
1673 {
1674         int ret;
1675
1676         ret = vgic_lazy_init(kvm);
1677         if (ret)
1678                 return ret;
1679
1680         return vgic_update_irq_pending(kvm, cpuid, virt_irq, level);
1681 }
1682
1683 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
1684 {
1685         /*
1686          * We cannot rely on the vgic maintenance interrupt to be
1687          * delivered synchronously. This means we can only use it to
1688          * exit the VM, and we perform the handling of EOIed
1689          * interrupts on the exit path (see vgic_process_maintenance).
1690          */
1691         return IRQ_HANDLED;
1692 }
1693
1694 static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
1695                                                     int virt_irq)
1696 {
1697         if (virt_irq < VGIC_NR_PRIVATE_IRQS)
1698                 return &vcpu->arch.vgic_cpu.irq_phys_map_list;
1699         else
1700                 return &vcpu->kvm->arch.vgic.irq_phys_map_list;
1701 }
1702
1703 /**
1704  * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
1705  * @vcpu: The VCPU pointer
1706  * @virt_irq: The virtual IRQ number for the guest
1707  * @phys_irq: The hardware IRQ number of the host
1708  *
1709  * Establish a mapping between a guest visible irq (@virt_irq) and a
1710  * hardware irq (@phys_irq). On injection, @virt_irq will be associated with
1711  * the physical interrupt represented by @phys_irq. This mapping can be
1712  * established multiple times as long as the parameters are the same.
1713  *
1714  * Returns 0 on success or an error value otherwise.
1715  */
1716 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
1717 {
1718         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1719         struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1720         struct irq_phys_map *map;
1721         struct irq_phys_map_entry *entry;
1722         int ret = 0;
1723
1724         /* Create a new mapping */
1725         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1726         if (!entry)
1727                 return -ENOMEM;
1728
1729         spin_lock(&dist->irq_phys_map_lock);
1730
1731         /* Try to match an existing mapping */
1732         map = vgic_irq_map_search(vcpu, virt_irq);
1733         if (map) {
1734                 /* Make sure this mapping matches */
1735                 if (map->phys_irq != phys_irq)
1736                         ret = -EINVAL;
1737
1738                 /* Found an existing, valid mapping */
1739                 goto out;
1740         }
1741
1742         map           = &entry->map;
1743         map->virt_irq = virt_irq;
1744         map->phys_irq = phys_irq;
1745
1746         list_add_tail_rcu(&entry->entry, root);
1747
1748 out:
1749         spin_unlock(&dist->irq_phys_map_lock);
1750         /* If we've found a hit in the existing list, free the useless
1751          * entry */
1752         if (ret || map != &entry->map)
1753                 kfree(entry);
1754         return ret;
1755 }
1756
1757 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
1758                                                 int virt_irq)
1759 {
1760         struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1761         struct irq_phys_map_entry *entry;
1762         struct irq_phys_map *map;
1763
1764         rcu_read_lock();
1765
1766         list_for_each_entry_rcu(entry, root, entry) {
1767                 map = &entry->map;
1768                 if (map->virt_irq == virt_irq) {
1769                         rcu_read_unlock();
1770                         return map;
1771                 }
1772         }
1773
1774         rcu_read_unlock();
1775
1776         return NULL;
1777 }
1778
1779 static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
1780 {
1781         struct irq_phys_map_entry *entry;
1782
1783         entry = container_of(rcu, struct irq_phys_map_entry, rcu);
1784         kfree(entry);
1785 }
1786
1787 /**
1788  * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
1789  * @vcpu: The VCPU pointer
1790  * @virt_irq: The virtual IRQ number to be unmapped
1791  *
1792  * Remove an existing mapping between virtual and physical interrupts.
1793  */
1794 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
1795 {
1796         struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
1797         struct irq_phys_map_entry *entry;
1798         struct list_head *root;
1799
1800         root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
1801
1802         spin_lock(&dist->irq_phys_map_lock);
1803
1804         list_for_each_entry(entry, root, entry) {
1805                 if (entry->map.virt_irq == virt_irq) {
1806                         list_del_rcu(&entry->entry);
1807                         call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1808                         break;
1809                 }
1810         }
1811
1812         spin_unlock(&dist->irq_phys_map_lock);
1813
1814         return 0;
1815 }
1816
1817 static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
1818 {
1819         struct vgic_dist *dist = &kvm->arch.vgic;
1820         struct irq_phys_map_entry *entry;
1821
1822         spin_lock(&dist->irq_phys_map_lock);
1823
1824         list_for_each_entry(entry, root, entry) {
1825                 list_del_rcu(&entry->entry);
1826                 call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
1827         }
1828
1829         spin_unlock(&dist->irq_phys_map_lock);
1830 }
1831
1832 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
1833 {
1834         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1835
1836         kfree(vgic_cpu->pending_shared);
1837         kfree(vgic_cpu->active_shared);
1838         kfree(vgic_cpu->pend_act_shared);
1839         vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
1840         vgic_cpu->pending_shared = NULL;
1841         vgic_cpu->active_shared = NULL;
1842         vgic_cpu->pend_act_shared = NULL;
1843 }
1844
1845 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
1846 {
1847         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1848         int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS);
1849         int sz = nr_longs * sizeof(unsigned long);
1850         vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
1851         vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
1852         vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
1853
1854         if (!vgic_cpu->pending_shared
1855                 || !vgic_cpu->active_shared
1856                 || !vgic_cpu->pend_act_shared) {
1857                 kvm_vgic_vcpu_destroy(vcpu);
1858                 return -ENOMEM;
1859         }
1860
1861         return 0;
1862 }
1863
1864 /**
1865  * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
1866  *
1867  * No memory allocation should be performed here, only static init.
1868  */
1869 void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
1870 {
1871         struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
1872         INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
1873 }
1874
1875 /**
1876  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
1877  *
1878  * The host's GIC naturally limits the maximum amount of VCPUs a guest
1879  * can use.
1880  */
1881 int kvm_vgic_get_max_vcpus(void)
1882 {
1883         return vgic->max_gic_vcpus;
1884 }
1885
1886 void kvm_vgic_destroy(struct kvm *kvm)
1887 {
1888         struct vgic_dist *dist = &kvm->arch.vgic;
1889         struct kvm_vcpu *vcpu;
1890         int i;
1891
1892         kvm_for_each_vcpu(i, vcpu, kvm)
1893                 kvm_vgic_vcpu_destroy(vcpu);
1894
1895         vgic_free_bitmap(&dist->irq_enabled);
1896         vgic_free_bitmap(&dist->irq_level);
1897         vgic_free_bitmap(&dist->irq_pending);
1898         vgic_free_bitmap(&dist->irq_soft_pend);
1899         vgic_free_bitmap(&dist->irq_queued);
1900         vgic_free_bitmap(&dist->irq_cfg);
1901         vgic_free_bytemap(&dist->irq_priority);
1902         if (dist->irq_spi_target) {
1903                 for (i = 0; i < dist->nr_cpus; i++)
1904                         vgic_free_bitmap(&dist->irq_spi_target[i]);
1905         }
1906         kfree(dist->irq_sgi_sources);
1907         kfree(dist->irq_spi_cpu);
1908         kfree(dist->irq_spi_mpidr);
1909         kfree(dist->irq_spi_target);
1910         kfree(dist->irq_pending_on_cpu);
1911         kfree(dist->irq_active_on_cpu);
1912         vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
1913         dist->irq_sgi_sources = NULL;
1914         dist->irq_spi_cpu = NULL;
1915         dist->irq_spi_target = NULL;
1916         dist->irq_pending_on_cpu = NULL;
1917         dist->irq_active_on_cpu = NULL;
1918         dist->nr_cpus = 0;
1919 }
1920
1921 /*
1922  * Allocate and initialize the various data structures. Must be called
1923  * with kvm->lock held!
1924  */
1925 int vgic_init(struct kvm *kvm)
1926 {
1927         struct vgic_dist *dist = &kvm->arch.vgic;
1928         struct kvm_vcpu *vcpu;
1929         int nr_cpus, nr_irqs;
1930         int ret, i, vcpu_id;
1931
1932         if (vgic_initialized(kvm))
1933                 return 0;
1934
1935         nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
1936         if (!nr_cpus)           /* No vcpus? Can't be good... */
1937                 return -ENODEV;
1938
1939         /*
1940          * If nobody configured the number of interrupts, use the
1941          * legacy one.
1942          */
1943         if (!dist->nr_irqs)
1944                 dist->nr_irqs = VGIC_NR_IRQS_LEGACY;
1945
1946         nr_irqs = dist->nr_irqs;
1947
1948         ret  = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs);
1949         ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs);
1950         ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
1951         ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
1952         ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
1953         ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
1954         ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
1955         ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
1956
1957         if (ret)
1958                 goto out;
1959
1960         dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL);
1961         dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL);
1962         dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus,
1963                                        GFP_KERNEL);
1964         dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1965                                            GFP_KERNEL);
1966         dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
1967                                            GFP_KERNEL);
1968         if (!dist->irq_sgi_sources ||
1969             !dist->irq_spi_cpu ||
1970             !dist->irq_spi_target ||
1971             !dist->irq_pending_on_cpu ||
1972             !dist->irq_active_on_cpu) {
1973                 ret = -ENOMEM;
1974                 goto out;
1975         }
1976
1977         for (i = 0; i < nr_cpus; i++)
1978                 ret |= vgic_init_bitmap(&dist->irq_spi_target[i],
1979                                         nr_cpus, nr_irqs);
1980
1981         if (ret)
1982                 goto out;
1983
1984         ret = kvm->arch.vgic.vm_ops.init_model(kvm);
1985         if (ret)
1986                 goto out;
1987
1988         kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
1989                 ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
1990                 if (ret) {
1991                         kvm_err("VGIC: Failed to allocate vcpu memory\n");
1992                         break;
1993                 }
1994
1995                 /*
1996                  * Enable and configure all SGIs to be edge-triggere and
1997                  * configure all PPIs as level-triggered.
1998                  */
1999                 for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
2000                         if (i < VGIC_NR_SGIS) {
2001                                 /* SGIs */
2002                                 vgic_bitmap_set_irq_val(&dist->irq_enabled,
2003                                                         vcpu->vcpu_id, i, 1);
2004                                 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2005                                                         vcpu->vcpu_id, i,
2006                                                         VGIC_CFG_EDGE);
2007                         } else if (i < VGIC_NR_PRIVATE_IRQS) {
2008                                 /* PPIs */
2009                                 vgic_bitmap_set_irq_val(&dist->irq_cfg,
2010                                                         vcpu->vcpu_id, i,
2011                                                         VGIC_CFG_LEVEL);
2012                         }
2013                 }
2014
2015                 vgic_enable(vcpu);
2016         }
2017
2018 out:
2019         if (ret)
2020                 kvm_vgic_destroy(kvm);
2021
2022         return ret;
2023 }
2024
2025 static int init_vgic_model(struct kvm *kvm, int type)
2026 {
2027         switch (type) {
2028         case KVM_DEV_TYPE_ARM_VGIC_V2:
2029                 vgic_v2_init_emulation(kvm);
2030                 break;
2031 #ifdef CONFIG_KVM_ARM_VGIC_V3
2032         case KVM_DEV_TYPE_ARM_VGIC_V3:
2033                 vgic_v3_init_emulation(kvm);
2034                 break;
2035 #endif
2036         default:
2037                 return -ENODEV;
2038         }
2039
2040         if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus)
2041                 return -E2BIG;
2042
2043         return 0;
2044 }
2045
2046 /**
2047  * kvm_vgic_early_init - Earliest possible vgic initialization stage
2048  *
2049  * No memory allocation should be performed here, only static init.
2050  */
2051 void kvm_vgic_early_init(struct kvm *kvm)
2052 {
2053         spin_lock_init(&kvm->arch.vgic.lock);
2054         spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
2055         INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
2056 }
2057
2058 int kvm_vgic_create(struct kvm *kvm, u32 type)
2059 {
2060         int i, vcpu_lock_idx = -1, ret;
2061         struct kvm_vcpu *vcpu;
2062
2063         mutex_lock(&kvm->lock);
2064
2065         if (irqchip_in_kernel(kvm)) {
2066                 ret = -EEXIST;
2067                 goto out;
2068         }
2069
2070         /*
2071          * This function is also called by the KVM_CREATE_IRQCHIP handler,
2072          * which had no chance yet to check the availability of the GICv2
2073          * emulation. So check this here again. KVM_CREATE_DEVICE does
2074          * the proper checks already.
2075          */
2076         if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
2077                 ret = -ENODEV;
2078                 goto out;
2079         }
2080
2081         /*
2082          * Any time a vcpu is run, vcpu_load is called which tries to grab the
2083          * vcpu->mutex.  By grabbing the vcpu->mutex of all VCPUs we ensure
2084          * that no other VCPUs are run while we create the vgic.
2085          */
2086         ret = -EBUSY;
2087         kvm_for_each_vcpu(i, vcpu, kvm) {
2088                 if (!mutex_trylock(&vcpu->mutex))
2089                         goto out_unlock;
2090                 vcpu_lock_idx = i;
2091         }
2092
2093         kvm_for_each_vcpu(i, vcpu, kvm) {
2094                 if (vcpu->arch.has_run_once)
2095                         goto out_unlock;
2096         }
2097         ret = 0;
2098
2099         ret = init_vgic_model(kvm, type);
2100         if (ret)
2101                 goto out_unlock;
2102
2103         kvm->arch.vgic.in_kernel = true;
2104         kvm->arch.vgic.vgic_model = type;
2105         kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
2106         kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
2107         kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
2108         kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
2109
2110 out_unlock:
2111         for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
2112                 vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
2113                 mutex_unlock(&vcpu->mutex);
2114         }
2115
2116 out:
2117         mutex_unlock(&kvm->lock);
2118         return ret;
2119 }
2120
2121 static int vgic_ioaddr_overlap(struct kvm *kvm)
2122 {
2123         phys_addr_t dist = kvm->arch.vgic.vgic_dist_base;
2124         phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base;
2125
2126         if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu))
2127                 return 0;
2128         if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) ||
2129             (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist))
2130                 return -EBUSY;
2131         return 0;
2132 }
2133
2134 static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
2135                               phys_addr_t addr, phys_addr_t size)
2136 {
2137         int ret;
2138
2139         if (addr & ~KVM_PHYS_MASK)
2140                 return -E2BIG;
2141
2142         if (addr & (SZ_4K - 1))
2143                 return -EINVAL;
2144
2145         if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
2146                 return -EEXIST;
2147         if (addr + size < addr)
2148                 return -EINVAL;
2149
2150         *ioaddr = addr;
2151         ret = vgic_ioaddr_overlap(kvm);
2152         if (ret)
2153                 *ioaddr = VGIC_ADDR_UNDEF;
2154
2155         return ret;
2156 }
2157
2158 /**
2159  * kvm_vgic_addr - set or get vgic VM base addresses
2160  * @kvm:   pointer to the vm struct
2161  * @type:  the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
2162  * @addr:  pointer to address value
2163  * @write: if true set the address in the VM address space, if false read the
2164  *          address
2165  *
2166  * Set or get the vgic base addresses for the distributor and the virtual CPU
2167  * interface in the VM physical address space.  These addresses are properties
2168  * of the emulated core/SoC and therefore user space initially knows this
2169  * information.
2170  */
2171 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
2172 {
2173         int r = 0;
2174         struct vgic_dist *vgic = &kvm->arch.vgic;
2175         int type_needed;
2176         phys_addr_t *addr_ptr, block_size;
2177         phys_addr_t alignment;
2178
2179         mutex_lock(&kvm->lock);
2180         switch (type) {
2181         case KVM_VGIC_V2_ADDR_TYPE_DIST:
2182                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2183                 addr_ptr = &vgic->vgic_dist_base;
2184                 block_size = KVM_VGIC_V2_DIST_SIZE;
2185                 alignment = SZ_4K;
2186                 break;
2187         case KVM_VGIC_V2_ADDR_TYPE_CPU:
2188                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
2189                 addr_ptr = &vgic->vgic_cpu_base;
2190                 block_size = KVM_VGIC_V2_CPU_SIZE;
2191                 alignment = SZ_4K;
2192                 break;
2193 #ifdef CONFIG_KVM_ARM_VGIC_V3
2194         case KVM_VGIC_V3_ADDR_TYPE_DIST:
2195                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2196                 addr_ptr = &vgic->vgic_dist_base;
2197                 block_size = KVM_VGIC_V3_DIST_SIZE;
2198                 alignment = SZ_64K;
2199                 break;
2200         case KVM_VGIC_V3_ADDR_TYPE_REDIST:
2201                 type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
2202                 addr_ptr = &vgic->vgic_redist_base;
2203                 block_size = KVM_VGIC_V3_REDIST_SIZE;
2204                 alignment = SZ_64K;
2205                 break;
2206 #endif
2207         default:
2208                 r = -ENODEV;
2209                 goto out;
2210         }
2211
2212         if (vgic->vgic_model != type_needed) {
2213                 r = -ENODEV;
2214                 goto out;
2215         }
2216
2217         if (write) {
2218                 if (!IS_ALIGNED(*addr, alignment))
2219                         r = -EINVAL;
2220                 else
2221                         r = vgic_ioaddr_assign(kvm, addr_ptr, *addr,
2222                                                block_size);
2223         } else {
2224                 *addr = *addr_ptr;
2225         }
2226
2227 out:
2228         mutex_unlock(&kvm->lock);
2229         return r;
2230 }
2231
2232 int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2233 {
2234         int r;
2235
2236         switch (attr->group) {
2237         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2238                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2239                 u64 addr;
2240                 unsigned long type = (unsigned long)attr->attr;
2241
2242                 if (copy_from_user(&addr, uaddr, sizeof(addr)))
2243                         return -EFAULT;
2244
2245                 r = kvm_vgic_addr(dev->kvm, type, &addr, true);
2246                 return (r == -ENODEV) ? -ENXIO : r;
2247         }
2248         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2249                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2250                 u32 val;
2251                 int ret = 0;
2252
2253                 if (get_user(val, uaddr))
2254                         return -EFAULT;
2255
2256                 /*
2257                  * We require:
2258                  * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
2259                  * - at most 1024 interrupts
2260                  * - a multiple of 32 interrupts
2261                  */
2262                 if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
2263                     val > VGIC_MAX_IRQS ||
2264                     (val & 31))
2265                         return -EINVAL;
2266
2267                 mutex_lock(&dev->kvm->lock);
2268
2269                 if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
2270                         ret = -EBUSY;
2271                 else
2272                         dev->kvm->arch.vgic.nr_irqs = val;
2273
2274                 mutex_unlock(&dev->kvm->lock);
2275
2276                 return ret;
2277         }
2278         case KVM_DEV_ARM_VGIC_GRP_CTRL: {
2279                 switch (attr->attr) {
2280                 case KVM_DEV_ARM_VGIC_CTRL_INIT:
2281                         r = vgic_init(dev->kvm);
2282                         return r;
2283                 }
2284                 break;
2285         }
2286         }
2287
2288         return -ENXIO;
2289 }
2290
2291 int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
2292 {
2293         int r = -ENXIO;
2294
2295         switch (attr->group) {
2296         case KVM_DEV_ARM_VGIC_GRP_ADDR: {
2297                 u64 __user *uaddr = (u64 __user *)(long)attr->addr;
2298                 u64 addr;
2299                 unsigned long type = (unsigned long)attr->attr;
2300
2301                 r = kvm_vgic_addr(dev->kvm, type, &addr, false);
2302                 if (r)
2303                         return (r == -ENODEV) ? -ENXIO : r;
2304
2305                 if (copy_to_user(uaddr, &addr, sizeof(addr)))
2306                         return -EFAULT;
2307                 break;
2308         }
2309         case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
2310                 u32 __user *uaddr = (u32 __user *)(long)attr->addr;
2311
2312                 r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr);
2313                 break;
2314         }
2315
2316         }
2317
2318         return r;
2319 }
2320
2321 int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
2322 {
2323         if (vgic_find_range(ranges, 4, offset))
2324                 return 0;
2325         else
2326                 return -ENXIO;
2327 }
2328
2329 static void vgic_init_maintenance_interrupt(void *info)
2330 {
2331         enable_percpu_irq(vgic->maint_irq, 0);
2332 }
2333
2334 static int vgic_cpu_notify(struct notifier_block *self,
2335                            unsigned long action, void *cpu)
2336 {
2337         switch (action) {
2338         case CPU_STARTING:
2339         case CPU_STARTING_FROZEN:
2340                 vgic_init_maintenance_interrupt(NULL);
2341                 break;
2342         case CPU_DYING:
2343         case CPU_DYING_FROZEN:
2344                 disable_percpu_irq(vgic->maint_irq);
2345                 break;
2346         }
2347
2348         return NOTIFY_OK;
2349 }
2350
2351 static struct notifier_block vgic_cpu_nb = {
2352         .notifier_call = vgic_cpu_notify,
2353 };
2354
2355 static int kvm_vgic_probe(void)
2356 {
2357         const struct gic_kvm_info *gic_kvm_info;
2358         int ret;
2359
2360         gic_kvm_info = gic_get_kvm_info();
2361         if (!gic_kvm_info)
2362                 return -ENODEV;
2363
2364         switch (gic_kvm_info->type) {
2365         case GIC_V2:
2366                 ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
2367                 break;
2368         case GIC_V3:
2369                 ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
2370                 break;
2371         default:
2372                 ret = -ENODEV;
2373         }
2374
2375         return ret;
2376 }
2377
2378 int kvm_vgic_hyp_init(void)
2379 {
2380         int ret;
2381
2382         ret = kvm_vgic_probe();
2383         if (ret) {
2384                 kvm_err("error: KVM vGIC probing failed\n");
2385                 return ret;
2386         }
2387
2388         ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
2389                                  "vgic", kvm_get_running_vcpus());
2390         if (ret) {
2391                 kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
2392                 return ret;
2393         }
2394
2395         ret = __register_cpu_notifier(&vgic_cpu_nb);
2396         if (ret) {
2397                 kvm_err("Cannot register vgic CPU notifier\n");
2398                 goto out_free_irq;
2399         }
2400
2401         on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
2402
2403         return 0;
2404
2405 out_free_irq:
2406         free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
2407         return ret;
2408 }
2409
2410 int kvm_irq_map_gsi(struct kvm *kvm,
2411                     struct kvm_kernel_irq_routing_entry *entries,
2412                     int gsi)
2413 {
2414         return 0;
2415 }
2416
2417 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
2418 {
2419         return pin;
2420 }
2421
2422 int kvm_set_irq(struct kvm *kvm, int irq_source_id,
2423                 u32 irq, int level, bool line_status)
2424 {
2425         unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
2426
2427         trace_kvm_set_irq(irq, level, irq_source_id);
2428
2429         BUG_ON(!vgic_initialized(kvm));
2430
2431         return kvm_vgic_inject_irq(kvm, 0, spi, level);
2432 }
2433
2434 /* MSI not implemented yet */
2435 int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
2436                 struct kvm *kvm, int irq_source_id,
2437                 int level, bool line_status)
2438 {
2439         return 0;
2440 }