kvm: x86: Add kvm_x86_ops hook that enables XSAVES for guest
[cascardo/linux.git] / arch / x86 / kvm / cpuid.c
1 /*
2  * Kernel-based Virtual Machine driver for Linux
3  * cpuid support routines
4  *
5  * derived from arch/x86/kvm/x86.c
6  *
7  * Copyright 2011 Red Hat, Inc. and/or its affiliates.
8  * Copyright IBM Corporation, 2008
9  *
10  * This work is licensed under the terms of the GNU GPL, version 2.  See
11  * the COPYING file in the top-level directory.
12  *
13  */
14
15 #include <linux/kvm_host.h>
16 #include <linux/module.h>
17 #include <linux/vmalloc.h>
18 #include <linux/uaccess.h>
19 #include <asm/user.h>
20 #include <asm/xsave.h>
21 #include "cpuid.h"
22 #include "lapic.h"
23 #include "mmu.h"
24 #include "trace.h"
25
26 static u32 xstate_required_size(u64 xstate_bv)
27 {
28         int feature_bit = 0;
29         u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
30
31         xstate_bv &= XSTATE_EXTEND_MASK;
32         while (xstate_bv) {
33                 if (xstate_bv & 0x1) {
34                         u32 eax, ebx, ecx, edx;
35                         cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
36                         ret = max(ret, eax + ebx);
37                 }
38
39                 xstate_bv >>= 1;
40                 feature_bit++;
41         }
42
43         return ret;
44 }
45
46 u64 kvm_supported_xcr0(void)
47 {
48         u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
49
50         if (!kvm_x86_ops->mpx_supported())
51                 xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
52
53         return xcr0;
54 }
55
56 #define F(x) bit(X86_FEATURE_##x)
57
58 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
59 {
60         struct kvm_cpuid_entry2 *best;
61         struct kvm_lapic *apic = vcpu->arch.apic;
62
63         best = kvm_find_cpuid_entry(vcpu, 1, 0);
64         if (!best)
65                 return 0;
66
67         /* Update OSXSAVE bit */
68         if (cpu_has_xsave && best->function == 0x1) {
69                 best->ecx &= ~F(OSXSAVE);
70                 if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
71                         best->ecx |= F(OSXSAVE);
72         }
73
74         if (apic) {
75                 if (best->ecx & F(TSC_DEADLINE_TIMER))
76                         apic->lapic_timer.timer_mode_mask = 3 << 17;
77                 else
78                         apic->lapic_timer.timer_mode_mask = 1 << 17;
79         }
80
81         best = kvm_find_cpuid_entry(vcpu, 0xD, 0);
82         if (!best) {
83                 vcpu->arch.guest_supported_xcr0 = 0;
84                 vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
85         } else {
86                 vcpu->arch.guest_supported_xcr0 =
87                         (best->eax | ((u64)best->edx << 32)) &
88                         kvm_supported_xcr0();
89                 vcpu->arch.guest_xstate_size = best->ebx =
90                         xstate_required_size(vcpu->arch.xcr0);
91         }
92
93         /*
94          * The existing code assumes virtual address is 48-bit in the canonical
95          * address checks; exit if it is ever changed.
96          */
97         best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
98         if (best && ((best->eax & 0xff00) >> 8) != 48 &&
99                 ((best->eax & 0xff00) >> 8) != 0)
100                 return -EINVAL;
101
102         kvm_pmu_cpuid_update(vcpu);
103         return 0;
104 }
105
106 static int is_efer_nx(void)
107 {
108         unsigned long long efer = 0;
109
110         rdmsrl_safe(MSR_EFER, &efer);
111         return efer & EFER_NX;
112 }
113
114 static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
115 {
116         int i;
117         struct kvm_cpuid_entry2 *e, *entry;
118
119         entry = NULL;
120         for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
121                 e = &vcpu->arch.cpuid_entries[i];
122                 if (e->function == 0x80000001) {
123                         entry = e;
124                         break;
125                 }
126         }
127         if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
128                 entry->edx &= ~F(NX);
129                 printk(KERN_INFO "kvm: guest NX capability removed\n");
130         }
131 }
132
133 /* when an old userspace process fills a new kernel module */
134 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
135                              struct kvm_cpuid *cpuid,
136                              struct kvm_cpuid_entry __user *entries)
137 {
138         int r, i;
139         struct kvm_cpuid_entry *cpuid_entries;
140
141         r = -E2BIG;
142         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
143                 goto out;
144         r = -ENOMEM;
145         cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
146         if (!cpuid_entries)
147                 goto out;
148         r = -EFAULT;
149         if (copy_from_user(cpuid_entries, entries,
150                            cpuid->nent * sizeof(struct kvm_cpuid_entry)))
151                 goto out_free;
152         for (i = 0; i < cpuid->nent; i++) {
153                 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
154                 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
155                 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
156                 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
157                 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
158                 vcpu->arch.cpuid_entries[i].index = 0;
159                 vcpu->arch.cpuid_entries[i].flags = 0;
160                 vcpu->arch.cpuid_entries[i].padding[0] = 0;
161                 vcpu->arch.cpuid_entries[i].padding[1] = 0;
162                 vcpu->arch.cpuid_entries[i].padding[2] = 0;
163         }
164         vcpu->arch.cpuid_nent = cpuid->nent;
165         cpuid_fix_nx_cap(vcpu);
166         kvm_apic_set_version(vcpu);
167         kvm_x86_ops->cpuid_update(vcpu);
168         r = kvm_update_cpuid(vcpu);
169
170 out_free:
171         vfree(cpuid_entries);
172 out:
173         return r;
174 }
175
176 int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
177                               struct kvm_cpuid2 *cpuid,
178                               struct kvm_cpuid_entry2 __user *entries)
179 {
180         int r;
181
182         r = -E2BIG;
183         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
184                 goto out;
185         r = -EFAULT;
186         if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
187                            cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
188                 goto out;
189         vcpu->arch.cpuid_nent = cpuid->nent;
190         kvm_apic_set_version(vcpu);
191         kvm_x86_ops->cpuid_update(vcpu);
192         r = kvm_update_cpuid(vcpu);
193 out:
194         return r;
195 }
196
197 int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
198                               struct kvm_cpuid2 *cpuid,
199                               struct kvm_cpuid_entry2 __user *entries)
200 {
201         int r;
202
203         r = -E2BIG;
204         if (cpuid->nent < vcpu->arch.cpuid_nent)
205                 goto out;
206         r = -EFAULT;
207         if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
208                          vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
209                 goto out;
210         return 0;
211
212 out:
213         cpuid->nent = vcpu->arch.cpuid_nent;
214         return r;
215 }
216
217 static void cpuid_mask(u32 *word, int wordnum)
218 {
219         *word &= boot_cpu_data.x86_capability[wordnum];
220 }
221
222 static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
223                            u32 index)
224 {
225         entry->function = function;
226         entry->index = index;
227         cpuid_count(entry->function, entry->index,
228                     &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
229         entry->flags = 0;
230 }
231
232 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
233                                    u32 func, u32 index, int *nent, int maxnent)
234 {
235         switch (func) {
236         case 0:
237                 entry->eax = 1;         /* only one leaf currently */
238                 ++*nent;
239                 break;
240         case 1:
241                 entry->ecx = F(MOVBE);
242                 ++*nent;
243                 break;
244         default:
245                 break;
246         }
247
248         entry->function = func;
249         entry->index = index;
250
251         return 0;
252 }
253
254 static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
255                                  u32 index, int *nent, int maxnent)
256 {
257         int r;
258         unsigned f_nx = is_efer_nx() ? F(NX) : 0;
259 #ifdef CONFIG_X86_64
260         unsigned f_gbpages = (kvm_x86_ops->get_lpage_level() == PT_PDPE_LEVEL)
261                                 ? F(GBPAGES) : 0;
262         unsigned f_lm = F(LM);
263 #else
264         unsigned f_gbpages = 0;
265         unsigned f_lm = 0;
266 #endif
267         unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
268         unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
269         unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
270         unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
271
272         /* cpuid 1.edx */
273         const u32 kvm_supported_word0_x86_features =
274                 F(FPU) | F(VME) | F(DE) | F(PSE) |
275                 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
276                 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
277                 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
278                 F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
279                 0 /* Reserved, DS, ACPI */ | F(MMX) |
280                 F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
281                 0 /* HTT, TM, Reserved, PBE */;
282         /* cpuid 0x80000001.edx */
283         const u32 kvm_supported_word1_x86_features =
284                 F(FPU) | F(VME) | F(DE) | F(PSE) |
285                 F(TSC) | F(MSR) | F(PAE) | F(MCE) |
286                 F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) |
287                 F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
288                 F(PAT) | F(PSE36) | 0 /* Reserved */ |
289                 f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) |
290                 F(FXSR) | F(FXSR_OPT) | f_gbpages | f_rdtscp |
291                 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW);
292         /* cpuid 1.ecx */
293         const u32 kvm_supported_word4_x86_features =
294                 /* NOTE: MONITOR (and MWAIT) are emulated as NOP,
295                  * but *not* advertised to guests via CPUID ! */
296                 F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
297                 0 /* DS-CPL, VMX, SMX, EST */ |
298                 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
299                 F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
300                 F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
301                 F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
302                 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
303                 F(F16C) | F(RDRAND);
304         /* cpuid 0x80000001.ecx */
305         const u32 kvm_supported_word6_x86_features =
306                 F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ |
307                 F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) |
308                 F(3DNOWPREFETCH) | F(OSVW) | 0 /* IBS */ | F(XOP) |
309                 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM);
310
311         /* cpuid 0xC0000001.edx */
312         const u32 kvm_supported_word5_x86_features =
313                 F(XSTORE) | F(XSTORE_EN) | F(XCRYPT) | F(XCRYPT_EN) |
314                 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
315                 F(PMM) | F(PMM_EN);
316
317         /* cpuid 7.0.ebx */
318         const u32 kvm_supported_word9_x86_features =
319                 F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
320                 F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
321                 F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
322                 F(AVX512CD);
323
324         /* cpuid 0xD.1.eax */
325         const u32 kvm_supported_word10_x86_features =
326                 F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
327
328         /* all calls to cpuid_count() should be made on the same cpu */
329         get_cpu();
330
331         r = -E2BIG;
332
333         if (*nent >= maxnent)
334                 goto out;
335
336         do_cpuid_1_ent(entry, function, index);
337         ++*nent;
338
339         switch (function) {
340         case 0:
341                 entry->eax = min(entry->eax, (u32)0xd);
342                 break;
343         case 1:
344                 entry->edx &= kvm_supported_word0_x86_features;
345                 cpuid_mask(&entry->edx, 0);
346                 entry->ecx &= kvm_supported_word4_x86_features;
347                 cpuid_mask(&entry->ecx, 4);
348                 /* we support x2apic emulation even if host does not support
349                  * it since we emulate x2apic in software */
350                 entry->ecx |= F(X2APIC);
351                 break;
352         /* function 2 entries are STATEFUL. That is, repeated cpuid commands
353          * may return different values. This forces us to get_cpu() before
354          * issuing the first command, and also to emulate this annoying behavior
355          * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
356         case 2: {
357                 int t, times = entry->eax & 0xff;
358
359                 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
360                 entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
361                 for (t = 1; t < times; ++t) {
362                         if (*nent >= maxnent)
363                                 goto out;
364
365                         do_cpuid_1_ent(&entry[t], function, 0);
366                         entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
367                         ++*nent;
368                 }
369                 break;
370         }
371         /* function 4 has additional index. */
372         case 4: {
373                 int i, cache_type;
374
375                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
376                 /* read more entries until cache_type is zero */
377                 for (i = 1; ; ++i) {
378                         if (*nent >= maxnent)
379                                 goto out;
380
381                         cache_type = entry[i - 1].eax & 0x1f;
382                         if (!cache_type)
383                                 break;
384                         do_cpuid_1_ent(&entry[i], function, i);
385                         entry[i].flags |=
386                                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
387                         ++*nent;
388                 }
389                 break;
390         }
391         case 7: {
392                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
393                 /* Mask ebx against host capability word 9 */
394                 if (index == 0) {
395                         entry->ebx &= kvm_supported_word9_x86_features;
396                         cpuid_mask(&entry->ebx, 9);
397                         // TSC_ADJUST is emulated
398                         entry->ebx |= F(TSC_ADJUST);
399                 } else
400                         entry->ebx = 0;
401                 entry->eax = 0;
402                 entry->ecx = 0;
403                 entry->edx = 0;
404                 break;
405         }
406         case 9:
407                 break;
408         case 0xa: { /* Architectural Performance Monitoring */
409                 struct x86_pmu_capability cap;
410                 union cpuid10_eax eax;
411                 union cpuid10_edx edx;
412
413                 perf_get_x86_pmu_capability(&cap);
414
415                 /*
416                  * Only support guest architectural pmu on a host
417                  * with architectural pmu.
418                  */
419                 if (!cap.version)
420                         memset(&cap, 0, sizeof(cap));
421
422                 eax.split.version_id = min(cap.version, 2);
423                 eax.split.num_counters = cap.num_counters_gp;
424                 eax.split.bit_width = cap.bit_width_gp;
425                 eax.split.mask_length = cap.events_mask_len;
426
427                 edx.split.num_counters_fixed = cap.num_counters_fixed;
428                 edx.split.bit_width_fixed = cap.bit_width_fixed;
429                 edx.split.reserved = 0;
430
431                 entry->eax = eax.full;
432                 entry->ebx = cap.events_mask;
433                 entry->ecx = 0;
434                 entry->edx = edx.full;
435                 break;
436         }
437         /* function 0xb has additional index. */
438         case 0xb: {
439                 int i, level_type;
440
441                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
442                 /* read more entries until level_type is zero */
443                 for (i = 1; ; ++i) {
444                         if (*nent >= maxnent)
445                                 goto out;
446
447                         level_type = entry[i - 1].ecx & 0xff00;
448                         if (!level_type)
449                                 break;
450                         do_cpuid_1_ent(&entry[i], function, i);
451                         entry[i].flags |=
452                                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
453                         ++*nent;
454                 }
455                 break;
456         }
457         case 0xd: {
458                 int idx, i;
459                 u64 supported = kvm_supported_xcr0();
460
461                 entry->eax &= supported;
462                 entry->edx &= supported >> 32;
463                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
464                 if (!supported)
465                         break;
466
467                 for (idx = 1, i = 1; idx < 64; ++idx) {
468                         u64 mask = ((u64)1 << idx);
469                         if (*nent >= maxnent)
470                                 goto out;
471
472                         do_cpuid_1_ent(&entry[i], function, idx);
473                         if (idx == 1)
474                                 entry[i].eax &= kvm_supported_word10_x86_features;
475                         else if (entry[i].eax == 0 || !(supported & mask))
476                                 continue;
477                         entry[i].flags |=
478                                KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
479                         ++*nent;
480                         ++i;
481                 }
482                 break;
483         }
484         case KVM_CPUID_SIGNATURE: {
485                 static const char signature[12] = "KVMKVMKVM\0\0";
486                 const u32 *sigptr = (const u32 *)signature;
487                 entry->eax = KVM_CPUID_FEATURES;
488                 entry->ebx = sigptr[0];
489                 entry->ecx = sigptr[1];
490                 entry->edx = sigptr[2];
491                 break;
492         }
493         case KVM_CPUID_FEATURES:
494                 entry->eax = (1 << KVM_FEATURE_CLOCKSOURCE) |
495                              (1 << KVM_FEATURE_NOP_IO_DELAY) |
496                              (1 << KVM_FEATURE_CLOCKSOURCE2) |
497                              (1 << KVM_FEATURE_ASYNC_PF) |
498                              (1 << KVM_FEATURE_PV_EOI) |
499                              (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
500                              (1 << KVM_FEATURE_PV_UNHALT);
501
502                 if (sched_info_on())
503                         entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
504
505                 entry->ebx = 0;
506                 entry->ecx = 0;
507                 entry->edx = 0;
508                 break;
509         case 0x80000000:
510                 entry->eax = min(entry->eax, 0x8000001a);
511                 break;
512         case 0x80000001:
513                 entry->edx &= kvm_supported_word1_x86_features;
514                 cpuid_mask(&entry->edx, 1);
515                 entry->ecx &= kvm_supported_word6_x86_features;
516                 cpuid_mask(&entry->ecx, 6);
517                 break;
518         case 0x80000007: /* Advanced power management */
519                 /* invariant TSC is CPUID.80000007H:EDX[8] */
520                 entry->edx &= (1 << 8);
521                 /* mask against host */
522                 entry->edx &= boot_cpu_data.x86_power;
523                 entry->eax = entry->ebx = entry->ecx = 0;
524                 break;
525         case 0x80000008: {
526                 unsigned g_phys_as = (entry->eax >> 16) & 0xff;
527                 unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
528                 unsigned phys_as = entry->eax & 0xff;
529
530                 if (!g_phys_as)
531                         g_phys_as = phys_as;
532                 entry->eax = g_phys_as | (virt_as << 8);
533                 entry->ebx = entry->edx = 0;
534                 break;
535         }
536         case 0x80000019:
537                 entry->ecx = entry->edx = 0;
538                 break;
539         case 0x8000001a:
540                 break;
541         case 0x8000001d:
542                 break;
543         /*Add support for Centaur's CPUID instruction*/
544         case 0xC0000000:
545                 /*Just support up to 0xC0000004 now*/
546                 entry->eax = min(entry->eax, 0xC0000004);
547                 break;
548         case 0xC0000001:
549                 entry->edx &= kvm_supported_word5_x86_features;
550                 cpuid_mask(&entry->edx, 5);
551                 break;
552         case 3: /* Processor serial number */
553         case 5: /* MONITOR/MWAIT */
554         case 6: /* Thermal management */
555         case 0xC0000002:
556         case 0xC0000003:
557         case 0xC0000004:
558         default:
559                 entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
560                 break;
561         }
562
563         kvm_x86_ops->set_supported_cpuid(function, entry);
564
565         r = 0;
566
567 out:
568         put_cpu();
569
570         return r;
571 }
572
573 static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
574                         u32 idx, int *nent, int maxnent, unsigned int type)
575 {
576         if (type == KVM_GET_EMULATED_CPUID)
577                 return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
578
579         return __do_cpuid_ent(entry, func, idx, nent, maxnent);
580 }
581
582 #undef F
583
584 struct kvm_cpuid_param {
585         u32 func;
586         u32 idx;
587         bool has_leaf_count;
588         bool (*qualifier)(const struct kvm_cpuid_param *param);
589 };
590
591 static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
592 {
593         return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
594 }
595
596 static bool sanity_check_entries(struct kvm_cpuid_entry2 __user *entries,
597                                  __u32 num_entries, unsigned int ioctl_type)
598 {
599         int i;
600         __u32 pad[3];
601
602         if (ioctl_type != KVM_GET_EMULATED_CPUID)
603                 return false;
604
605         /*
606          * We want to make sure that ->padding is being passed clean from
607          * userspace in case we want to use it for something in the future.
608          *
609          * Sadly, this wasn't enforced for KVM_GET_SUPPORTED_CPUID and so we
610          * have to give ourselves satisfied only with the emulated side. /me
611          * sheds a tear.
612          */
613         for (i = 0; i < num_entries; i++) {
614                 if (copy_from_user(pad, entries[i].padding, sizeof(pad)))
615                         return true;
616
617                 if (pad[0] || pad[1] || pad[2])
618                         return true;
619         }
620         return false;
621 }
622
623 int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
624                             struct kvm_cpuid_entry2 __user *entries,
625                             unsigned int type)
626 {
627         struct kvm_cpuid_entry2 *cpuid_entries;
628         int limit, nent = 0, r = -E2BIG, i;
629         u32 func;
630         static const struct kvm_cpuid_param param[] = {
631                 { .func = 0, .has_leaf_count = true },
632                 { .func = 0x80000000, .has_leaf_count = true },
633                 { .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
634                 { .func = KVM_CPUID_SIGNATURE },
635                 { .func = KVM_CPUID_FEATURES },
636         };
637
638         if (cpuid->nent < 1)
639                 goto out;
640         if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
641                 cpuid->nent = KVM_MAX_CPUID_ENTRIES;
642
643         if (sanity_check_entries(entries, cpuid->nent, type))
644                 return -EINVAL;
645
646         r = -ENOMEM;
647         cpuid_entries = vzalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
648         if (!cpuid_entries)
649                 goto out;
650
651         r = 0;
652         for (i = 0; i < ARRAY_SIZE(param); i++) {
653                 const struct kvm_cpuid_param *ent = &param[i];
654
655                 if (ent->qualifier && !ent->qualifier(ent))
656                         continue;
657
658                 r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
659                                 &nent, cpuid->nent, type);
660
661                 if (r)
662                         goto out_free;
663
664                 if (!ent->has_leaf_count)
665                         continue;
666
667                 limit = cpuid_entries[nent - 1].eax;
668                 for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
669                         r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
670                                      &nent, cpuid->nent, type);
671
672                 if (r)
673                         goto out_free;
674         }
675
676         r = -EFAULT;
677         if (copy_to_user(entries, cpuid_entries,
678                          nent * sizeof(struct kvm_cpuid_entry2)))
679                 goto out_free;
680         cpuid->nent = nent;
681         r = 0;
682
683 out_free:
684         vfree(cpuid_entries);
685 out:
686         return r;
687 }
688
689 static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
690 {
691         struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
692         int j, nent = vcpu->arch.cpuid_nent;
693
694         e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
695         /* when no next entry is found, the current entry[i] is reselected */
696         for (j = i + 1; ; j = (j + 1) % nent) {
697                 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
698                 if (ej->function == e->function) {
699                         ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
700                         return j;
701                 }
702         }
703         return 0; /* silence gcc, even though control never reaches here */
704 }
705
706 /* find an entry with matching function, matching index (if needed), and that
707  * should be read next (if it's stateful) */
708 static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
709         u32 function, u32 index)
710 {
711         if (e->function != function)
712                 return 0;
713         if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
714                 return 0;
715         if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
716             !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
717                 return 0;
718         return 1;
719 }
720
721 struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
722                                               u32 function, u32 index)
723 {
724         int i;
725         struct kvm_cpuid_entry2 *best = NULL;
726
727         for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
728                 struct kvm_cpuid_entry2 *e;
729
730                 e = &vcpu->arch.cpuid_entries[i];
731                 if (is_matching_cpuid_entry(e, function, index)) {
732                         if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
733                                 move_to_next_stateful_cpuid_entry(vcpu, i);
734                         best = e;
735                         break;
736                 }
737         }
738         return best;
739 }
740 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
741
742 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
743 {
744         struct kvm_cpuid_entry2 *best;
745
746         best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
747         if (!best || best->eax < 0x80000008)
748                 goto not_found;
749         best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
750         if (best)
751                 return best->eax & 0xff;
752 not_found:
753         return 36;
754 }
755 EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
756
757 /*
758  * If no match is found, check whether we exceed the vCPU's limit
759  * and return the content of the highest valid _standard_ leaf instead.
760  * This is to satisfy the CPUID specification.
761  */
762 static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
763                                                   u32 function, u32 index)
764 {
765         struct kvm_cpuid_entry2 *maxlevel;
766
767         maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
768         if (!maxlevel || maxlevel->eax >= function)
769                 return NULL;
770         if (function & 0x80000000) {
771                 maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
772                 if (!maxlevel)
773                         return NULL;
774         }
775         return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
776 }
777
778 void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
779 {
780         u32 function = *eax, index = *ecx;
781         struct kvm_cpuid_entry2 *best;
782
783         best = kvm_find_cpuid_entry(vcpu, function, index);
784
785         if (!best)
786                 best = check_cpuid_limit(vcpu, function, index);
787
788         /*
789          * Perfmon not yet supported for L2 guest.
790          */
791         if (is_guest_mode(vcpu) && function == 0xa)
792                 best = NULL;
793
794         if (best) {
795                 *eax = best->eax;
796                 *ebx = best->ebx;
797                 *ecx = best->ecx;
798                 *edx = best->edx;
799         } else
800                 *eax = *ebx = *ecx = *edx = 0;
801         trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx);
802 }
803 EXPORT_SYMBOL_GPL(kvm_cpuid);
804
805 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
806 {
807         u32 function, eax, ebx, ecx, edx;
808
809         function = eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
810         ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
811         kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
812         kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
813         kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
814         kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
815         kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
816         kvm_x86_ops->skip_emulated_instruction(vcpu);
817 }
818 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);