Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Nov 2015 00:26:26 +0000 (16:26 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 6 Nov 2015 00:26:26 +0000 (16:26 -0800)
Pull KVM updates from Paolo Bonzini:
 "First batch of KVM changes for 4.4.

  s390:
     A bunch of fixes and optimizations for interrupt and time handling.

  PPC:
     Mostly bug fixes.

  ARM:
     No big features, but many small fixes and prerequisites including:

      - a number of fixes for the arch-timer

      - introducing proper level-triggered semantics for the arch-timers

      - a series of patches to synchronously halt a guest (prerequisite
        for IRQ forwarding)

      - some tracepoint improvements

      - a tweak for the EL2 panic handlers

      - some more VGIC cleanups getting rid of redundant state

  x86:
     Quite a few changes:

      - support for VT-d posted interrupts (i.e. PCI devices can inject
        interrupts directly into vCPUs).  This introduces a new
        component (in virt/lib/) that connects VFIO and KVM together.
        The same infrastructure will be used for ARM interrupt
        forwarding as well.

      - more Hyper-V features, though the main one Hyper-V synthetic
        interrupt controller will have to wait for 4.5.  These will let
        KVM expose Hyper-V devices.

      - nested virtualization now supports VPID (same as PCID but for
        vCPUs) which makes it quite a bit faster

      - for future hardware that supports NVDIMM, there is support for
        clflushopt, clwb, pcommit

      - support for "split irqchip", i.e.  LAPIC in kernel +
        IOAPIC/PIC/PIT in userspace, which reduces the attack surface of
        the hypervisor

      - obligatory smattering of SMM fixes

      - on the guest side, stable scheduler clock support was rewritten
        to not require help from the hypervisor"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (123 commits)
  KVM: VMX: Fix commit which broke PML
  KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0()
  KVM: x86: allow RSM from 64-bit mode
  KVM: VMX: fix SMEP and SMAP without EPT
  KVM: x86: move kvm_set_irq_inatomic to legacy device assignment
  KVM: device assignment: remove pointless #ifdefs
  KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
  KVM: x86: zero apic_arb_prio on reset
  drivers/hv: share Hyper-V SynIC constants with userspace
  KVM: x86: handle SMBASE as physical address in RSM
  KVM: x86: add read_phys to x86_emulate_ops
  KVM: x86: removing unused variable
  KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs
  KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()
  KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings
  KVM: arm/arm64: Optimize away redundant LR tracking
  KVM: s390: use simple switch statement as multiplexer
  KVM: s390: drop useless newline in debugging data
  KVM: s390: SCA must not cross page boundaries
  KVM: arm: Do not indent the arguments of DECLARE_BITMAP
  ...

12 files changed:
1  2 
Documentation/kernel-parameters.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
Makefile
arch/arm64/kvm/Kconfig
arch/powerpc/include/asm/kvm_host.h
arch/s390/include/asm/kvm_host.h
arch/s390/kvm/kvm-s390.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/x86.c
include/kvm/arm_vgic.h
virt/kvm/arm/vgic.c

@@@ -167,8 -167,7 +167,8 @@@ bytes respectively. Such letter suffixe
  
        acpi=           [HW,ACPI,X86,ARM64]
                        Advanced Configuration and Power Interface
 -                      Format: { force | off | strict | noirq | rsdt }
 +                      Format: { force | off | strict | noirq | rsdt |
 +                                copy_dsdt }
                        force -- enable ACPI if default was off
                        off -- disable ACPI if default was on
                        noirq -- do not use ACPI for IRQ routing
                        is passed, kernel could allocate physical memory region
                        above 4G, that cause second kernel crash on system
                        that require some amount of low memory, e.g. swiotlb
 -                      requires at least 64M+32K low memory.  Kernel would
 -                      try to allocate 72M below 4G automatically.
 +                      requires at least 64M+32K low memory, also enough extra
 +                      low memory is needed to make sure DMA buffers for 32-bit
 +                      devices won't run out. Kernel would try to allocate at
 +                      at least 256M below 4G automatically.
                        This one let user to specify own low range under 4G
                        for second kernel instead.
                        0: to disable low allocation.
  
        earlycon=       [KNL] Output early console device and options.
  
 +                      When used with no options, the early console is
 +                      determined by the stdout-path property in device
 +                      tree's chosen node.
 +
                cdns,<addr>
                        Start an early, polled-mode console on a cadence serial
                        port at the specified address. The cadence serial port
                        serial port must already be setup and configured.
                        Options are not yet supported.
  
 +              lpuart,<addr>
 +              lpuart32,<addr>
 +                      Use early console provided by Freescale LP UART driver
 +                      found on Freescale Vybrid and QorIQ LS1021A processors.
 +                      A valid base address must be provided, and the serial
 +                      port must already be setup and configured.
 +
        earlyprintk=    [X86,SH,BLACKFIN,ARM,M68k]
                        earlyprintk=vga
                        earlyprintk=efi
                        you are really sure that your UEFI does sane gc and
                        fulfills the spec otherwise your board may brick.
  
 +      efi_fake_mem=   nn[KMG]@ss[KMG]:aa[,nn[KMG]@ss[KMG]:aa,..] [EFI; X86]
 +                      Add arbitrary attribute to specific memory range by
 +                      updating original EFI memory map.
 +                      Region of memory which aa attribute is added to is
 +                      from ss to ss+nn.
 +                      If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000
 +                      is specified, EFI_MEMORY_MORE_RELIABLE(0x10000)
 +                      attribute is added to range 0x100000000-0x180000000 and
 +                      0x10a0000000-0x1120000000.
 +
 +                      Using this parameter you can do debugging of EFI memmap
 +                      related feature. For example, you can do debugging of
 +                      Address Range Mirroring feature even if your box
 +                      doesn't support it.
 +
        eisa_irq_edge=  [PARISC,HW]
                        See header of drivers/parisc/eisa.c.
  
                hwp_only
                        Only load intel_pstate on systems which support
                        hardware P state control (HWP) if available.
 +              no_acpi
 +                      Don't use ACPI processor performance control objects
 +                      _PSS and _PPC specified limits.
  
        intremap=       [X86-64, Intel-IOMMU]
                        on      enable Interrupt Remapping (default)
                        nosid   disable Source ID checking
                        no_x2apic_optout
                                BIOS x2APIC opt-out request will be ignored
+                       nopost  disable Interrupt Posting
  
        iomem=          Disable strict checking of access to MMIO memory
                strict  regions from userspace.
        nmi_watchdog=   [KNL,BUGS=X86] Debugging features for SMP kernels
                        Format: [panic,][nopanic,][num]
                        Valid num: 0 or 1
 -                      0 - turn nmi_watchdog off
 -                      1 - turn nmi_watchdog on
 +                      0 - turn hardlockup detector in nmi_watchdog off
 +                      1 - turn hardlockup detector in nmi_watchdog on
                        When panic is specified, panic when an NMI watchdog
                        timeout occurs (or 'nopanic' to override the opposite
 -                      default).
 +                      default). To disable both hard and soft lockup detectors,
 +                      please see 'nowatchdog'.
                        This is useful when you use a panic=... timeout and
                        need the box quickly up again.
  
                        cache-to-cache transfer latencies.
  
        rcutree.rcu_fanout_leaf= [KNL]
 -                      Increase the number of CPUs assigned to each
 -                      leaf rcu_node structure.  Useful for very large
 -                      systems.
 +                      Change the number of CPUs assigned to each
 +                      leaf rcu_node structure.  Useful for very
 +                      large systems, which will choose the value 64,
 +                      and for NUMA systems with large remote-access
 +                      latencies, which will choose a value aligned
 +                      with the appropriate hardware boundaries.
  
        rcutree.jiffies_till_sched_qs= [KNL]
                        Set required age in jiffies for a
@@@ -401,10 -401,9 +401,9 @@@ Capability: basi
  Architectures: x86, ppc, mips
  Type: vcpu ioctl
  Parameters: struct kvm_interrupt (in)
- Returns: 0 on success, -1 on error
+ Returns: 0 on success, negative on failure.
  
- Queues a hardware interrupt vector to be injected.  This is only
- useful if in-kernel local APIC or equivalent is not used.
+ Queues a hardware interrupt vector to be injected.
  
  /* for KVM_INTERRUPT */
  struct kvm_interrupt {
  
  X86:
  
- Note 'irq' is an interrupt vector, not an interrupt pin or line.
+ Returns: 0 on success,
+        -EEXIST if an interrupt is already enqueued
+        -EINVAL the the irq number is invalid
+        -ENXIO if the PIC is in the kernel
+        -EFAULT if the pointer is invalid
+ Note 'irq' is an interrupt vector, not an interrupt pin or line. This
+ ioctl is useful if the in-kernel PIC is not used.
  
  PPC:
  
@@@ -1598,7 -1604,7 +1604,7 @@@ provided event instead of triggering a
  struct kvm_ioeventfd {
        __u64 datamatch;
        __u64 addr;        /* legal pio/mmio address */
-       __u32 len;         /* 1, 2, 4, or 8 bytes    */
+       __u32 len;         /* 0, 1, 2, 4, or 8 bytes    */
        __s32 fd;
        __u32 flags;
        __u8  pad[36];
@@@ -1621,6 -1627,10 +1627,10 @@@ to the registered address is equal to d
  For virtio-ccw devices, addr contains the subchannel id and datamatch the
  virtqueue index.
  
+ With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and
+ the kernel will ignore the length of guest write and may get a faster vmexit.
+ The speedup may only apply to specific architectures, but the ioeventfd will
+ work anyway.
  
  4.60 KVM_DIRTY_TLB
  
@@@ -1774,7 -1784,7 +1784,7 @@@ has been called, this interface is comp
  To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the
  following algorithm:
  
 -  - pause the vpcu
 +  - pause the vcpu
    - read the local APIC's state (KVM_GET_LAPIC)
    - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1)
    - if so, issue KVM_NMI
@@@ -2798,7 -2808,7 +2808,7 @@@ Returns: = 0 on success
           < 0 on generic error (e.g. -EFAULT or -ENOMEM),
           > 0 if an exception occurred while walking the page tables
  
 -Read or write data from/to the logical (virtual) memory of a VPCU.
 +Read or write data from/to the logical (virtual) memory of a VCPU.
  
  Parameters are specified via the following structure:
  
@@@ -3309,6 -3319,18 +3319,18 @@@ Valid values for 'type' are
     to ignore the request, or to gather VM memory core dump and/or
     reset/shutdown of the VM.
  
+               /* KVM_EXIT_IOAPIC_EOI */
+               struct {
+                       __u8 vector;
+               } eoi;
+ Indicates that the VCPU's in-kernel local APIC received an EOI for a
+ level-triggered IOAPIC interrupt.  This exit only triggers when the
+ IOAPIC is implemented in userspace (i.e. KVM_CAP_SPLIT_IRQCHIP is enabled);
+ the userspace IOAPIC should process the EOI and retrigger the interrupt if
+ it is still asserted.  Vector is the LAPIC interrupt vector for which the
+ EOI was received.
                /* Fix the size of the union. */
                char padding[256];
        };
@@@ -3627,6 -3649,26 +3649,26 @@@ struct 
  
  KVM handlers should exit to userspace with rc = -EREMOTE.
  
+ 7.5 KVM_CAP_SPLIT_IRQCHIP
+ Architectures: x86
+ Parameters: args[0] - number of routes reserved for userspace IOAPICs
+ Returns: 0 on success, -1 on error
+ Create a local apic for each processor in the kernel. This can be used
+ instead of KVM_CREATE_IRQCHIP if the userspace VMM wishes to emulate the
+ IOAPIC and PIC (and also the PIT, even though this has to be enabled
+ separately).
+ This capability also enables in kernel routing of interrupt requests;
+ when KVM_CAP_SPLIT_IRQCHIP only routes of KVM_IRQ_ROUTING_MSI type are
+ used in the IRQ routing table.  The first args[0] MSI routes are reserved
+ for the IOAPIC pins.  Whenever the LAPIC receives an EOI for these routes,
+ a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace.
+ Fails if VCPU has already been created, or if the irqchip is already in the
+ kernel (i.e. KVM_CREATE_IRQCHIP has already been called).
  
  8. Other capabilities.
  ----------------------
diff --combined MAINTAINERS
@@@ -240,12 -240,6 +240,12 @@@ L:       lm-sensors@lm-sensors.or
  S:    Maintained
  F:    drivers/hwmon/abituguru3.c
  
 +ACCES 104-IDIO-16 GPIO DRIVER
 +M:    "William Breathitt Gray" <vilhelm.gray@gmail.com>
 +L:    linux-gpio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-104-idio-16.c
 +
  ACENIC DRIVER
  M:    Jes Sorensen <jes@trained-monkey.org>
  L:    linux-acenic@sunsite.dk
@@@ -660,6 -654,11 +660,6 @@@ F:        drivers/gpu/drm/radeon/radeon_kfd.
  F:    drivers/gpu/drm/radeon/radeon_kfd.h
  F:    include/uapi/linux/kfd_ioctl.h
  
 -AMD MICROCODE UPDATE SUPPORT
 -M:    Borislav Petkov <bp@alien8.de>
 -S:    Maintained
 -F:    arch/x86/kernel/cpu/microcode/amd*
 -
  AMD XGBE DRIVER
  M:    Tom Lendacky <thomas.lendacky@amd.com>
  L:    netdev@vger.kernel.org
@@@ -823,13 -822,12 +823,13 @@@ F:      arch/arm/include/asm/floppy.
  
  ARM PMU PROFILING AND DEBUGGING
  M:    Will Deacon <will.deacon@arm.com>
 +R:    Mark Rutland <mark.rutland@arm.com>
  S:    Maintained
 -F:    arch/arm/kernel/perf_*
 +F:    arch/arm*/kernel/perf_*
  F:    arch/arm/oprofile/common.c
 -F:    arch/arm/kernel/hw_breakpoint.c
 -F:    arch/arm/include/asm/hw_breakpoint.h
 -F:    arch/arm/include/asm/perf_event.h
 +F:    arch/arm*/kernel/hw_breakpoint.c
 +F:    arch/arm*/include/asm/hw_breakpoint.h
 +F:    arch/arm*/include/asm/perf_event.h
  F:    drivers/perf/arm_pmu.c
  F:    include/linux/perf/arm_pmu.h
  
@@@ -896,12 -894,11 +896,12 @@@ M:      Lennert Buytenhek <kernel@wantstofly
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  
 -ARM/Allwinner A1X SoC support
 +ARM/Allwinner sunXi SoC support
  M:    Maxime Ripard <maxime.ripard@free-electrons.com>
 +M:    Chen-Yu Tsai <wens@csie.org>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -N:    sun[x4567]i
 +N:    sun[x456789]i
  
  ARM/Allwinner SoC Clock Support
  M:    Emilio López <emilio@elopez.com.ar>
@@@ -1300,13 -1297,6 +1300,13 @@@ F:    arch/arm/mach-mediatek
  N:    mtk
  K:    mediatek
  
 +ARM/Mediatek USB3 PHY DRIVER
 +M:    Chunfeng Yun <chunfeng.yun@mediatek.com>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +L:    linux-mediatek@lists.infradead.org (moderated for non-subscribers)
 +S:    Maintained
 +F:    drivers/phy/phy-mt65xx-usb3.c
 +
  ARM/MICREL KS8695 ARCHITECTURE
  M:    Greg Ungerer <gerg@uclinux.org>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -1489,14 -1479,6 +1489,14 @@@ L:    linux-media@vger.kernel.or
  S:    Maintained
  F:    drivers/media/platform/s5p-tv/
  
 +ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
 +M:    Andrzej Pietrasiewicz <andrzej.p@samsung.com>
 +M:    Jacek Anaszewski <j.anaszewski@samsung.com>
 +L:    linux-arm-kernel@lists.infradead.org
 +L:    linux-media@vger.kernel.org
 +S:    Maintained
 +F:    drivers/media/platform/s5p-jpeg/
 +
  ARM/SHMOBILE ARM ARCHITECTURE
  M:    Simon Horman <horms@verge.net.au>
  M:    Magnus Damm <magnus.damm@gmail.com>
@@@ -1545,7 -1527,6 +1545,7 @@@ W:      http://www.stlinux.co
  S:    Maintained
  F:    arch/arm/mach-sti/
  F:    arch/arm/boot/dts/sti*
 +F:    drivers/char/hw_random/st-rng.c
  F:    drivers/clocksource/arm_global_timer.c
  F:    drivers/clocksource/clksrc_st_lpc.c
  F:    drivers/i2c/busses/i2c-st.c
@@@ -1798,14 -1779,6 +1798,14 @@@ S:    Supporte
  F:    Documentation/aoe/
  F:    drivers/block/aoe/
  
 +ATHEROS 71XX/9XXX GPIO DRIVER
 +M:    Alban Bedel <albeu@free.fr>
 +W:    https://github.com/AlbanBedel/linux
 +T:    git git://github.com/AlbanBedel/linux
 +S:    Maintained
 +F:    drivers/gpio/gpio-ath79.c
 +F:    Documentation/devicetree/bindings/gpio/gpio-ath79.txt
 +
  ATHEROS ATH GENERIC UTILITIES
  M:    "Luis R. Rodriguez" <mcgrof@do-not-panic.com>
  L:    linux-wireless@vger.kernel.org
@@@ -3186,15 -3159,6 +3186,15 @@@ F:    Documentation/powerpc/cxl.tx
  F:    Documentation/powerpc/cxl.txt
  F:    Documentation/ABI/testing/sysfs-class-cxl
  
 +CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
 +M:    Manoj N. Kumar <manoj@linux.vnet.ibm.com>
 +M:    Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
 +L:    linux-scsi@vger.kernel.org
 +S:    Supported
 +F:    drivers/scsi/cxlflash/
 +F:    include/uapi/scsi/cxlflash_ioctls.h
 +F:    Documentation/powerpc/cxlflash.txt
 +
  STMMAC ETHERNET DRIVER
  M:    Giuseppe Cavallaro <peppe.cavallaro@st.com>
  L:    netdev@vger.kernel.org
@@@ -3537,15 -3501,13 +3537,15 @@@ M:   Jonathan Corbet <corbet@lwn.net
  L:    linux-doc@vger.kernel.org
  S:    Maintained
  F:    Documentation/
 +F:    scripts/docproc.c
 +F:    scripts/kernel-doc*
  X:    Documentation/ABI/
  X:    Documentation/devicetree/
  X:    Documentation/acpi
  X:    Documentation/power
  X:    Documentation/spi
  X:    Documentation/DocBook/media
 -T:    git git://git.lwn.net/linux-2.6.git docs-next
 +T:    git git://git.lwn.net/linux.git docs-next
  
  DOUBLETALK DRIVER
  M:    "James R. Van Zandt" <jrv@vanzandt.mv.com>
@@@ -3629,13 -3591,6 +3629,13 @@@ F:    drivers/gpu/drm/i915
  F:    include/drm/i915*
  F:    include/uapi/drm/i915*
  
 +DRM DRIVERS FOR ATMEL HLCDC
 +M:    Boris Brezillon <boris.brezillon@free-electrons.com>
 +L:    dri-devel@lists.freedesktop.org
 +S:    Supported
 +F:    drivers/gpu/drm/atmel-hlcdc/
 +F:    Documentation/devicetree/bindings/drm/atmel/
 +
  DRM DRIVERS FOR EXYNOS
  M:    Inki Dae <inki.dae@samsung.com>
  M:    Joonyoung Shim <jy0922.shim@samsung.com>
@@@ -3664,14 -3619,6 +3664,14 @@@ S:    Maintaine
  F:    drivers/gpu/drm/imx/
  F:    Documentation/devicetree/bindings/drm/imx/
  
 +DRM DRIVERS FOR GMA500 (Poulsbo, Moorestown and derivative chipsets)
 +M:    Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
 +L:    dri-devel@lists.freedesktop.org
 +T:    git git://github.com/patjak/drm-gma500
 +S:    Maintained
 +F:    drivers/gpu/drm/gma500
 +F:    include/drm/gma500*
 +
  DRM DRIVERS FOR NVIDIA TEGRA
  M:    Thierry Reding <thierry.reding@gmail.com>
  M:    Terje Bergström <tbergstrom@nvidia.com>
@@@ -4056,7 -4003,7 +4056,7 @@@ S:      Maintaine
  F:    sound/usb/misc/ua101.c
  
  EXTENSIBLE FIRMWARE INTERFACE (EFI)
 -M:    Matt Fleming <matt.fleming@intel.com>
 +M:    Matt Fleming <matt@codeblueprint.co.uk>
  L:    linux-efi@vger.kernel.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
  S:    Maintained
@@@ -4071,7 -4018,7 +4071,7 @@@ F:      include/linux/efi*.
  EFI VARIABLE FILESYSTEM
  M:    Matthew Garrett <matthew.garrett@nebula.com>
  M:    Jeremy Kerr <jk@ozlabs.org>
 -M:    Matt Fleming <matt.fleming@intel.com>
 +M:    Matt Fleming <matt@codeblueprint.co.uk>
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
  L:    linux-efi@vger.kernel.org
  S:    Maintained
@@@ -4374,13 -4321,6 +4374,13 @@@ F:    include/linux/fmc*.
  F:    include/linux/ipmi-fru.h
  K:    fmc_d.*register
  
 +FPGA MANAGER FRAMEWORK
 +M:    Alan Tull <atull@opensource.altera.com>
 +S:    Maintained
 +F:    drivers/fpga/
 +F:    include/linux/fpga/fpga-mgr.h
 +W:    http://www.rocketboards.org
 +
  FPU EMULATOR
  M:    Bill Metzenthen <billm@melbpc.org.au>
  W:    http://floatingpoint.sourceforge.net/emulator/index.html
@@@ -4472,14 -4412,6 +4472,14 @@@ L:    linuxppc-dev@lists.ozlabs.or
  S:    Maintained
  F:    drivers/net/ethernet/freescale/ucc_geth*
  
 +FREESCALE eTSEC ETHERNET DRIVER (GIANFAR)
 +M:    Claudiu Manoil <claudiu.manoil@freescale.com>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    drivers/net/ethernet/freescale/gianfar*
 +X:    drivers/net/ethernet/freescale/gianfar_ptp.c
 +F:    Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
 +
  FREESCALE QUICC ENGINE UCC UART DRIVER
  M:    Timur Tabi <timur@tabi.org>
  L:    linuxppc-dev@lists.ozlabs.org
@@@ -5498,6 -5430,12 +5498,6 @@@ W:     https://01.org/linux-acp
  S:    Supported
  F:    drivers/platform/x86/intel_menlow.c
  
 -INTEL IA32 MICROCODE UPDATE SUPPORT
 -M:    Borislav Petkov <bp@alien8.de>
 -S:    Maintained
 -F:    arch/x86/kernel/cpu/microcode/core*
 -F:    arch/x86/kernel/cpu/microcode/intel*
 -
  INTEL I/OAT DMA DRIVER
  M:    Dave Jiang <dave.jiang@intel.com>
  R:    Dan Williams <dan.j.williams@intel.com>
@@@ -5577,12 -5515,6 +5577,12 @@@ F:    Documentation/networking/README.ipw2
  F:    Documentation/networking/README.ipw2200
  F:    drivers/net/wireless/ipw2x00/
  
 +INTEL(R) TRACE HUB
 +M:    Alexander Shishkin <alexander.shishkin@linux.intel.com>
 +S:    Supported
 +F:    Documentation/trace/intel_th.txt
 +F:    drivers/hwtracing/intel_th/
 +
  INTEL(R) TRUSTED EXECUTION TECHNOLOGY (TXT)
  M:    Richard L Maliszewski <richard.l.maliszewski@intel.com>
  M:    Gang Wei <gang.wei@intel.com>
@@@ -5614,7 -5546,7 +5614,7 @@@ F:      drivers/net/wireless/iwlegacy
  INTEL WIRELESS WIFI LINK (iwlwifi)
  M:    Johannes Berg <johannes.berg@intel.com>
  M:    Emmanuel Grumbach <emmanuel.grumbach@intel.com>
 -M:    Intel Linux Wireless <ilw@linux.intel.com>
 +M:    Intel Linux Wireless <linuxwifi@intel.com>
  L:    linux-wireless@vger.kernel.org
  W:    http://intellinuxwireless.org
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git
@@@ -5630,22 -5562,6 +5630,22 @@@ F:    include/linux/mei_cl_bus.
  F:    drivers/misc/mei/*
  F:    Documentation/misc-devices/mei/*
  
 +INTEL MIC DRIVERS (mic)
 +M:    Sudeep Dutt <sudeep.dutt@intel.com>
 +M:    Ashutosh Dixit <ashutosh.dixit@intel.com>
 +S:    Supported
 +W:    https://github.com/sudeepdutt/mic
 +W:    http://software.intel.com/en-us/mic-developer
 +F:    include/linux/mic_bus.h
 +F:    include/linux/scif.h
 +F:    include/uapi/linux/mic_common.h
 +F:    include/uapi/linux/mic_ioctl.h
 +F     include/uapi/linux/scif_ioctl.h
 +F:    drivers/misc/mic/
 +F:    drivers/dma/mic_x100_dma.c
 +F:    drivers/dma/mic_x100_dma.h
 +F     Documentation/mic/
 +
  INTEL PMC IPC DRIVER
  M:    Zha Qipeng<qipeng.zha@intel.com>
  L:    platform-driver-x86@vger.kernel.org
@@@ -6177,13 -6093,6 +6177,13 @@@ F:    Documentation/auxdisplay/ks010
  F:    drivers/auxdisplay/ks0108.c
  F:    include/linux/ks0108.h
  
 +L3MDEV
 +M:    David Ahern <dsa@cumulusnetworks.com>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    net/l3mdev
 +F:    include/net/l3mdev.h
 +
  LAPB module
  L:    linux-x25@vger.kernel.org
  S:    Orphan
@@@ -6334,14 -6243,6 +6334,14 @@@ F:    drivers/nvdimm/pmem.
  F:    include/linux/pmem.h
  F:    arch/*/include/asm/pmem.h
  
 +LIGHTNVM PLATFORM SUPPORT
 +M:    Matias Bjorling <mb@lightnvm.io>
 +W:    http://github/OpenChannelSSD
 +S:    Maintained
 +F:    drivers/lightnvm/
 +F:    include/linux/lightnvm.h
 +F:    include/uapi/linux/lightnvm.h
 +
  LINUX FOR IBM pSERIES (RS/6000)
  M:    Paul Mackerras <paulus@au.ibm.com>
  W:    http://www.ibm.com/linux/ltc/projects/ppc
@@@ -6659,13 -6560,6 +6659,13 @@@ M:    Guenter Roeck <linux@roeck-us.net
  S:    Maintained
  F:    drivers/net/dsa/mv88e6352.c
  
 +MARVELL CRYPTO DRIVER
 +M:    Boris Brezillon <boris.brezillon@free-electrons.com>
 +M:    Arnaud Ebalard <arno@natisbad.org>
 +F:    drivers/crypto/marvell/
 +S:    Maintained
 +L:    linux-crypto@vger.kernel.org
 +
  MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
  M:    Mirko Lindner <mlindner@marvell.com>
  M:    Stephen Hemminger <stephen@networkplumber.org>
@@@ -6788,12 -6682,6 +6788,12 @@@ W:    http://linuxtv.or
  S:    Maintained
  F:    drivers/media/radio/radio-maxiradio*
  
 +MCP4531 MICROCHIP DIGITAL POTENTIOMETER DRIVER
 +M:    Peter Rosin <peda@axentia.se>
 +L:    linux-iio@vger.kernel.org
 +S:    Maintained
 +F:    drivers/iio/potentiometer/mcp4531.c
 +
  MEDIA DRIVERS FOR RENESAS - VSP1
  M:    Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  L:    linux-media@vger.kernel.org
@@@ -6890,6 -6778,7 +6890,6 @@@ F:      drivers/scsi/megaraid
  
  MELLANOX ETHERNET DRIVER (mlx4_en)
  M:    Amir Vadai <amirv@mellanox.com>
 -M:    Ido Shamay <idos@mellanox.com>
  L:    netdev@vger.kernel.org
  S:    Supported
  W:    http://www.mellanox.com
@@@ -7029,13 -6918,6 +7029,13 @@@ S:    Supporte
  F:    include/linux/mlx5/
  F:    drivers/infiniband/hw/mlx5/
  
 +MELEXIS MLX90614 DRIVER
 +M:    Crt Mori <cmo@melexis.com>
 +L:    linux-iio@vger.kernel.org
 +W:    http://www.melexis.com
 +S:    Supported
 +F:    drivers/iio/temperature/mlx90614.c
 +
  MN88472 MEDIA DRIVER
  M:    Antti Palosaari <crope@iki.fi>
  L:    linux-media@vger.kernel.org
@@@ -7089,7 -6971,6 +7089,7 @@@ M:      Alan Ott <alan@signal11.us
  L:    linux-wpan@vger.kernel.org
  S:    Maintained
  F:    drivers/net/ieee802154/mrf24j40.c
 +F:    Documentation/devicetree/bindings/net/ieee802154/mrf24j40.txt
  
  MSI LAPTOP SUPPORT
  M:    "Lee, Chun-Yi" <jlee@suse.com>
@@@ -7424,6 -7305,7 +7424,6 @@@ S:      Odd Fixe
  F:    drivers/net/
  F:    include/linux/if_*
  F:    include/linux/netdevice.h
 -F:    include/linux/arcdevice.h
  F:    include/linux/etherdevice.h
  F:    include/linux/fcdevice.h
  F:    include/linux/fddidevice.h
@@@ -7516,10 -7398,10 +7516,10 @@@ NOKIA N900 POWER SUPPLY DRIVER
  M:    Pali Rohár <pali.rohar@gmail.com>
  S:    Maintained
  F:    include/linux/power/bq2415x_charger.h
 -F:    include/linux/power/bq27x00_battery.h
 +F:    include/linux/power/bq27xxx_battery.h
  F:    include/linux/power/isp1704_charger.h
  F:    drivers/power/bq2415x_charger.c
 -F:    drivers/power/bq27x00_battery.c
 +F:    drivers/power/bq27xxx_battery.c
  F:    drivers/power/isp1704_charger.c
  F:    drivers/power/rx51_battery.c
  
@@@ -7562,13 -7444,11 +7562,13 @@@ F:   drivers/video/fbdev/riva
  F:    drivers/video/fbdev/nvidia/
  
  NVM EXPRESS DRIVER
 -M:    Matthew Wilcox <willy@linux.intel.com>
 +M:    Keith Busch <keith.busch@intel.com>
 +M:    Jens Axboe <axboe@fb.com>
  L:    linux-nvme@lists.infradead.org
 -T:    git git://git.infradead.org/users/willy/linux-nvme.git
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 +W:    https://kernel.googlesource.com/pub/scm/linux/kernel/git/axboe/linux-block/
  S:    Supported
 -F:    drivers/block/nvme*
 +F:    drivers/nvme/host/
  F:    include/linux/nvme.h
  
  NVMEM FRAMEWORK
@@@ -8278,13 -8158,6 +8278,13 @@@ L:    linux-arm-kernel@lists.infradead.or
  S:    Maintained
  F:    drivers/pinctrl/pinctrl-at91.*
  
 +PIN CONTROLLER - ATMEL AT91 PIO4
 +M:    Ludovic Desroches <ludovic.desroches@atmel.com>
 +L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 +L:    linux-gpio@vger.kernel.org
 +S:    Supported
 +F:    drivers/pinctrl/pinctrl-at91-pio4.*
 +
  PIN CONTROLLER - INTEL
  M:    Mika Westerberg <mika.westerberg@linux.intel.com>
  M:    Heikki Krogerus <heikki.krogerus@linux.intel.com>
@@@ -8646,16 -8519,6 +8646,16 @@@ L:    netdev@vger.kernel.or
  S:    Supported
  F:    drivers/net/ethernet/qlogic/qlge/
  
 +QLOGIC QL4xxx ETHERNET DRIVER
 +M:    Yuval Mintz <Yuval.Mintz@qlogic.com>
 +M:    Ariel Elior <Ariel.Elior@qlogic.com>
 +M:    everest-linux-l2@qlogic.com
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    drivers/net/ethernet/qlogic/qed/
 +F:    include/linux/qed/
 +F:    drivers/net/ethernet/qlogic/qede/
 +
  QNX4 FILESYSTEM
  M:    Anders Larsen <al@alarsen.net>
  W:    http://www.alarsen.net/linux/qnx4fs/
@@@ -9007,13 -8870,6 +9007,13 @@@ S:    Maintaine
  F:    drivers/net/wireless/rtlwifi/
  F:    drivers/net/wireless/rtlwifi/rtl8192ce/
  
 +RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
 +M:    Jes Sorensen <Jes.Sorensen@redhat.com>
 +L:    linux-wireless@vger.kernel.org
 +T:    git git://git.kernel.org/pub/scm/linux/kernel/git/jes/linux.git rtl8723au-mac80211
 +S:    Maintained
 +F:    drivers/net/wireless/realtek/rtl8xxxu/
 +
  S3 SAVAGE FRAMEBUFFER DRIVER
  M:    Antonino Daplas <adaplas@gmail.com>
  L:    linux-fbdev@vger.kernel.org
@@@ -9087,13 -8943,6 +9087,13 @@@ F:    drivers/s390/net/*iucv
  F:    include/net/iucv/
  F:    net/iucv/
  
 +S390 IOMMU (PCI)
 +M:    Gerald Schaefer <gerald.schaefer@de.ibm.com>
 +L:    linux-s390@vger.kernel.org
 +W:    http://www.ibm.com/developerworks/linux/linux390/
 +S:    Supported
 +F:    drivers/iommu/s390-iommu.c
 +
  S3C24XX SD/MMC Driver
  M:    Ben Dooks <ben-linux@fluff.org>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@@ -9252,15 -9101,6 +9252,15 @@@ S: Supporte
  F: Documentation/devicetree/bindings/net/snps,dwc-qos-ethernet.txt
  F: drivers/net/ethernet/synopsys/dwc_eth_qos.c
  
 +SYNOPSYS DESIGNWARE I2C DRIVER
 +M:    Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 +M:    Jarkko Nikula <jarkko.nikula@linux.intel.com>
 +M:    Mika Westerberg <mika.westerberg@linux.intel.com>
 +L:    linux-i2c@vger.kernel.org
 +S:    Maintained
 +F:    drivers/i2c/busses/i2c-designware-*
 +F:    include/linux/platform_data/i2c-designware.h
 +
  SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
  M:    Seungwon Jeon <tgih.jun@samsung.com>
  M:    Jaehoon Chung <jh80.chung@samsung.com>
@@@ -9269,14 -9109,6 +9269,14 @@@ S:    Maintaine
  F:    include/linux/mmc/dw_mmc.h
  F:    drivers/mmc/host/dw_mmc*
  
 +SYSTEM TRACE MODULE CLASS
 +M:    Alexander Shishkin <alexander.shishkin@linux.intel.com>
 +S:    Maintained
 +F:    Documentation/trace/stm.txt
 +F:    drivers/hwtracing/stm/
 +F:    include/linux/stm.h
 +F:    include/uapi/linux/stm.h
 +
  THUNDERBOLT DRIVER
  M:    Andreas Noever <andreas.noever@gmail.com>
  S:    Maintained
@@@ -9482,8 -9314,8 +9482,8 @@@ F:      include/uapi/linux/phantom.
  
  SERVER ENGINES 10Gbps iSCSI - BladeEngine 2 DRIVER
  M:    Jayamohan Kallickal <jayamohan.kallickal@avagotech.com>
 -M:    Minh Tran <minh.tran@avagotech.com>
 -M:    John Soni Jose <sony.john-n@avagotech.com>
 +M:    Ketan Mukadam <ketan.mukadam@avagotech.com>
 +M:    John Soni Jose <sony.john@avagotech.com>
  L:    linux-scsi@vger.kernel.org
  W:    http://www.avagotech.com
  S:    Supported
@@@ -10082,6 -9914,7 +10082,6 @@@ S:    Maintaine
  F:    drivers/staging/lustre
  
  STAGING - NVIDIA COMPLIANT EMBEDDED CONTROLLER INTERFACE (nvec)
 -M:    Julian Andres Klode <jak@jak-linux.org>
  M:    Marc Dietrich <marvin24@gmx.de>
  L:    ac100@lists.launchpad.net (moderated for non-subscribers)
  L:    linux-tegra@vger.kernel.org
@@@ -10145,11 -9978,9 +10145,11 @@@ F:  drivers/staging/vt665?
  
  STAGING - WILC1000 WIFI DRIVER
  M:    Johnny Kim <johnny.kim@atmel.com>
 -M:    Rachel Kim <rachel.kim@atmel.com>
 -M:    Dean Lee <dean.lee@atmel.com>
 +M:    Austin Shin <austin.shin@atmel.com>
  M:    Chris Park <chris.park@atmel.com>
 +M:    Tony Cho <tony.cho@atmel.com>
 +M:    Glen Lee <glen.lee@atmel.com>
 +M:    Leo Kim <leo.kim@atmel.com>
  L:    linux-wireless@vger.kernel.org
  S:    Supported
  F:    drivers/staging/wilc1000/
@@@ -10238,7 -10069,6 +10238,7 @@@ F:   include/net/switchdev.
  
  SYNOPSYS ARC ARCHITECTURE
  M:    Vineet Gupta <vgupta@synopsys.com>
 +L:    linux-snps-arc@lists.infraded.org
  S:    Supported
  F:    arch/arc/
  F:    Documentation/devicetree/bindings/arc/*
@@@ -10756,7 -10586,6 +10756,7 @@@ F:   drivers/media/pci/tw68
  TPM DEVICE DRIVER
  M:    Peter Huewe <peterhuewe@gmx.de>
  M:    Marcel Selhorst <tpmdd@selhorst.net>
 +M:    Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
  R:    Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
  W:    http://tpmdd.sourceforge.net
  L:    tpmdd-devel@lists.sourceforge.net (moderated for non-subscribers)
@@@ -11348,6 -11177,13 +11348,13 @@@ L: netdev@vger.kernel.or
  S:    Maintained
  F:    drivers/net/ethernet/via/via-velocity.*
  
+ VIRT LIB
+ M:    Alex Williamson <alex.williamson@redhat.com>
+ M:    Paolo Bonzini <pbonzini@redhat.com>
+ L:    kvm@vger.kernel.org
+ S:    Supported
+ F:    virt/lib/
  VIVID VIRTUAL VIDEO DRIVER
  M:    Hans Verkuil <hverkuil@xs4all.nl>
  L:    linux-media@vger.kernel.org
@@@ -11436,6 -11272,7 +11443,6 @@@ M:   Shrijeet Mukherjee <shm@cumulusnetwo
  L:    netdev@vger.kernel.org
  S:    Maintained
  F:    drivers/net/vrf.c
 -F:    include/net/vrf.h
  F:    Documentation/networking/vrf.txt
  
  VT1211 HARDWARE MONITOR DRIVER
@@@ -11548,6 -11385,15 +11555,6 @@@ W:  http://oops.ghostprotocols.net:81/bl
  S:    Maintained
  F:    drivers/net/wireless/wl3501*
  
 -WM97XX TOUCHSCREEN DRIVERS
 -M:    Mark Brown <broonie@kernel.org>
 -M:    Liam Girdwood <lrg@slimlogic.co.uk>
 -L:    linux-input@vger.kernel.org
 -W:    https://github.com/CirrusLogic/linux-drivers/wiki
 -S:    Supported
 -F:    drivers/input/touchscreen/*wm97*
 -F:    include/linux/wm97xx.h
 -
  WOLFSON MICROELECTRONICS DRIVERS
  L:    patches@opensource.wolfsonmicro.com
  T:    git https://github.com/CirrusLogic/linux-drivers.git
@@@ -11622,11 -11468,6 +11629,11 @@@ L: linux-edac@vger.kernel.or
  S:    Maintained
  F:    arch/x86/kernel/cpu/mcheck/*
  
 +X86 MICROCODE UPDATE SUPPORT
 +M:    Borislav Petkov <bp@alien8.de>
 +S:    Maintained
 +F:    arch/x86/kernel/cpu/microcode/*
 +
  X86 VDSO
  M:    Andy Lutomirski <luto@amacapital.net>
  L:    linux-kernel@vger.kernel.org
@@@ -11827,7 -11668,6 +11834,7 @@@ F:   drivers/tty/serial/zs.
  ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
  M:    Minchan Kim <minchan@kernel.org>
  M:    Nitin Gupta <ngupta@vflare.org>
 +R:    Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com>
  L:    linux-mm@kvack.org
  S:    Maintained
  F:    mm/zsmalloc.c
diff --combined Makefile
+++ b/Makefile
@@@ -1,8 -1,8 +1,8 @@@
  VERSION = 4
  PATCHLEVEL = 3
  SUBLEVEL = 0
 -EXTRAVERSION = -rc3
 -NAME = Hurr durr I'ma sheep
 +EXTRAVERSION =
 +NAME = Blurry Fish Butt
  
  # *DOCUMENTATION*
  # To see a list of typical targets execute "make help"
@@@ -550,6 -550,7 +550,7 @@@ drivers-y  := drivers/ sound/ firmware
  net-y         := net/
  libs-y                := lib/
  core-y                := usr/
+ virt-y                := virt/
  endif # KBUILD_EXTMOD
  
  ifeq ($(dot-config),1)
@@@ -882,10 -883,10 +883,10 @@@ core-y          += kernel/ certs/ mm/ fs/ ipc/ 
  
  vmlinux-dirs  := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
                     $(core-y) $(core-m) $(drivers-y) $(drivers-m) \
-                    $(net-y) $(net-m) $(libs-y) $(libs-m)))
+                    $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y)))
  
  vmlinux-alldirs       := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \
-                    $(init-) $(core-) $(drivers-) $(net-) $(libs-))))
+                    $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-))))
  
  init-y                := $(patsubst %/, %/built-in.o, $(init-y))
  core-y                := $(patsubst %/, %/built-in.o, $(core-y))
@@@ -894,14 -895,15 +895,15 @@@ net-y           := $(patsubst %/, %/built-in.o, 
  libs-y1               := $(patsubst %/, %/lib.a, $(libs-y))
  libs-y2               := $(patsubst %/, %/built-in.o, $(libs-y))
  libs-y                := $(libs-y1) $(libs-y2)
+ virt-y                := $(patsubst %/, %/built-in.o, $(virt-y))
  
  # Externally visible symbols (used by link-vmlinux.sh)
  export KBUILD_VMLINUX_INIT := $(head-y) $(init-y)
- export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y)
+ export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y)
  export KBUILD_LDS          := arch/$(SRCARCH)/kernel/vmlinux.lds
  export LDFLAGS_vmlinux
  # used by scripts/pacmage/Makefile
- export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt)
+ export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools)
  
  vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN)
  
@@@ -1336,7 -1338,7 +1338,7 @@@ $(help-board-dirs): help-%
  # Documentation targets
  # ---------------------------------------------------------------------------
  %docs: scripts_basic FORCE
 -      $(Q)$(MAKE) $(build)=scripts build_docproc
 +      $(Q)$(MAKE) $(build)=scripts build_docproc build_check-lc_ctype
        $(Q)$(MAKE) $(build)=Documentation/DocBook $@
  
  else # KBUILD_EXTMOD
diff --combined arch/arm64/kvm/Kconfig
@@@ -16,13 -16,9 +16,13 @@@ menuconfig VIRTUALIZATIO
  
  if VIRTUALIZATION
  
 +config KVM_ARM_VGIC_V3
 +      bool
 +
  config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
        depends on OF
 +      depends on !ARM64_16K_PAGES
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select ANON_INODES
        select KVM_VFIO
        select HAVE_KVM_EVENTFD
        select HAVE_KVM_IRQFD
 +      select KVM_ARM_VGIC_V3
        ---help---
          Support hosting virtualized guest machines.
 +        We don't support KVM with 16K page tables yet, due to the multiple
 +        levels of fake page tables.
  
          If unsure, say N.
  
@@@ -48,4 -41,6 +48,6 @@@ config KVM_ARM_HOS
        ---help---
          Provides host support for ARM processors.
  
+ source drivers/vhost/Kconfig
  endif # VIRTUALIZATION
@@@ -297,6 -297,8 +297,6 @@@ struct kvmppc_vcore 
        u32 arch_compat;
        ulong pcr;
        ulong dpdes;            /* doorbell state (POWER8) */
 -      void *mpp_buffer; /* Micro Partition Prefetch buffer */
 -      bool mpp_buffer_is_valid;
        ulong conferring_threads;
  };
  
@@@ -716,5 -718,7 +716,7 @@@ static inline void kvm_arch_memslots_up
  static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
  static inline void kvm_arch_exit(void) {}
+ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
  
  #endif /* __POWERPC_KVM_HOST_H__ */
@@@ -22,7 -22,7 +22,7 @@@
  #include <linux/kvm.h>
  #include <asm/debug.h>
  #include <asm/cpu.h>
 -#include <asm/fpu-internal.h>
 +#include <asm/fpu/api.h>
  #include <asm/isc.h>
  
  #define KVM_MAX_VCPUS 64
@@@ -644,5 -644,7 +644,7 @@@ static inline void kvm_arch_memslots_up
  static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
  static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
                struct kvm_memory_slot *slot) {}
+ static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+ static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
  
  #endif
diff --combined arch/s390/kvm/kvm-s390.c
@@@ -514,35 -514,20 +514,20 @@@ static int kvm_s390_set_tod_high(struc
  
        if (gtod_high != 0)
                return -EINVAL;
-       VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x\n", gtod_high);
+       VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
  
        return 0;
  }
  
  static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
  {
-       struct kvm_vcpu *cur_vcpu;
-       unsigned int vcpu_idx;
-       u64 host_tod, gtod;
-       int r;
+       u64 gtod;
  
        if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
                return -EFAULT;
  
-       r = store_tod_clock(&host_tod);
-       if (r)
-               return r;
-       mutex_lock(&kvm->lock);
-       preempt_disable();
-       kvm->arch.epoch = gtod - host_tod;
-       kvm_s390_vcpu_block_all(kvm);
-       kvm_for_each_vcpu(vcpu_idx, cur_vcpu, kvm)
-               cur_vcpu->arch.sie_block->epoch = kvm->arch.epoch;
-       kvm_s390_vcpu_unblock_all(kvm);
-       preempt_enable();
-       mutex_unlock(&kvm->lock);
-       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx\n", gtod);
+       kvm_s390_set_tod_clock(kvm, gtod);
+       VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
        return 0;
  }
  
@@@ -574,26 -559,19 +559,19 @@@ static int kvm_s390_get_tod_high(struc
        if (copy_to_user((void __user *)attr->addr, &gtod_high,
                                         sizeof(gtod_high)))
                return -EFAULT;
-       VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x\n", gtod_high);
+       VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
  
        return 0;
  }
  
  static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
  {
-       u64 host_tod, gtod;
-       int r;
+       u64 gtod;
  
-       r = store_tod_clock(&host_tod);
-       if (r)
-               return r;
-       preempt_disable();
-       gtod = host_tod + kvm->arch.epoch;
-       preempt_enable();
+       gtod = kvm_s390_get_tod_clock_fast(kvm);
        if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
                return -EFAULT;
-       VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx\n", gtod);
+       VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
  
        return 0;
  }
@@@ -1120,7 -1098,9 +1098,9 @@@ int kvm_arch_init_vm(struct kvm *kvm, u
        if (!kvm->arch.sca)
                goto out_err;
        spin_lock(&kvm_lock);
-       sca_offset = (sca_offset + 16) & 0x7f0;
+       sca_offset += 16;
+       if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+               sca_offset = 0;
        kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
        spin_unlock(&kvm_lock);
  
@@@ -1292,6 -1272,7 +1272,6 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
  static inline void save_fpu_to(struct fpu *dst)
  {
        dst->fpc = current->thread.fpu.fpc;
 -      dst->flags = current->thread.fpu.flags;
        dst->regs = current->thread.fpu.regs;
  }
  
  static inline void load_fpu_from(struct fpu *from)
  {
        current->thread.fpu.fpc = from->fpc;
 -      current->thread.fpu.flags = from->flags;
        current->thread.fpu.regs = from->regs;
  }
  
@@@ -1313,12 -1295,15 +1293,12 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
  
        if (test_kvm_facility(vcpu->kvm, 129)) {
                current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
 -              current->thread.fpu.flags = FPU_USE_VX;
                /*
                 * Use the register save area in the SIE-control block
                 * for register restore and save in kvm_arch_vcpu_put()
                 */
                current->thread.fpu.vxrs =
                        (__vector128 *)&vcpu->run->s.regs.vrs;
 -              /* Always enable the vector extension for KVM */
 -              __ctl_set_vx();
        } else
                load_fpu_from(&vcpu->arch.guest_fpregs);
  
@@@ -1911,6 -1896,22 +1891,22 @@@ retry
        return 0;
  }
  
+ void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
+ {
+       struct kvm_vcpu *vcpu;
+       int i;
+       mutex_lock(&kvm->lock);
+       preempt_disable();
+       kvm->arch.epoch = tod - get_tod_clock();
+       kvm_s390_vcpu_block_all(kvm);
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               vcpu->arch.sie_block->epoch = kvm->arch.epoch;
+       kvm_s390_vcpu_unblock_all(kvm);
+       preempt_enable();
+       mutex_unlock(&kvm->lock);
+ }
  /**
   * kvm_arch_fault_in_page - fault-in guest page if necessary
   * @vcpu: The corresponding virtual cpu
@@@ -2321,6 -2322,7 +2317,6 @@@ int kvm_s390_vcpu_store_status(struct k
                 * registers and the FPC value and store them in the
                 * guest_fpregs structure.
                 */
 -              WARN_ON(!is_vx_task(current));    /* XXX remove later */
                vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
                convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
                                 current->thread.fpu.vxrs);
diff --combined arch/x86/kvm/cpuid.c
@@@ -30,7 -30,7 +30,7 @@@ static u32 xstate_required_size(u64 xst
        int feature_bit = 0;
        u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
  
 -      xstate_bv &= XSTATE_EXTEND_MASK;
 +      xstate_bv &= XFEATURE_MASK_EXTEND;
        while (xstate_bv) {
                if (xstate_bv & 0x1) {
                        u32 eax, ebx, ecx, edx, offset;
@@@ -51,7 -51,7 +51,7 @@@ u64 kvm_supported_xcr0(void
        u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
  
        if (!kvm_x86_ops->mpx_supported())
 -              xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
 +              xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
  
        return xcr0;
  }
@@@ -348,7 -348,7 +348,7 @@@ static inline int __do_cpuid_ent(struc
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
                F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
                F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
-               F(AVX512CD);
+               F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(PCOMMIT);
  
        /* cpuid 0xD.1.eax */
        const u32 kvm_supported_word10_x86_features =
diff --combined arch/x86/kvm/x86.c
@@@ -51,6 -51,8 +51,8 @@@
  #include <linux/pci.h>
  #include <linux/timekeeper_internal.h>
  #include <linux/pvclock_gtod.h>
+ #include <linux/kvm_irqfd.h>
+ #include <linux/irqbypass.h>
  #include <trace/events/kvm.h>
  
  #define CREATE_TRACE_POINTS
@@@ -64,6 -66,7 +66,7 @@@
  #include <asm/fpu/internal.h> /* Ugh! */
  #include <asm/pvclock.h>
  #include <asm/div64.h>
+ #include <asm/irq_remapping.h>
  
  #define MAX_IO_MSRS 256
  #define KVM_MAX_MCE_BANKS 32
@@@ -622,7 -625,9 +625,9 @@@ int kvm_set_cr0(struct kvm_vcpu *vcpu, 
        if ((cr0 ^ old_cr0) & update_bits)
                kvm_mmu_reset_context(vcpu);
  
-       if ((cr0 ^ old_cr0) & X86_CR0_CD)
+       if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+           kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+           !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
                kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
  
        return 0;
@@@ -663,9 -668,9 +668,9 @@@ static int __kvm_set_xcr(struct kvm_vcp
        /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now  */
        if (index != XCR_XFEATURE_ENABLED_MASK)
                return 1;
 -      if (!(xcr0 & XSTATE_FP))
 +      if (!(xcr0 & XFEATURE_MASK_FP))
                return 1;
 -      if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE))
 +      if ((xcr0 & XFEATURE_MASK_YMM) && !(xcr0 & XFEATURE_MASK_SSE))
                return 1;
  
        /*
         * saving.  However, xcr0 bit 0 is always set, even if the
         * emulated CPU does not support XSAVE (see fx_init).
         */
 -      valid_bits = vcpu->arch.guest_supported_xcr0 | XSTATE_FP;
 +      valid_bits = vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FP;
        if (xcr0 & ~valid_bits)
                return 1;
  
 -      if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
 +      if ((!(xcr0 & XFEATURE_MASK_BNDREGS)) !=
 +          (!(xcr0 & XFEATURE_MASK_BNDCSR)))
                return 1;
  
 -      if (xcr0 & XSTATE_AVX512) {
 -              if (!(xcr0 & XSTATE_YMM))
 +      if (xcr0 & XFEATURE_MASK_AVX512) {
 +              if (!(xcr0 & XFEATURE_MASK_YMM))
                        return 1;
 -              if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
 +              if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
                        return 1;
        }
        kvm_put_guest_xcr0(vcpu);
        vcpu->arch.xcr0 = xcr0;
  
 -      if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK)
 +      if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
                kvm_update_cpuid(vcpu);
        return 0;
  }
@@@ -789,7 -793,7 +794,7 @@@ int kvm_set_cr8(struct kvm_vcpu *vcpu, 
  {
        if (cr8 & CR8_RESERVED_BITS)
                return 1;
-       if (irqchip_in_kernel(vcpu->kvm))
+       if (lapic_in_kernel(vcpu))
                kvm_lapic_set_tpr(vcpu, cr8);
        else
                vcpu->arch.cr8 = cr8;
@@@ -799,7 -803,7 +804,7 @@@ EXPORT_SYMBOL_GPL(kvm_set_cr8)
  
  unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
  {
-       if (irqchip_in_kernel(vcpu->kvm))
+       if (lapic_in_kernel(vcpu))
                return kvm_lapic_get_cr8(vcpu);
        else
                return vcpu->arch.cr8;
@@@ -953,6 -957,9 +958,9 @@@ static u32 emulated_msrs[] = 
        HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
        HV_X64_MSR_CRASH_P0, HV_X64_MSR_CRASH_P1, HV_X64_MSR_CRASH_P2,
        HV_X64_MSR_CRASH_P3, HV_X64_MSR_CRASH_P4, HV_X64_MSR_CRASH_CTL,
+       HV_X64_MSR_RESET,
+       HV_X64_MSR_VP_INDEX,
+       HV_X64_MSR_VP_RUNTIME,
        HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
        MSR_KVM_PV_EOI_EN,
  
@@@ -1898,6 -1905,8 +1906,8 @@@ static void accumulate_steal_time(struc
  
  static void record_steal_time(struct kvm_vcpu *vcpu)
  {
+       accumulate_steal_time(vcpu);
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
  
@@@ -2048,12 -2057,6 +2058,6 @@@ int kvm_set_msr_common(struct kvm_vcpu 
                if (!(data & KVM_MSR_ENABLED))
                        break;
  
-               vcpu->arch.st.last_steal = current->sched_info.run_delay;
-               preempt_disable();
-               accumulate_steal_time(vcpu);
-               preempt_enable();
                kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
  
                break;
@@@ -2449,6 -2452,7 +2453,7 @@@ int kvm_vm_ioctl_check_extension(struc
        case KVM_CAP_ENABLE_CAP_VM:
        case KVM_CAP_DISABLE_QUIRKS:
        case KVM_CAP_SET_BOOT_CPU_ID:
+       case KVM_CAP_SPLIT_IRQCHIP:
  #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
        case KVM_CAP_ASSIGN_DEV_IRQ:
        case KVM_CAP_PCI_2_3:
@@@ -2628,7 -2632,6 +2633,6 @@@ void kvm_arch_vcpu_load(struct kvm_vcp
                vcpu->cpu = cpu;
        }
  
-       accumulate_steal_time(vcpu);
        kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
  }
  
@@@ -2662,12 -2665,24 +2666,24 @@@ static int kvm_vcpu_ioctl_interrupt(str
  {
        if (irq->irq >= KVM_NR_INTERRUPTS)
                return -EINVAL;
-       if (irqchip_in_kernel(vcpu->kvm))
+       if (!irqchip_in_kernel(vcpu->kvm)) {
+               kvm_queue_interrupt(vcpu, irq->irq, false);
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+               return 0;
+       }
+       /*
+        * With in-kernel LAPIC, we only use this to inject EXTINT, so
+        * fail for in-kernel 8259.
+        */
+       if (pic_in_kernel(vcpu->kvm))
                return -ENXIO;
  
-       kvm_queue_interrupt(vcpu, irq->irq, false);
-       kvm_make_request(KVM_REQ_EVENT, vcpu);
+       if (vcpu->arch.pending_external_vector != -1)
+               return -EEXIST;
  
+       vcpu->arch.pending_external_vector = irq->irq;
        return 0;
  }
  
@@@ -2906,7 -2921,7 +2922,7 @@@ static void fill_xsave(u8 *dest, struc
         * Copy each region from the possibly compacted offset to the
         * non-compacted offset.
         */
 -      valid = xstate_bv & ~XSTATE_FPSSE;
 +      valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
        while (valid) {
                u64 feature = valid & -valid;
                int index = fls64(feature) - 1;
@@@ -2944,7 -2959,7 +2960,7 @@@ static void load_xsave(struct kvm_vcpu 
         * Copy each region from the non-compacted offset to the
         * possibly compacted offset.
         */
 -      valid = xstate_bv & ~XSTATE_FPSSE;
 +      valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
        while (valid) {
                u64 feature = valid & -valid;
                int index = fls64(feature) - 1;
@@@ -2972,7 -2987,7 +2988,7 @@@ static void kvm_vcpu_ioctl_x86_get_xsav
                        &vcpu->arch.guest_fpu.state.fxsave,
                        sizeof(struct fxregs_state));
                *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] =
 -                      XSTATE_FPSSE;
 +                      XFEATURE_MASK_FPSSE;
        }
  }
  
@@@ -2992,7 -3007,7 +3008,7 @@@ static int kvm_vcpu_ioctl_x86_set_xsave
                        return -EINVAL;
                load_xsave(vcpu, (u8 *)guest_xsave->region);
        } else {
 -              if (xstate_bv & ~XSTATE_FPSSE)
 +              if (xstate_bv & ~XFEATURE_MASK_FPSSE)
                        return -EINVAL;
                memcpy(&vcpu->arch.guest_fpu.state.fxsave,
                        guest_xsave->region, sizeof(struct fxregs_state));
@@@ -3176,7 -3191,7 +3192,7 @@@ long kvm_arch_vcpu_ioctl(struct file *f
                struct kvm_vapic_addr va;
  
                r = -EINVAL;
-               if (!irqchip_in_kernel(vcpu->kvm))
+               if (!lapic_in_kernel(vcpu))
                        goto out;
                r = -EFAULT;
                if (copy_from_user(&va, argp, sizeof va))
@@@ -3425,41 -3440,35 +3441,35 @@@ static int kvm_vm_ioctl_set_irqchip(str
  
  static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
  {
-       int r = 0;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-       return r;
+       return 0;
  }
  
  static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
  {
-       int r = 0;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
        kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-       return r;
+       return 0;
  }
  
  static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
  {
-       int r = 0;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
                sizeof(ps->channels));
        ps->flags = kvm->arch.vpit->pit_state.flags;
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
        memset(&ps->reserved, 0, sizeof(ps->reserved));
-       return r;
+       return 0;
  }
  
  static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
  {
-       int r = 0, start = 0;
+       int start = 0;
        u32 prev_legacy, cur_legacy;
        mutex_lock(&kvm->arch.vpit->pit_state.lock);
        prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
        kvm->arch.vpit->pit_state.flags = ps->flags;
        kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
        mutex_unlock(&kvm->arch.vpit->pit_state.lock);
-       return r;
+       return 0;
  }
  
  static int kvm_vm_ioctl_reinject(struct kvm *kvm,
@@@ -3556,6 -3565,28 +3566,28 @@@ static int kvm_vm_ioctl_enable_cap(stru
                kvm->arch.disabled_quirks = cap->args[0];
                r = 0;
                break;
+       case KVM_CAP_SPLIT_IRQCHIP: {
+               mutex_lock(&kvm->lock);
+               r = -EINVAL;
+               if (cap->args[0] > MAX_NR_RESERVED_IOAPIC_PINS)
+                       goto split_irqchip_unlock;
+               r = -EEXIST;
+               if (irqchip_in_kernel(kvm))
+                       goto split_irqchip_unlock;
+               if (atomic_read(&kvm->online_vcpus))
+                       goto split_irqchip_unlock;
+               r = kvm_setup_empty_irq_routing(kvm);
+               if (r)
+                       goto split_irqchip_unlock;
+               /* Pairs with irqchip_in_kernel. */
+               smp_wmb();
+               kvm->arch.irqchip_split = true;
+               kvm->arch.nr_reserved_ioapic_pins = cap->args[0];
+               r = 0;
+ split_irqchip_unlock:
+               mutex_unlock(&kvm->lock);
+               break;
+       }
        default:
                r = -EINVAL;
                break;
@@@ -3669,7 -3700,7 +3701,7 @@@ long kvm_arch_vm_ioctl(struct file *fil
                }
  
                r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
+               if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
                        goto get_irqchip_out;
                r = kvm_vm_ioctl_get_irqchip(kvm, chip);
                if (r)
                }
  
                r = -ENXIO;
-               if (!irqchip_in_kernel(kvm))
+               if (!irqchip_in_kernel(kvm) || irqchip_split(kvm))
                        goto set_irqchip_out;
                r = kvm_vm_ioctl_set_irqchip(kvm, chip);
                if (r)
@@@ -4060,6 -4091,15 +4092,15 @@@ static int kvm_read_guest_virt_system(s
        return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
  }
  
+ static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
+               unsigned long addr, void *val, unsigned int bytes)
+ {
+       struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+       int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
+       return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
+ }
  int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
                                       gva_t addr, void *val,
                                       unsigned int bytes,
@@@ -4795,6 -4835,7 +4836,7 @@@ static const struct x86_emulate_ops emu
        .write_gpr           = emulator_write_gpr,
        .read_std            = kvm_read_guest_virt_system,
        .write_std           = kvm_write_guest_virt_system,
+       .read_phys           = kvm_read_guest_phys_system,
        .fetch               = kvm_fetch_guest_virt,
        .read_emulated       = emulator_read_emulated,
        .write_emulated      = emulator_write_emulated,
@@@ -5667,7 -5708,7 +5709,7 @@@ void kvm_arch_exit(void
  int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
  {
        ++vcpu->stat.halt_exits;
-       if (irqchip_in_kernel(vcpu->kvm)) {
+       if (lapic_in_kernel(vcpu)) {
                vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
                return 1;
        } else {
@@@ -5774,9 -5815,15 +5816,15 @@@ static int emulator_fix_hypercall(struc
   */
  static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
  {
-       return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
-               vcpu->run->request_interrupt_window &&
-               kvm_arch_interrupt_allowed(vcpu));
+       if (!vcpu->run->request_interrupt_window || pic_in_kernel(vcpu->kvm))
+               return false;
+       if (kvm_cpu_has_interrupt(vcpu))
+               return false;
+       return (irqchip_split(vcpu->kvm)
+               ? kvm_apic_accept_pic_intr(vcpu)
+               : kvm_arch_interrupt_allowed(vcpu));
  }
  
  static void post_kvm_run_save(struct kvm_vcpu *vcpu)
        kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
        kvm_run->cr8 = kvm_get_cr8(vcpu);
        kvm_run->apic_base = kvm_get_apic_base(vcpu);
-       if (irqchip_in_kernel(vcpu->kvm))
-               kvm_run->ready_for_interrupt_injection = 1;
-       else
+       if (!irqchip_in_kernel(vcpu->kvm))
                kvm_run->ready_for_interrupt_injection =
                        kvm_arch_interrupt_allowed(vcpu) &&
                        !kvm_cpu_has_interrupt(vcpu) &&
                        !kvm_event_needs_reinjection(vcpu);
+       else if (!pic_in_kernel(vcpu->kvm))
+               kvm_run->ready_for_interrupt_injection =
+                       kvm_apic_accept_pic_intr(vcpu) &&
+                       !kvm_cpu_has_interrupt(vcpu);
+       else
+               kvm_run->ready_for_interrupt_injection = 1;
  }
  
  static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@@ -6144,18 -6195,18 +6196,18 @@@ static void process_smi(struct kvm_vcp
  
  static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
  {
-       u64 eoi_exit_bitmap[4];
-       u32 tmr[8];
        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
                return;
  
-       memset(eoi_exit_bitmap, 0, 32);
-       memset(tmr, 0, 32);
+       memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
  
-       kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
-       kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
-       kvm_apic_update_tmr(vcpu, tmr);
+       if (irqchip_split(vcpu->kvm))
+               kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+       else {
+               kvm_x86_ops->sync_pir_to_irr(vcpu);
+               kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+       }
+       kvm_x86_ops->load_eoi_exitmap(vcpu);
  }
  
  static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@@@ -6168,7 -6219,7 +6220,7 @@@ void kvm_vcpu_reload_apic_access_page(s
  {
        struct page *page = NULL;
  
-       if (!irqchip_in_kernel(vcpu->kvm))
+       if (!lapic_in_kernel(vcpu))
                return;
  
        if (!kvm_x86_ops->set_apic_access_page_addr)
@@@ -6206,7 -6257,7 +6258,7 @@@ void kvm_arch_mmu_notifier_invalidate_p
  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
  {
        int r;
-       bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
+       bool req_int_win = !lapic_in_kernel(vcpu) &&
                vcpu->run->request_interrupt_window;
        bool req_immediate_exit = false;
  
                        kvm_pmu_handle_event(vcpu);
                if (kvm_check_request(KVM_REQ_PMI, vcpu))
                        kvm_pmu_deliver_pmi(vcpu);
+               if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
+                       BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
+                       if (test_bit(vcpu->arch.pending_ioapic_eoi,
+                                    (void *) vcpu->arch.eoi_exit_bitmap)) {
+                               vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
+                               vcpu->run->eoi.vector =
+                                               vcpu->arch.pending_ioapic_eoi;
+                               r = 0;
+                               goto out;
+                       }
+               }
                if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
                        vcpu_scan_ioapic(vcpu);
                if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
                        r = 0;
                        goto out;
                }
+               if (kvm_check_request(KVM_REQ_HV_RESET, vcpu)) {
+                       vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
+                       vcpu->run->system_event.type = KVM_SYSTEM_EVENT_RESET;
+                       r = 0;
+                       goto out;
+               }
+       }
+       /*
+        * KVM_REQ_EVENT is not set when posted interrupts are set by
+        * VT-d hardware, so we have to update RVI unconditionally.
+        */
+       if (kvm_lapic_enabled(vcpu)) {
+               /*
+                * Update architecture specific hints for APIC
+                * virtual interrupt delivery.
+                */
+               if (kvm_x86_ops->hwapic_irr_update)
+                       kvm_x86_ops->hwapic_irr_update(vcpu,
+                               kvm_lapic_find_highest_irr(vcpu));
        }
  
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
                        kvm_x86_ops->enable_irq_window(vcpu);
  
                if (kvm_lapic_enabled(vcpu)) {
-                       /*
-                        * Update architecture specific hints for APIC
-                        * virtual interrupt delivery.
-                        */
-                       if (kvm_x86_ops->hwapic_irr_update)
-                               kvm_x86_ops->hwapic_irr_update(vcpu,
-                                       kvm_lapic_find_highest_irr(vcpu));
                        update_cr8_intercept(vcpu);
                        kvm_lapic_sync_to_vapic(vcpu);
                }
  
  static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
  {
-       if (!kvm_arch_vcpu_runnable(vcpu)) {
+       if (!kvm_arch_vcpu_runnable(vcpu) &&
+           (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) {
                srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
                kvm_vcpu_block(vcpu);
                vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               if (kvm_x86_ops->post_block)
+                       kvm_x86_ops->post_block(vcpu);
                if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
                        return 1;
        }
@@@ -6468,10 -6548,12 +6549,12 @@@ static int vcpu_run(struct kvm_vcpu *vc
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
  
        for (;;) {
-               if (kvm_vcpu_running(vcpu))
+               if (kvm_vcpu_running(vcpu)) {
                        r = vcpu_enter_guest(vcpu);
-               else
+               } else {
                        r = vcpu_block(kvm, vcpu);
+               }
                if (r <= 0)
                        break;
  
                        kvm_inject_pending_timer_irqs(vcpu);
  
                if (dm_request_for_irq_injection(vcpu)) {
-                       r = -EINTR;
-                       vcpu->run->exit_reason = KVM_EXIT_INTR;
+                       r = 0;
+                       vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
                        ++vcpu->stat.request_irq_exits;
                        break;
                }
@@@ -6608,7 -6690,7 +6691,7 @@@ int kvm_arch_vcpu_ioctl_run(struct kvm_
        }
  
        /* re-sync apic's tpr */
-       if (!irqchip_in_kernel(vcpu->kvm)) {
+       if (!lapic_in_kernel(vcpu)) {
                if (kvm_set_cr8(vcpu, kvm_run->cr8) != 0) {
                        r = -EINVAL;
                        goto out;
@@@ -7006,7 -7088,7 +7089,7 @@@ static void fx_init(struct kvm_vcpu *vc
        /*
         * Ensure guest xcr0 is valid for loading
         */
 -      vcpu->arch.xcr0 = XSTATE_FP;
 +      vcpu->arch.xcr0 = XFEATURE_MASK_FP;
  
        vcpu->arch.cr0 |= X86_CR0_ET;
  }
@@@ -7308,7 -7390,7 +7391,7 @@@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *v
  
  bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
  {
-       return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
+       return irqchip_in_kernel(vcpu->kvm) == lapic_in_kernel(vcpu);
  }
  
  struct static_key kvm_no_apic_vcpu __read_mostly;
@@@ -7377,6 -7459,8 +7460,8 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu 
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
  
+       vcpu->arch.pending_external_vector = -1;
        return 0;
  
  fail_free_mce_banks:
@@@ -7402,7 -7486,7 +7487,7 @@@ void kvm_arch_vcpu_uninit(struct kvm_vc
        kvm_mmu_destroy(vcpu);
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
        free_page((unsigned long)vcpu->arch.pio_data);
-       if (!irqchip_in_kernel(vcpu->kvm))
+       if (!lapic_in_kernel(vcpu))
                static_key_slow_dec(&kvm_no_apic_vcpu);
  }
  
@@@ -8029,7 -8113,59 +8114,59 @@@ bool kvm_arch_has_noncoherent_dma(struc
  }
  EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
  
+ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+                                     struct irq_bypass_producer *prod)
+ {
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       if (kvm_x86_ops->update_pi_irte) {
+               irqfd->producer = prod;
+               return kvm_x86_ops->update_pi_irte(irqfd->kvm,
+                               prod->irq, irqfd->gsi, 1);
+       }
+       return -EINVAL;
+ }
+ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+                                     struct irq_bypass_producer *prod)
+ {
+       int ret;
+       struct kvm_kernel_irqfd *irqfd =
+               container_of(cons, struct kvm_kernel_irqfd, consumer);
+       if (!kvm_x86_ops->update_pi_irte) {
+               WARN_ON(irqfd->producer != NULL);
+               return;
+       }
+       WARN_ON(irqfd->producer != prod);
+       irqfd->producer = NULL;
+       /*
+        * When producer of consumer is unregistered, we change back to
+        * remapped mode, so we can re-use the current implementation
+        * when the irq is masked/disabed or the consumer side (KVM
+        * int this case doesn't want to receive the interrupts.
+       */
+       ret = kvm_x86_ops->update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+       if (ret)
+               printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
+                      " fails: %d\n", irqfd->consumer.token, ret);
+ }
+ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
+                                  uint32_t guest_irq, bool set)
+ {
+       if (!kvm_x86_ops->update_pi_irte)
+               return -EINVAL;
+       return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
+ }
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
@@@ -8044,3 -8180,4 +8181,4 @@@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_write_tsc_offset);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ple_window);
  EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pml_full);
+ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_pi_irte_update);
diff --combined include/kvm/arm_vgic.h
@@@ -112,7 -112,6 +112,6 @@@ struct vgic_vmcr 
  struct vgic_ops {
        struct vgic_lr  (*get_lr)(const struct kvm_vcpu *, int);
        void    (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr);
-       void    (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
        u64     (*get_elrsr)(const struct kvm_vcpu *vcpu);
        u64     (*get_eisr)(const struct kvm_vcpu *vcpu);
        void    (*clear_eisr)(struct kvm_vcpu *vcpu);
@@@ -159,7 -158,6 +158,6 @@@ struct irq_phys_map 
        u32                     virt_irq;
        u32                     phys_irq;
        u32                     irq;
-       bool                    active;
  };
  
  struct irq_phys_map_entry {
@@@ -282,7 -280,7 +280,7 @@@ struct vgic_v2_cpu_if 
  };
  
  struct vgic_v3_cpu_if {
 -#ifdef CONFIG_ARM_GIC_V3
 +#ifdef CONFIG_KVM_ARM_VGIC_V3
        u32             vgic_hcr;
        u32             vgic_vmcr;
        u32             vgic_sre;       /* Restored only, change ignored */
  };
  
  struct vgic_cpu {
-       /* per IRQ to LR mapping */
-       u8              *vgic_irq_lr_map;
        /* Pending/active/both interrupts on this VCPU */
-       DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
-       DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
-       DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
  
        /* Pending/active/both shared interrupts, dynamically sized */
        unsigned long   *pending_shared;
        unsigned long   *active_shared;
        unsigned long   *pend_act_shared;
  
-       /* Bitmap of used/free list registers */
-       DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
        /* Number of list registers on this CPU */
        int             nr_lr;
  
@@@ -354,8 -346,6 +346,6 @@@ int kvm_vgic_vcpu_active_irq(struct kvm
  struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
                                           int virt_irq, int irq);
  int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
- bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
- void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
  
  #define irqchip_in_kernel(k)  (!!((k)->arch.vgic.in_kernel))
  #define vgic_initialized(k)   (!!((k)->arch.vgic.nr_cpus))
  int vgic_v2_probe(struct device_node *vgic_node,
                  const struct vgic_ops **ops,
                  const struct vgic_params **params);
 -#ifdef CONFIG_ARM_GIC_V3
 +#ifdef CONFIG_KVM_ARM_VGIC_V3
  int vgic_v3_probe(struct device_node *vgic_node,
                  const struct vgic_ops **ops,
                  const struct vgic_params **params);
diff --combined virt/kvm/arm/vgic.c
@@@ -34,6 -34,9 +34,9 @@@
  #include <asm/kvm.h>
  #include <kvm/iodev.h>
  
+ #define CREATE_TRACE_POINTS
+ #include "trace.h"
  /*
   * How the whole thing works (courtesy of Christoffer Dall):
   *
  #include "vgic.h"
  
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
- static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
  static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
  static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+ static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
  static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
                                                int virt_irq);
+ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
  
  static const struct vgic_ops *vgic_ops;
  static const struct vgic_params *vgic;
@@@ -357,6 -362,11 +362,11 @@@ static void vgic_dist_irq_clear_soft_pe
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
  
        vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+       if (!vgic_dist_irq_get_level(vcpu, irq)) {
+               vgic_dist_irq_clear_pending(vcpu, irq);
+               if (!compute_pending_for_cpu(vcpu))
+                       clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+       }
  }
  
  static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@@ -531,34 -541,6 +541,6 @@@ bool vgic_handle_set_pending_reg(struc
        return false;
  }
  
- /*
-  * If a mapped interrupt's state has been modified by the guest such that it
-  * is no longer active or pending, without it have gone through the sync path,
-  * then the map->active field must be cleared so the interrupt can be taken
-  * again.
-  */
- static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
- {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-       struct list_head *root;
-       struct irq_phys_map_entry *entry;
-       struct irq_phys_map *map;
-       rcu_read_lock();
-       /* Check for PPIs */
-       root = &vgic_cpu->irq_phys_map_list;
-       list_for_each_entry_rcu(entry, root, entry) {
-               map = &entry->map;
-               if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
-                   !vgic_irq_is_active(vcpu, map->virt_irq))
-                       map->active = false;
-       }
-       rcu_read_unlock();
- }
  bool vgic_handle_clear_pending_reg(struct kvm *kvm,
                                   struct kvm_exit_mmio *mmio,
                                   phys_addr_t offset, int vcpu_id)
                                          vcpu_id, offset);
                vgic_reg_access(mmio, reg, offset, mode);
  
-               vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
                vgic_update_state(kvm);
                return true;
        }
@@@ -627,7 -608,6 +608,6 @@@ bool vgic_handle_clear_active_reg(struc
                        ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
  
        if (mmio->is_write) {
-               vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
                vgic_update_state(kvm);
                return true;
        }
@@@ -684,10 -664,9 +664,9 @@@ bool vgic_handle_cfg_reg(u32 *reg, stru
        vgic_reg_access(mmio, &val, offset,
                        ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
        if (mmio->is_write) {
-               if (offset < 8) {
-                       *reg = ~0U; /* Force PPIs/SGIs to 1 */
+               /* Ignore writes to read-only SGI and PPI bits */
+               if (offset < 8)
                        return false;
-               }
  
                val = vgic_cfg_compress(val);
                if (offset & 4) {
  void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
  {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u64 elrsr = vgic_get_elrsr(vcpu);
+       unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
        int i;
  
-       for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+       for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
                struct vgic_lr lr = vgic_get_lr(vcpu, i);
  
                /*
                 * interrupt then move the active state to the
                 * distributor tracking bit.
                 */
-               if (lr.state & LR_STATE_ACTIVE) {
+               if (lr.state & LR_STATE_ACTIVE)
                        vgic_irq_set_active(vcpu, lr.irq);
-                       lr.state &= ~LR_STATE_ACTIVE;
-               }
  
                /*
                 * Reestablish the pending state on the distributor and the
-                * CPU interface.  It may have already been pending, but that
-                * is fine, then we are only setting a few bits that were
-                * already set.
+                * CPU interface and mark the LR as free for other use.
                 */
-               if (lr.state & LR_STATE_PENDING) {
-                       vgic_dist_irq_set_pending(vcpu, lr.irq);
-                       lr.state &= ~LR_STATE_PENDING;
-               }
-               vgic_set_lr(vcpu, i, lr);
-               /*
-                * Mark the LR as free for other use.
-                */
-               BUG_ON(lr.state & LR_STATE_MASK);
-               vgic_retire_lr(i, lr.irq, vcpu);
-               vgic_irq_clear_queued(vcpu, lr.irq);
+               vgic_retire_lr(i, vcpu);
  
                /* Finally update the VGIC state. */
                vgic_update_state(vcpu->kvm);
@@@ -1067,12 -1032,6 +1032,6 @@@ static void vgic_set_lr(struct kvm_vcp
        vgic_ops->set_lr(vcpu, lr, vlr);
  }
  
- static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-                              struct vgic_lr vlr)
- {
-       vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
- }
  static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
  {
        return vgic_ops->get_elrsr(vcpu);
@@@ -1118,25 -1077,23 +1077,23 @@@ static inline void vgic_enable(struct k
        vgic_ops->enable(vcpu);
  }
  
- static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
  
+       vgic_irq_clear_queued(vcpu, vlr.irq);
        /*
         * We must transfer the pending state back to the distributor before
         * retiring the LR, otherwise we may loose edge-triggered interrupts.
         */
        if (vlr.state & LR_STATE_PENDING) {
-               vgic_dist_irq_set_pending(vcpu, irq);
+               vgic_dist_irq_set_pending(vcpu, vlr.irq);
                vlr.hwirq = 0;
        }
  
        vlr.state = 0;
        vgic_set_lr(vcpu, lr_nr, vlr);
-       clear_bit(lr_nr, vgic_cpu->lr_used);
-       vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
-       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
  }
  
  /*
   */
  static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+       u64 elrsr = vgic_get_elrsr(vcpu);
+       unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
        int lr;
  
-       for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+       for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
                struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
  
-               if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
-                       vgic_retire_lr(lr, vlr.irq, vcpu);
-                       if (vgic_irq_is_queued(vcpu, vlr.irq))
-                               vgic_irq_clear_queued(vcpu, vlr.irq);
-               }
+               if (!vgic_irq_is_enabled(vcpu, vlr.irq))
+                       vgic_retire_lr(lr, vcpu);
        }
  }
  
@@@ -1200,7 -1155,6 +1155,6 @@@ static void vgic_queue_irq_to_lr(struc
        }
  
        vgic_set_lr(vcpu, lr_nr, vlr);
-       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
  }
  
  /*
   */
  bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       u64 elrsr = vgic_get_elrsr(vcpu);
+       unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
        struct vgic_lr vlr;
        int lr;
  
  
        kvm_debug("Queue IRQ%d\n", irq);
  
-       lr = vgic_cpu->vgic_irq_lr_map[irq];
        /* Do we have an active interrupt for the same CPUID? */
-       if (lr != LR_EMPTY) {
+       for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
                vlr = vgic_get_lr(vcpu, lr);
-               if (vlr.source == sgi_source_id) {
+               if (vlr.irq == irq && vlr.source == sgi_source_id) {
                        kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
-                       BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
                        vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
                        return true;
                }
        }
  
        /* Try to use another LR for this interrupt */
-       lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-                              vgic->nr_lr);
+       lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
        if (lr >= vgic->nr_lr)
                return false;
  
        kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-       vgic_cpu->vgic_irq_lr_map[irq] = lr;
-       set_bit(lr, vgic_cpu->lr_used);
  
        vlr.irq = irq;
        vlr.source = sgi_source_id;
@@@ -1338,12 -1287,60 +1287,60 @@@ epilog
        }
  }
  
+ static int process_queued_irq(struct kvm_vcpu *vcpu,
+                                  int lr, struct vgic_lr vlr)
+ {
+       int pending = 0;
+       /*
+        * If the IRQ was EOIed (called from vgic_process_maintenance) or it
+        * went from active to non-active (called from vgic_sync_hwirq) it was
+        * also ACKed and we we therefore assume we can clear the soft pending
+        * state (should it had been set) for this interrupt.
+        *
+        * Note: if the IRQ soft pending state was set after the IRQ was
+        * acked, it actually shouldn't be cleared, but we have no way of
+        * knowing that unless we start trapping ACKs when the soft-pending
+        * state is set.
+        */
+       vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+       /*
+        * Tell the gic to start sampling this interrupt again.
+        */
+       vgic_irq_clear_queued(vcpu, vlr.irq);
+       /* Any additional pending interrupt? */
+       if (vgic_irq_is_edge(vcpu, vlr.irq)) {
+               BUG_ON(!(vlr.state & LR_HW));
+               pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
+       } else {
+               if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+                       vgic_cpu_irq_set(vcpu, vlr.irq);
+                       pending = 1;
+               } else {
+                       vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+                       vgic_cpu_irq_clear(vcpu, vlr.irq);
+               }
+       }
+       /*
+        * Despite being EOIed, the LR may not have
+        * been marked as empty.
+        */
+       vlr.state = 0;
+       vlr.hwirq = 0;
+       vgic_set_lr(vcpu, lr, vlr);
+       return pending;
+ }
  static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
  {
        u32 status = vgic_get_interrupt_status(vcpu);
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-       bool level_pending = false;
        struct kvm *kvm = vcpu->kvm;
+       int level_pending = 0;
  
        kvm_debug("STATUS = %08x\n", status);
  
  
                for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
                        struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
-                       WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
  
-                       spin_lock(&dist->lock);
-                       vgic_irq_clear_queued(vcpu, vlr.irq);
+                       WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
                        WARN_ON(vlr.state & LR_STATE_MASK);
-                       vlr.state = 0;
-                       vgic_set_lr(vcpu, lr, vlr);
  
-                       /*
-                        * If the IRQ was EOIed it was also ACKed and we we
-                        * therefore assume we can clear the soft pending
-                        * state (should it had been set) for this interrupt.
-                        *
-                        * Note: if the IRQ soft pending state was set after
-                        * the IRQ was acked, it actually shouldn't be
-                        * cleared, but we have no way of knowing that unless
-                        * we start trapping ACKs when the soft-pending state
-                        * is set.
-                        */
-                       vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
  
                        /*
                         * kvm_notify_acked_irq calls kvm_set_irq()
-                        * to reset the IRQ level. Need to release the
-                        * lock for kvm_set_irq to grab it.
+                        * to reset the IRQ level, which grabs the dist->lock
+                        * so we call this before taking the dist->lock.
                         */
-                       spin_unlock(&dist->lock);
                        kvm_notify_acked_irq(kvm, 0,
                                             vlr.irq - VGIC_NR_PRIVATE_IRQS);
-                       spin_lock(&dist->lock);
-                       /* Any additional pending interrupt? */
-                       if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
-                               vgic_cpu_irq_set(vcpu, vlr.irq);
-                               level_pending = true;
-                       } else {
-                               vgic_dist_irq_clear_pending(vcpu, vlr.irq);
-                               vgic_cpu_irq_clear(vcpu, vlr.irq);
-                       }
  
+                       spin_lock(&dist->lock);
+                       level_pending |= process_queued_irq(vcpu, lr, vlr);
                        spin_unlock(&dist->lock);
-                       /*
-                        * Despite being EOIed, the LR may not have
-                        * been marked as empty.
-                        */
-                       vgic_sync_lr_elrsr(vcpu, lr, vlr);
                }
        }
  
  /*
   * Save the physical active state, and reset it to inactive.
   *
-  * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
+  * Return true if there's a pending forwarded interrupt to queue.
   */
- static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
+ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
  {
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        struct irq_phys_map *map;
+       bool phys_active;
+       bool level_pending;
        int ret;
  
        if (!(vlr.state & LR_HW))
-               return 0;
+               return false;
  
        map = vgic_irq_map_search(vcpu, vlr.irq);
        BUG_ON(!map);
  
        ret = irq_get_irqchip_state(map->irq,
                                    IRQCHIP_STATE_ACTIVE,
-                                   &map->active);
+                                   &phys_active);
  
        WARN_ON(ret);
  
-       if (map->active)
+       if (phys_active)
                return 0;
  
-       return 1;
+       spin_lock(&dist->lock);
+       level_pending = process_queued_irq(vcpu, lr, vlr);
+       spin_unlock(&dist->lock);
+       return level_pending;
  }
  
  /* Sync back the VGIC state after a guest run */
  static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
  {
-       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        u64 elrsr;
        unsigned long *elrsr_ptr;
        bool level_pending;
  
        level_pending = vgic_process_maintenance(vcpu);
-       elrsr = vgic_get_elrsr(vcpu);
-       elrsr_ptr = u64_to_bitmask(&elrsr);
  
        /* Deal with HW interrupts, and clear mappings for empty LRs */
        for (lr = 0; lr < vgic->nr_lr; lr++) {
-               struct vgic_lr vlr;
-               if (!test_bit(lr, vgic_cpu->lr_used))
-                       continue;
-               vlr = vgic_get_lr(vcpu, lr);
-               if (vgic_sync_hwirq(vcpu, vlr)) {
-                       /*
-                        * So this is a HW interrupt that the guest
-                        * EOI-ed. Clean the LR state and allow the
-                        * interrupt to be sampled again.
-                        */
-                       vlr.state = 0;
-                       vlr.hwirq = 0;
-                       vgic_set_lr(vcpu, lr, vlr);
-                       vgic_irq_clear_queued(vcpu, vlr.irq);
-                       set_bit(lr, elrsr_ptr);
-               }
-               if (!test_bit(lr, elrsr_ptr))
-                       continue;
-               clear_bit(lr, vgic_cpu->lr_used);
+               struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
  
+               level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
                BUG_ON(vlr.irq >= dist->nr_irqs);
-               vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
        }
  
        /* Check if we still have something up our sleeve... */
+       elrsr = vgic_get_elrsr(vcpu);
+       elrsr_ptr = u64_to_bitmask(&elrsr);
        pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
        if (level_pending || pending < vgic->nr_lr)
                set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
@@@ -1585,6 -1533,8 +1533,8 @@@ static int vgic_update_irq_pending(stru
        int enabled;
        bool ret = true, can_inject = true;
  
+       trace_vgic_update_irq_pending(cpuid, irq_num, level);
        if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
                return -EINVAL;
  
@@@ -1863,30 -1813,6 +1813,6 @@@ static void vgic_free_phys_irq_map_rcu(
        kfree(entry);
  }
  
- /**
-  * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
-  *
-  * Return the logical active state of a mapped interrupt. This doesn't
-  * necessarily reflects the current HW state.
-  */
- bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
- {
-       BUG_ON(!map);
-       return map->active;
- }
- /**
-  * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
-  *
-  * Set the logical active state of a mapped interrupt. This doesn't
-  * immediately affects the HW state.
-  */
- void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
- {
-       BUG_ON(!map);
-       map->active = active;
- }
  /**
   * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
   * @vcpu: The VCPU pointer
@@@ -1942,12 -1868,10 +1868,10 @@@ void kvm_vgic_vcpu_destroy(struct kvm_v
        kfree(vgic_cpu->pending_shared);
        kfree(vgic_cpu->active_shared);
        kfree(vgic_cpu->pend_act_shared);
-       kfree(vgic_cpu->vgic_irq_lr_map);
        vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
        vgic_cpu->pending_shared = NULL;
        vgic_cpu->active_shared = NULL;
        vgic_cpu->pend_act_shared = NULL;
-       vgic_cpu->vgic_irq_lr_map = NULL;
  }
  
  static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
        vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
-       vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
  
        if (!vgic_cpu->pending_shared
                || !vgic_cpu->active_shared
-               || !vgic_cpu->pend_act_shared
-               || !vgic_cpu->vgic_irq_lr_map) {
+               || !vgic_cpu->pend_act_shared) {
                kvm_vgic_vcpu_destroy(vcpu);
                return -ENOMEM;
        }
  
-       memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
        /*
         * Store the number of LRs per vcpu, so we don't have to go
         * all the way to the distributor structure to find out. Only
@@@ -2111,14 -2031,24 +2031,24 @@@ int vgic_init(struct kvm *kvm
                        break;
                }
  
-               for (i = 0; i < dist->nr_irqs; i++) {
-                       if (i < VGIC_NR_PPIS)
+               /*
+                * Enable and configure all SGIs to be edge-triggere and
+                * configure all PPIs as level-triggered.
+                */
+               for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+                       if (i < VGIC_NR_SGIS) {
+                               /* SGIs */
                                vgic_bitmap_set_irq_val(&dist->irq_enabled,
                                                        vcpu->vcpu_id, i, 1);
-                       if (i < VGIC_NR_PRIVATE_IRQS)
                                vgic_bitmap_set_irq_val(&dist->irq_cfg,
                                                        vcpu->vcpu_id, i,
                                                        VGIC_CFG_EDGE);
+                       } else if (i < VGIC_NR_PRIVATE_IRQS) {
+                               /* PPIs */
+                               vgic_bitmap_set_irq_val(&dist->irq_cfg,
+                                                       vcpu->vcpu_id, i,
+                                                       VGIC_CFG_LEVEL);
+                       }
                }
  
                vgic_enable(vcpu);
@@@ -2137,7 -2067,7 +2067,7 @@@ static int init_vgic_model(struct kvm *
        case KVM_DEV_TYPE_ARM_VGIC_V2:
                vgic_v2_init_emulation(kvm);
                break;
 -#ifdef CONFIG_ARM_GIC_V3
 +#ifdef CONFIG_KVM_ARM_VGIC_V3
        case KVM_DEV_TYPE_ARM_VGIC_V3:
                vgic_v3_init_emulation(kvm);
                break;
@@@ -2299,7 -2229,7 +2229,7 @@@ int kvm_vgic_addr(struct kvm *kvm, unsi
                block_size = KVM_VGIC_V2_CPU_SIZE;
                alignment = SZ_4K;
                break;
 -#ifdef CONFIG_ARM_GIC_V3
 +#ifdef CONFIG_KVM_ARM_VGIC_V3
        case KVM_VGIC_V3_ADDR_TYPE_DIST:
                type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
                addr_ptr = &vgic->vgic_dist_base;