Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
Pull networking fixes from David Miller:

 1) mlx4 doesn't check fully for supported valid RSS hash function, fix
    from Amir Vadai

 2) Off by one in ibmveth_change_mtu(), from David Gibson

 3) Prevent altera chip from reporting false error interrupts in some
    circumstances, from Chee Nouk Phoon

 4) Get rid of that stupid endless loop trying to allocate a FIN packet
    in TCP, and in the process kill deadlocks.  From Eric Dumazet

 5) Fix get_rps_cpus() crash due to wrong invalid-cpu value, also from
    Eric Dumazet

 6) Fix two bugs in async rhashtable resizing, from Thomas Graf

 7) Fix topology server listener socket namespace bug in TIPC, from Ying
    Xue

 8) Add some missing HAS_DMA kconfig dependencies, from Geert
    Uytterhoeven

 9) bgmac driver intends to force re-polling but does so by returning
    the wrong value from it's ->poll() handler.  Fix from Rafał Miłecki

10) When the creater of an rhashtable configures a max size for it,
    don't bark in the logs and drop insertions when that is exceeded.
    Fix from Johannes Berg

11) Recover from out of order packets in ppp mppe properly, from Sylvain
    Rochet

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (41 commits)
  bnx2x: really disable TPA if 'disable_tpa' option is set
  net:treewide: Fix typo in drivers/net
  net/mlx4_en: Prevent setting invalid RSS hash function
  mdio-mux-gpio: use new gpiod_get_array and gpiod_put_array functions
  netfilter; Add some missing default cases to switch statements in nft_reject.
  ppp: mppe: discard late packet in stateless mode
  ppp: mppe: sanity error path rework
  net/bonding: Make DRV macros private
  net: rfs: fix crash in get_rps_cpus()
  altera tse: add support for fixed-links.
  pxa168: fix double deallocation of managed resources
  net: fix crash in build_skb()
  net: eth: altera: Resolve false errors from MSGDMA to TSE
  ehea: Fix memory hook reference counting crashes
  net/tg3: Release IRQs on permanent error
  net: mdio-gpio: support access that may sleep
  inet: fix possible panic in reqsk_queue_unlink()
  rhashtable: don't attempt to grow when at max_size
  bgmac: fix requests for extra polling calls from NAPI
  tcp: avoid looping in tcp_send_fin()
  ...

1057 files changed:
CREDITS
Documentation/ABI/testing/sysfs-class-mtd
Documentation/ABI/testing/sysfs-driver-toshiba_acpi
Documentation/ABI/testing/sysfs-platform-dell-laptop [new file with mode: 0644]
Documentation/arm64/acpi_object_usage.txt [new file with mode: 0644]
Documentation/arm64/arm-acpi.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arc/pct.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arc/pmu.txt [deleted file]
Documentation/devicetree/bindings/arm/altera.txt [new file with mode: 0644]
Documentation/devicetree/bindings/arm/arch_timer.txt
Documentation/devicetree/bindings/arm/msm/timer.txt
Documentation/devicetree/bindings/common-properties.txt [new file with mode: 0644]
Documentation/devicetree/bindings/cris/axis.txt [new file with mode: 0644]
Documentation/devicetree/bindings/cris/boards.txt [new file with mode: 0644]
Documentation/devicetree/bindings/cris/interrupts.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/apm-xgene-dma.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/jz4780-dma.txt [new file with mode: 0644]
Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
Documentation/devicetree/bindings/dma/rcar-audmapp.txt [deleted file]
Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt [new file with mode: 0644]
Documentation/devicetree/bindings/mtd/m25p80.txt
Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
Documentation/devicetree/bindings/mtd/sunxi-nand.txt
Documentation/devicetree/bindings/pwm/imx-pwm.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/dma-buf-sharing.txt
Documentation/filesystems/nfs/nfs-rdma.txt
Documentation/filesystems/xfs.txt
Documentation/ioctl/ioctl-number.txt
Documentation/kernel-parameters.txt
Documentation/laptops/thinkpad-acpi.txt
Documentation/md-cluster.txt [new file with mode: 0644]
Documentation/target/tcm_mod_builder.py
Documentation/target/tcmu-design.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
Makefile
arch/arc/boot/dts/angel4.dts
arch/arc/configs/nsimosci_defconfig
arch/arc/include/asm/arcregs.h
arch/arc/include/asm/bitops.h
arch/arc/include/asm/perf_event.h
arch/arc/kernel/perf_event.c
arch/arc/kernel/process.c
arch/arc/kernel/setup.c
arch/arc/kernel/traps.c
arch/arc/mm/init.c
arch/arm/boot/dts/qcom-ipq8064.dtsi
arch/arm/include/uapi/asm/kvm.h
arch/arm/kernel/head-nommu.S
arch/arm/kvm/arm.c
arch/arm/mach-shmobile/board-armadillo800eva.c
arch/arm/mach-shmobile/board-bockw.c
arch/arm/mach-shmobile/board-kzm9g.c
arch/arm/mach-shmobile/board-marzen.c
arch/arm/mm/Kconfig
arch/arm/vdso/.gitignore
arch/arm/vdso/Makefile
arch/arm64/Kconfig
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/arm64/include/asm/acenv.h [new file with mode: 0644]
arch/arm64/include/asm/acpi.h [new file with mode: 0644]
arch/arm64/include/asm/cpu_ops.h
arch/arm64/include/asm/fixmap.h
arch/arm64/include/asm/irq.h
arch/arm64/include/asm/pci.h
arch/arm64/include/asm/psci.h
arch/arm64/include/asm/smp.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/Makefile
arch/arm64/kernel/acpi.c [new file with mode: 0644]
arch/arm64/kernel/cpu_ops.c
arch/arm64/kernel/pci.c
arch/arm64/kernel/psci.c
arch/arm64/kernel/setup.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/time.c
arch/blackfin/configs/BF518F-EZBRD_defconfig
arch/blackfin/configs/BF527-TLL6527M_defconfig
arch/blackfin/configs/BF533-EZKIT_defconfig
arch/blackfin/configs/BF533-STAMP_defconfig
arch/blackfin/configs/BF537-STAMP_defconfig
arch/blackfin/configs/BF538-EZKIT_defconfig
arch/blackfin/configs/BF561-ACVILON_defconfig
arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
arch/blackfin/configs/BF561-EZKIT_defconfig
arch/blackfin/configs/BF609-EZKIT_defconfig
arch/blackfin/configs/CM-BF527_defconfig
arch/blackfin/configs/CM-BF533_defconfig
arch/blackfin/configs/CM-BF537E_defconfig
arch/blackfin/configs/CM-BF537U_defconfig
arch/blackfin/configs/CM-BF548_defconfig
arch/blackfin/configs/CM-BF561_defconfig
arch/blackfin/configs/DNP5370_defconfig
arch/blackfin/configs/IP0X_defconfig
arch/blackfin/configs/PNAV-10_defconfig
arch/blackfin/configs/SRV1_defconfig
arch/blackfin/configs/TCM-BF518_defconfig
arch/blackfin/configs/TCM-BF537_defconfig
arch/blackfin/include/asm/io.h
arch/blackfin/include/uapi/asm/unistd.h
arch/blackfin/kernel/debug-mmrs.c
arch/blackfin/kernel/kgdb.c
arch/blackfin/kernel/setup.c
arch/blackfin/mach-bf527/include/mach/cdefBF525.h
arch/blackfin/mach-bf527/include/mach/defBF525.h
arch/blackfin/mach-bf548/include/mach/cdefBF542.h
arch/blackfin/mach-bf548/include/mach/cdefBF547.h
arch/blackfin/mach-bf548/include/mach/defBF542.h
arch/blackfin/mach-bf548/include/mach/defBF547.h
arch/blackfin/mach-bf609/boards/ezkit.c
arch/blackfin/mach-bf609/clock.c
arch/blackfin/mach-common/entry.S
arch/blackfin/mach-common/pm.c
arch/cris/Kconfig
arch/cris/Makefile
arch/cris/arch-v32/kernel/Makefile
arch/cris/arch-v32/kernel/entry.S
arch/cris/arch-v32/kernel/head.S
arch/cris/arch-v32/kernel/irq.c
arch/cris/arch-v32/kernel/setup.c
arch/cris/arch-v32/kernel/signal.c
arch/cris/arch-v32/kernel/smp.c [deleted file]
arch/cris/arch-v32/kernel/time.c
arch/cris/arch-v32/lib/Makefile
arch/cris/arch-v32/lib/spinlock.S [deleted file]
arch/cris/arch-v32/mm/init.c
arch/cris/arch-v32/mm/mmu.S
arch/cris/boot/dts/Makefile [new file with mode: 0644]
arch/cris/boot/dts/dev88.dts [new file with mode: 0644]
arch/cris/boot/dts/etraxfs.dtsi [new file with mode: 0644]
arch/cris/include/arch-v10/arch/atomic.h [deleted file]
arch/cris/include/arch-v10/arch/system.h
arch/cris/include/arch-v32/arch/atomic.h [deleted file]
arch/cris/include/arch-v32/arch/processor.h
arch/cris/include/arch-v32/arch/spinlock.h [deleted file]
arch/cris/include/asm/Kbuild
arch/cris/include/asm/atomic.h [deleted file]
arch/cris/include/asm/bitops.h
arch/cris/include/asm/cmpxchg.h [deleted file]
arch/cris/include/asm/device.h [deleted file]
arch/cris/include/asm/div64.h [deleted file]
arch/cris/include/asm/elf.h
arch/cris/include/asm/emergency-restart.h [deleted file]
arch/cris/include/asm/futex.h [deleted file]
arch/cris/include/asm/hardirq.h [deleted file]
arch/cris/include/asm/irq_regs.h [deleted file]
arch/cris/include/asm/kdebug.h [deleted file]
arch/cris/include/asm/kmap_types.h [deleted file]
arch/cris/include/asm/local.h [deleted file]
arch/cris/include/asm/local64.h [deleted file]
arch/cris/include/asm/percpu.h [deleted file]
arch/cris/include/asm/smp.h [deleted file]
arch/cris/include/asm/spinlock.h [deleted file]
arch/cris/include/asm/tlbflush.h
arch/cris/include/asm/topology.h [deleted file]
arch/cris/kernel/Makefile
arch/cris/kernel/devicetree.c [new file with mode: 0644]
arch/cris/kernel/ptrace.c
arch/cris/kernel/setup.c
arch/cris/kernel/time.c
arch/frv/include/asm/io.h
arch/ia64/Kconfig
arch/ia64/kernel/acpi.c
arch/ia64/kernel/perfmon.c
arch/ia64/pci/pci.c
arch/metag/kernel/process.c
arch/mn10300/include/asm/io.h
arch/nios2/include/asm/Kbuild
arch/nios2/include/asm/shmparam.h [new file with mode: 0644]
arch/nios2/include/uapi/asm/ptrace.h
arch/nios2/kernel/entry.S
arch/nios2/kernel/traps.c
arch/nios2/mm/cacheflush.c
arch/powerpc/include/asm/archrandom.h
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_book3s_64.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/include/asm/kvm_ppc.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/time.h
arch/powerpc/kernel/asm-offsets.c
arch/powerpc/kernel/eeh.c
arch/powerpc/kernel/io-workarounds.c
arch/powerpc/kernel/time.c
arch/powerpc/kvm/Kconfig
arch/powerpc/kvm/book3s.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_builtin.c
arch/powerpc/kvm/book3s_hv_rm_mmu.c
arch/powerpc/kvm/book3s_hv_rm_xics.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/book3s_pr_papr.c
arch/powerpc/kvm/book3s_xics.c
arch/powerpc/kvm/book3s_xics.h
arch/powerpc/kvm/e500_mmu_host.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/perf/callchain.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powernv/rng.c
arch/s390/hypfs/inode.c
arch/s390/kvm/kvm-s390.c
arch/sh/boards/board-sh7757lcr.c
arch/sh/boards/mach-ap325rxa/setup.c
arch/sh/boards/mach-ecovec24/setup.c
arch/sh/boards/mach-kfr2r09/setup.c
arch/sh/boards/mach-migor/setup.c
arch/sh/boards/mach-se/7724/setup.c
arch/x86/Kconfig
arch/x86/crypto/sha512-avx2-asm.S
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/lguest.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/process_64.c
arch/x86/kvm/assigned-dev.c
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lguest/boot.c
arch/x86/lguest/head_32.S
arch/x86/lib/usercopy_64.c
crypto/async_tx/async_pq.c
drivers/acpi/Kconfig
drivers/acpi/Makefile
drivers/acpi/acpi_processor.c
drivers/acpi/bus.c
drivers/acpi/ec.c
drivers/acpi/gsi.c [new file with mode: 0644]
drivers/acpi/internal.h
drivers/acpi/osl.c
drivers/acpi/processor_core.c
drivers/acpi/scan.c
drivers/acpi/tables.c
drivers/base/devtmpfs.c
drivers/block/drbd/drbd_debugfs.c
drivers/block/rbd.c
drivers/clocksource/arm_arch_timer.c
drivers/cpufreq/intel_pstate.c
drivers/crypto/Kconfig
drivers/dma-buf/dma-buf.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/amba-pl08x.c
drivers/dma/at_hdmac.c
drivers/dma/at_xdmac.c
drivers/dma/bestcomm/bestcomm.c
drivers/dma/dma-jz4740.c
drivers/dma/dma-jz4780.c [new file with mode: 0644]
drivers/dma/dmaengine.c
drivers/dma/dw/Kconfig
drivers/dma/dw/core.c
drivers/dma/edma.c
drivers/dma/fsl_raid.c [new file with mode: 0644]
drivers/dma/fsl_raid.h [new file with mode: 0644]
drivers/dma/img-mdc-dma.c
drivers/dma/imx-sdma.c
drivers/dma/ioat/dca.c
drivers/dma/ioat/dma.c
drivers/dma/ioat/dma.h
drivers/dma/ioat/dma_v2.c
drivers/dma/ioat/dma_v2.h
drivers/dma/ioat/dma_v3.c
drivers/dma/ioat/hw.h
drivers/dma/ioat/pci.c
drivers/dma/ioat/registers.h
drivers/dma/iop-adma.c
drivers/dma/k3dma.c
drivers/dma/mmp_pdma.c
drivers/dma/mmp_tdma.c
drivers/dma/mpc512x_dma.c
drivers/dma/mv_xor.c
drivers/dma/mv_xor.h
drivers/dma/pch_dma.c
drivers/dma/pl330.c
drivers/dma/ppc4xx/adma.c
drivers/dma/qcom_bam_dma.c
drivers/dma/s3c24xx-dma.c
drivers/dma/sa11x0-dma.c
drivers/dma/sh/Kconfig
drivers/dma/sh/Makefile
drivers/dma/sh/rcar-audmapp.c [deleted file]
drivers/dma/sh/shdma-base.c
drivers/dma/sh/shdmac.c
drivers/dma/sh/usb-dmac.c [new file with mode: 0644]
drivers/dma/sirf-dma.c
drivers/dma/ste_dma40.c
drivers/dma/sun6i-dma.c
drivers/dma/xgene-dma.c [new file with mode: 0755]
drivers/dma/xilinx/xilinx_vdma.c
drivers/gpu/drm/armada/armada_gem.c
drivers/gpu/drm/drm_prime.c
drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_dmabuf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_i2c.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
drivers/gpu/drm/tegra/gem.c
drivers/gpu/drm/ttm/ttm_object.c
drivers/gpu/drm/udl/udl_dmabuf.c
drivers/i2c/busses/i2c-cros-ec-tunnel.c
drivers/i2c/busses/i2c-digicolor.c
drivers/i2c/busses/i2c-mxs.c
drivers/i2c/busses/i2c-pca-platform.c
drivers/i2c/busses/i2c-rk3x.c
drivers/i2c/busses/i2c-st.c
drivers/i2c/i2c-core.c
drivers/i2c/i2c-mux.c
drivers/infiniband/core/umem.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/mlx4/alias_GUID.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/sysfs.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/input/keyboard/cros_ec_keyb.c
drivers/iommu/intel-iommu.c
drivers/iommu/intel_irq_remapping.c
drivers/irqchip/irq-gic.c
drivers/irqchip/irqchip.c
drivers/lguest/hypercalls.c
drivers/lguest/interrupts_and_traps.c
drivers/lguest/lg.h
drivers/lguest/lguest_user.c
drivers/md/Kconfig
drivers/md/Makefile
drivers/md/bitmap.c
drivers/md/bitmap.h
drivers/md/md-cluster.c [new file with mode: 0644]
drivers/md/md-cluster.h [new file with mode: 0644]
drivers/md/md.c
drivers/md/md.h
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/media/platform/xilinx/xilinx-dma.c
drivers/media/v4l2-core/videobuf2-dma-contig.c
drivers/media/v4l2-core/videobuf2-dma-sg.c
drivers/media/v4l2-core/videobuf2-vmalloc.c
drivers/mfd/cros_ec.c
drivers/mmc/host/sh_mmcif.c
drivers/mmc/host/sh_mobile_sdhi.c
drivers/mmc/host/tmio_mmc.h
drivers/mmc/host/tmio_mmc_dma.c
drivers/mtd/Kconfig
drivers/mtd/chips/cfi_cmdset_0020.c
drivers/mtd/devices/block2mtd.c
drivers/mtd/devices/docg3.c
drivers/mtd/devices/m25p80.c
drivers/mtd/maps/Kconfig
drivers/mtd/maps/sa1100-flash.c
drivers/mtd/maps/ts5500_flash.c
drivers/mtd/mtd_blkdevs.c
drivers/mtd/mtdcore.c
drivers/mtd/mtdpart.c
drivers/mtd/nand/atmel_nand.c
drivers/mtd/nand/atmel_nand_ecc.h
drivers/mtd/nand/atmel_nand_nfc.h
drivers/mtd/nand/denali.c
drivers/mtd/nand/fsl_ifc_nand.c
drivers/mtd/nand/fsmc_nand.c
drivers/mtd/nand/gpmi-nand/gpmi-nand.c
drivers/mtd/nand/mxc_nand.c
drivers/mtd/nand/nand_base.c
drivers/mtd/nand/pxa3xx_nand.c
drivers/mtd/nand/s3c2410.c
drivers/mtd/nand/sh_flctl.c
drivers/mtd/onenand/onenand_base.c
drivers/mtd/spi-nor/fsl-quadspi.c
drivers/mtd/spi-nor/spi-nor.c
drivers/mtd/tests/mtd_nandecctest.c
drivers/mtd/tests/mtd_test.h
drivers/mtd/tests/nandbiterrs.c
drivers/mtd/tests/oobtest.c
drivers/mtd/tests/pagetest.c
drivers/mtd/tests/readtest.c
drivers/mtd/tests/speedtest.c
drivers/mtd/tests/stresstest.c
drivers/mtd/tests/subpagetest.c
drivers/mtd/tests/torturetest.c
drivers/mtd/ubi/build.c
drivers/mtd/ubi/kapi.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
drivers/of/Kconfig
drivers/of/base.c
drivers/of/fdt.c
drivers/of/unittest.c
drivers/oprofile/oprofilefs.c
drivers/platform/chrome/Kconfig
drivers/platform/chrome/Makefile
drivers/platform/chrome/chromeos_laptop.c
drivers/platform/chrome/cros_ec_dev.c [new file with mode: 0644]
drivers/platform/chrome/cros_ec_dev.h [new file with mode: 0644]
drivers/platform/chrome/cros_ec_lightbar.c [new file with mode: 0644]
drivers/platform/chrome/cros_ec_lpc.c [new file with mode: 0644]
drivers/platform/chrome/cros_ec_sysfs.c [new file with mode: 0644]
drivers/platform/x86/Kconfig
drivers/platform/x86/apple-gmux.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/intel_oaktrail.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/toshiba_acpi.c
drivers/platform/x86/toshiba_bluetooth.c
drivers/platform/x86/wmi.c
drivers/powercap/intel_rapl.c
drivers/pwm/core.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-mxs.c
drivers/pwm/pwm-pca9685.c
drivers/pwm/pwm-samsung.c
drivers/s390/kvm/virtio_ccw.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/qla2xxx/tcm_qla2xxx.h
drivers/spi/spi-rspi.c
drivers/spi/spi-sh-msiof.c
drivers/staging/android/ion/ion.c
drivers/staging/lustre/lustre/llite/dcache.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/llite/llite_internal.h
drivers/staging/lustre/lustre/llite/llite_lib.c
drivers/staging/lustre/lustre/llite/llite_nfs.c
drivers/staging/lustre/lustre/llite/namei.c
drivers/staging/lustre/lustre/llite/statahead.c
drivers/staging/lustre/lustre/llite/symlink.c
drivers/staging/lustre/lustre/llite/xattr.c
drivers/target/Kconfig
drivers/target/Makefile
drivers/target/iscsi/Makefile
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target.h
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/iscsi/iscsi_target_configfs.h [deleted file]
drivers/target/iscsi/iscsi_target_erl0.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_tpg.c
drivers/target/iscsi/iscsi_target_tpg.h
drivers/target/iscsi/iscsi_target_tq.c [deleted file]
drivers/target/iscsi/iscsi_target_tq.h [deleted file]
drivers/target/iscsi/iscsi_target_util.c
drivers/target/loopback/tcm_loop.c
drivers/target/loopback/tcm_loop.h
drivers/target/sbp/sbp_target.c
drivers/target/target_core_configfs.c
drivers/target/target_core_fabric_configfs.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_pr.c
drivers/target/target_core_rd.c
drivers/target/target_core_sbc.c
drivers/target/target_core_spc.c
drivers/target/target_core_tmr.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/target/target_core_xcopy.c
drivers/target/tcm_fc/tcm_fc.h
drivers/target/tcm_fc/tfc_conf.c
drivers/tty/serial/8250/8250_core.c
drivers/tty/serial/8250/8250_early.c
drivers/tty/serial/of_serial.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/legacy/tcm_usb_gadget.c
drivers/vhost/scsi.c
drivers/virtio/Kconfig
drivers/virtio/Makefile
drivers/virtio/virtio.c
drivers/virtio/virtio_balloon.c
drivers/virtio/virtio_input.c [new file with mode: 0644]
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_modern.c
drivers/watchdog/Kconfig
drivers/watchdog/bcm_kona_wdt.c
drivers/watchdog/octeon-wdt-main.c
drivers/watchdog/pnx4008_wdt.c
drivers/watchdog/qcom-wdt.c
drivers/watchdog/stmp3xxx_rtc_wdt.c
drivers/xen/Kconfig
drivers/xen/Makefile
drivers/xen/xen-scsiback.c
fs/9p/acl.c
fs/9p/vfs_dentry.c
fs/9p/vfs_dir.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/9p/vfs_super.c
fs/adfs/inode.c
fs/affs/amigaffs.c
fs/affs/inode.c
fs/affs/namei.c
fs/afs/dir.c
fs/afs/inode.c
fs/afs/mntpt.c
fs/afs/super.c
fs/autofs4/autofs_i.h
fs/autofs4/expire.c
fs/autofs4/inode.c
fs/autofs4/root.c
fs/autofs4/symlink.c
fs/autofs4/waitq.c
fs/befs/linuxvfs.c
fs/bfs/dir.c
fs/binfmt_misc.c
fs/block_dev.c
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/backref.c
fs/btrfs/btrfs_inode.h
fs/btrfs/check-integrity.c
fs/btrfs/compression.c
fs/btrfs/compression.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/export.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/math.h
fs/btrfs/props.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/sysfs.h
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-log.c
fs/btrfs/tree-log.h
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
fs/btrfs/zlib.c
fs/cachefiles/bind.c
fs/cachefiles/interface.c
fs/cachefiles/namei.c
fs/cachefiles/rdwr.c
fs/cachefiles/security.c
fs/cachefiles/xattr.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/dir.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/strings.c
fs/ceph/super.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifsfs.c
fs/cifs/cifssmb.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/smb1ops.c
fs/cifs/smb2file.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/xattr.c
fs/coda/cache.c
fs/coda/dir.c
fs/coda/inode.c
fs/coda/pioctl.c
fs/coda/upcall.c
fs/configfs/dir.c
fs/configfs/file.c
fs/configfs/inode.c
fs/dax.c
fs/debugfs/file.c
fs/debugfs/inode.c
fs/devpts/inode.c
fs/direct-io.c
fs/ecryptfs/crypto.c
fs/ecryptfs/dentry.c
fs/ecryptfs/file.c
fs/ecryptfs/inode.c
fs/ecryptfs/kthread.c
fs/ecryptfs/main.c
fs/ecryptfs/mmap.c
fs/efivarfs/inode.c
fs/efivarfs/super.c
fs/efs/namei.c
fs/exofs/dir.c
fs/exofs/inode.c
fs/exofs/namei.c
fs/exofs/super.c
fs/exofs/symlink.c
fs/ext2/dir.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/namei.c
fs/ext2/symlink.c
fs/ext2/xattr.c
fs/ext2/xattr_security.c
fs/ext2/xattr_trusted.c
fs/ext2/xattr_user.c
fs/ext3/ialloc.c
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext3/super.c
fs/ext3/symlink.c
fs/ext3/xattr.c
fs/ext3/xattr_security.c
fs/ext3/xattr_trusted.c
fs/ext3/xattr_user.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/indirect.c
fs/ext4/inline.c
fs/ext4/inode.c
fs/ext4/migrate.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/symlink.c
fs/ext4/xattr.c
fs/ext4/xattr_security.c
fs/ext4/xattr_trusted.c
fs/ext4/xattr_user.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/namei.c
fs/f2fs/xattr.c
fs/fat/file.c
fs/fat/namei_msdos.c
fs/fat/namei_vfat.c
fs/fat/nfs.c
fs/freevxfs/vxfs_immed.c
fs/fuse/control.c
fs/fuse/dir.c
fs/fuse/inode.c
fs/gfs2/dentry.c
fs/gfs2/export.c
fs/gfs2/inode.c
fs/gfs2/ops_fstype.c
fs/gfs2/super.c
fs/gfs2/xattr.c
fs/hfs/attr.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfs/sysdep.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hfsplus/ioctl.c
fs/hfsplus/xattr.c
fs/hostfs/hostfs_kern.c
fs/hpfs/inode.c
fs/hpfs/namei.c
fs/hppfs/hppfs.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/isofs/export.c
fs/jffs2/dir.c
fs/jffs2/fs.c
fs/jffs2/security.c
fs/jffs2/super.c
fs/jffs2/symlink.c
fs/jffs2/xattr.c
fs/jffs2/xattr_trusted.c
fs/jffs2/xattr_user.c
fs/jfs/file.c
fs/jfs/namei.c
fs/jfs/symlink.c
fs/jfs/xattr.c
fs/kernfs/dir.c
fs/kernfs/inode.c
fs/libfs.c
fs/lockd/svcsubs.c
fs/logfs/dir.c
fs/logfs/file.c
fs/minix/dir.c
fs/minix/file.c
fs/minix/inode.c
fs/minix/namei.c
fs/namei.c
fs/ncpfs/dir.c
fs/ncpfs/inode.c
fs/ncpfs/ioctl.c
fs/ncpfs/ncplib_kernel.c
fs/ncpfs/symlink.c
fs/nfs/Makefile
fs/nfs/blocklayout/blocklayout.c
fs/nfs/blocklayout/dev.c
fs/nfs/callback.c
fs/nfs/client.c
fs/nfs/delegation.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/filelayout/filelayout.c
fs/nfs/filelayout/filelayoutdev.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/flexfilelayout/flexfilelayoutdev.c
fs/nfs/getroot.c
fs/nfs/idmap.c [deleted file]
fs/nfs/inode.c
fs/nfs/namespace.c
fs/nfs/nfs3acl.c
fs/nfs/nfs3proc.c
fs/nfs/nfs42proc.c
fs/nfs/nfs42xdr.c
fs/nfs/nfs4client.c
fs/nfs/nfs4file.c
fs/nfs/nfs4idmap.c [new file with mode: 0644]
fs/nfs/nfs4idmap.h [new file with mode: 0644]
fs/nfs/nfs4namespace.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4super.c
fs/nfs/nfs4sysctl.c
fs/nfs/nfs4trace.h
fs/nfs/nfs4xdr.c
fs/nfs/nfstrace.c
fs/nfs/objlayout/objio_osd.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c
fs/nfs/pnfs_nfs.c
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/symlink.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/nfsd/Kconfig
fs/nfsd/export.c
fs/nfsd/nfs2acl.c
fs/nfsd/nfs3acl.c
fs/nfsd/nfs3proc.c
fs/nfsd/nfs3xdr.c
fs/nfsd/nfs4acl.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsd.h
fs/nfsd/nfsfh.c
fs/nfsd/nfsfh.h
fs/nfsd/nfsproc.c
fs/nfsd/nfsxdr.c
fs/nfsd/vfs.c
fs/nfsd/xdr4.h
fs/nilfs2/dir.c
fs/nilfs2/inode.c
fs/nilfs2/namei.c
fs/nilfs2/super.c
fs/nsfs.c
fs/ntfs/inode.c
fs/ntfs/namei.c
fs/ocfs2/dcache.c
fs/ocfs2/dir.h
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/export.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/namei.c
fs/ocfs2/refcounttree.c
fs/ocfs2/xattr.c
fs/omfs/dir.c
fs/omfs/file.c
fs/open.c
fs/pipe.c
fs/posix_acl.c
fs/proc/base.c
fs/proc/fd.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/namespaces.c
fs/proc/proc_net.c
fs/proc/proc_sysctl.c
fs/proc/root.c
fs/proc/self.c
fs/proc/thread_self.c
fs/pstore/inode.c
fs/qnx6/inode.c
fs/quota/dquot.c
fs/ramfs/file-nommu.c
fs/reiserfs/dir.c
fs/reiserfs/inode.c
fs/reiserfs/namei.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/reiserfs/xattr.h
fs/reiserfs/xattr_security.c
fs/reiserfs/xattr_trusted.c
fs/reiserfs/xattr_user.c
fs/squashfs/export.c
fs/squashfs/xattr.c
fs/stat.c
fs/sysv/dir.c
fs/sysv/file.c
fs/sysv/itree.c
fs/sysv/namei.c
fs/sysv/symlink.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/journal.c
fs/ubifs/xattr.c
fs/udf/file.c
fs/udf/namei.c
fs/ufs/dir.c
fs/ufs/namei.c
fs/ufs/super.c
fs/ufs/symlink.c
fs/ufs/truncate.c
fs/xfs/libxfs/xfs_alloc.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_bmap.h
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_da_btree.c
fs/xfs/libxfs/xfs_da_format.h
fs/xfs/libxfs/xfs_dir2_data.c
fs/xfs/libxfs/xfs_format.h
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_sb.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_attr_inactive.c
fs/xfs/xfs_attr_list.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_bmap_util.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_discard.c
fs/xfs/xfs_error.c
fs/xfs/xfs_error.h
fs/xfs/xfs_export.c
fs/xfs/xfs_file.c
fs/xfs/xfs_filestream.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.h
fs/xfs/xfs_itable.c
fs/xfs/xfs_linux.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_mru_cache.c
fs/xfs/xfs_pnfs.c
fs/xfs/xfs_pnfs.h
fs/xfs/xfs_qm.c
fs/xfs/xfs_super.c
fs/xfs/xfs_super.h
fs/xfs/xfs_symlink.c
fs/xfs/xfs_trace.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_xattr.c
include/acpi/acpi_io.h
include/acpi/processor.h
include/dt-bindings/dma/jz4780-dma.h [new file with mode: 0644]
include/linux/acpi.h
include/linux/acpi_irq.h [new file with mode: 0644]
include/linux/amba/xilinx_dma.h [deleted file]
include/linux/async_tx.h
include/linux/ceph/ceph_features.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/debugfs.h
include/linux/ceph/libceph.h
include/linux/ceph/osdmap.h
include/linux/clocksource.h
include/linux/crush/crush.h
include/linux/dma-buf.h
include/linux/dma/xilinx_dma.h [new file with mode: 0644]
include/linux/dmaengine.h
include/linux/falloc.h
include/linux/fs.h
include/linux/intel-iommu.h
include/linux/irqchip/arm-gic-acpi.h [new file with mode: 0644]
include/linux/lguest.h
include/linux/mfd/cros_ec.h
include/linux/mfd/tmio.h
include/linux/mlx4/device.h
include/linux/mm.h
include/linux/mmc/sh_mobile_sdhi.h
include/linux/mtd/map.h
include/linux/mtd/spi-nor.h
include/linux/nfs4.h
include/linux/nfs_fs.h
include/linux/nfs_idmap.h [deleted file]
include/linux/nfs_xdr.h
include/linux/of.h
include/linux/of_fdt.h
include/linux/of_irq.h
include/linux/platform_data/dma-imx-sdma.h
include/linux/raid/pq.h
include/linux/shdma-base.h
include/linux/sunrpc/msg_prot.h
include/linux/sunrpc/xprtrdma.h
include/linux/virtio.h
include/linux/virtio_config.h
include/linux/virtio_ring.h
include/target/iscsi/iscsi_target_core.h
include/target/target_core_base.h
include/target/target_core_configfs.h
include/target/target_core_fabric.h
include/target/target_core_fabric_configfs.h
include/trace/events/btrfs.h
include/trace/events/ext3.h
include/trace/events/ext4.h
include/uapi/linux/Kbuild
include/uapi/linux/falloc.h
include/uapi/linux/kvm.h
include/uapi/linux/nfs4.h
include/uapi/linux/nfs_idmap.h
include/uapi/linux/nfsd/debug.h
include/uapi/linux/nfsd/export.h
include/uapi/linux/raid/md_p.h
include/uapi/linux/raid/md_u.h
include/uapi/linux/target_core_user.h
include/uapi/linux/virtio_balloon.h
include/uapi/linux/virtio_ids.h
include/uapi/linux/virtio_input.h [new file with mode: 0644]
include/uapi/sound/asound.h
ipc/mqueue.c
ipc/shm.c
kernel/audit.c
kernel/audit.h
kernel/audit_tree.c
kernel/audit_watch.c
kernel/auditsc.c
kernel/module.c
kernel/params.c
kernel/relay.c
kernel/trace/trace.c
kernel/trace/trace_events.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_uprobe.c
lib/raid6/algos.c
lib/raid6/altivec.uc
lib/raid6/avx2.c
lib/raid6/int.uc
lib/raid6/mmx.c
lib/raid6/neon.c
lib/raid6/sse1.c
lib/raid6/sse2.c
lib/raid6/test/test.c
lib/raid6/tilegx.uc
mm/shmem.c
net/ceph/ceph_common.c
net/ceph/crush/crush.c
net/ceph/crush/crush_ln_table.h [new file with mode: 0644]
net/ceph/crush/mapper.c
net/ceph/debugfs.c
net/ceph/messenger.c
net/ceph/osdmap.c
net/socket.c
net/sunrpc/rpc_pipe.c
net/sunrpc/sched.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/Makefile
net/sunrpc/xprtrdma/fmr_ops.c [new file with mode: 0644]
net/sunrpc/xprtrdma/frwr_ops.c [new file with mode: 0644]
net/sunrpc/xprtrdma/physical_ops.c [new file with mode: 0644]
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/unix/af_unix.c
net/unix/diag.c
scripts/check_extable.sh [new file with mode: 0755]
scripts/mod/modpost.c
security/apparmor/apparmorfs.c
security/apparmor/file.c
security/apparmor/lsm.c
security/commoncap.c
security/inode.c
security/integrity/evm/evm_crypto.c
security/integrity/evm/evm_main.c
security/integrity/ima/ima_appraise.c
security/lsm_audit.c
security/security.c
security/selinux/hooks.c
security/selinux/selinuxfs.c
security/smack/smack_lsm.c
security/smack/smackfs.c
security/tomoyo/condition.c
security/tomoyo/realpath.c
sound/oss/sequencer.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_controller.h
sound/pci/hda/hda_i915.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_proc.c
sound/pci/hda/patch_realtek.c
sound/pci/intel8x0.c
sound/soc/sh/fsi.c
sound/usb/format.c
sound/usb/quirks-table.h
tools/power/cpupower/utils/helpers/pci.c
virt/kvm/arm/vgic.c
virt/kvm/kvm_main.c

diff --git a/CREDITS b/CREDITS
index 2ef5dce..40cc4bf 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2049,6 +2049,10 @@ D: pirq addr, CS5535 alsa audio driver
 S: Gurgaon, India
 S: Kuala Lumpur, Malaysia
 
+N: Mohit Kumar
+D: ST Microelectronics SPEAr13xx PCI host bridge driver
+D: Synopsys Designware PCI host bridge driver
+
 N: Gabor Kuti
 M: seasons@falcon.sch.bme.hu
 M: seasons@makosteszta.sote.hu
index 76ee192..3b5c3bc 100644 (file)
@@ -222,3 +222,13 @@ Description:
                The number of blocks that are marked as reserved, if any, in
                this partition. These are typically used to store the in-flash
                bad block table (BBT).
+
+What:          /sys/class/mtd/mtdX/offset
+Date:          March 2015
+KernelVersion: 4.1
+Contact:       linux-mtd@lists.infradead.org
+Description:
+               For a partition, the offset of that partition from the start
+               of the master device in bytes. This attribute is absent on
+               main devices, so it can be used to distinguish between
+               partitions and devices that aren't partitions.
index ca9c71a..eed922e 100644 (file)
@@ -8,9 +8,11 @@ Description:   This file controls the keyboard backlight operation mode, valid
                        * 0x2  -> AUTO (also called TIMER)
                        * 0x8  -> ON
                        * 0x10 -> OFF
-               Note that the kernel 3.16 onwards this file accepts all listed
+               Note that from kernel 3.16 onwards this file accepts all listed
                parameters, kernel 3.15 only accepts the first two (FN-Z and
                AUTO).
+               Also note that toggling this value on type 1 devices, requires
+               a reboot for changes to take effect.
 Users:         KToshiba
 
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_backlight_timeout
@@ -67,15 +69,72 @@ Description:        This file shows the current keyboard backlight type,
                        * 2 -> Type 2, supporting modes TIMER, ON and OFF
 Users:         KToshiba
 
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_charge
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Sleep & Charge charging mode, which
+               can be:
+                       * 0 -> Disabled         (0x00)
+                       * 1 -> Alternate        (0x09)
+                       * 2 -> Auto             (0x21)
+                       * 3 -> Typical          (0x11)
+               Note that from kernel 4.1 onwards this file accepts all listed
+               values, kernel 4.0 only supports the first three.
+               Note that this feature only works when connected to power, if
+               you want to use it under battery, see the entry named
+               "sleep_functions_on_battery"
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/sleep_functions_on_battery
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Sleep Functions under battery, and
+               set the level at which point they will be disabled, accepted
+               values can be:
+                       * 0     -> Disabled
+                       * 1-100 -> Battery level to disable sleep functions
+               Currently it prints two values, the first one indicates if the
+               feature is enabled or disabled, while the second one shows the
+               current battery level set.
+               Note that when the value is set to disabled, the sleep function
+               will only work when connected to power.
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_rapid_charge
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Rapid Charge state, which can be:
+                       * 0 -> Disabled
+                       * 1 -> Enabled
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_music
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the Sleep & Music state, which values can be:
+                       * 0 -> Disabled
+                       * 1 -> Enabled
+               Note that this feature only works when connected to power, if
+               you want to use it under battery, see the entry named
+               "sleep_functions_on_battery"
+Users:         KToshiba
+
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/version
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
 Contact:       Azael Avalos <coproscefalo@gmail.com>
 Description:   This file shows the current version of the driver
+Users:         KToshiba
 
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/fan
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
 Contact:       Azael Avalos <coproscefalo@gmail.com>
 Description:   This file controls the state of the internal fan, valid
                values are:
@@ -83,8 +142,8 @@ Description: This file controls the state of the internal fan, valid
                        * 1 -> ON
 
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_function_keys
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
 Contact:       Azael Avalos <coproscefalo@gmail.com>
 Description:   This file controls the Special Functions (hotkeys) operation
                mode, valid values are:
@@ -94,21 +153,29 @@ Description:       This file controls the Special Functions (hotkeys) operation
                and the hotkeys are accessed via FN-F{1-12}.
                In the "Special Functions" mode, the F{1-12} keys trigger the
                hotkey and the F{1-12} keys are accessed via FN-F{1-12}.
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
 
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/panel_power_on
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
 Contact:       Azael Avalos <coproscefalo@gmail.com>
 Description:   This file controls whether the laptop should turn ON whenever
                the LID is opened, valid values are:
                        * 0 -> Disabled
                        * 1 -> Enabled
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
 
 What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_three
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
 Contact:       Azael Avalos <coproscefalo@gmail.com>
-Description:   This file controls whether the USB 3 functionality, valid
-               values are:
+Description:   This file controls the USB 3 functionality, valid values are:
                        * 0 -> Disabled (Acts as a regular USB 2)
                        * 1 -> Enabled (Full USB 3 functionality)
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
new file mode 100644 (file)
index 0000000..8c6a0b8
--- /dev/null
@@ -0,0 +1,69 @@
+What:          /sys/class/leds/dell::kbd_backlight/als_enabled
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to control the automatic keyboard
+               illumination mode on some systems that have an ambient
+               light sensor. Write 1 to this file to enable the auto
+               mode, 0 to disable it.
+
+What:          /sys/class/leds/dell::kbd_backlight/als_setting
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to specifiy the on/off threshold value,
+               as reported by the ambient light sensor.
+
+What:          /sys/class/leds/dell::kbd_backlight/start_triggers
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to control the input triggers that
+               turn on the keyboard backlight illumination that is
+               disabled because of inactivity.
+               Read the file to see the triggers available. The ones
+               enabled are preceded by '+', those disabled by '-'.
+
+               To enable a trigger, write its name preceded by '+' to
+               this file. To disable a trigger, write its name preceded
+               by '-' instead.
+
+               For example, to enable the keyboard as trigger run:
+                   echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+               To disable it:
+                   echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+
+               Note that not all the available triggers can be configured.
+
+What:          /sys/class/leds/dell::kbd_backlight/stop_timeout
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to specify the interval after which the
+               keyboard illumination is disabled because of inactivity.
+               The timeouts are expressed in seconds, minutes, hours and
+               days, for which the symbols are 's', 'm', 'h' and 'd'
+               respectively.
+
+               To configure the timeout, write to this file a value along
+               with any the above units. If no unit is specified, the value
+               is assumed to be expressed in seconds.
+
+               For example, to set the timeout to 10 minutes run:
+                   echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
+
+               Note that when this file is read, the returned value might be
+               expressed in a different unit than the one used when the timeout
+               was set.
+
+               Also note that only some timeouts are supported and that
+               some systems might fall back to a specific timeout in case
+               an invalid timeout is written to this file.
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt
new file mode 100644 (file)
index 0000000..a6e1a18
--- /dev/null
@@ -0,0 +1,593 @@
+ACPI Tables
+-----------
+The expectations of individual ACPI tables are discussed in the list that
+follows.
+
+If a section number is used, it refers to a section number in the ACPI
+specification where the object is defined.  If "Signature Reserved" is used,
+the table signature (the first four bytes of the table) is the only portion
+of the table recognized by the specification, and the actual table is defined
+outside of the UEFI Forum (see Section 5.2.6 of the specification).
+
+For ACPI on arm64, tables also fall into the following categories:
+
+       -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
+
+       -- Recommended: BERT, EINJ, ERST, HEST, SSDT
+
+       -- Optional: BGRT, CPEP, CSRT, DRTM, ECDT, FACS, FPDT, MCHI, MPST,
+          MSCT, RASF, SBST, SLIT, SPMI, SRAT, TCPA, TPM2, UEFI
+
+       -- Not supported: BOOT, DBG2, DBGP, DMAR, ETDT, HPET, IBFT, IVRS,
+          LPIT, MSDM, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+
+
+Table  Usage for ARMv8 Linux
+-----  ----------------------------------------------------------------
+BERT   Section 18.3 (signature == "BERT")
+       == Boot Error Record Table ==
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+BOOT   Signature Reserved (signature == "BOOT")
+       == simple BOOT flag table ==
+       Microsoft only table, will not be supported.
+
+BGRT   Section 5.2.22 (signature == "BGRT")
+       == Boot Graphics Resource Table ==
+       Optional, not currently supported, with no real use-case for an
+       ARM server.
+
+CPEP   Section 5.2.18 (signature == "CPEP")
+       == Corrected Platform Error Polling table ==
+       Optional, not currently supported, and not recommended until such
+       time as ARM-compatible hardware is available, and the specification
+       suitably modified.
+
+CSRT   Signature Reserved (signature == "CSRT")
+       == Core System Resources Table ==
+       Optional, not currently supported.
+
+DBG2   Signature Reserved (signature == "DBG2")
+       == DeBuG port table 2 ==
+       Microsoft only table, will not be supported.
+
+DBGP   Signature Reserved (signature == "DBGP")
+       == DeBuG Port table ==
+       Microsoft only table, will not be supported.
+
+DSDT   Section 5.2.11.1 (signature == "DSDT")
+       == Differentiated System Description Table ==
+       A DSDT is required; see also SSDT.
+
+       ACPI tables contain only one DSDT but can contain one or more SSDTs,
+       which are optional.  Each SSDT can only add to the ACPI namespace,
+       but cannot modify or replace anything in the DSDT.
+
+DMAR   Signature Reserved (signature == "DMAR")
+       == DMA Remapping table ==
+       x86 only table, will not be supported.
+
+DRTM   Signature Reserved (signature == "DRTM")
+       == Dynamic Root of Trust for Measurement table ==
+       Optional, not currently supported.
+
+ECDT   Section 5.2.16 (signature == "ECDT")
+       == Embedded Controller Description Table ==
+       Optional, not currently supported, but could be used on ARM if and
+       only if one uses the GPE_BIT field to represent an IRQ number, since
+       there are no GPE blocks defined in hardware reduced mode.  This would
+       need to be modified in the ACPI specification.
+
+EINJ   Section 18.6 (signature == "EINJ")
+       == Error Injection table ==
+       This table is very useful for testing platform response to error
+       conditions; it allows one to inject an error into the system as
+       if it had actually occurred.  However, this table should not be
+       shipped with a production system; it should be dynamically loaded
+       and executed with the ACPICA tools only during testing.
+
+ERST   Section 18.5 (signature == "ERST")
+       == Error Record Serialization Table ==
+       On a platform supports RAS, this table must be supplied if it is not
+       UEFI-based; if it is UEFI-based, this table may be supplied. When this
+       table is not present, UEFI run time service will be utilized to save
+       and retrieve hardware error information to and from a persistent store.
+
+ETDT   Signature Reserved (signature == "ETDT")
+       == Event Timer Description Table ==
+       Obsolete table, will not be supported.
+
+FACS   Section 5.2.10 (signature == "FACS")
+       == Firmware ACPI Control Structure ==
+       It is unlikely that this table will be terribly useful.  If it is
+       provided, the Global Lock will NOT be used since it is not part of
+       the hardware reduced profile, and only 64-bit address fields will
+       be considered valid.
+
+FADT   Section 5.2.9 (signature == "FACP")
+       == Fixed ACPI Description Table ==
+       Required for arm64.
+
+       The HW_REDUCED_ACPI flag must be set.  All of the fields that are
+       to be ignored when HW_REDUCED_ACPI is set are expected to be set to
+       zero.
+
+       If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
+       used, not FIRMWARE_CTRL.
+
+       If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
+       filled in properly -- that the PSCI_COMPLIANT flag is set and that
+       PSCI_USE_HVC is set or unset as needed (see table 5-37).
+
+       For the DSDT that is also required, the X_DSDT field is to be used,
+       not the DSDT field.
+
+FPDT   Section 5.2.23 (signature == "FPDT")
+       == Firmware Performance Data Table ==
+       Optional, not currently supported.
+
+GTDT   Section 5.2.24 (signature == "GTDT")
+       == Generic Timer Description Table ==
+       Required for arm64.
+
+HEST   Section 18.3.2 (signature == "HEST")
+       == Hardware Error Source Table ==
+       Until further error source types are defined, use only types 6 (AER
+       Root Port), 7 (AER Endpoint), 8 (AER Bridge), or 9 (Generic Hardware
+       Error Source).  Firmware first error handling is possible if and only
+       if Trusted Firmware is being used on arm64.
+
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+HPET   Signature Reserved (signature == "HPET")
+       == High Precision Event timer Table ==
+       x86 only table, will not be supported.
+
+IBFT   Signature Reserved (signature == "IBFT")
+       == iSCSI Boot Firmware Table ==
+       Microsoft defined table, support TBD.
+
+IVRS   Signature Reserved (signature == "IVRS")
+       == I/O Virtualization Reporting Structure ==
+       x86_64 (AMD) only table, will not be supported.
+
+LPIT   Signature Reserved (signature == "LPIT")
+       == Low Power Idle Table ==
+       x86 only table as of ACPI 5.1; future versions have been adapted for
+       use with ARM and will be recommended in order to support ACPI power
+       management.
+
+MADT   Section 5.2.12 (signature == "APIC")
+       == Multiple APIC Description Table ==
+       Required for arm64.  Only the GIC interrupt controller structures
+       should be used (types 0xA - 0xE).
+
+MCFG   Signature Reserved (signature == "MCFG")
+       == Memory-mapped ConFiGuration space ==
+       If the platform supports PCI/PCIe, an MCFG table is required.
+
+MCHI   Signature Reserved (signature == "MCHI")
+       == Management Controller Host Interface table ==
+       Optional, not currently supported.
+
+MPST   Section 5.2.21 (signature == "MPST")
+       == Memory Power State Table ==
+       Optional, not currently supported.
+
+MSDM   Signature Reserved (signature == "MSDM")
+       == Microsoft Data Management table ==
+       Microsoft only table, will not be supported.
+
+MSCT   Section 5.2.19 (signature == "MSCT")
+       == Maximum System Characteristic Table ==
+       Optional, not currently supported.
+
+RASF   Section 5.2.20 (signature == "RASF")
+       == RAS Feature table ==
+       Optional, not currently supported.
+
+RSDP   Section 5.2.5 (signature == "RSD PTR")
+       == Root System Description PoinTeR ==
+       Required for arm64.
+
+RSDT   Section 5.2.7 (signature == "RSDT")
+       == Root System Description Table ==
+       Since this table can only provide 32-bit addresses, it is deprecated
+       on arm64, and will not be used.
+
+SBST   Section 5.2.14 (signature == "SBST")
+       == Smart Battery Subsystem Table ==
+       Optional, not currently supported.
+
+SLIC   Signature Reserved (signature == "SLIC")
+       == Software LIcensing table ==
+       Microsoft only table, will not be supported.
+
+SLIT   Section 5.2.17 (signature == "SLIT")
+       == System Locality distance Information Table ==
+       Optional in general, but required for NUMA systems.
+
+SPCR   Signature Reserved (signature == "SPCR")
+       == Serial Port Console Redirection table ==
+       Required for arm64.
+
+SPMI   Signature Reserved (signature == "SPMI")
+       == Server Platform Management Interface table ==
+       Optional, not currently supported.
+
+SRAT   Section 5.2.16 (signature == "SRAT")
+       == System Resource Affinity Table ==
+       Optional, but if used, only the GICC Affinity structures are read.
+       To support NUMA, this table is required.
+
+SSDT   Section 5.2.11.2 (signature == "SSDT")
+       == Secondary System Description Table ==
+       These tables are a continuation of the DSDT; these are recommended
+       for use with devices that can be added to a running system, but can
+       also serve the purpose of dividing up device descriptions into more
+       manageable pieces.
+
+       An SSDT can only ADD to the ACPI namespace.  It cannot modify or
+       replace existing device descriptions already in the namespace.
+
+       These tables are optional, however.  ACPI tables should contain only
+       one DSDT but can contain many SSDTs.
+
+TCPA   Signature Reserved (signature == "TCPA")
+       == Trusted Computing Platform Alliance table ==
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+TPM2   Signature Reserved (signature == "TPM2")
+       == Trusted Platform Module 2 table ==
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+UEFI   Signature Reserved (signature == "UEFI")
+       == UEFI ACPI data table ==
+       Optional, not currently supported.  No known use case for arm64,
+       at present.
+
+WAET   Signature Reserved (signature == "WAET")
+       == Windows ACPI Emulated devices Table ==
+       Microsoft only table, will not be supported.
+
+WDAT   Signature Reserved (signature == "WDAT")
+       == Watch Dog Action Table ==
+       Microsoft only table, will not be supported.
+
+WDRT   Signature Reserved (signature == "WDRT")
+       == Watch Dog Resource Table ==
+       Microsoft only table, will not be supported.
+
+WPBT   Signature Reserved (signature == "WPBT")
+       == Windows Platform Binary Table ==
+       Microsoft only table, will not be supported.
+
+XSDT   Section 5.2.8 (signature == "XSDT")
+       == eXtended System Description Table ==
+       Required for arm64.
+
+
+ACPI Objects
+------------
+The expectations on individual ACPI objects are discussed in the list that
+follows:
+
+Name   Section         Usage for ARMv8 Linux
+----   ------------    -------------------------------------------------
+_ADR   6.1.1           Use as needed.
+
+_BBN   6.5.5           Use as needed; PCI-specific.
+
+_BDN   6.5.3           Optional; not likely to be used on arm64.
+
+_CCA   6.2.17          This method should be defined for all bus masters
+                       on arm64.  While cache coherency is assumed, making
+                       it explicit ensures the kernel will set up DMA as
+                       it should.
+
+_CDM   6.2.1           Optional, to be used only for processor devices.
+
+_CID   6.1.2           Use as needed.
+
+_CLS   6.1.3           Use as needed.
+
+_CRS   6.2.2           Required on arm64.
+
+_DCK   6.5.2           Optional; not likely to be used on arm64.
+
+_DDN   6.1.4           This field can be used for a device name.  However,
+                       it is meant for DOS device names (e.g., COM1), so be
+                       careful of its use across OSes.
+
+_DEP   6.5.8           Use as needed.
+
+_DIS   6.2.3           Optional, for power management use.
+
+_DLM   5.7.5           Optional.
+
+_DMA   6.2.4           Optional.
+
+_DSD   6.2.5           To be used with caution.  If this object is used, try
+                       to use it within the constraints already defined by the
+                       Device Properties UUID.  Only in rare circumstances
+                       should it be necessary to create a new _DSD UUID.
+
+                       In either case, submit the _DSD definition along with
+                       any driver patches for discussion, especially when
+                       device properties are used.  A driver will not be
+                       considered complete without a corresponding _DSD
+                       description.  Once approved by kernel maintainers,
+                       the UUID or device properties must then be registered
+                       with the UEFI Forum; this may cause some iteration as
+                       more than one OS will be registering entries.
+
+_DSM                   Do not use this method.  It is not standardized, the
+                       return values are not well documented, and it is
+                       currently a frequent source of error.
+
+_DSW   7.2.1           Use as needed; power management specific.
+
+_EDL   6.3.1           Optional.
+
+_EJD   6.3.2           Optional.
+
+_EJx   6.3.3           Optional.
+
+_FIX   6.2.7           x86 specific, not used on arm64.
+
+\_GL   5.7.1           This object is not to be used in hardware reduced
+                       mode, and therefore should not be used on arm64.
+
+_GLK   6.5.7           This object requires a global lock be defined; there
+                       is no global lock on arm64 since it runs in hardware
+                       reduced mode.  Hence, do not use this object on arm64.
+
+\_GPE  5.3.1           This namespace is for x86 use only.  Do not use it
+                       on arm64.
+
+_GSB   6.2.7           Optional.
+
+_HID   6.1.5           Use as needed.  This is the primary object to use in
+                       device probing, though _CID and _CLS may also be used.
+
+_HPP   6.2.8           Optional, PCI specific.
+
+_HPX   6.2.9           Optional, PCI specific.
+
+_HRV   6.1.6           Optional, use as needed to clarify device behavior; in
+                       some cases, this may be easier to use than _DSD.
+
+_INI   6.5.1           Not required, but can be useful in setting up devices
+                       when UEFI leaves them in a state that may not be what
+                       the driver expects before it starts probing.
+
+_IRC   7.2.15          Use as needed; power management specific.
+
+_LCK   6.3.4           Optional.
+
+_MAT   6.2.10          Optional; see also the MADT.
+
+_MLS   6.1.7           Optional, but highly recommended for use in
+                       internationalization.
+
+_OFF   7.1.2           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+_ON    7.1.3           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+\_OS   5.7.3           This method will return "Linux" by default (this is
+                       the value of the macro ACPI_OS_NAME on Linux).  The
+                       command line parameter acpi_os=<string> can be used
+                       to set it to some other value.
+
+_OSC   6.2.11          This method can be a global method in ACPI (i.e.,
+                       \_SB._OSC), or it may be associated with a specific
+                       device (e.g., \_SB.DEV0._OSC), or both.  When used
+                       as a global method, only capabilities published in
+                       the ACPI specification are allowed.  When used as
+                       a device-specific method, the process described for
+                       using _DSD MUST be used to create an _OSC definition;
+                       out-of-process use of _OSC is not allowed.  That is,
+                       submit the device-specific _OSC usage description as
+                       part of the kernel driver submission, get it approved
+                       by the kernel community, then register it with the
+                       UEFI Forum.
+
+\_OSI  5.7.2           Deprecated on ARM64.  Any invocation of this method
+                       will print a warning on the console and return false.
+                       That is, as far as ACPI firmware is concerned, _OSI
+                       cannot be used to determine what sort of system is
+                       being used or what functionality is provided.  The
+                       _OSC method is to be used instead.
+
+_OST   6.3.5           Optional.
+
+_PDC   8.4.1           Deprecated, do not use on arm64.
+
+\_PIC  5.8.1           The method should not be used.  On arm64, the only
+                       interrupt model available is GIC.
+
+_PLD   6.1.8           Optional.
+
+\_PR   5.3.1           This namespace is for x86 use only on legacy systems.
+                       Do not use it on arm64.
+
+_PRS   6.2.12          Optional.
+
+_PRT   6.2.13          Required as part of the definition of all PCI root
+                       devices.
+
+_PRW   7.2.13          Use as needed; power management specific.
+
+_PRx   7.2.8-11        Use as needed; power management specific.  If _PR0 is
+                       defined, _PR3 must also be defined.
+
+_PSC   7.2.6           Use as needed; power management specific.
+
+_PSE   7.2.7           Use as needed; power management specific.
+
+_PSW   7.2.14          Use as needed; power management specific.
+
+_PSx   7.2.2-5         Use as needed; power management specific.  If _PS0 is
+                       defined, _PS3 must also be defined.  If clocks or
+                       regulators need adjusting to be consistent with power
+                       usage, change them in these methods.
+
+\_PTS  7.3.1           Use as needed; power management specific.
+
+_PXM   6.2.14          Optional.
+
+_REG   6.5.4           Use as needed.
+
+\_REV  5.7.4           Always returns the latest version of ACPI supported.
+
+_RMV   6.3.6           Optional.
+
+\_SB   5.3.1           Required on arm64; all devices must be defined in this
+                       namespace.
+
+_SEG   6.5.6           Use as needed; PCI-specific.
+
+\_SI   5.3.1,          Optional.
+       9.1
+
+_SLI   6.2.15          Optional; recommended when SLIT table is in use.
+
+_STA   6.3.7,          It is recommended to define this method for any device
+       7.1.4           that can be turned on or off.
+
+_SRS   6.2.16          Optional; see also _PRS.
+
+_STR   6.1.10          Recommended for conveying device names to end users;
+                       this is preferred over using _DDN.
+
+_SUB   6.1.9           Use as needed; _HID or _CID are preferred.
+
+_SUN   6.1.11          Optional.
+
+\_Sx   7.3.2           Use as needed; power management specific.
+
+_SxD   7.2.16-19       Use as needed; power management specific.
+
+_SxW   7.2.20-24       Use as needed; power management specific.
+
+_SWS   7.3.3           Use as needed; power management specific; this may
+                       require specification changes for use on arm64.
+
+\_TTS  7.3.4           Use as needed; power management specific.
+
+\_TZ   5.3.1           Optional.
+
+_UID   6.1.12          Recommended for distinguishing devices of the same
+                       class; define it if at all possible.
+
+\_WAK  7.3.5           Use as needed; power management specific.
+
+
+ACPI Event Model
+----------------
+Do not use GPE block devices; these are not supported in the hardware reduced
+profile used by arm64.  Since there are no GPE blocks defined for use on ARM
+platforms, GPIO-signaled interrupts should be used for creating system events.
+
+
+ACPI Processor Control
+----------------------
+Section 8 of the ACPI specification is currently undergoing change that
+should be completed in the 6.0 version of the specification.  Processor
+performance control will be handled differently for arm64 at that point
+in time.  Processor aggregator devices (section 8.5) will not be used,
+for example, but another similar mechanism instead.
+
+While UEFI constrains what we can say until the release of 6.0, it is
+recommended that CPPC (8.4.5) be used as the primary model.  This will
+still be useful into the future.  C-states and P-states will still be
+provided, but most of the current design work appears to favor CPPC.
+
+Further, it is essential that the ARMv8 SoC provide a fully functional
+implementation of PSCI; this will be the only mechanism supported by ACPI
+to control CPU power state (including secondary CPU booting).
+
+More details will be provided on the release of the ACPI 6.0 specification.
+
+
+ACPI System Address Map Interfaces
+----------------------------------
+In Section 15 of the ACPI specification, several methods are mentioned as
+possible mechanisms for conveying memory resource information to the kernel.
+For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
+GetMemoryMap() boot service is the only mechanism that will be used.
+
+
+ACPI Platform Error Interfaces (APEI)
+-------------------------------------
+The APEI tables supported are described above.
+
+APEI requires the equivalent of an SCI and an NMI on ARMv8.  The SCI is used
+to notify the OSPM of errors that have occurred but can be corrected and the
+system can continue correct operation, even if possibly degraded.  The NMI is
+used to indicate fatal errors that cannot be corrected, and require immediate
+attention.
+
+Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
+these slightly differently.  The SCI is handled as a normal GPIO-signaled
+interrupt; given that these are corrected (or correctable) errors being
+reported, this is sufficient.  The NMI is emulated as the highest priority
+GPIO-signaled interrupt possible.  This implies some caution must be used
+since there could be interrupts at higher privilege levels or even interrupts
+at the same priority as the emulated NMI.  In Linux, this should not be the
+case but one should be aware it could happen.
+
+
+ACPI Objects Not Supported on ARM64
+-----------------------------------
+While this may change in the future, there are several classes of objects
+that can be defined, but are not currently of general interest to ARM servers.
+
+These are not supported:
+
+       -- Section 9.2: ambient light sensor devices
+
+       -- Section 9.3: battery devices
+
+       -- Section 9.4: lids (e.g., laptop lids)
+
+       -- Section 9.8.2: IDE controllers
+
+       -- Section 9.9: floppy controllers
+
+       -- Section 9.10: GPE block devices
+
+       -- Section 9.15: PC/AT RTC/CMOS devices
+
+       -- Section 9.16: user presence detection devices
+
+       -- Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
+
+       -- Section 9.18: time and alarm devices (see 9.15)
+
+
+ACPI Objects Not Yet Implemented
+--------------------------------
+While these objects have x86 equivalents, and they do make some sense in ARM
+servers, there is either no hardware available at present, or in some cases
+there may not yet be a non-ARM implementation.  Hence, they are currently not
+implemented though that may change in the future.
+
+Not yet implemented are:
+
+       -- Section 10: power source and power meter devices
+
+       -- Section 11: thermal management
+
+       -- Section 12: embedded controllers interface
+
+       -- Section 13: SMBus interfaces
+
+       -- Section 17: NUMA support (prototypes have been submitted for
+          review)
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt
new file mode 100644 (file)
index 0000000..570a4f8
--- /dev/null
@@ -0,0 +1,505 @@
+ACPI on ARMv8 Servers
+---------------------
+ACPI can be used for ARMv8 general purpose servers designed to follow
+the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
+Base Boot Requirements) [1] specifications.  Please note that the SBBR
+can be retrieved simply by visiting [1], but the SBSA is currently only
+available to those with an ARM login due to ARM IP licensing concerns.
+
+The ARMv8 kernel implements the reduced hardware model of ACPI version
+5.1 or later.  Links to the specification and all external documents
+it refers to are managed by the UEFI Forum.  The specification is
+available at http://www.uefi.org/specifications and documents referenced
+by the specification can be found via http://www.uefi.org/acpi.
+
+If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+or cannot be described using the mechanisms defined in the required ACPI
+specifications, then ACPI may not be a good fit for the hardware.
+
+While the documents mentioned above set out the requirements for building
+industry-standard ARMv8 servers, they also apply to more than one operating
+system.  The purpose of this document is to describe the interaction between
+ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and what ACPI can expect of Linux.
+
+
+Why ACPI on ARM?
+----------------
+Before examining the details of the interface between ACPI and Linux, it is
+useful to understand why ACPI is being used.  Several technologies already
+exist in Linux for describing non-enumerable hardware, after all.  In this
+section we summarize a blog post [2] from Grant Likely that outlines the
+reasoning behind ACPI on ARMv8 servers.  Actually, we snitch a good portion
+of the summary text almost directly, to be honest.
+
+The short form of the rationale for ACPI on ARM is:
+
+-- ACPI’s bytecode (AML) allows the platform to encode hardware behavior,
+   while DT explicitly does not support this.  For hardware vendors, being
+   able to encode behavior is a key tool used in supporting operating
+   system releases on new hardware.
+
+-- ACPI’s OSPM defines a power management model that constrains what the
+   platform is allowed to do into a specific model, while still providing
+   flexibility in hardware design.
+
+-- In the enterprise server environment, ACPI has established bindings (such
+   as for RAS) which are currently used in production systems.  DT does not.
+   Such bindings could be defined in DT at some point, but doing so means ARM
+   and x86 would end up using completely different code paths in both firmware
+   and the kernel.
+
+-- Choosing a single interface to describe the abstraction between a platform
+   and an OS is important.  Hardware vendors would not be required to implement
+   both DT and ACPI if they want to support multiple operating systems.  And,
+   agreeing on a single interface instead of being fragmented into per OS
+   interfaces makes for better interoperability overall.
+
+-- The new ACPI governance process works well and Linux is now at the same
+   table as hardware vendors and other OS vendors.  In fact, there is no
+   longer any reason to feel that ACPI is only belongs to Windows or that
+   Linux is in any way secondary to Microsoft in this arena.  The move of
+   ACPI governance into the UEFI forum has significantly opened up the
+   specification development process, and currently, a large portion of the
+   changes being made to ACPI is being driven by Linux.
+
+Key to the use of ACPI is the support model.  For servers in general, the
+responsibility for hardware behaviour cannot solely be the domain of the
+kernel, but rather must be split between the platform and the kernel, in
+order to allow for orderly change over time.  ACPI frees the OS from needing
+to understand all the minute details of the hardware so that the OS doesn’t
+need to be ported to each and every device individually.  It allows the
+hardware vendors to take responsibility for power management behaviour without
+depending on an OS release cycle which is not under their control.
+
+ACPI is also important because hardware and OS vendors have already worked
+out the mechanisms for supporting a general purpose computing ecosystem.  The
+infrastructure is in place, the bindings are in place, and the processes are
+in place.  DT does exactly what Linux needs it to when working with vertically
+integrated devices, but there are no good processes for supporting what the
+server vendors need.  Linux could potentially get there with DT, but doing so
+really just duplicates something that already works.  ACPI already does what
+the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
+vendors would still end up providing two completely separate firmware
+interfaces -- one for Linux and one for Windows.
+
+
+Kernel Compatibility
+--------------------
+One of the primary motivations for ACPI is standardization, and using that
+to provide backward compatibility for Linux kernels.  In the server market,
+software and hardware are often used for long periods.  ACPI allows the
+kernel and firmware to agree on a consistent abstraction that can be
+maintained over time, even as hardware or software change.  As long as the
+abstraction is supported, systems can be updated without necessarily having
+to replace the kernel.
+
+When a Linux driver or subsystem is first implemented using ACPI, it by
+definition ends up requiring a specific version of the ACPI specification
+-- it's baseline.  ACPI firmware must continue to work, even though it may
+not be optimal, with the earliest kernel version that first provides support
+for that baseline version of ACPI.  There may be a need for additional drivers,
+but adding new functionality (e.g., CPU power management) should not break
+older kernel versions.  Further, ACPI firmware must also work with the most
+recent version of the kernel.
+
+
+Relationship with Device Tree
+-----------------------------
+ACPI support in drivers and subsystems for ARMv8 should never be mutually
+exclusive with DT support at compile time.
+
+At boot time the kernel will only use one description method depending on
+parameters passed from the bootloader (including kernel bootargs).
+
+Regardless of whether DT or ACPI is used, the kernel must always be capable
+of booting with either scheme (in kernels with both schemes enabled at compile
+time).
+
+
+Booting using ACPI tables
+-------------------------
+The only defined method for passing ACPI tables to the kernel on ARMv8
+is via the UEFI system configuration table.  Just so it is explicit, this
+means that ACPI is only supported on platforms that boot via UEFI.
+
+When an ARMv8 system boots, it can either have DT information, ACPI tables,
+or in some very unusual cases, both.  If no command line parameters are used,
+the kernel will try to use DT for device enumeration; if there is no DT
+present, the kernel will try to use ACPI tables, but only if they are present.
+In neither is available, the kernel will not boot.  If acpi=force is used
+on the command line, the kernel will attempt to use ACPI tables first, but
+fall back to DT if there are no ACPI tables present.  The basic idea is that
+the kernel will not fail to boot unless it absolutely has no other choice.
+
+Processing of ACPI tables may be disabled by passing acpi=off on the kernel
+command line; this is the default behavior.
+
+In order for the kernel to load and use ACPI tables, the UEFI implementation
+MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
+the ACPI signature "RSD PTR ").  If this pointer is incorrect and acpi=force
+is used, the kernel will disable ACPI and try to use DT to boot instead; the
+kernel has, in effect, determined that ACPI tables are not present at that
+point.
+
+If the pointer to the RSDP table is correct, the table will be mapped into
+the kernel by the ACPI core, using the address provided by UEFI.
+
+The ACPI core will then locate and map in all other ACPI tables provided by
+using the addresses in the RSDP table to find the XSDT (eXtended System
+Description Table).  The XSDT in turn provides the addresses to all other
+ACPI tables provided by the system firmware; the ACPI core will then traverse
+this table and map in the tables listed.
+
+The ACPI core will ignore any provided RSDT (Root System Description Table).
+RSDTs have been deprecated and are ignored on arm64 since they only allow
+for 32-bit addresses.
+
+Further, the ACPI core will only use the 64-bit address fields in the FADT
+(Fixed ACPI Description Table).  Any 32-bit address fields in the FADT will
+be ignored on arm64.
+
+Hardware reduced mode (see Section 4.1 of the ACPI 5.1 specification) will
+be enforced by the ACPI core on arm64.  Doing so allows the ACPI core to
+run less complex code since it no longer has to provide support for legacy
+hardware from other architectures.  Any fields that are not to be used for
+hardware reduced mode must be set to zero.
+
+For the ACPI core to operate properly, and in turn provide the information
+the kernel needs to configure devices, it expects to find the following
+tables (all section numbers refer to the ACPI 5.1 specfication):
+
+    -- RSDP (Root System Description Pointer), section 5.2.5
+
+    -- XSDT (eXtended System Description Table), section 5.2.8
+
+    -- FADT (Fixed ACPI Description Table), section 5.2.9
+
+    -- DSDT (Differentiated System Description Table), section
+       5.2.11.1
+
+    -- MADT (Multiple APIC Description Table), section 5.2.12
+
+    -- GTDT (Generic Timer Description Table), section 5.2.24
+
+    -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
+       Table), section 5.2.6, specifically Table 5-31.
+
+If the above tables are not all present, the kernel may or may not be
+able to boot properly since it may not be able to configure all of the
+devices available.
+
+
+ACPI Detection
+--------------
+Drivers should determine their probe() type by checking for a null
+value for ACPI_HANDLE, or checking .of_node, or other information in
+the device structure.  This is detailed further in the "Driver
+Recommendations" section.
+
+In non-driver code, if the presence of ACPI needs to be detected at
+runtime, then check the value of acpi_disabled. If CONFIG_ACPI is not
+set, acpi_disabled will always be 1.
+
+
+Device Enumeration
+------------------
+Device descriptions in ACPI should use standard recognized ACPI interfaces.
+These may contain less information than is typically provided via a Device
+Tree description for the same device.  This is also one of the reasons that
+ACPI can be useful -- the driver takes into account that it may have less
+detailed information about the device and uses sensible defaults instead.
+If done properly in the driver, the hardware can change and improve over
+time without the driver having to change at all.
+
+Clocks provide an excellent example.  In DT, clocks need to be specified
+and the drivers need to take them into account.  In ACPI, the assumption
+is that UEFI will leave the device in a reasonable default state, including
+any clock settings.  If for some reason the driver needs to change a clock
+value, this can be done in an ACPI method; all the driver needs to do is
+invoke the method and not concern itself with what the method needs to do
+to change the clock.  Changing the hardware can then take place over time
+by changing what the ACPI method does, and not the driver.
+
+In DT, the parameters needed by the driver to set up clocks as in the example
+above are known as "bindings"; in ACPI, these are known as "Device Properties"
+and provided to a driver via the _DSD object.
+
+ACPI tables are described with a formal language called ASL, the ACPI
+Source Language (section 19 of the specification).  This means that there
+are always multiple ways to describe the same thing -- including device
+properties.  For example, device properties could use an ASL construct
+that looks like this: Name(KEY0, "value0").  An ACPI device driver would
+then retrieve the value of the property by evaluating the KEY0 object.
+However, using Name() this way has multiple problems: (1) ACPI limits
+names ("KEY0") to four characters unlike DT; (2) there is no industry
+wide registry that maintains a list of names, minimzing re-use; (3)
+there is also no registry for the definition of property values ("value0"),
+again making re-use difficult; and (4) how does one maintain backward
+compatibility as new hardware comes out?  The _DSD method was created
+to solve precisely these sorts of problems; Linux drivers should ALWAYS
+use the _DSD method for device properties and nothing else.
+
+The _DSM object (ACPI Section 9.14.1) could also be used for conveying
+device properties to a driver.  Linux drivers should only expect it to
+be used if _DSD cannot represent the data required, and there is no way
+to create a new UUID for the _DSD object.  Note that there is even less
+regulation of the use of _DSM than there is of _DSD.  Drivers that depend
+on the contents of _DSM objects will be more difficult to maintain over
+time because of this; as of this writing, the use of _DSM is the cause
+of quite a few firmware problems and is not recommended.
+
+Drivers should look for device properties in the _DSD object ONLY; the _DSD
+object is described in the ACPI specification section 6.2.5, but this only
+describes how to define the structure of an object returned via _DSD, and
+how specific data structures are defined by specific UUIDs.  Linux should
+only use the _DSD Device Properties UUID [5]:
+
+   -- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
+
+   -- http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+
+The UEFI Forum provides a mechanism for registering device properties [4]
+so that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum should
+not be used.
+
+Before creating new device properties, check to be sure that they have not
+been defined before and either registered in the Linux kernel documentation
+as DT bindings, or the UEFI Forum as device properties.  While we do not want
+to simply move all DT bindings into ACPI device properties, we can learn from
+what has been previously defined.
+
+If it is necessary to define a new device property, or if it makes sense to
+synthesize the definition of a binding so it can be used in any firmware,
+both DT bindings and ACPI device properties for device drivers have review
+processes.  Use them both.  When the driver itself is submitted for review
+to the Linux mailing lists, the device property definitions needed must be
+submitted at the same time.  A driver that supports ACPI and uses device
+properties will not be considered complete without their definitions.  Once
+the device property has been accepted by the Linux community, it must be
+registered with the UEFI Forum [4], which will review it again for consistency
+within the registry.  This may require iteration.  The UEFI Forum, though,
+will always be the canonical site for device property definitions.
+
+It may make sense to provide notice to the UEFI Forum that there is the
+intent to register a previously unused device property name as a means of
+reserving the name for later use.  Other operating system vendors will
+also be submitting registration requests and this may help smooth the
+process.
+
+Once registration and review have been completed, the kernel provides an
+interface for looking up device properties in a manner independent of
+whether DT or ACPI is being used.  This API should be used [6]; it can
+eliminate some duplication of code paths in driver probing functions and
+discourage divergence between DT bindings and ACPI device properties.
+
+
+Programmable Power Control Resources
+------------------------------------
+Programmable power control resources include such resources as voltage/current
+providers (regulators) and clock sources.
+
+With ACPI, the kernel clock and regulator framework is not expected to be used
+at all.
+
+The kernel assumes that power control of these resources is represented with
+Power Resource Objects (ACPI section 7.1).  The ACPI core will then handle
+correctly enabling and disabling resources as they are needed.  In order to
+get that to work, ACPI assumes each device has defined D-states and that these
+can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
+in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
+turning a device full off.
+
+There are two options for using those Power Resources.  They can:
+
+   -- be managed in a _PSx method which gets called on entry to power
+      state Dx.
+
+   -- be declared separately as power resources with their own _ON and _OFF
+      methods.  They are then tied back to D-states for a particular device
+      via _PRx which specifies which power resources a device needs to be on
+      while in Dx.  Kernel then tracks number of devices using a power resource
+      and calls _ON/_OFF as needed.
+
+The kernel ACPI code will also assume that the _PSx methods follow the normal
+ACPI rules for such methods:
+
+   -- If either _PS0 or _PS3 is implemented, then the other method must also
+      be implemented.
+
+   -- If a device requires usage or setup of a power resource when on, the ASL
+      should organize that it is allocated/enabled using the _PS0 method.
+
+   -- Resources allocated or enabled in the _PS0 method should be disabled
+      or de-allocated in the _PS3 method.
+
+   -- Firmware will leave the resources in a reasonable state before handing
+      over control to the kernel.
+
+Such code in _PSx methods will of course be very platform specific.  But,
+this allows the driver to abstract out the interface for operating the device
+and avoid having to read special non-standard values from ACPI tables. Further,
+abstracting the use of these resources allows the hardware to change over time
+without requiring updates to the driver.
+
+
+Clocks
+------
+ACPI makes the assumption that clocks are initialized by the firmware --
+UEFI, in this case -- to some working value before control is handed over
+to the kernel.  This has implications for devices such as UARTs, or SoC-driven
+LCD displays, for example.
+
+When the kernel boots, the clocks are assumed to be set to reasonable
+working values.  If for some reason the frequency needs to change -- e.g.,
+throttling for power management -- the device driver should expect that
+process to be abstracted out into some ACPI method that can be invoked
+(please see the ACPI specification for further recommendations on standard
+methods to be expected).  The only exceptions to this are CPU clocks where
+CPPC provides a much richer interface than ACPI methods.  If the clocks
+are not set, there is no direct way for Linux to control them.
+
+If an SoC vendor wants to provide fine-grained control of the system clocks,
+they could do so by providing ACPI methods that could be invoked by Linux
+drivers.  However, this is NOT recommended and Linux drivers should NOT use
+such methods, even if they are provided.  Such methods are not currently
+standardized in the ACPI specification, and using them could tie a kernel
+to a very specific SoC, or tie an SoC to a very specific version of the
+kernel, both of which we are trying to avoid.
+
+
+Driver Recommendations
+----------------------
+DO NOT remove any DT handling when adding ACPI support for a driver.  The
+same device may be used on many different systems.
+
+DO try to structure the driver so that it is data-driven.  That is, set up
+a struct containing internal per-device state based on defaults and whatever
+else must be discovered by the driver probe function.  Then, have the rest
+of the driver operate off of the contents of that struct.  Doing so should
+allow most divergence between ACPI and DT functionality to be kept local to
+the probe function instead of being scattered throughout the driver.  For
+example:
+
+static int device_probe_dt(struct platform_device *pdev)
+{
+       /* DT specific functionality */
+       ...
+}
+
+static int device_probe_acpi(struct platform_device *pdev)
+{
+       /* ACPI specific functionality */
+       ...
+}
+
+static int device_probe(struct platform_device *pdev)
+{
+       ...
+       struct device_node node = pdev->dev.of_node;
+       ...
+
+       if (node)
+               ret = device_probe_dt(pdev);
+       else if (ACPI_HANDLE(&pdev->dev))
+               ret = device_probe_acpi(pdev);
+       else
+               /* other initialization */
+               ...
+       /* Continue with any generic probe operations */
+       ...
+}
+
+DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
+clear the different names the driver is probed for, both from DT and from
+ACPI:
+
+static struct of_device_id virtio_mmio_match[] = {
+        { .compatible = "virtio,mmio", },
+        { }
+};
+MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+static const struct acpi_device_id virtio_mmio_acpi_match[] = {
+        { "LNRO0005", },
+        { }
+};
+MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
+
+
+ASWG
+----
+The ACPI specification changes regularly.  During the year 2014, for instance,
+version 5.1 was released and version 6.0 substantially completed, with most of
+the changes being driven by ARM-specific requirements.  Proposed changes are
+presented and discussed in the ASWG (ACPI Specification Working Group) which
+is a part of the UEFI Forum.
+
+Participation in this group is open to all UEFI members.  Please see
+http://www.uefi.org/workinggroup for details on group membership.
+
+It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+as closely as possible, and to only implement functionality that complies with
+the released standards from UEFI ASWG.  As a practical matter, there will be
+vendors that provide bad ACPI tables or violate the standards in some way.
+If this is because of errors, quirks and fixups may be necessary, but will
+be avoided if possible.  If there are features missing from ACPI that preclude
+it from being used on a platform, ECRs (Engineering Change Requests) should be
+submitted to ASWG and go through the normal approval process; for those that
+are not UEFI members, many other members of the Linux community are and would
+likely be willing to assist in submitting ECRs.
+
+
+Linux Code
+----------
+Individual items specific to Linux on ARM, contained in the the Linux
+source code, are in the list that follows:
+
+ACPI_OS_NAME           This macro defines the string to be returned when
+                       an ACPI method invokes the _OS method.  On ARM64
+                       systems, this macro will be "Linux" by default.
+                       The command line parameter acpi_os=<string>
+                       can be used to set it to some other value.  The
+                       default value for other architectures is "Microsoft
+                       Windows NT", for example.
+
+ACPI Objects
+------------
+Detailed expectations for ACPI tables and object are listed in the file
+Documentation/arm64/acpi_object_usage.txt.
+
+
+References
+----------
+[0] http://silver.arm.com -- document ARM-DEN-0029, or newer
+    "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+
+[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
+    Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
+    Software on ARM Platforms", dated 16 Aug 2014
+
+[2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
+    Linaro Ltd., written by Grant Likely.  A copy of the verbatim text (apart
+    from formatting) is also in Documentation/arm64/why_use_acpi.txt.
+
+[3] AMD ACPI for Seattle platform documentation:
+    http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
+
+[4] http://www.uefi.org/acpi -- please see the link for the "ACPI _DSD Device
+    Property Registry Instructions"
+
+[5] http://www.uefi.org/acpi -- please see the link for the "_DSD (Device
+    Specific Data) Implementation Guide"
+
+[6] Kernel code for the unified device property interface can be found in
+    include/linux/property.h and drivers/base/property.c.
+
+
+Authors
+-------
+Al Stone <al.stone@linaro.org>
+Graeme Gregory <graeme.gregory@linaro.org>
+Hanjun Guo <hanjun.guo@linaro.org>
+
+Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/devicetree/bindings/arc/pct.txt b/Documentation/devicetree/bindings/arc/pct.txt
new file mode 100644 (file)
index 0000000..7b95884
--- /dev/null
@@ -0,0 +1,20 @@
+* ARC Performance Counters
+
+The ARC700 can be configured with a pipeline performance monitor for counting
+CPU and cache events like cache misses and hits. Like conventional PCT there
+are 100+ hardware conditions dynamically mapped to upto 32 counters
+
+Note that:
+ * The ARC 700 PCT does not support interrupts; although HW events may be
+   counted, the HW events themselves cannot serve as a trigger for a sample.
+
+Required properties:
+
+- compatible : should contain
+       "snps,arc700-pct"
+
+Example:
+
+pmu {
+        compatible = "snps,arc700-pct";
+};
diff --git a/Documentation/devicetree/bindings/arc/pmu.txt b/Documentation/devicetree/bindings/arc/pmu.txt
deleted file mode 100644 (file)
index 49d5173..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-* ARC Performance Monitor Unit
-
-The ARC 700 can be configured with a pipeline performance monitor for counting
-CPU and cache events like cache misses and hits.
-
-Note that:
- * ARC 700 refers to a family of ARC processor cores;
-   - There is only one type of PMU available for the whole family;
-   - The PMU may support different sets of events; supported events are probed
-     at boot time, as required by the reference manual.
-
- * The ARC 700 PMU does not support interrupts; although HW events may be
-   counted, the HW events themselves cannot serve as a trigger for a sample.
-
-Required properties:
-
-- compatible : should contain
-       "snps,arc700-pmu"
-
-Example:
-
-pmu {
-        compatible = "snps,arc700-pmu";
-};
diff --git a/Documentation/devicetree/bindings/arm/altera.txt b/Documentation/devicetree/bindings/arm/altera.txt
new file mode 100644 (file)
index 0000000..558735a
--- /dev/null
@@ -0,0 +1,14 @@
+Altera's SoCFPGA platform device tree bindings
+---------------------------------------------
+
+Boards with Cyclone 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+
+Boards with Arria 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria5", "altr,socfpga";
+
+Boards with Arria 10 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria10", "altr,socfpga";
index 256b4d8..e774128 100644 (file)
@@ -17,7 +17,10 @@ to deliver its interrupts via SPIs.
 - interrupts : Interrupt list for secure, non-secure, virtual and
   hypervisor timers, in that order.
 
-- clock-frequency : The frequency of the main counter, in Hz. Optional.
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+  only where necessary to work around broken firmware which does not configure
+  CNTFRQ on all CPUs to a uniform correct value. Use of this property is
+  strongly discouraged; fix your firmware unless absolutely impossible.
 
 - always-on : a boolean property. If present, the timer is powered through an
   always-on power domain, therefore it never loses context.
@@ -46,7 +49,8 @@ Example:
 
 - compatible : Should at least contain "arm,armv7-timer-mem".
 
-- clock-frequency : The frequency of the main counter, in Hz. Optional.
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+  only when firmware has not configured the MMIO CNTFRQ registers.
 
 - reg : The control frame base address.
 
index 74607b6..5e10c34 100644 (file)
@@ -9,11 +9,17 @@ Properties:
                "qcom,scss-timer" - scorpion subsystem
 
 - interrupts : Interrupts for the debug timer, the first general purpose
-               timer, and optionally a second general purpose timer in that
-               order.
+               timer, and optionally a second general purpose timer, and
+               optionally as well, 2 watchdog interrupts, in that order.
 
 - reg : Specifies the base address of the timer registers.
 
+- clocks: Reference to the parent clocks, one per output clock. The parents
+          must appear in the same order as the clock names.
+
+- clock-names: The name of the clocks as free-form strings. They should be in
+               the same order as the clocks.
+
 - clock-frequency : The frequency of the debug timer and the general purpose
                     timer(s) in Hz in that order.
 
@@ -29,9 +35,13 @@ Example:
                compatible = "qcom,scss-timer", "qcom,msm-timer";
                interrupts = <1 1 0x301>,
                             <1 2 0x301>,
-                            <1 3 0x301>;
+                            <1 3 0x301>,
+                            <1 4 0x301>,
+                            <1 5 0x301>;
                reg = <0x0200a000 0x100>;
                clock-frequency = <19200000>,
                                  <32768>;
+               clocks = <&sleep_clk>;
+               clock-names = "sleep";
                cpu-offset = <0x40000>;
        };
diff --git a/Documentation/devicetree/bindings/common-properties.txt b/Documentation/devicetree/bindings/common-properties.txt
new file mode 100644 (file)
index 0000000..3193979
--- /dev/null
@@ -0,0 +1,60 @@
+Common properties
+
+The ePAPR specification does not define any properties related to hardware
+byteswapping, but endianness issues show up frequently in porting Linux to
+different machine types.  This document attempts to provide a consistent
+way of handling byteswapping across drivers.
+
+Optional properties:
+ - big-endian: Boolean; force big endian register accesses
+   unconditionally (e.g. ioread32be/iowrite32be).  Use this if you
+   know the peripheral always needs to be accessed in BE mode.
+ - little-endian: Boolean; force little endian register accesses
+   unconditionally (e.g. readl/writel).  Use this if you know the
+   peripheral always needs to be accessed in LE mode.
+ - native-endian: Boolean; always use register accesses matched to the
+   endianness of the kernel binary (e.g. LE vmlinux -> readl/writel,
+   BE vmlinux -> ioread32be/iowrite32be).  In this case no byteswaps
+   will ever be performed.  Use this if the hardware "self-adjusts"
+   register endianness based on the CPU's configured endianness.
+
+If a binding supports these properties, then the binding should also
+specify the default behavior if none of these properties are present.
+In such cases, little-endian is the preferred default, but it is not
+a requirement.  The of_device_is_big_endian() and of_fdt_is_big_endian()
+helper functions do assume that little-endian is the default, because
+most existing (PCI-based) drivers implicitly default to LE by using
+readl/writel for MMIO accesses.
+
+Examples:
+Scenario 1 : CPU in LE mode & device in LE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             native-endian;
+};
+
+Scenario 2 : CPU in LE mode & device in BE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             big-endian;
+};
+
+Scenario 3 : CPU in BE mode & device in BE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             native-endian;
+};
+
+Scenario 4 : CPU in BE mode & device in LE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             little-endian;
+};
diff --git a/Documentation/devicetree/bindings/cris/axis.txt b/Documentation/devicetree/bindings/cris/axis.txt
new file mode 100644 (file)
index 0000000..d209ca2
--- /dev/null
@@ -0,0 +1,9 @@
+Axis Communications AB
+ARTPEC series SoC Device Tree Bindings
+
+
+CRISv32 based SoCs are ETRAX FS and ARTPEC-3:
+
+    - compatible = "axis,crisv32";
+
+
diff --git a/Documentation/devicetree/bindings/cris/boards.txt b/Documentation/devicetree/bindings/cris/boards.txt
new file mode 100644 (file)
index 0000000..533dd27
--- /dev/null
@@ -0,0 +1,8 @@
+Boards based on the CRIS SoCs:
+
+Required root node properties:
+    - compatible = should be one or more of the following:
+       - "axis,dev88"  - for Axis devboard 88 with ETRAX FS
+
+Optional:
+
diff --git a/Documentation/devicetree/bindings/cris/interrupts.txt b/Documentation/devicetree/bindings/cris/interrupts.txt
new file mode 100644 (file)
index 0000000..e8b123b
--- /dev/null
@@ -0,0 +1,23 @@
+* CRISv32 Interrupt Controller
+
+Interrupt controller for the CRISv32 SoCs.
+
+Main node required properties:
+
+- compatible : should be:
+       "axis,crisv32-intc"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The type shall be a <u32> and the value shall be 1.
+- reg: physical base address and size of the intc registers map.
+
+Example:
+
+       intc: interrupt-controller {
+               compatible = "axis,crisv32-intc";
+               reg = <0xb001c000 0x1000>;
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+
diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt
new file mode 100644 (file)
index 0000000..d305876
--- /dev/null
@@ -0,0 +1,47 @@
+Applied Micro X-Gene SoC DMA nodes
+
+DMA nodes are defined to describe on-chip DMA interfaces in
+APM X-Gene SoC.
+
+Required properties for DMA interfaces:
+- compatible: Should be "apm,xgene-dma".
+- device_type: set to "dma".
+- reg: Address and length of the register set for the device.
+  It contains the information of registers in the following order:
+  1st - DMA control and status register address space.
+  2nd - Descriptor ring control and status register address space.
+  3rd - Descriptor ring command register address space.
+  4th - Soc efuse register address space.
+- interrupts: DMA has 5 interrupts sources. 1st interrupt is
+  DMA error reporting interrupt. 2nd, 3rd, 4th and 5th interrupts
+  are completion interrupts for each DMA channels.
+- clocks: Reference to the clock entry.
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+
+Example:
+       dmaclk: dmaclk@1f27c000 {
+               compatible = "apm,xgene-device-clock";
+               #clock-cells = <1>;
+               clocks = <&socplldiv2 0>;
+               reg = <0x0 0x1f27c000 0x0 0x1000>;
+               reg-names = "csr-reg";
+               clock-output-names = "dmaclk";
+       };
+
+       dma: dma@1f270000 {
+                       compatible = "apm,xgene-storm-dma";
+                       device_type = "dma";
+                       reg = <0x0 0x1f270000 0x0 0x10000>,
+                             <0x0 0x1f200000 0x0 0x10000>,
+                             <0x0 0x1b008000 0x0 0x2000>,
+                             <0x0 0x1054a000 0x0 0x100>;
+                       interrupts = <0x0 0x82 0x4>,
+                                    <0x0 0xb8 0x4>,
+                                    <0x0 0xb9 0x4>,
+                                    <0x0 0xba 0x4>,
+                                    <0x0 0xbb 0x4>;
+                       dma-coherent;
+                       clocks = <&dmaclk 0>;
+       };
diff --git a/Documentation/devicetree/bindings/dma/jz4780-dma.txt b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
new file mode 100644 (file)
index 0000000..f25feee
--- /dev/null
@@ -0,0 +1,56 @@
+* Ingenic JZ4780 DMA Controller
+
+Required properties:
+
+- compatible: Should be "ingenic,jz4780-dma"
+- reg: Should contain the DMA controller registers location and length.
+- interrupts: Should contain the interrupt specifier of the DMA controller.
+- interrupt-parent: Should be the phandle of the interrupt controller that
+- clocks: Should contain a clock specifier for the JZ4780 PDMA clock.
+- #dma-cells: Must be <2>. Number of integer cells in the dmas property of
+  DMA clients (see below).
+
+Optional properties:
+
+- ingenic,reserved-channels: Bitmask of channels to reserve for devices that
+  need a specific channel. These channels will only be assigned when explicitly
+  requested by a client. The primary use for this is channels 0 and 1, which
+  can be configured to have special behaviour for NAND/BCH when using
+  programmable firmware.
+
+Example:
+
+dma: dma@13420000 {
+       compatible = "ingenic,jz4780-dma";
+       reg = <0x13420000 0x10000>;
+
+       interrupt-parent = <&intc>;
+       interrupts = <10>;
+
+       clocks = <&cgu JZ4780_CLK_PDMA>;
+
+       #dma-cells = <2>;
+
+       ingenic,reserved-channels = <0x3>;
+};
+
+DMA clients must use the format described in dma.txt, giving a phandle to the
+DMA controller plus the following 2 integer cells:
+
+1. Request type: The DMA request type for transfers to/from the device on
+   the allocated channel, as defined in the SoC documentation.
+
+2. Channel: If set to 0xffffffff, any available channel will be allocated for
+   the client. Otherwise, the exact channel specified will be used. The channel
+   should be reserved on the DMA controller using the ingenic,reserved-channels
+   property.
+
+Example:
+
+uart0: serial@10030000 {
+       ...
+       dmas = <&dma 0x14 0xffffffff
+               &dma 0x15 0xffffffff>;
+       dma-names = "tx", "rx";
+       ...
+};
index f8c3311..1c9d48e 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
 - compatible: must be one of the following:
  * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
  * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
+ * "qcom,bam-v1.7.0" for MSM8916
 - reg: Address range for DMA registers
 - interrupts: Should contain the one interrupt shared by all channels
 - #dma-cells: must be <1>, the cell in the dmas property of the client device
diff --git a/Documentation/devicetree/bindings/dma/rcar-audmapp.txt b/Documentation/devicetree/bindings/dma/rcar-audmapp.txt
deleted file mode 100644 (file)
index 61bca50..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-* R-Car Audio DMAC peri peri Device Tree bindings
-
-Required properties:
-- compatible:  should be "renesas,rcar-audmapp"
-- #dma-cells:  should be <1>, see "dmas" property below
-
-Example:
-       audmapp: audio-dma-pp@0xec740000 {
-               compatible = "renesas,rcar-audmapp";
-               #dma-cells = <1>;
-
-               reg = <0 0xec740000 0 0x200>;
-       };
-
-
-* DMA client
-
-Required properties:
-- dmas:                a list of <[DMA multiplexer phandle] [SRS << 8 | DRS]> pairs.
-               where SRS/DRS are specified in the SoC manual.
-               It will be written into PDMACHCR as high 16-bit parts.
-- dma-names:   a list of DMA channel names, one per "dmas" entry
-
-Example:
-
-       dmas = <&audmapp 0x2d00
-               &audmapp 0x3700>;
-       dma-names =  "src0_ssiu0",
-                    "dvc0_ssiu0";
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
new file mode 100644 (file)
index 0000000..040f365
--- /dev/null
@@ -0,0 +1,37 @@
+* Renesas USB DMA Controller Device Tree bindings
+
+Required Properties:
+- compatible: must contain "renesas,usb-dmac"
+- reg: base address and length of the registers block for the DMAC
+- interrupts: interrupt specifiers for the DMAC, one for each entry in
+  interrupt-names.
+- interrupt-names: one entry per channel, named "ch%u", where %u is the
+  channel number ranging from zero to the number of channels minus one.
+- clocks: a list of phandle + clock-specifier pairs.
+- #dma-cells: must be <1>, the cell specifies the channel number of the DMAC
+  port connected to the DMA client.
+- dma-channels: number of DMA channels
+
+Example: R8A7790 (R-Car H2) USB-DMACs
+
+       usb_dmac0: dma-controller@e65a0000 {
+               compatible = "renesas,usb-dmac";
+               reg = <0 0xe65a0000 0 0x100>;
+               interrupts = <0 109 IRQ_TYPE_LEVEL_HIGH
+                             0 109 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "ch0", "ch1";
+               clocks = <&mstp3_clks R8A7790_CLK_USBDMAC0>;
+               #dma-cells = <1>;
+               dma-channels = <2>;
+       };
+
+       usb_dmac1: dma-controller@e65b0000 {
+               compatible = "renesas,usb-dmac";
+               reg = <0 0xe65b0000 0 0x100>;
+               interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH
+                             0 110 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "ch0", "ch1";
+               clocks = <&mstp3_clks R8A7790_CLK_USBDMAC1>;
+               #dma-cells = <1>;
+               dma-channels = <2>;
+       };
index 4611aa8..f20b111 100644 (file)
@@ -3,10 +3,13 @@
 Required properties:
 - #address-cells, #size-cells : Must be present if the device has sub-nodes
   representing partitions.
-- compatible : Should be the manufacturer and the name of the chip. Bear in mind
-               the DT binding is not Linux-only, but in case of Linux, see the
-               "spi_nor_ids" table in drivers/mtd/spi-nor/spi-nor.c for the list
-               of supported chips.
+- compatible : May include a device-specific string consisting of the
+               manufacturer and name of the chip. Bear in mind the DT binding
+               is not Linux-only, but in case of Linux, see the "m25p_ids"
+               table in drivers/mtd/devices/m25p80.c for the list of supported
+               chips.
+               Must also include "nor-jedec" for any SPI NOR flash that can be
+               identified by the JEDEC READ ID opcode (0x9F).
 - reg : Chip-Select number
 - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
 
@@ -22,7 +25,7 @@ Example:
        flash: m25p80@0 {
                #address-cells = <1>;
                #size-cells = <1>;
-               compatible = "spansion,m25p80";
+               compatible = "spansion,m25p80", "nor-jedec";
                reg = <0>;
                spi-max-frequency = <40000000>;
                m25p,fast-read;
index de8b517..4f833e3 100644 (file)
@@ -14,7 +14,7 @@ Optional properties:
  - marvell,nand-enable-arbiter:        Set to enable the bus arbiter
  - marvell,nand-keep-config:   Set to keep the NAND controller config as set
                                by the bootloader
- - num-cs:                     Number of chipselect lines to usw
+ - num-cs:                     Number of chipselect lines to use
  - nand-on-flash-bbt:          boolean to enable on flash bbt option if
                                not present false
  - nand-ecc-strength:           number of bits to correct per ECC step
index 0273adb..086d6f4 100644 (file)
@@ -21,7 +21,7 @@ Optional properties:
 - nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft",
   "soft_bch" or "none")
 
-see Documentation/devicetree/mtd/nand.txt for generic bindings.
+see Documentation/devicetree/bindings/mtd/nand.txt for generic bindings.
 
 
 Examples:
index b50d7a6..e00c2e9 100644 (file)
@@ -1,10 +1,17 @@
 Freescale i.MX PWM controller
 
 Required properties:
-- compatible: should be "fsl,<soc>-pwm"
+- compatible : should be "fsl,<soc>-pwm" and one of the following
+   compatible strings:
+  - "fsl,imx1-pwm" for PWM compatible with the one integrated on i.MX1
+  - "fsl,imx27-pwm" for PWM compatible with the one integrated on i.MX27
 - reg: physical base address and length of the controller's registers
 - #pwm-cells: should be 2. See pwm.txt in this directory for a description of
   the cells format.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+       Documentation/devicetree/bindings/clock/clock-bindings.txt
 - interrupts: The interrupt for the pwm controller
 
 Example:
@@ -13,5 +20,8 @@ pwm1: pwm@53fb4000 {
        #pwm-cells = <2>;
        compatible = "fsl,imx53-pwm", "fsl,imx27-pwm";
        reg = <0x53fb4000 0x4000>;
+       clocks = <&clks IMX5_CLK_PWM1_IPG_GATE>,
+                <&clks IMX5_CLK_PWM1_HF_GATE>;
+       clock-names = "ipg", "per";
        interrupts = <61>;
 };
index 83737a3..8033919 100644 (file)
@@ -26,6 +26,7 @@ aptina        Aptina Imaging
 arasan Arasan Chip Systems
 arm    ARM Ltd.
 armadeus       ARMadeus Systems SARL
+artesyn        Artesyn Embedded Technologies Inc.
 asahi-kasei    Asahi Kasei Corp.
 atmel  Atmel Corporation
 auo    AU Optronics Corporation
index bb9753b..480c8de 100644 (file)
@@ -49,25 +49,26 @@ The dma_buf buffer sharing API usage contains the following steps:
    The buffer exporter announces its wish to export a buffer. In this, it
    connects its own private buffer data, provides implementation for operations
    that can be performed on the exported dma_buf, and flags for the file
-   associated with this buffer.
+   associated with this buffer. All these fields are filled in struct
+   dma_buf_export_info, defined via the DEFINE_DMA_BUF_EXPORT_INFO macro.
 
    Interface:
-      struct dma_buf *dma_buf_export_named(void *priv, struct dma_buf_ops *ops,
-                                    size_t size, int flags,
-                                    const char *exp_name)
+      DEFINE_DMA_BUF_EXPORT_INFO(exp_info)
+      struct dma_buf *dma_buf_export(struct dma_buf_export_info *exp_info)
 
-   If this succeeds, dma_buf_export_named allocates a dma_buf structure, and
+   If this succeeds, dma_buf_export allocates a dma_buf structure, and
    returns a pointer to the same. It also associates an anonymous file with this
    buffer, so it can be exported. On failure to allocate the dma_buf object,
    it returns NULL.
 
-   'exp_name' is the name of exporter - to facilitate information while
-   debugging.
+   'exp_name' in struct dma_buf_export_info is the name of exporter - to
+   facilitate information while debugging. It is set to KBUILD_MODNAME by
+   default, so exporters don't have to provide a specific name, if they don't
+   wish to.
+
+   DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct dma_buf_export_info,
+   zeroes it out and pre-populates exp_name in it.
 
-   Exporting modules which do not wish to provide any specific name may use the
-   helper define 'dma_buf_export()', with the same arguments as above, but
-   without the last argument; a KBUILD_MODNAME pre-processor directive will be
-   inserted in place of 'exp_name' instead.
 
 2. Userspace gets a handle to pass around to potential buffer-users
 
index 7240438..95c13aa 100644 (file)
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
     To further test the InfiniBand software stack, use IPoIB (this
     assumes you have two IB hosts named host1 and host2):
 
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
+    host1$ ip link set dev ib0 up
+    host1$ ip address add dev ib0 a.b.c.x
+    host2$ ip link set dev ib0 up
+    host2$ ip address add dev ib0 a.b.c.y
     host1$ ping a.b.c.y
     host2$ ping a.b.c.x
 
@@ -229,7 +231,8 @@ NFS/RDMA Setup
 
     $ modprobe ib_mthca
     $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
+    $ ip li set dev ib0 up
+    $ ip addr add dev ib0 a.b.c.d
 
     NOTE: use unique addresses for the client and server
 
index 0bfafe1..5a5a055 100644 (file)
@@ -228,30 +228,19 @@ default behaviour.
 Deprecated Mount Options
 ========================
 
-  delaylog/nodelaylog
-       Delayed logging is the only logging method that XFS supports
-       now, so these mount options are now ignored.
-
-       Due for removal in 3.12.
-
-  ihashsize=value
-       In memory inode hashes have been removed, so this option has
-       no function as of August 2007. Option is deprecated.
-
-       Due for removal in 3.12.
+None at present.
 
-  irixsgid
-       This behaviour is now controlled by a sysctl, so the mount
-       option is ignored.
 
-       Due for removal in 3.12.
+Removed Mount Options
+=====================
 
-  osyncisdsync
-  osyncisosync
-       O_SYNC and O_DSYNC are fully supported, so there is no need
-       for these options any more.
+  Name                         Removed
+  ----                         -------
+  delaylog/nodelaylog          v3.20
+  ihashsize                    v3.20
+  irixsgid                     v3.20
+  osyncisdsync/osyncisosync    v3.20
 
-       Due for removal in 3.12.
 
 sysctls
 =======
index 8136e1f..51f4221 100644 (file)
@@ -321,6 +321,7 @@ Code  Seq#(hex)     Include File            Comments
 0xDB   00-0F   drivers/char/mwave/mwavepub.h
 0xDD   00-3F   ZFCP device driver      see drivers/s390/scsi/
                                        <mailto:aherrman@de.ibm.com>
+0xEC   00-01   drivers/platform/chrome/cros_ec_dev.h   ChromeOS EC driver
 0xF3   00-3F   drivers/usb/misc/sisusbvga/sisusb.h     sisfb (in development)
                                        <mailto:thomas@winischhofer.net>
 0xF4   00-1F   video/mbxfb.h           mbxfb
index 84960c6..f6befa9 100644 (file)
@@ -165,7 +165,7 @@ multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
 bytes respectively. Such letter suffixes can also be entirely omitted.
 
 
-       acpi=           [HW,ACPI,X86]
+       acpi=           [HW,ACPI,X86,ARM64]
                        Advanced Configuration and Power Interface
                        Format: { force | off | strict | noirq | rsdt }
                        force -- enable ACPI if default was off
@@ -175,6 +175,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                strictly ACPI specification compliant.
                        rsdt -- prefer RSDT over (default) XSDT
                        copy_dsdt -- copy DSDT to memory
+                       For ARM64, ONLY "acpi=off" or "acpi=force" are available
 
                        See also Documentation/power/runtime_pm.txt, pci=noacpi
 
index fc04c14..72a150d 100644 (file)
@@ -1355,6 +1355,24 @@ Sysfs notes:
        rfkill controller switch "tpacpi_uwb_sw": refer to
        Documentation/rfkill.txt for details.
 
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+1 = Home mode
+2 = Web-browser mode
+3 = Web-conference mode
+4 = Function mode
+5 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
 
 Multiple Commands, Module Parameters
 ------------------------------------
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
new file mode 100644 (file)
index 0000000..de1af7d
--- /dev/null
@@ -0,0 +1,176 @@
+The cluster MD is a shared-device RAID for a cluster.
+
+
+1. On-disk format
+
+Separate write-intent-bitmap are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is:
+
+0                    4k                     8k                    12k
+-------------------------------------------------------------------
+| idle                | md super            | bm super [0] + bits |
+| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
+| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
+| bm bits [3, contd]  |                     |                     |
+
+During "normal" functioning we assume the filesystem ensures that only one
+node writes to any given block at a time, so a write
+request will
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally.  It is up to the filesystem to
+ensure one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+
+There are two locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+
+ The bm_lockres protects individual node bitmaps. They are named in the
+ form bitmap001 for node 1, bitmap002 for node and so on. When a node
+ joins the cluster, it acquires the lock in PW mode and it stays so
+ during the lifetime the node is part of the cluster. The lock resource
+ number is based on the slot number returned by the DLM subsystem. Since
+ DLM starts node count from one and bitmap slots start from zero, one is
+ subtracted from the DLM slot number to arrive at the bitmap slot number.
+
+3. Communication
+
+Each node has to communicate with other nodes when starting or ending
+resync, and metadata superblock updates.
+
+3.1 Message Types
+
+ There are 3 types, of messages which are passed
+
+ 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has been
+   updated, and the node must re-read the md superblock. This is performed
+   synchronously.
+
+ 3.1.2 RESYNC: informs other nodes that a resync is initiated or ended
+   so that each node may suspend or resume the region.
+
+3.2 Communication mechanism
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+  3.2.1 Token: The resource which protects the entire communication
+   system. The node having the token resource is allowed to
+   communicate.
+
+  3.2.2 Message: The lock resource which carries the data to
+   communicate.
+
+  3.2.3 Ack: The resource, acquiring which means the message has been
+   acknowledged by all nodes in the cluster. The BAST of the resource
+   is used to inform the receive node that a node wants to communicate.
+
+The algorithm is:
+
+ 1. receive status
+
+   sender                         receiver                   receiver
+   ACK:CR                          ACK:CR                     ACK:CR
+
+ 2. sender get EX of TOKEN
+    sender get EX of MESSAGE
+    sender                        receiver                 receiver
+    TOKEN:EX                       ACK:CR                   ACK:CR
+    MESSAGE:EX
+    ACK:CR
+
+    Sender checks that it still needs to send a message. Messages received
+    or other events that happened while waiting for the TOKEN may have made
+    this message inappropriate or redundant.
+
+ 3. sender write LVB.
+    sender down-convert MESSAGE from EX to CR
+    sender try to get EX of ACK
+    [ wait until all receiver has *processed* the MESSAGE ]
+
+                                     [ triggered by bast of ACK ]
+                                     receiver get CR of MESSAGE
+                                     receiver read LVB
+                                     receiver processes the message
+                                     [ wait finish ]
+                                     receiver release ACK
+
+   sender                         receiver                   receiver
+   TOKEN:EX                       MESSAGE:CR                 MESSAGE:CR
+   MESSAGE:CR
+   ACK:EX
+
+ 4. triggered by grant of EX on ACK (indicating all receivers have processed
+    message)
+    sender down-convert ACK from EX to CR
+    sender release MESSAGE
+    sender release TOKEN
+                               receiver upconvert to EX of MESSAGE
+                               receiver get CR of ACK
+                               receiver release MESSAGE
+
+   sender                      receiver                   receiver
+   ACK:CR                       ACK:CR                     ACK:CR
+
+
+4. Handling Failures
+
+4.1 Node Failure
+ When a node fails, the DLM informs the cluster with the slot. The node
+ starts a cluster recovery thread. The cluster recovery thread:
+       - acquires the bitmap<number> lock of the failed node
+       - opens the bitmap
+       - reads the bitmap of the failed node
+       - copies the set bitmap to local node
+       - cleans the bitmap of the failed node
+       - releases bitmap<number> lock of the failed node
+       - initiates resync of the bitmap on the current node
+
+ The resync process, is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNC_START with the (lo,hi) range of the area which needs
+ to be suspended. Each node maintains a suspend_list, which contains
+ the list  of ranges which are currently suspended. On receiving
+ RESYNC_START, the node adds the range to the suspend_list. Similarly,
+ when the node performing resync finishes, it send RESYNC_FINISHED
+ to other nodes and other nodes remove the corresponding entry from
+ the suspend_list.
+
+ A helper function, should_suspend() can be used to check if a particular
+ I/O range should be suspended or not.
+
+4.2 Device Failure
+ Device failures are handled and communicated with the metadata update
+ routine.
+
+5. Adding a new Device
+For adding a new device, it is necessary that all nodes "see" the new device
+to be added. For this, the following algorithm is used:
+
+    1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+       ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
+    2. Node 1 sends NEWDISK with uuid and slot number
+    3. Other nodes issue kobject_uevent_env with uuid and slot number
+       (Steps 4,5 could be a udev rule)
+    4. In userspace, the node searches for the disk, perhaps
+       using blkid -t SUB_UUID=""
+    5. Other nodes issue either of the following depending on whether the disk
+       was found:
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+                disc.number set to slot number)
+       ioctl(CLUSTERED_DISK_NACK)
+    6. Other nodes drop lock on no-new-devs (CR) if device is found
+    7. Node 1 attempts EX lock on no-new-devs
+    8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
+       as SpareLocal
+    9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
+    10. Other nodes get the information whether a disk is added or not
+       by the following METADATA_UPDATED.
index 2b47704..2ba71ce 100755 (executable)
@@ -237,8 +237,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
        buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
        buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
 
-       buf += "/* Local pointer to allocated TCM configfs fabric module */\n"
-       buf += "struct target_fabric_configfs *" + fabric_mod_name + "_fabric_configfs;\n\n"
+       buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops;\n\n"
 
        buf += "static struct se_node_acl *" + fabric_mod_name + "_make_nodeacl(\n"
        buf += "        struct se_portal_group *se_tpg,\n"
@@ -309,8 +308,8 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
        buf += "        }\n"
        buf += "        tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n"
        buf += "        tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n"
-       buf += "        ret = core_tpg_register(&" + fabric_mod_name + "_fabric_configfs->tf_ops, wwn,\n"
-       buf += "                                &tpg->se_tpg, (void *)tpg,\n"
+       buf += "        ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n"
+       buf += "                                &tpg->se_tpg, tpg,\n"
        buf += "                                TRANSPORT_TPG_TYPE_NORMAL);\n"
        buf += "        if (ret < 0) {\n"
        buf += "                kfree(tpg);\n"
@@ -370,7 +369,10 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
        buf += "        NULL,\n"
        buf += "};\n\n"
 
-       buf += "static struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n"
+       buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n"
+       buf += "        .module                         = THIS_MODULE,\n"
+       buf += "        .name                           = " + fabric_mod_name + ",\n"
+       buf += "        .get_fabric_proto_ident         = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
        buf += "        .get_fabric_name                = " + fabric_mod_name + "_get_fabric_name,\n"
        buf += "        .get_fabric_proto_ident         = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
        buf += "        .tpg_get_wwn                    = " + fabric_mod_name + "_get_fabric_wwn,\n"
@@ -413,75 +415,18 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
        buf += "        .fabric_drop_np                 = NULL,\n"
        buf += "        .fabric_make_nodeacl            = " + fabric_mod_name + "_make_nodeacl,\n"
        buf += "        .fabric_drop_nodeacl            = " + fabric_mod_name + "_drop_nodeacl,\n"
-       buf += "};\n\n"
-
-       buf += "static int " + fabric_mod_name + "_register_configfs(void)\n"
-       buf += "{\n"
-       buf += "        struct target_fabric_configfs *fabric;\n"
-       buf += "        int ret;\n\n"
-       buf += "        printk(KERN_INFO \"" + fabric_mod_name.upper() + " fabric module %s on %s/%s\"\n"
-       buf += "                \" on \"UTS_RELEASE\"\\n\"," + fabric_mod_name.upper() + "_VERSION, utsname()->sysname,\n"
-       buf += "                utsname()->machine);\n"
-       buf += "        /*\n"
-       buf += "         * Register the top level struct config_item_type with TCM core\n"
-       buf += "         */\n"
-       buf += "        fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name + "\");\n"
-       buf += "        if (IS_ERR(fabric)) {\n"
-       buf += "                printk(KERN_ERR \"target_fabric_configfs_init() failed\\n\");\n"
-       buf += "                return PTR_ERR(fabric);\n"
-       buf += "        }\n"
-       buf += "        /*\n"
-       buf += "         * Setup fabric->tf_ops from our local " + fabric_mod_name + "_ops\n"
-       buf += "         */\n"
-       buf += "        fabric->tf_ops = " + fabric_mod_name + "_ops;\n"
-       buf += "        /*\n"
-       buf += "         * Setup default attribute lists for various fabric->tf_cit_tmpl\n"
-       buf += "         */\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n"
-       buf += "        /*\n"
-       buf += "         * Register the fabric for use within TCM\n"
-       buf += "         */\n"
-       buf += "        ret = target_fabric_configfs_register(fabric);\n"
-       buf += "        if (ret < 0) {\n"
-       buf += "                printk(KERN_ERR \"target_fabric_configfs_register() failed\"\n"
-       buf += "                                \" for " + fabric_mod_name.upper() + "\\n\");\n"
-       buf += "                return ret;\n"
-       buf += "        }\n"
-       buf += "        /*\n"
-       buf += "         * Setup our local pointer to *fabric\n"
-       buf += "         */\n"
-       buf += "        " + fabric_mod_name + "_fabric_configfs = fabric;\n"
-       buf += "        printk(KERN_INFO \"" +  fabric_mod_name.upper() + "[0] - Set fabric -> " + fabric_mod_name + "_fabric_configfs\\n\");\n"
-       buf += "        return 0;\n"
-       buf += "};\n\n"
-       buf += "static void __exit " + fabric_mod_name + "_deregister_configfs(void)\n"
-       buf += "{\n"
-       buf += "        if (!" + fabric_mod_name + "_fabric_configfs)\n"
-       buf += "                return;\n\n"
-       buf += "        target_fabric_configfs_deregister(" + fabric_mod_name + "_fabric_configfs);\n"
-       buf += "        " + fabric_mod_name + "_fabric_configfs = NULL;\n"
-       buf += "        printk(KERN_INFO \"" +  fabric_mod_name.upper() + "[0] - Cleared " + fabric_mod_name + "_fabric_configfs\\n\");\n"
+       buf += "\n"
+       buf += "        .tfc_wwn_attrs                  = " + fabric_mod_name + "_wwn_attrs;\n"
        buf += "};\n\n"
 
        buf += "static int __init " + fabric_mod_name + "_init(void)\n"
        buf += "{\n"
-       buf += "        int ret;\n\n"
-       buf += "        ret = " + fabric_mod_name + "_register_configfs();\n"
-       buf += "        if (ret < 0)\n"
-       buf += "                return ret;\n\n"
-       buf += "        return 0;\n"
+       buf += "        return target_register_template(" + fabric_mod_name + "_ops);\n"
        buf += "};\n\n"
+
        buf += "static void __exit " + fabric_mod_name + "_exit(void)\n"
        buf += "{\n"
-       buf += "        " + fabric_mod_name + "_deregister_configfs();\n"
+       buf += "        target_unregister_template(" + fabric_mod_name + "_ops);\n"
        buf += "};\n\n"
 
        buf += "MODULE_DESCRIPTION(\"" + fabric_mod_name.upper() + " series fabric driver\");\n"
index 5518465..43e94ea 100644 (file)
@@ -138,27 +138,40 @@ signals the kernel via a 4-byte write(). When cmd_head equals
 cmd_tail, the ring is empty -- no commands are currently waiting to be
 processed by userspace.
 
-TCMU commands start with a common header containing "len_op", a 32-bit
-value that stores the length, as well as the opcode in the lowest
-unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and
-TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it
-should skip ahead by the bytes in "length". (The kernel inserts PAD
-entries to ensure each CMD entry fits contigously into the circular
-buffer.)
-
-When userspace handles a CMD, it finds the SCSI CDB (Command Data
-Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the
-start of the overall shared memory region, not the entry. The data
-in/out buffers are accessible via tht req.iov[] array. Note that
-each iov.iov_base is also an offset from the start of the region.
-
-TCMU currently does not support BIDI operations.
+TCMU commands are 8-byte aligned. They start with a common header
+containing "len_op", a 32-bit value that stores the length, as well as
+the opcode in the lowest unused bits. It also contains cmd_id and
+flags fields for setting by the kernel (kflags) and userspace
+(uflags).
+
+Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
+
+When the opcode is CMD, the entry in the command ring is a struct
+tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
+tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
+overall shared memory region, not the entry. The data in/out buffers
+are accessible via tht req.iov[] array. iov_cnt contains the number of
+entries in iov[] needed to describe either the Data-In or Data-Out
+buffers. For bidirectional commands, iov_cnt specifies how many iovec
+entries cover the Data-Out area, and iov_bidi_count specifies how many
+iovec entries immediately after that in iov[] cover the Data-In
+area. Just like other fields, iov.iov_base is an offset from the start
+of the region.
 
 When completing a command, userspace sets rsp.scsi_status, and
 rsp.sense_buffer if necessary. Userspace then increments
 mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
 kernel via the UIO method, a 4-byte write to the file descriptor.
 
+When the opcode is PAD, userspace only updates cmd_tail as above --
+it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
+is contiguous within the command ring.)
+
+More opcodes may be added in the future. If userspace encounters an
+opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
+hdr.uflags, update cmd_tail, and proceed with processing additional
+commands, if any.
+
 The Data Area:
 
 This is shared-memory space after the command ring. The organization
index bc9f6fe..9fa2bf8 100644 (file)
@@ -3573,3 +3573,20 @@ struct {
 @ar   - access register number
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
+
+
+8. Other capabilities.
+----------------------
+
+This section lists capabilities that give information about other
+features of the KVM implementation.
+
+8.1 KVM_CAP_PPC_HWRNG
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+H_RANDOM hypercall backed by a hardware random-number generator.
+If present, the kernel H_RANDOM handler can be enabled for guest use
+with the KVM_CAP_PPC_ENABLE_HCALL capability.
index f6f5950..2e5bbc0 100644 (file)
@@ -3066,10 +3066,16 @@ F:      drivers/net/fddi/defxx.*
 
 DELL LAPTOP DRIVER
 M:     Matthew Garrett <mjg59@srcf.ucam.org>
+M:     Pali Rohár <pali.rohar@gmail.com>
 L:     platform-driver-x86@vger.kernel.org
 S:     Maintained
 F:     drivers/platform/x86/dell-laptop.c
 
+DELL LAPTOP FREEFALL DRIVER
+M:     Pali Rohár <pali.rohar@gmail.com>
+S:     Maintained
+F:     drivers/platform/x86/dell-smo8800.c
+
 DELL LAPTOP SMM DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 S:     Maintained
@@ -3084,6 +3090,7 @@ F:        drivers/firmware/dcdbas.*
 
 DELL WMI EXTRAS DRIVER
 M:     Matthew Garrett <mjg59@srcf.ucam.org>
+M:     Pali Rohár <pali.rohar@gmail.com>
 S:     Maintained
 F:     drivers/platform/x86/dell-wmi.c
 
@@ -3271,12 +3278,6 @@ F:       drivers/firmware/dmi-id.c
 F:     drivers/firmware/dmi_scan.c
 F:     include/linux/dmi.h
 
-DOCKING STATION DRIVER
-M:     Shaohua Li <shaohua.li@intel.com>
-L:     linux-acpi@vger.kernel.org
-S:     Supported
-F:     drivers/acpi/dock.c
-
 DOCUMENTATION
 M:     Jonathan Corbet <corbet@lwn.net>
 L:     linux-doc@vger.kernel.org
@@ -5009,6 +5010,11 @@ W:       http://industrypack.sourceforge.net
 S:     Maintained
 F:     drivers/ipack/
 
+INGENIC JZ4780 DMA Driver
+M:     Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
+S:     Maintained
+F:     drivers/dma/dma-jz4780.c
+
 INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
 M:     Mimi Zohar <zohar@linux.vnet.ibm.com>
 M:     Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
@@ -7533,7 +7539,6 @@ S:        Maintained
 F:     drivers/pci/host/pci-exynos.c
 
 PCI DRIVER FOR SYNOPSIS DESIGNWARE
-M:     Mohit Kumar <mohit.kumar@st.com>
 M:     Jingoo Han <jg1.han@samsung.com>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
@@ -7548,9 +7553,8 @@ F:        Documentation/devicetree/bindings/pci/host-generic-pci.txt
 F:     drivers/pci/host/pci-host-generic.c
 
 PCIE DRIVER FOR ST SPEAR13XX
-M:     Mohit Kumar <mohit.kumar@st.com>
 L:     linux-pci@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/pci/host/*spear*
 
 PCMCIA SUBSYSTEM
@@ -8805,6 +8809,15 @@ W:       http://www.emulex.com
 S:     Supported
 F:     drivers/net/ethernet/emulex/benet/
 
+EMULEX ONECONNECT ROCE DRIVER
+M:     Selvin Xavier <selvin.xavier@emulex.com>
+M:     Devesh Sharma <devesh.sharma@emulex.com>
+M:     Mitesh Ahuja <mitesh.ahuja@emulex.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.emulex.com
+S:     Supported
+F:     drivers/infiniband/hw/ocrdma/
+
 SFC NETWORK DRIVER
 M:     Solarflare linux maintainers <linux-net-drivers@solarflare.com>
 M:     Shradha Shah <sshah@solarflare.com>
@@ -9937,10 +9950,23 @@ S:      Maintained
 F:     drivers/platform/x86/topstar-laptop.c
 
 TOSHIBA ACPI EXTRAS DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
 L:     platform-driver-x86@vger.kernel.org
-S:     Orphan
+S:     Maintained
 F:     drivers/platform/x86/toshiba_acpi.c
 
+TOSHIBA BLUETOOTH DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/toshiba_bluetooth.c
+
+TOSHIBA HDD ACTIVE PROTECTION SENSOR DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/toshiba_haps.c
+
 TOSHIBA SMM DRIVER
 M:     Jonathan Buzzard <jonathan@buzzard.org.uk>
 L:     tlinux-users@tce.toshiba-dme.co.jp
@@ -10517,6 +10543,12 @@ S:     Maintained
 F:     drivers/vhost/
 F:     include/uapi/linux/vhost.h
 
+VIRTIO INPUT DRIVER
+M:     Gerd Hoffmann <kraxel@redhat.com>
+S:     Maintained
+F:     drivers/virtio/virtio_input.c
+F:     include/uapi/linux/virtio_input.h
+
 VIA RHINE NETWORK DRIVER
 M:     Roger Luethi <rl@hellgate.ch>
 S:     Maintained
index 6cc5b24..7ff1239 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
-PATCHLEVEL = 0
+PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
index 757e0c6..3b076fb 100644 (file)
@@ -64,7 +64,7 @@
                };
 
                arcpmu0: pmu {
-                       compatible = "snps,arc700-pmu";
+                       compatible = "snps,arc700-pct";
                };
        };
 };
index 278dacf..d2ac4e5 100644 (file)
@@ -2,6 +2,9 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
@@ -9,7 +12,7 @@ CONFIG_NAMESPACES=y
 # CONFIG_UTS_NS is not set
 # CONFIG_PID_NS is not set
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs"
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
 CONFIG_KALLSYMS_ALL=y
 CONFIG_EMBEDDED=y
 # CONFIG_SLUB_DEBUG is not set
@@ -21,12 +24,9 @@ CONFIG_MODULES=y
 # CONFIG_IOSCHED_DEADLINE is not set
 # CONFIG_IOSCHED_CFQ is not set
 CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_IDE is not set
-# CONFIG_ARCTANGENT_EMAC is not set
 # CONFIG_ARC_HAS_RTSC is not set
 CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
 # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -39,23 +39,23 @@ CONFIG_INET=y
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 # CONFIG_BLK_DEV is not set
 CONFIG_NETDEVICES=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
 # CONFIG_MOUSE_PS2_ALPS is not set
 # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
 # CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_CYPRESS is not set
 # CONFIG_MOUSE_PS2_TRACKPOINT is not set
 CONFIG_MOUSE_PS2_TOUCHKIT=y
-# CONFIG_SERIO_I8042 is not set
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
 CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
 CONFIG_FB=y
@@ -72,4 +72,3 @@ CONFIG_TMPFS=y
 CONFIG_NFS_FS=y
 # CONFIG_ENABLE_WARN_DEPRECATED is not set
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
index be33db8..e2b1b12 100644 (file)
@@ -30,6 +30,7 @@
 #define ARC_REG_D_UNCACH_BCR   0x6A
 #define ARC_REG_BPU_BCR                0xc0
 #define ARC_REG_ISA_CFG_BCR    0xc1
+#define ARC_REG_RTT_BCR                0xF2
 #define ARC_REG_SMART_BCR      0xFF
 
 /* status32 Bits Positions */
  * [15: 8] = Exception Cause Code
  * [ 7: 0] = Exception Parameters (for certain types only)
  */
-#define ECR_VEC_MASK                   0xff0000
-#define ECR_CODE_MASK                  0x00ff00
-#define ECR_PARAM_MASK                 0x0000ff
-
-/* Exception Cause Vector Values */
+#define ECR_V_MEM_ERR                  0x01
 #define ECR_V_INSN_ERR                 0x02
 #define ECR_V_MACH_CHK                 0x20
 #define ECR_V_ITLB_MISS                        0x21
@@ -62,7 +59,8 @@
 #define ECR_V_PROTV                    0x23
 #define ECR_V_TRAP                     0x25
 
-/* Protection Violation Exception Cause Code Values */
+/* DTLB Miss and Protection Violation Cause Codes */
+
 #define ECR_C_PROTV_INST_FETCH         0x00
 #define ECR_C_PROTV_LOAD               0x01
 #define ECR_C_PROTV_STORE              0x02
        }                                               \
 }
 
-#define WRITE_BCR(reg, into)                           \
+#define WRITE_AUX(reg, into)                           \
 {                                                      \
        unsigned int tmp;                               \
        if (sizeof(tmp) == sizeof(into)) {              \
-               tmp = (*(unsigned int *)(into));        \
+               tmp = (*(unsigned int *)&(into));       \
                write_aux_reg(reg, tmp);                \
        } else  {                                       \
                extern void bogus_undefined(void);      \
index 1a5bf07..4051e95 100644 (file)
@@ -32,6 +32,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *m)
 
        m += nr >> 5;
 
+       /*
+        * ARC ISA micro-optimization:
+        *
+        * Instructions dealing with bitpos only consider lower 5 bits (0-31)
+        * e.g (x << 33) is handled like (x << 1) by ASL instruction
+        *  (mem pointer still needs adjustment to point to next word)
+        *
+        * Hence the masking to clamp @nr arg can be elided in general.
+        *
+        * However if @nr is a constant (above assumed it in a register),
+        * and greater than 31, gcc can optimize away (x << 33) to 0,
+        * as overflow, given the 32-bit ISA. Thus masking needs to be done
+        * for constant @nr, but no code is generated due to const prop.
+        */
        if (__builtin_constant_p(nr))
                nr &= 0x1f;
 
@@ -374,29 +388,20 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *m)
  * This routine doesn't need to be atomic.
  */
 static inline int
-__constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-       return ((1UL << (nr & 31)) &
-               (((const volatile unsigned int *)addr)[nr >> 5])) != 0;
-}
-
-static inline int
-__test_bit(unsigned int nr, const volatile unsigned long *addr)
+test_bit(unsigned int nr, const volatile unsigned long *addr)
 {
        unsigned long mask;
 
        addr += nr >> 5;
 
-       /* ARC700 only considers 5 bits in bit-fiddling insn */
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+
        mask = 1 << nr;
 
        return ((mask & *addr) != 0);
 }
 
-#define test_bit(nr, addr)     (__builtin_constant_p(nr) ? \
-                                       __constant_test_bit((nr), (addr)) : \
-                                       __test_bit((nr), (addr)))
-
 /*
  * Count the number of zeros, starting from MSB
  * Helper for fls( ) friends
index cbf755e..2b8880e 100644 (file)
@@ -54,29 +54,13 @@ struct arc_reg_cc_build {
 #define PERF_COUNT_ARC_BPOK    (PERF_COUNT_HW_MAX + 3)
 #define PERF_COUNT_ARC_EDTLB   (PERF_COUNT_HW_MAX + 4)
 #define PERF_COUNT_ARC_EITLB   (PERF_COUNT_HW_MAX + 5)
-#define PERF_COUNT_ARC_HW_MAX  (PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_LDC     (PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_STC     (PERF_COUNT_HW_MAX + 7)
+
+#define PERF_COUNT_ARC_HW_MAX  (PERF_COUNT_HW_MAX + 8)
 
 /*
- * The "generalized" performance events seem to really be a copy
- * of the available events on x86 processors; the mapping to ARC
- * events is not always possible 1-to-1. Fortunately, there doesn't
- * seem to be an exact definition for these events, so we can cheat
- * a bit where necessary.
- *
- * In particular, the following PERF events may behave a bit differently
- * compared to other architectures:
- *
- * PERF_COUNT_HW_CPU_CYCLES
- *     Cycles not in halted state
- *
- * PERF_COUNT_HW_REF_CPU_CYCLES
- *     Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
- *     for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
- *
- * PERF_COUNT_HW_BUS_CYCLES
- *     Unclear what this means, Intel uses 0x013c, which according to
- *     their datasheet means "unhalted reference cycles". It sounds similar
- *     to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ * Some ARC pct quirks:
  *
  * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
  * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
@@ -91,21 +75,38 @@ struct arc_reg_cc_build {
  *     Note that I$ cache misses aren't counted by either of the two!
  */
 
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
 static const char * const arc_pmu_ev_hw_map[] = {
+       /* count cycles */
        [PERF_COUNT_HW_CPU_CYCLES] = "crun",
        [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
        [PERF_COUNT_HW_BUS_CYCLES] = "crun",
-       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+
        [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
        [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-       [PERF_COUNT_ARC_DCLM] = "dclm",
-       [PERF_COUNT_ARC_DCSM] = "dcsm",
-       [PERF_COUNT_ARC_ICM] = "icm",
-       [PERF_COUNT_ARC_BPOK] = "bpok",
-       [PERF_COUNT_ARC_EDTLB] = "edtlb",
-       [PERF_COUNT_ARC_EITLB] = "eitlb",
+
+       /* counts condition */
+       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+       [PERF_COUNT_ARC_BPOK]         = "bpok",   /* NP-NT, PT-T, PNT-NT */
+       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+
+       [PERF_COUNT_ARC_LDC] = "imemrdc",       /* Instr: mem read cached */
+       [PERF_COUNT_ARC_STC] = "imemwrc",       /* Instr: mem write cached */
+
+       [PERF_COUNT_ARC_DCLM] = "dclm",         /* D-cache Load Miss */
+       [PERF_COUNT_ARC_DCSM] = "dcsm",         /* D-cache Store Miss */
+       [PERF_COUNT_ARC_ICM] = "icm",           /* I-cache Miss */
+       [PERF_COUNT_ARC_EDTLB] = "edtlb",       /* D-TLB Miss */
+       [PERF_COUNT_ARC_EITLB] = "eitlb",       /* I-TLB Miss */
 };
 
 #define C(_x)                  PERF_COUNT_HW_CACHE_##_x
@@ -114,11 +115,11 @@ static const char * const arc_pmu_ev_hw_map[] = {
 static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        [C(L1D)] = {
                [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_LDC,
                        [C(RESULT_MISS)]        = PERF_COUNT_ARC_DCLM,
                },
                [C(OP_WRITE)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_STC,
                        [C(RESULT_MISS)]        = PERF_COUNT_ARC_DCSM,
                },
                [C(OP_PREFETCH)] = {
@@ -128,7 +129,7 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        },
        [C(L1I)] = {
                [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_HW_INSTRUCTIONS,
                        [C(RESULT_MISS)]        = PERF_COUNT_ARC_ICM,
                },
                [C(OP_WRITE)] = {
@@ -156,9 +157,10 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
        },
        [C(DTLB)] = {
                [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_LDC,
                        [C(RESULT_MISS)]        = PERF_COUNT_ARC_EDTLB,
                },
+                       /* DTLB LD/ST Miss not segregated by h/w*/
                [C(OP_WRITE)] = {
                        [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
                        [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
index ae1c485..fd2ec50 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <asm/arcregs.h>
+#include <asm/stacktrace.h>
 
 struct arc_pmu {
        struct pmu      pmu;
@@ -25,6 +26,46 @@ struct arc_pmu {
        int             ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
 };
 
+struct arc_callchain_trace {
+       int depth;
+       void *perf_stuff;
+};
+
+static int callchain_trace(unsigned int addr, void *data)
+{
+       struct arc_callchain_trace *ctrl = data;
+       struct perf_callchain_entry *entry = ctrl->perf_stuff;
+       perf_callchain_store(entry, addr);
+
+       if (ctrl->depth++ < 3)
+               return 0;
+
+       return -1;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct arc_callchain_trace ctrl = {
+               .depth = 0,
+               .perf_stuff = entry,
+       };
+
+       arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       /*
+        * User stack can't be unwound trivially with kernel dwarf unwinder
+        * So for now just record the user PC
+        */
+       perf_callchain_store(entry, instruction_pointer(regs));
+}
+
+static struct arc_pmu *arc_pmu;
+
 /* read counter #idx; note that counter# != event# on ARC! */
 static uint64_t arc_pmu_read_counter(int idx)
 {
@@ -47,7 +88,6 @@ static uint64_t arc_pmu_read_counter(int idx)
 static void arc_perf_event_update(struct perf_event *event,
                                  struct hw_perf_event *hwc, int idx)
 {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
        uint64_t prev_raw_count, new_raw_count;
        int64_t delta;
 
@@ -89,13 +129,16 @@ static int arc_pmu_cache_event(u64 config)
        if (ret == CACHE_OP_UNSUPPORTED)
                return -ENOENT;
 
+       pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+                cache_type, cache_op, cache_result, ret,
+                arc_pmu_ev_hw_map[ret]);
+
        return ret;
 }
 
 /* initializes hw_perf_event structure if event is supported */
 static int arc_pmu_event_init(struct perf_event *event)
 {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
        struct hw_perf_event *hwc = &event->hw;
        int ret;
 
@@ -106,8 +149,9 @@ static int arc_pmu_event_init(struct perf_event *event)
                if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
                        return -ENOENT;
                hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
-               pr_debug("initializing event %d with cfg %d\n",
-                        (int) event->attr.config, (int) hwc->config);
+               pr_debug("init event %d with h/w %d \'%s\'\n",
+                        (int) event->attr.config, (int) hwc->config,
+                        arc_pmu_ev_hw_map[event->attr.config]);
                return 0;
        case PERF_TYPE_HW_CACHE:
                ret = arc_pmu_cache_event(event->attr.config);
@@ -183,8 +227,6 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
 
 static void arc_pmu_del(struct perf_event *event, int flags)
 {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
-
        arc_pmu_stop(event, PERF_EF_UPDATE);
        __clear_bit(event->hw.idx, arc_pmu->used_mask);
 
@@ -194,7 +236,6 @@ static void arc_pmu_del(struct perf_event *event, int flags)
 /* allocate hardware counter and optionally start counting */
 static int arc_pmu_add(struct perf_event *event, int flags)
 {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
        struct hw_perf_event *hwc = &event->hw;
        int idx = hwc->idx;
 
@@ -247,10 +288,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
        BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
 
        READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-       if (!cc_bcr.v) {
-               pr_err("Performance counters exist, but no countable conditions?\n");
-               return -ENODEV;
-       }
+       BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
 
        arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
        if (!arc_pmu)
@@ -263,19 +301,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
                arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
 
        cc_name.str[8] = 0;
-       for (i = 0; i < PERF_COUNT_HW_MAX; i++)
+       for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
                arc_pmu->ev_hw_idx[i] = -1;
 
+       /* loop thru all available h/w condition indexes */
        for (j = 0; j < cc_bcr.c; j++) {
                write_aux_reg(ARC_REG_CC_INDEX, j);
                cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
                cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+
+               /* See if it has been mapped to a perf event_id */
                for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
                        if (arc_pmu_ev_hw_map[i] &&
                            !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
                            strlen(arc_pmu_ev_hw_map[i])) {
-                               pr_debug("mapping %d to idx %d with name %s\n",
-                                        i, j, cc_name.str);
+                               pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+                                        i, cc_name.str, j);
                                arc_pmu->ev_hw_idx[i] = j;
                        }
                }
@@ -302,7 +343,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
 
 #ifdef CONFIG_OF
 static const struct of_device_id arc_pmu_match[] = {
-       { .compatible = "snps,arc700-pmu" },
+       { .compatible = "snps,arc700-pct" },
        {},
 };
 MODULE_DEVICE_TABLE(of, arc_pmu_match);
@@ -310,7 +351,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match);
 
 static struct platform_driver arc_pmu_driver = {
        .driver = {
-               .name           = "arc700-pmu",
+               .name           = "arc700-pct",
                .of_match_table = of_match_ptr(arc_pmu_match),
        },
        .probe          = arc_pmu_device_probe,
index f46efd1..e095c55 100644 (file)
@@ -49,7 +49,10 @@ void arch_cpu_idle(void)
 
 asmlinkage void ret_from_fork(void);
 
-/* Layout of Child kernel mode stack as setup at the end of this function is
+/*
+ * Copy architecture-specific thread state
+ *
+ * Layout of Child kernel mode stack as setup at the end of this function is
  *
  * |     ...        |
  * |     ...        |
@@ -81,7 +84,7 @@ asmlinkage void ret_from_fork(void);
  * ------------------  <===== END of PAGE
  */
 int copy_thread(unsigned long clone_flags,
-               unsigned long usp, unsigned long arg,
+               unsigned long usp, unsigned long kthread_arg,
                struct task_struct *p)
 {
        struct pt_regs *c_regs;        /* child's pt_regs */
@@ -112,7 +115,7 @@ int copy_thread(unsigned long clone_flags,
        if (unlikely(p->flags & PF_KTHREAD)) {
                memset(c_regs, 0, sizeof(struct pt_regs));
 
-               c_callee->r13 = arg; /* argument to kernel thread */
+               c_callee->r13 = kthread_arg;
                c_callee->r14 = usp;  /* function */
 
                return 0;
index 900f68a..1d167c6 100644 (file)
@@ -120,7 +120,10 @@ static void read_arc_build_cfg_regs(void)
        READ_BCR(ARC_REG_SMART_BCR, bcr);
        cpu->extn.smart = bcr.ver ? 1 : 0;
 
-       cpu->extn.debug = cpu->extn.ap | cpu->extn.smart;
+       READ_BCR(ARC_REG_RTT_BCR, bcr);
+       cpu->extn.rtt = bcr.ver ? 1 : 0;
+
+       cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
 }
 
 static const struct cpuinfo_data arc_cpu_tbl[] = {
index 3eadfda..c927aa8 100644 (file)
@@ -42,7 +42,7 @@ void die(const char *str, struct pt_regs *regs, unsigned long address)
  *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
  */
 static noinline int
-handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
+unhandled_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
 {
        if (user_mode(regs)) {
                struct task_struct *tsk = current;
@@ -71,7 +71,7 @@ int name(unsigned long address, struct pt_regs *regs) \
                .si_code  = sicode,             \
                .si_addr = (void __user *)address,      \
        };                                      \
-       return handle_exception(str, regs, &info);\
+       return unhandled_exception(str, regs, &info);\
 }
 
 /*
index 5234123..d44eedd 100644 (file)
@@ -71,7 +71,7 @@ early_param("initrd", early_initrd);
  */
 void __init setup_arch_memory(void)
 {
-       unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 };
+       unsigned long zones_size[MAX_NR_ZONES];
        unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
 
        init_mm.start_code = (unsigned long)_text;
@@ -90,7 +90,7 @@ void __init setup_arch_memory(void)
        /*------------- externs in mm need setting up ---------------*/
 
        /* first page of system - kernel .vector starts here */
-       min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE);
+       min_low_pfn = ARCH_PFN_OFFSET;
 
        /* Last usable page of low mem (no HIGHMEM yet for ARC port) */
        max_low_pfn = max_pfn = PFN_DOWN(end_mem);
@@ -111,7 +111,7 @@ void __init setup_arch_memory(void)
 
        /*-------------- node setup --------------------------------*/
        memset(zones_size, 0, sizeof(zones_size));
-       zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
+       zones_size[ZONE_NORMAL] = max_mapnr;
 
        /*
         * We can't use the helper free_area_init(zones[]) because it uses
@@ -123,6 +123,8 @@ void __init setup_arch_memory(void)
                            zones_size,         /* num pages per zone */
                            min_low_pfn,        /* first pfn of node */
                            NULL);              /* NO holes */
+
+       high_memory = (void *)end_mem;
 }
 
 /*
@@ -133,7 +135,6 @@ void __init setup_arch_memory(void)
  */
 void __init mem_init(void)
 {
-       high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
        free_all_bootmem();
        mem_init_print_info(NULL);
 }
index 1bc5fdd..9f727d8 100644 (file)
                };
        };
 
+       clocks {
+               sleep_clk: sleep_clk {
+                       compatible = "fixed-clock";
+                       clock-frequency = <32768>;
+                       #clock-cells = <0>;
+               };
+       };
+
        soc: soc {
                #address-cells = <1>;
                #size-cells = <1>;
                        compatible = "qcom,kpss-timer", "qcom,msm-timer";
                        interrupts = <1 1 0x301>,
                                     <1 2 0x301>,
-                                    <1 3 0x301>;
+                                    <1 3 0x301>,
+                                    <1 4 0x301>,
+                                    <1 5 0x301>;
                        reg = <0x0200a000 0x100>;
                        clock-frequency = <25000000>,
                                          <32768>;
+                       clocks = <&sleep_clk>;
+                       clock-names = "sleep";
                        cpu-offset = <0x80000>;
                };
 
index 2499867..df3f60c 100644 (file)
@@ -195,8 +195,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ            0
 #define KVM_ARM_IRQ_CPU_FIQ            1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX            127
+#endif
 
 /* One single KVM irqchip, ie. the VGIC */
 #define KVM_NR_IRQCHIPS          1
index cc176b6..aebfbf7 100644 (file)
@@ -80,9 +80,9 @@ ENTRY(stext)
        ldr     r13, =__mmap_switched           @ address to jump to after
                                                @ initialising sctlr
        adr     lr, BSYM(1f)                    @ return (PIC) address
- ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
- THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        ret     r12                             )
+       ldr     r12, [r10, #PROCINFO_INITFUNC]
+       add     r12, r12, r10
+       ret     r12
  1:    b       __after_proc_init
 ENDPROC(stext)
 
@@ -117,9 +117,9 @@ ENTRY(secondary_startup)
 
        adr     lr, BSYM(__after_proc_init)     @ return address
        mov     r13, r12                        @ __secondary_switched address
- ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
- THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        ret     r12                             )
+       ldr     r12, [r10, #PROCINFO_INITFUNC]
+       add     r12, r12, r10
+       ret     r12
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
index 6f53645..d9631ec 100644 (file)
@@ -671,8 +671,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
                if (!irqchip_in_kernel(kvm))
                        return -ENXIO;
 
-               if (irq_num < VGIC_NR_PRIVATE_IRQS ||
-                   irq_num > KVM_ARM_IRQ_GIC_MAX)
+               if (irq_num < VGIC_NR_PRIVATE_IRQS)
                        return -EINVAL;
 
                return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
index 36aaeb1..bf37e3c 100644 (file)
@@ -754,12 +754,12 @@ static struct platform_device vcc_sdhi1 = {
 };
 
 /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                          MMC_CAP_POWER_OFF_CARD,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
        .cd_gpio        = 167,
 };
 
@@ -796,12 +796,12 @@ static struct platform_device sdhi0_device = {
 };
 
 /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi1_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                          MMC_CAP_POWER_OFF_CARD,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
        /* Port72 cannot generate IRQs, will be used in polling mode. */
        .cd_gpio        = 72,
 };
index f27b5a8..25558d1 100644 (file)
@@ -201,12 +201,12 @@ static struct rcar_phy_platform_data usb_phy_platform_data __initdata =
 
 
 /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info __initdata = {
-       .dma_slave_tx   = HPBDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = HPBDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
-       .tmio_ocr_mask  = MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
+static struct tmio_mmc_data sdhi0_info __initdata = {
+       .chan_priv_tx   = (void *)HPBDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)HPBDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED,
+       .ocr_mask       = MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT,
 };
 
 static struct resource sdhi0_resources[] __initdata = {
@@ -683,7 +683,7 @@ static void __init bockw_init(void)
                platform_device_register_resndata(
                        NULL, "sh_mobile_sdhi", 0,
                        sdhi0_resources, ARRAY_SIZE(sdhi0_resources),
-                       &sdhi0_info, sizeof(struct sh_mobile_sdhi_info));
+                       &sdhi0_info, sizeof(struct tmio_mmc_data));
        }
 
        /* for Audio */
index 7c9b63b..260d831 100644 (file)
@@ -442,11 +442,11 @@ static struct platform_device vcc_sdhi2 = {
 };
 
 /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                          MMC_CAP_POWER_OFF_CARD,
 };
 
@@ -484,13 +484,13 @@ static struct platform_device sdhi0_device = {
 };
 
 /* Micro SD */
-static struct sh_mobile_sdhi_info sdhi2_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI2_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI2_RX,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT |
+static struct tmio_mmc_data sdhi2_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI2_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI2_RX,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT |
                          TMIO_MMC_USE_GPIO_CD |
                          TMIO_MMC_WRPROTECT_DISABLE,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
        .cd_gpio        = 13,
 };
 
index 598f704..51db288 100644 (file)
@@ -122,11 +122,11 @@ static struct resource sdhi0_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sdhi0_platform_data = {
-       .dma_slave_tx = HPBDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx = HPBDMA_SLAVE_SDHI0_RX,
-       .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-       .tmio_caps = MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi0_platform_data = {
+       .chan_priv_tx = (void *)HPBDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx = (void *)HPBDMA_SLAVE_SDHI0_RX,
+       .flags        = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
+       .capabilities = MMC_CAP_SD_HIGHSPEED,
 };
 
 static struct platform_device sdhi0_device = {
index b764431..b4f92b9 100644 (file)
@@ -827,7 +827,7 @@ config KUSER_HELPERS
 
 config VDSO
        bool "Enable VDSO for acceleration of some system calls"
-       depends on AEABI && MMU
+       depends on AEABI && MMU && CPU_V7
        default y if ARM_ARCH_TIMER
        select GENERIC_TIME_VSYSCALL
        help
index f8b69d8..6b47f6e 100644 (file)
@@ -1 +1,3 @@
 vdso.lds
+vdso.so.raw
+vdsomunge
index bab0a8b..8aa7910 100644 (file)
@@ -10,8 +10,8 @@ ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
 ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
 ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
 
-obj-y += vdso.o
-extra-y += vdso.lds
+obj-$(CONFIG_VDSO) += vdso.o
+extra-$(CONFIG_VDSO) += vdso.lds
 CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 
 CFLAGS_REMOVE_vdso.o = -pg
index da5f20e..4269dba 100644 (file)
@@ -1,5 +1,7 @@
 config ARM64
        def_bool y
+       select ACPI_GENERIC_GSI if ACPI
+       select ACPI_REDUCED_HARDWARE_ONLY if ACPI
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_GCOV_PROFILE_ALL
@@ -758,6 +760,8 @@ source "drivers/Kconfig"
 
 source "drivers/firmware/Kconfig"
 
+source "drivers/acpi/Kconfig"
+
 source "fs/Kconfig"
 
 source "arch/arm64/kvm/Kconfig"
index e74f6e0..c8d3e0e 100644 (file)
                #address-cells = <2>;
                #size-cells = <2>;
                ranges;
+               dma-ranges = <0x0 0x0 0x0 0x0 0x400 0x0>;
 
                clocks {
                        #address-cells = <2>;
                                reg-names = "csr-reg";
                                clock-output-names = "pcie4clk";
                        };
+
+                       dmaclk: dmaclk@1f27c000 {
+                               compatible = "apm,xgene-device-clock";
+                               #clock-cells = <1>;
+                               clocks = <&socplldiv2 0>;
+                               reg = <0x0 0x1f27c000 0x0 0x1000>;
+                               reg-names = "csr-reg";
+                               clock-output-names = "dmaclk";
+                       };
                };
 
                pcie0: pcie@1f2b0000 {
                        interrupts = <0x0 0x41 0x4>;
                        clocks = <&rngpkaclk 0>;
                };
+
+               dma: dma@1f270000 {
+                       compatible = "apm,xgene-storm-dma";
+                       device_type = "dma";
+                       reg = <0x0 0x1f270000 0x0 0x10000>,
+                             <0x0 0x1f200000 0x0 0x10000>,
+                             <0x0 0x1b008000 0x0 0x2000>,
+                             <0x0 0x1054a000 0x0 0x100>;
+                       interrupts = <0x0 0x82 0x4>,
+                                    <0x0 0xb8 0x4>,
+                                    <0x0 0xb9 0x4>,
+                                    <0x0 0xba 0x4>,
+                                    <0x0 0xbb 0x4>;
+                       dma-coherent;
+                       clocks = <&dmaclk 0>;
+               };
        };
 };
diff --git a/arch/arm64/include/asm/acenv.h b/arch/arm64/include/asm/acenv.h
new file mode 100644 (file)
index 0000000..b49166f
--- /dev/null
@@ -0,0 +1,18 @@
+/*
+ * ARM64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Linaro Ltd.
+ *   Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *   Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* It is required unconditionally by ACPI core, update it when needed. */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
new file mode 100644 (file)
index 0000000..59c05d8
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *     Author: Al Stone <al.stone@linaro.org>
+ *     Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *     Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation;
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#include <linux/mm.h>
+#include <linux/irqchip/arm-gic-acpi.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+/* Basic configuration for ACPI */
+#ifdef CONFIG_ACPI
+/* ACPI table mapping after acpi_gbl_permanent_mmap is set */
+static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
+                                           acpi_size size)
+{
+       if (!page_is_ram(phys >> PAGE_SHIFT))
+               return ioremap(phys, size);
+
+       return ioremap_cache(phys, size);
+}
+#define acpi_os_ioremap acpi_os_ioremap
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HWID
+
+#define acpi_strict 1  /* No out-of-spec workarounds on ARM64 */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+/* 1 to indicate PSCI 0.2+ is implemented */
+static inline bool acpi_psci_present(void)
+{
+       return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT;
+}
+
+/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */
+static inline bool acpi_psci_use_hvc(void)
+{
+       return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
+}
+
+static inline void disable_acpi(void)
+{
+       acpi_disabled = 1;
+       acpi_pci_disabled = 1;
+       acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+       acpi_disabled = 0;
+       acpi_pci_disabled = 0;
+       acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpu_logical_map(cpu)
+
+/*
+ * It's used from ACPI core in kdump to boot UP system with SMP kernel,
+ * with this check the ACPI core will not override the CPU index
+ * obtained from GICC with 0 and not print some error message as well.
+ * Since MADT must provide at least one GICC structure for GIC
+ * initialization, CPU will be always available in MADT on ARM64.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+       return true;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+void __init acpi_init_cpus(void);
+
+#else
+static inline bool acpi_psci_present(void) { return false; }
+static inline bool acpi_psci_use_hvc(void) { return false; }
+static inline void acpi_init_cpus(void) { }
+#endif /* CONFIG_ACPI */
+
+#endif /*_ASM_ACPI_H*/
index da301ee..5a31d67 100644 (file)
@@ -66,5 +66,6 @@ struct cpu_operations {
 extern const struct cpu_operations *cpu_ops[NR_CPUS];
 int __init cpu_read_ops(struct device_node *dn, int cpu);
 void __init cpu_read_bootcpu_ops(void);
+const struct cpu_operations *cpu_get_ops(const char *name);
 
 #endif /* ifndef __ASM_CPU_OPS_H */
index 9264956..95e6b6d 100644 (file)
@@ -62,6 +62,9 @@ void __init early_fixmap_init(void);
 
 #define __early_set_fixmap __set_fixmap
 
+#define __late_set_fixmap __set_fixmap
+#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR)
+
 extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
 
 #include <asm-generic/fixmap.h>
index 94c5367..bbb251b 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef __ASM_IRQ_H
 #define __ASM_IRQ_H
 
+#include <linux/irqchip/arm-gic-acpi.h>
+
 #include <asm-generic/irq.h>
 
 struct pt_regs;
@@ -8,4 +10,15 @@ struct pt_regs;
 extern void migrate_irqs(void);
 extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
 
+static inline void acpi_irq_init(void)
+{
+       /*
+        * Hardcode ACPI IRQ chip initialization to GICv2 for now.
+        * Proper irqchip infrastructure will be implemented along with
+        * incoming  GICv2m|GICv3|ITS bits.
+        */
+       acpi_gic_init();
+}
+#define acpi_irq_init acpi_irq_init
+
 #endif
index 872ba93..b008a72 100644 (file)
 extern int isa_dma_bridge_buggy;
 
 #ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+       /* no legacy IRQ on arm64 */
+       return -ENODEV;
+}
+
 static inline int pci_proc_domain(struct pci_bus *bus)
 {
        return 1;
index e5312ea..2454bc5 100644 (file)
@@ -14,6 +14,7 @@
 #ifndef __ASM_PSCI_H
 #define __ASM_PSCI_H
 
-int psci_init(void);
+int psci_dt_init(void);
+int psci_acpi_init(void);
 
 #endif /* __ASM_PSCI_H */
index 780f82c..bf22650 100644 (file)
@@ -39,9 +39,10 @@ extern void show_ipi_list(struct seq_file *p, int prec);
 extern void handle_IPI(int ipinr, struct pt_regs *regs);
 
 /*
- * Setup the set of possible CPUs (via set_cpu_possible)
+ * Discover the set of possible CPUs and determine their
+ * SMP operations.
  */
-extern void smp_init_cpus(void);
+extern void of_smp_init_cpus(void);
 
 /*
  * Provide a function to raise an IPI cross call on CPUs in callmap.
index c154c0b..d268320 100644 (file)
@@ -188,8 +188,14 @@ struct kvm_arch_memory_slot {
 #define KVM_ARM_IRQ_CPU_IRQ            0
 #define KVM_ARM_IRQ_CPU_FIQ            1
 
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
 #define KVM_ARM_IRQ_GIC_MAX            127
+#endif
 
 /* One single KVM irqchip, ie. the VGIC */
 #define KVM_NR_IRQCHIPS          1
index b12e15b..426d076 100644 (file)
@@ -35,6 +35,7 @@ arm64-obj-$(CONFIG_KGDB)              += kgdb.o
 arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
 arm64-obj-$(CONFIG_PCI)                        += pci.o
 arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
+arm64-obj-$(CONFIG_ACPI)               += acpi.o
 
 obj-y                                  += $(arm64-obj-y) vdso/
 obj-m                                  += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
new file mode 100644 (file)
index 0000000..8b83955
--- /dev/null
@@ -0,0 +1,345 @@
+/*
+ *  ARM64 Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *     Author: Al Stone <al.stone@linaro.org>
+ *     Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *     Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *     Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *     Author: Naresh Bhat <naresh.bhat@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+
+#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+
+int acpi_noirq = 1;            /* skip ACPI IRQ initialization */
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+
+int acpi_pci_disabled = 1;     /* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+
+/* Processors with enabled flag and sane MPIDR */
+static int enabled_cpus;
+
+/* Boot CPU is valid or not in MADT */
+static bool bootcpu_valid  __initdata;
+
+static bool param_acpi_off __initdata;
+static bool param_acpi_force __initdata;
+
+static int __init parse_acpi(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       /* "acpi=off" disables both ACPI table parsing and interpreter */
+       if (strcmp(arg, "off") == 0)
+               param_acpi_off = true;
+       else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
+               param_acpi_force = true;
+       else
+               return -EINVAL; /* Core will print when we return error */
+
+       return 0;
+}
+early_param("acpi", parse_acpi);
+
+static int __init dt_scan_depth1_nodes(unsigned long node,
+                                      const char *uname, int depth,
+                                      void *data)
+{
+       /*
+        * Return 1 as soon as we encounter a node at depth 1 that is
+        * not the /chosen node.
+        */
+       if (depth == 1 && (strcmp(uname, "chosen") != 0))
+               return 1;
+       return 0;
+}
+
+/*
+ * __acpi_map_table() will be called before page_init(), so early_ioremap()
+ * or early_memremap() should be called here to for ACPI table mapping.
+ */
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+{
+       if (!size)
+               return NULL;
+
+       return early_memremap(phys, size);
+}
+
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+       if (!map || !size)
+               return;
+
+       early_memunmap(map, size);
+}
+
+/**
+ * acpi_map_gic_cpu_interface - generates a logical cpu number
+ * and map to MPIDR represented by GICC structure
+ */
+static void __init
+acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
+{
+       int i;
+       u64 mpidr = processor->arm_mpidr & MPIDR_HWID_BITMASK;
+       bool enabled = !!(processor->flags & ACPI_MADT_ENABLED);
+
+       if (mpidr == INVALID_HWID) {
+               pr_info("Skip MADT cpu entry with invalid MPIDR\n");
+               return;
+       }
+
+       total_cpus++;
+       if (!enabled)
+               return;
+
+       if (enabled_cpus >=  NR_CPUS) {
+               pr_warn("NR_CPUS limit of %d reached, Processor %d/0x%llx ignored.\n",
+                       NR_CPUS, total_cpus, mpidr);
+               return;
+       }
+
+       /* Check if GICC structure of boot CPU is available in the MADT */
+       if (cpu_logical_map(0) == mpidr) {
+               if (bootcpu_valid) {
+                       pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+                              mpidr);
+                       return;
+               }
+
+               bootcpu_valid = true;
+       }
+
+       /*
+        * Duplicate MPIDRs are a recipe for disaster. Scan
+        * all initialized entries and check for
+        * duplicates. If any is found just ignore the CPU.
+        */
+       for (i = 1; i < enabled_cpus; i++) {
+               if (cpu_logical_map(i) == mpidr) {
+                       pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+                              mpidr);
+                       return;
+               }
+       }
+
+       if (!acpi_psci_present())
+               return;
+
+       cpu_ops[enabled_cpus] = cpu_get_ops("psci");
+       /* CPU 0 was already initialized */
+       if (enabled_cpus) {
+               if (!cpu_ops[enabled_cpus])
+                       return;
+
+               if (cpu_ops[enabled_cpus]->cpu_init(NULL, enabled_cpus))
+                       return;
+
+               /* map the logical cpu id to cpu MPIDR */
+               cpu_logical_map(enabled_cpus) = mpidr;
+       }
+
+       enabled_cpus++;
+}
+
+static int __init
+acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+                               const unsigned long end)
+{
+       struct acpi_madt_generic_interrupt *processor;
+
+       processor = (struct acpi_madt_generic_interrupt *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+       acpi_map_gic_cpu_interface(processor);
+       return 0;
+}
+
+/* Parse GIC cpu interface entries in MADT for SMP init */
+void __init acpi_init_cpus(void)
+{
+       int count, i;
+
+       /*
+        * do a partial walk of MADT to determine how many CPUs
+        * we have including disabled CPUs, and get information
+        * we need for SMP init
+        */
+       count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+                       acpi_parse_gic_cpu_interface, 0);
+
+       if (!count) {
+               pr_err("No GIC CPU interface entries present\n");
+               return;
+       } else if (count < 0) {
+               pr_err("Error parsing GIC CPU interface entry\n");
+               return;
+       }
+
+       if (!bootcpu_valid) {
+               pr_err("MADT missing boot CPU MPIDR, not enabling secondaries\n");
+               return;
+       }
+
+       for (i = 0; i < enabled_cpus; i++)
+               set_cpu_possible(i, true);
+
+       /* Make boot-up look pretty */
+       pr_info("%d CPUs enabled, %d CPUs total\n", enabled_cpus, total_cpus);
+}
+
+/*
+ * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity
+ *                           checks on it
+ *
+ * Return 0 on success,  <0 on failure
+ */
+static int __init acpi_fadt_sanity_check(void)
+{
+       struct acpi_table_header *table;
+       struct acpi_table_fadt *fadt;
+       acpi_status status;
+       acpi_size tbl_size;
+       int ret = 0;
+
+       /*
+        * FADT is required on arm64; retrieve it to check its presence
+        * and carry out revision and ACPI HW reduced compliancy tests
+        */
+       status = acpi_get_table_with_size(ACPI_SIG_FADT, 0, &table, &tbl_size);
+       if (ACPI_FAILURE(status)) {
+               const char *msg = acpi_format_exception(status);
+
+               pr_err("Failed to get FADT table, %s\n", msg);
+               return -ENODEV;
+       }
+
+       fadt = (struct acpi_table_fadt *)table;
+
+       /*
+        * Revision in table header is the FADT Major revision, and there
+        * is a minor revision of FADT which was introduced by ACPI 5.1,
+        * we only deal with ACPI 5.1 or newer revision to get GIC and SMP
+        * boot protocol configuration data.
+        */
+       if (table->revision < 5 ||
+          (table->revision == 5 && fadt->minor_revision < 1)) {
+               pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+                      table->revision, fadt->minor_revision);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
+               pr_err("FADT not ACPI hardware reduced compliant\n");
+               ret = -EINVAL;
+       }
+
+out:
+       /*
+        * acpi_get_table_with_size() creates FADT table mapping that
+        * should be released after parsing and before resuming boot
+        */
+       early_acpi_os_unmap_memory(table, tbl_size);
+       return ret;
+}
+
+/*
+ * acpi_boot_table_init() called from setup_arch(), always.
+ *     1. find RSDP and get its address, and then find XSDT
+ *     2. extract all tables and checksums them all
+ *     3. check ACPI FADT revision
+ *     4. check ACPI FADT HW reduced flag
+ *
+ * We can parse ACPI boot-time tables such as MADT after
+ * this function is called.
+ *
+ * On return ACPI is enabled if either:
+ *
+ * - ACPI tables are initialized and sanity checks passed
+ * - acpi=force was passed in the command line and ACPI was not disabled
+ *   explicitly through acpi=off command line parameter
+ *
+ * ACPI is disabled on function return otherwise
+ */
+void __init acpi_boot_table_init(void)
+{
+       /*
+        * Enable ACPI instead of device tree unless
+        * - ACPI has been disabled explicitly (acpi=off), or
+        * - the device tree is not empty (it has more than just a /chosen node)
+        *   and ACPI has not been force enabled (acpi=force)
+        */
+       if (param_acpi_off ||
+           (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+               return;
+
+       /*
+        * ACPI is disabled at this point. Enable it in order to parse
+        * the ACPI tables and carry out sanity checks
+        */
+       enable_acpi();
+
+       /*
+        * If ACPI tables are initialized and FADT sanity checks passed,
+        * leave ACPI enabled and carry on booting; otherwise disable ACPI
+        * on initialization error.
+        * If acpi=force was passed on the command line it forces ACPI
+        * to be enabled even if its initialization failed.
+        */
+       if (acpi_table_init() || acpi_fadt_sanity_check()) {
+               pr_err("Failed to init ACPI tables\n");
+               if (!param_acpi_force)
+                       disable_acpi();
+       }
+}
+
+void __init acpi_gic_init(void)
+{
+       struct acpi_table_header *table;
+       acpi_status status;
+       acpi_size tbl_size;
+       int err;
+
+       if (acpi_disabled)
+               return;
+
+       status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
+       if (ACPI_FAILURE(status)) {
+               const char *msg = acpi_format_exception(status);
+
+               pr_err("Failed to get MADT table, %s\n", msg);
+               return;
+       }
+
+       err = gic_v2_acpi_init(table);
+       if (err)
+               pr_err("Failed to initialize GIC IRQ controller");
+
+       early_acpi_os_unmap_memory((char *)table, tbl_size);
+}
index cce9524..fb8ff9b 100644 (file)
@@ -35,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = {
        NULL,
 };
 
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+const struct cpu_operations * __init cpu_get_ops(const char *name)
 {
        const struct cpu_operations **ops = supported_cpu_ops;
 
index 6f93c24..4095379 100644 (file)
@@ -10,6 +10,7 @@
  *
  */
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -46,3 +47,27 @@ int pcibios_add_device(struct pci_dev *dev)
 
        return 0;
 }
+
+/*
+ * raw_pci_read/write - Platform-specific PCI config space access.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus,
+                 unsigned int devfn, int reg, int len, u32 *val)
+{
+       return -ENXIO;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus,
+               unsigned int devfn, int reg, int len, u32 val)
+{
+       return -ENXIO;
+}
+
+#ifdef CONFIG_ACPI
+/* Root bridge scanning */
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+       /* TODO: Should be revisited when implementing PCI on ACPI */
+       return NULL;
+}
+#endif
index 9b8a70a..ea18cb5 100644 (file)
@@ -15,6 +15,7 @@
 
 #define pr_fmt(fmt) "psci: " fmt
 
+#include <linux/acpi.h>
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/smp.h>
@@ -24,6 +25,7 @@
 #include <linux/slab.h>
 #include <uapi/linux/psci.h>
 
+#include <asm/acpi.h>
 #include <asm/compiler.h>
 #include <asm/cpu_ops.h>
 #include <asm/errno.h>
@@ -273,39 +275,8 @@ static void psci_sys_poweroff(void)
        invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
 }
 
-/*
- * PSCI Function IDs for v0.2+ are well defined so use
- * standard values.
- */
-static int __init psci_0_2_init(struct device_node *np)
+static void __init psci_0_2_set_functions(void)
 {
-       int err, ver;
-
-       err = get_set_conduit_method(np);
-
-       if (err)
-               goto out_put_node;
-
-       ver = psci_get_version();
-
-       if (ver == PSCI_RET_NOT_SUPPORTED) {
-               /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
-               pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
-               err = -EOPNOTSUPP;
-               goto out_put_node;
-       } else {
-               pr_info("PSCIv%d.%d detected in firmware.\n",
-                               PSCI_VERSION_MAJOR(ver),
-                               PSCI_VERSION_MINOR(ver));
-
-               if (PSCI_VERSION_MAJOR(ver) == 0 &&
-                               PSCI_VERSION_MINOR(ver) < 2) {
-                       err = -EINVAL;
-                       pr_err("Conflicting PSCI version detected.\n");
-                       goto out_put_node;
-               }
-       }
-
        pr_info("Using standard PSCI v0.2 function IDs\n");
        psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
        psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -329,6 +300,60 @@ static int __init psci_0_2_init(struct device_node *np)
        arm_pm_restart = psci_sys_reset;
 
        pm_power_off = psci_sys_poweroff;
+}
+
+/*
+ * Probe function for PSCI firmware versions >= 0.2
+ */
+static int __init psci_probe(void)
+{
+       int ver = psci_get_version();
+
+       if (ver == PSCI_RET_NOT_SUPPORTED) {
+               /*
+                * PSCI versions >=0.2 mandates implementation of
+                * PSCI_VERSION.
+                */
+               pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+               return -EOPNOTSUPP;
+       } else {
+               pr_info("PSCIv%d.%d detected in firmware.\n",
+                               PSCI_VERSION_MAJOR(ver),
+                               PSCI_VERSION_MINOR(ver));
+
+               if (PSCI_VERSION_MAJOR(ver) == 0 &&
+                               PSCI_VERSION_MINOR(ver) < 2) {
+                       pr_err("Conflicting PSCI version detected.\n");
+                       return -EINVAL;
+               }
+       }
+
+       psci_0_2_set_functions();
+
+       return 0;
+}
+
+/*
+ * PSCI init function for PSCI versions >=0.2
+ *
+ * Probe based on PSCI PSCI_VERSION function
+ */
+static int __init psci_0_2_init(struct device_node *np)
+{
+       int err;
+
+       err = get_set_conduit_method(np);
+
+       if (err)
+               goto out_put_node;
+       /*
+        * Starting with v0.2, the PSCI specification introduced a call
+        * (PSCI_VERSION) that allows probing the firmware version, so
+        * that PSCI function IDs and version specific initialization
+        * can be carried out according to the specific version reported
+        * by firmware
+        */
+       err = psci_probe();
 
 out_put_node:
        of_node_put(np);
@@ -381,7 +406,7 @@ static const struct of_device_id psci_of_match[] __initconst = {
        {},
 };
 
-int __init psci_init(void)
+int __init psci_dt_init(void)
 {
        struct device_node *np;
        const struct of_device_id *matched_np;
@@ -396,6 +421,27 @@ int __init psci_init(void)
        return init_fn(np);
 }
 
+/*
+ * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
+ * explicitly clarified in SBBR
+ */
+int __init psci_acpi_init(void)
+{
+       if (!acpi_psci_present()) {
+               pr_info("is not implemented in ACPI.\n");
+               return -EOPNOTSUPP;
+       }
+
+       pr_info("probing for conduit method from ACPI.\n");
+
+       if (acpi_psci_use_hvc())
+               invoke_psci_fn = __invoke_psci_fn_hvc;
+       else
+               invoke_psci_fn = __invoke_psci_fn_smc;
+
+       return psci_probe();
+}
+
 #ifdef CONFIG_SMP
 
 static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
index 51ef972..7475313 100644 (file)
@@ -17,6 +17,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/acpi.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
 #include <linux/stddef.h>
@@ -46,6 +47,7 @@
 #include <linux/efi.h>
 #include <linux/personality.h>
 
+#include <asm/acpi.h>
 #include <asm/fixmap.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
@@ -395,18 +397,27 @@ void __init setup_arch(char **cmdline_p)
        efi_init();
        arm64_memblock_init();
 
+       /* Parse the ACPI tables for possible boot-time configuration */
+       acpi_boot_table_init();
+
        paging_init();
        request_standard_resources();
 
        early_ioremap_reset();
 
-       unflatten_device_tree();
-
-       psci_init();
+       if (acpi_disabled) {
+               unflatten_device_tree();
+               psci_dt_init();
+               cpu_read_bootcpu_ops();
+#ifdef CONFIG_SMP
+               of_smp_init_cpus();
+#endif
+       } else {
+               psci_acpi_init();
+               acpi_init_cpus();
+       }
 
-       cpu_read_bootcpu_ops();
 #ifdef CONFIG_SMP
-       smp_init_cpus();
        smp_build_mpidr_hash();
 #endif
 
index 714411f..2cb0081 100644 (file)
@@ -323,7 +323,7 @@ void __init smp_prepare_boot_cpu(void)
  * cpu logical map array containing MPIDR values related to logical
  * cpus. Assumes that cpu_logical_map(0) has already been initialized.
  */
-void __init smp_init_cpus(void)
+void __init of_smp_init_cpus(void)
 {
        struct device_node *dn = NULL;
        unsigned int i, cpu = 1;
index 1a7125c..42f9195 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/delay.h>
 #include <linux/clocksource.h>
 #include <linux/clk-provider.h>
+#include <linux/acpi.h>
 
 #include <clocksource/arm_arch_timer.h>
 
@@ -72,6 +73,12 @@ void __init time_init(void)
 
        tick_setup_hrtimer_broadcast();
 
+       /*
+        * Since ACPI or FDT will only one be available in the system,
+        * we can use acpi_generic_timer_init() here safely
+        */
+       acpi_generic_timer_init();
+
        arch_timer_rate = arch_timer_get_rate();
        if (!arch_timer_rate)
                panic("Unable to initialise architected timer.\n");
index 3830078..99c00d8 100644 (file)
@@ -48,7 +48,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
index cd0636b..cdeb518 100644 (file)
@@ -67,7 +67,6 @@ CONFIG_BFIN_SIR0=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 16273a9..ed7d2c0 100644 (file)
@@ -50,7 +50,6 @@ CONFIG_IRTTY_SIR=m
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_AMDSTD=y
index 0df2f92..0c241f4 100644 (file)
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
index 91d3eda..e5360b3 100644 (file)
@@ -55,13 +55,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_RAM=y
 CONFIG_MTD_ROM=m
 CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
 CONFIG_BLK_DEV_RAM=y
 CONFIG_NETDEVICES=y
 CONFIG_NET_BFIN=y
index be03be6..60f6fb8 100644 (file)
@@ -60,7 +60,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=m
 CONFIG_MTD_CFI_AMDSTD=m
index 802f9c4..78f6bc7 100644 (file)
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_PLATRAM=y
 CONFIG_MTD_PHRAM=y
index e2a2fa5..fac8bb5 100644 (file)
@@ -52,7 +52,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
index 680730e..2a2e4d0 100644 (file)
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
index fcec5ce..ba4267f 100644 (file)
@@ -105,6 +105,7 @@ CONFIG_SPI=y
 CONFIG_SPI_ADI_V3=y
 CONFIG_GPIOLIB=y
 CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MCP23S08=y
 # CONFIG_HWMON is not set
 CONFIG_WATCHDOG=y
 CONFIG_BFIN_WDT=y
index 05108b8..1902bb0 100644 (file)
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 5e0db82..9a5716d 100644 (file)
@@ -37,7 +37,6 @@ CONFIG_UNIX=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 2e47df7..6845928 100644 (file)
@@ -52,7 +52,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 6da629f..d9915e9 100644 (file)
@@ -48,7 +48,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 9ff79df..92d8130 100644 (file)
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index d6dd98e..fa8d911 100644 (file)
@@ -52,7 +52,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index 2b58cb2..8860059 100644 (file)
@@ -36,7 +36,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_DEBUG=y
 CONFIG_MTD_DEBUG_VERBOSE=1
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_NFTL=y
 CONFIG_NFTL_RW=y
index 5adf0da..9e3ae4b 100644 (file)
@@ -43,7 +43,6 @@ CONFIG_IP_NF_TARGET_REJECT=y
 CONFIG_IP_NF_MANGLE=y
 # CONFIG_WIRELESS is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_AMDSTD=y
index a6a7298..c792681 100644 (file)
@@ -46,7 +46,6 @@ CONFIG_IP_PNP=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_RAM=y
 CONFIG_MTD_COMPLEX_MAPPINGS=y
index bc21664..23fdc57 100644 (file)
@@ -38,7 +38,6 @@ CONFIG_IRTTY_SIR=m
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_JEDECPROBE=m
 CONFIG_MTD_RAM=y
index ea88158..e289594 100644 (file)
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_ADV_OPTIONS=y
index c1f45f1..39e85cc 100644 (file)
@@ -44,7 +44,6 @@ CONFIG_INET=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
index dccae26..4e8ad05 100644 (file)
 #include <linux/types.h>
 #include <asm/byteorder.h>
 
-#define DECLARE_BFIN_RAW_READX(size, type, asm, asm_sign) \
-static inline type __raw_read##size(const volatile void __iomem *addr) \
-{ \
-       unsigned int val; \
-       int tmp; \
-       __asm__ __volatile__ ( \
-               "cli %1;" \
-               "NOP; NOP; SSYNC;" \
-               "%0 = "#asm" [%2] "#asm_sign";" \
-               "sti %1;" \
-               : "=d"(val), "=d"(tmp) \
-               : "a"(addr) \
-       ); \
-       return (type) val; \
-}
-DECLARE_BFIN_RAW_READX(b, u8, b, (z))
-#define __raw_readb __raw_readb
-DECLARE_BFIN_RAW_READX(w, u16, w, (z))
-#define __raw_readw __raw_readw
-DECLARE_BFIN_RAW_READX(l, u32, , )
-#define __raw_readl __raw_readl
+#define __raw_readb bfin_read8
+#define __raw_readw bfin_read16
+#define __raw_readl bfin_read32
+#define __raw_writeb(val, addr) bfin_write8(addr, val)
+#define __raw_writew(val, addr) bfin_write16(addr, val)
+#define __raw_writel(val, addr) bfin_write32(addr, val)
 
 extern void outsb(unsigned long port, const void *addr, unsigned long count);
 extern void outsw(unsigned long port, const void *addr, unsigned long count);
@@ -50,14 +35,6 @@ extern void insl_16(unsigned long port, void *addr, unsigned long count);
 #define insw insw
 #define insl insl
 
-extern void dma_outsb(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsw(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsl(unsigned long port, const void *addr, unsigned short count);
-
-extern void dma_insb(unsigned long port, void *addr, unsigned short count);
-extern void dma_insw(unsigned long port, void *addr, unsigned short count);
-extern void dma_insl(unsigned long port, void *addr, unsigned short count);
-
 /**
  * I/O write barrier
  *
index a451164..0cb9078 100644 (file)
 #define __NR_sendmmsg          380
 #define __NR_process_vm_readv  381
 #define __NR_process_vm_writev 382
+#define __NR_kcmp              383
+#define __NR_finit_module      384
+#define __NR_sched_setattr     385
+#define __NR_sched_getattr     386
+#define __NR_renameat2         387
+#define __NR_seccomp           388
+#define __NR_getrandom         389
+#define __NR_memfd_create      390
+#define __NR_bpf               391
+#define __NR_execveat          392
 
-#define __NR_syscall           383
+#define __NR_syscall           393  /* For internal using, not implemented */
 #define NR_syscalls            __NR_syscall
 
 /* Old optional stuff no one actually uses */
index 947ad08..86b1cd3 100644 (file)
@@ -1620,7 +1620,6 @@ static int __init bfin_debug_mmrs_init(void)
        D16(USB_APHY_CNTRL);
        D16(USB_APHY_CALIB);
        D16(USB_APHY_CNTRL2);
-       D16(USB_PHY_TEST);
        D16(USB_PLLOSC_CTRL);
        D16(USB_SRP_CLKDIV);
        D16(USB_EP_NI0_TXMAXP);
index fa53fae..cf773f0 100644 (file)
@@ -330,9 +330,6 @@ static void bfin_disable_hw_debug(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_SMP
-extern void generic_exec_single(int cpu, struct call_single_data *data, int wait);
-static struct call_single_data kgdb_smp_ipi_data[NR_CPUS];
-
 void kgdb_passive_cpu_callback(void *info)
 {
        kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
@@ -343,15 +340,14 @@ void kgdb_roundup_cpus(unsigned long flags)
        unsigned int cpu;
 
        for (cpu = cpumask_first(cpu_online_mask); cpu < nr_cpu_ids;
-               cpu = cpumask_next(cpu, cpu_online_mask)) {
-               kgdb_smp_ipi_data[cpu].func = kgdb_passive_cpu_callback;
-               generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
-       }
+               cpu = cpumask_next(cpu, cpu_online_mask))
+               smp_call_function_single(cpu, kgdb_passive_cpu_callback,
+                                        NULL, 0);
 }
 
 void kgdb_roundup_cpu(int cpu, unsigned long flags)
 {
-       generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
+       smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0);
 }
 #endif
 
@@ -359,19 +355,6 @@ void kgdb_roundup_cpu(int cpu, unsigned long flags)
 static unsigned long kgdb_arch_imask;
 #endif
 
-void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
-       if (kgdb_single_step)
-               preempt_enable();
-
-#ifdef CONFIG_IPIPE
-       if (kgdb_arch_imask) {
-               cpu_pda[raw_smp_processor_id()].ex_imask = kgdb_arch_imask;
-               kgdb_arch_imask = 0;
-       }
-#endif
-}
-
 int kgdb_arch_handle_exception(int vector, int signo,
                               int err_code, char *remcom_in_buffer,
                               char *remcom_out_buffer,
index 4f424ae..ad82468 100644 (file)
@@ -1464,5 +1464,5 @@ void __init cmdline_init(const char *r0)
 {
        early_shadow_stamp();
        if (r0)
-               strncpy(command_line, r0, COMMAND_LINE_SIZE);
+               strlcpy(command_line, r0, COMMAND_LINE_SIZE);
 }
index d90a85b..bd04531 100644 (file)
 #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
-
 #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
index 71578d9..591e00f 100644 (file)
 
 #define                  USB_APHY_CNTRL2  0xffc039e8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc039ec   /* Used for reducing simulation time and simplifies FIFO testability */
-
 #define                  USB_PLLOSC_CTRL  0xffc039f0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc039f4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
index d09c19c..9163479 100644 (file)
 #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
 #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
index bcb9726..be83f64 100644 (file)
 #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
 #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
 #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
 #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
 #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
index 5116157..ae4b889 100644 (file)
 #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
 #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
 #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
index d55dcc0..7cc7928 100644 (file)
 #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
 #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
 
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
 #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
 #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
 
index 7f9fc27..2c61fc0 100644 (file)
@@ -780,8 +780,8 @@ static struct adi_spi3_chip spidev_chip_info = {
 };
 #endif
 
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-static struct platform_device bfin_i2s_pcm = {
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+static struct platform_device bfin_pcm = {
        .name = "bfin-i2s-pcm-audio",
        .id = -1,
 };
@@ -1034,7 +1034,6 @@ static struct adv7842_platform_data adv7842_data = {
        .i2c_infoframe = 0x48,
        .i2c_cec = 0x49,
        .i2c_avlink = 0x4a,
-       .i2c_ex = 0x26,
 };
 
 static struct bfin_capture_config bfin_capture_data = {
@@ -1104,7 +1103,6 @@ static struct disp_route adv7511_routes[] = {
 
 static struct adv7511_platform_data adv7511_data = {
        .edid_addr = 0x7e,
-       .i2c_ex = 0x25,
 };
 
 static struct bfin_display_config bfin_display_data = {
@@ -1209,6 +1207,35 @@ static struct platform_device bfin_display_device = {
 };
 #endif
 
+#if defined(CONFIG_FB_BF609_NL8048) \
+       || defined(CONFIG_FB_BF609_NL8048_MODULE)
+static struct resource nl8048_resources[] = {
+       {
+               .start = EPPI2_STAT,
+               .end = EPPI2_STAT,
+               .flags = IORESOURCE_MEM,
+       },
+       {
+               .start = CH_EPPI2_CH0,
+               .end = CH_EPPI2_CH0,
+               .flags = IORESOURCE_DMA,
+       },
+       {
+               .start = IRQ_EPPI2_STAT,
+               .end = IRQ_EPPI2_STAT,
+               .flags = IORESOURCE_IRQ,
+       },
+};
+static struct platform_device bfin_fb_device = {
+       .name = "bf609_nl8048",
+       .num_resources = ARRAY_SIZE(nl8048_resources),
+       .resource = nl8048_resources,
+       .dev = {
+               .platform_data = (void *)GPIO_PC15,
+       },
+};
+#endif
+
 #if defined(CONFIG_BFIN_CRC)
 #define BFIN_CRC_NAME "bfin-crc"
 
@@ -1862,6 +1889,29 @@ static struct platform_device i2c_bfin_twi1_device = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#include <linux/spi/mcp23s08.h>
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch0 = {
+       .base = 120,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch1 = {
+       .base = 130,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch2 = {
+       .base = 140,
+};
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+static const struct mcp23s08_platform_data bfin_adv7842_soft_switch = {
+       .base = 150,
+};
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+static const struct mcp23s08_platform_data bfin_adv7511_soft_switch = {
+       .base = 160,
+};
+# endif
+#endif
+
 static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
 #if IS_ENABLED(CONFIG_INPUT_ADXL34X_I2C)
        {
@@ -1881,6 +1931,32 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
                I2C_BOARD_INFO("ssm2602", 0x1b),
        },
 #endif
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x21),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch0
+       },
+       {
+               I2C_BOARD_INFO("mcp23017", 0x22),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch1
+       },
+       {
+               I2C_BOARD_INFO("mcp23017", 0x23),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch2
+       },
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x26),
+               .platform_data = (void *)&bfin_adv7842_soft_switch
+       },
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x25),
+               .platform_data = (void *)&bfin_adv7511_soft_switch
+       },
+# endif
+#endif
 };
 
 static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
@@ -2023,8 +2099,8 @@ static struct platform_device *ezkit_devices[] __initdata = {
 #if IS_ENABLED(CONFIG_MTD_PHYSMAP)
        &ezkit_flash_device,
 #endif
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-       &bfin_i2s_pcm,
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+       &bfin_pcm,
 #endif
 #if IS_ENABLED(CONFIG_SND_BF6XX_SOC_I2S)
        &bfin_i2s,
@@ -2060,7 +2136,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = {
        PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary",  "pinctrl-adi2.0", NULL, "rotary"),
        PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0",  "pinctrl-adi2.0", NULL, "can0"),
        PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0",  "pinctrl-adi2.0", NULL, "smc0"),
-       PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
+       PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
        PIN_MAP_MUX_GROUP("bfin_display.0", "8bit",  "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"),
        PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
        PIN_MAP_MUX_GROUP("bfin_display.0", "16bit",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
index 244fa4a..3783058 100644 (file)
@@ -363,6 +363,12 @@ static struct clk ethclk = {
        .ops        = &dummy_clk_ops,
 };
 
+static struct clk ethpclk = {
+       .name       = "pclk",
+       .parent     = &sclk0,
+       .ops        = &dummy_clk_ops,
+};
+
 static struct clk spiclk = {
        .name       = "spi",
        .parent     = &sclk1,
@@ -381,6 +387,7 @@ static struct clk_lookup bf609_clks[] = {
        CLK(dclk, NULL, "DCLK"),
        CLK(oclk, NULL, "OCLK"),
        CLK(ethclk, NULL, "stmmaceth"),
+       CLK(ethpclk, NULL, "pclk"),
        CLK(spiclk, NULL, "spi"),
 };
 
index 86b5a09..8d9431e 100644 (file)
@@ -1694,6 +1694,16 @@ ENTRY(_sys_call_table)
        .long _sys_sendmmsg             /* 380 */
        .long _sys_process_vm_readv
        .long _sys_process_vm_writev
+       .long _sys_kcmp
+       .long _sys_finit_module
+       .long _sys_sched_setattr        /* 385 */
+       .long _sys_sched_getattr
+       .long _sys_renameat2
+       .long _sys_seccomp
+       .long _sys_getrandom
+       .long _sys_memfd_create         /* 390 */
+       .long _sys_bpf
+       .long _sys_execveat
 
        .rept NR_syscalls-(.-_sys_call_table)/4
        .long _sys_ni_syscall
index 1387a94..a66d979 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/irq.h>
+#include <linux/delay.h>
 
 #include <asm/cplb.h>
 #include <asm/gpio.h>
@@ -180,6 +181,7 @@ int bfin_pm_suspend_mem_enter(void)
 
 #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK)
        flushinv_all_dcache();
+       udelay(1);
 #endif
        _disable_dcplb();
        _disable_icplb();
index 4a03911..0314e32 100644 (file)
@@ -46,12 +46,18 @@ config CRIS
        select ARCH_WANT_IPC_PARSE_VERSION
        select GENERIC_IRQ_SHOW
        select GENERIC_IOMAP
-       select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
        select GENERIC_CMOS_UPDATE
        select MODULES_USE_ELF_RELA
        select CLONE_BACKWARDS2
        select OLD_SIGSUSPEND
        select OLD_SIGACTION
+       select ARCH_REQUIRE_GPIOLIB
+       select IRQ_DOMAIN if ETRAX_ARCH_V32
+       select OF if ETRAX_ARCH_V32
+       select OF_EARLY_FLATTREE if ETRAX_ARCH_V32
+       select CLKSRC_MMIO if ETRAX_ARCH_V32
+       select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32
+       select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32
 
 config HZ
        int
@@ -61,6 +67,10 @@ config NR_CPUS
        int
        default "1"
 
+config BUILTIN_DTB
+       string "DTB to build into the kernel image"
+       depends on OF
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
index 39dc7d0..4a5404b 100644 (file)
@@ -40,6 +40,10 @@ else
 MACH :=
 endif
 
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+core-$(CONFIG_OF) += arch/cris/boot/dts/
+endif
+
 LD = $(CROSS_COMPILE)ld -mcrislinux
 
 OBJCOPYFLAGS := -O binary -R .note -R .comment -S
index 4035835..d9fc617 100644 (file)
@@ -9,7 +9,6 @@ obj-y   := entry.o traps.o irq.o debugport.o \
           process.o ptrace.o setup.o signal.o traps.o time.o \
           cache.o cacheflush.o
 
-obj-$(CONFIG_SMP) += smp.o
 obj-$(CONFIG_ETRAX_KGDB) += kgdb.o kgdb_asm.o
 obj-$(CONFIG_ETRAX_FAST_TIMER) += fasttimer.o
 obj-$(CONFIG_MODULES)    += crisksyms.o
index 2f19ac6..026a0b2 100644 (file)
@@ -99,6 +99,8 @@ ret_from_kernel_thread:
 
        .type   ret_from_intr,@function
 ret_from_intr:
+       moveq   0, $r9                  ; not a syscall
+
        ;; Check for resched if preemptive kernel, or if we're going back to
        ;; user-mode. This test matches the user_regs(regs) macro. Don't simply
        ;; test CCS since that doesn't necessarily reflect what mode we'll
@@ -145,7 +147,7 @@ system_call:
        ;; Stack-frame similar to the irq heads, which is reversed in
        ;; ret_from_sys_call.
 
-       sub.d   92, $sp         ; Skip EXS and EDA.
+       sub.d   92, $sp         ; Skip EDA.
        movem   $r13, [$sp]
        move.d  $sp, $r8
        addq    14*4, $r8
@@ -156,8 +158,9 @@ system_call:
        move    $ccs, $r4
        move    $srp, $r5
        move    $erp, $r6
+       move.d  $r9, $r7        ; Store syscall number in EXS
        subq    4, $sp
-       movem   $r6, [$r8]
+       movem   $r7, [$r8]
        ei                      ; Enable interrupts while processing syscalls.
        move.d  $r10, [$sp]
 
@@ -277,44 +280,15 @@ _syscall_exit_work:
 
        .type   _work_pending,@function
 _work_pending:
-       addoq   +TI_flags, $r0, $acr
-       move.d  [$acr], $r10
-       btstq   TIF_NEED_RESCHED, $r10  ; Need resched?
-       bpl     _work_notifysig         ; No, must be signal/notify.
-       nop
-       .size   _work_pending, . - _work_pending
-
-       .type   _work_resched,@function
-_work_resched:
-       move.d  $r9, $r1                ; Preserve R9.
-       jsr     schedule
-       nop
-       move.d  $r1, $r9
-       di
-
-       addoq   +TI_flags, $r0, $acr
-       move.d  [$acr], $r1
-       and.d   _TIF_WORK_MASK, $r1     ; Ignore sycall trace counter.
-       beq     _Rexit
-       nop
-       btstq   TIF_NEED_RESCHED, $r1
-       bmi     _work_resched           ; current->work.need_resched.
-       nop
-       .size   _work_resched, . - _work_resched
-
-       .type   _work_notifysig,@function
-_work_notifysig:
-       ;; Deal with pending signals and notify-resume requests.
-
        addoq   +TI_flags, $r0, $acr
        move.d  [$acr], $r12            ; The thread_info_flags parameter.
        move.d  $sp, $r11               ; The regs param.
-       jsr     do_notify_resume
-       move.d  $r9, $r10               ; do_notify_resume syscall/irq param.
+       jsr     do_work_pending
+       move.d  $r9, $r10               ; The syscall/irq param.
 
        ba _Rexit
        nop
-       .size   _work_notifysig, . - _work_notifysig
+       .size   _work_pending, . - _work_pending
 
        ;; We get here as a sidetrack when we've entered a syscall with the
        ;; trace-bit set. We need to call do_syscall_trace and then continue
index 51e3416..74a66e0 100644 (file)
@@ -52,11 +52,6 @@ tstart:
 
        GIO_INIT
 
-#ifdef CONFIG_SMP
-secondary_cpu_entry: /* Entry point for secondary CPUs */
-       di
-#endif
-
        ;; Setup and enable the MMU. Use same configuration for both the data
        ;; and the instruction MMU.
        ;;
@@ -164,33 +159,6 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */
        nop
        nop
 
-#ifdef CONFIG_SMP
-       ;; Read CPU ID
-       move    0, $srs
-       nop
-       nop
-       nop
-       move    $s12, $r0
-       cmpq    0, $r0
-       beq     master_cpu
-       nop
-slave_cpu:
-       ; Time to boot-up. Get stack location provided by master CPU.
-       move.d  smp_init_current_idle_thread, $r1
-       move.d  [$r1], $sp
-       add.d   8192, $sp
-       move.d  ebp_start, $r0  ; Defined in linker-script.
-       move    $r0, $ebp
-       jsr     smp_callin
-       nop
-master_cpu:
-       /* Set up entry point for secondary CPUs. The boot ROM has set up
-        * EBP at start of internal memory. The CPU will get there
-        * later when we issue an IPI to them... */
-       move.d MEM_INTMEM_START + IPI_INTR_VECT * 4, $r0
-       move.d secondary_cpu_entry, $r1
-       move.d $r1, [$r0]
-#endif
        ; Check if starting from DRAM (network->RAM boot or unpacked
        ; compressed kernel), or directly from flash.
        lapcq   ., $r0
index 25437ae..6a881e0 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/profile.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/threads.h>
@@ -56,9 +58,6 @@ struct cris_irq_allocation irq_allocations[NR_REAL_IRQS] =
 static unsigned long irq_regs[NR_CPUS] =
 {
   regi_irq,
-#ifdef CONFIG_SMP
-  regi_irq2,
-#endif
 };
 
 #if NR_REAL_IRQS > 32
@@ -431,6 +430,19 @@ crisv32_do_multiple(struct pt_regs* regs)
        irq_exit();
 }
 
+static int crisv32_irq_map(struct irq_domain *h, unsigned int virq,
+                          irq_hw_number_t hw_irq_num)
+{
+       irq_set_chip_and_handler(virq, &crisv32_irq_type, handle_simple_irq);
+
+       return 0;
+}
+
+static struct irq_domain_ops crisv32_irq_ops = {
+       .map    = crisv32_irq_map,
+       .xlate  = irq_domain_xlate_onecell,
+};
+
 /*
  * This is called by start_kernel. It fixes the IRQ masks and setup the
  * interrupt vector table to point to bad_interrupt pointers.
@@ -441,6 +453,8 @@ init_IRQ(void)
        int i;
        int j;
        reg_intr_vect_rw_mask vect_mask = {0};
+       struct device_node *np;
+       struct irq_domain *domain;
 
        /* Clear all interrupts masks. */
        for (i = 0; i < NBR_REGS; i++)
@@ -449,10 +463,15 @@ init_IRQ(void)
        for (i = 0; i < 256; i++)
                etrax_irv->v[i] = weird_irq;
 
-       /* Point all IRQ's to bad handlers. */
+       np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc");
+       domain = irq_domain_add_legacy(np, NR_IRQS - FIRST_IRQ,
+                                      FIRST_IRQ, FIRST_IRQ,
+                                      &crisv32_irq_ops, NULL);
+       BUG_ON(!domain);
+       irq_set_default_host(domain);
+       of_node_put(np);
+
        for (i = FIRST_IRQ, j = 0; j < NR_IRQS; i++, j++) {
-               irq_set_chip_and_handler(j, &crisv32_irq_type,
-                                        handle_simple_irq);
                set_exception_vector(i, interrupt[j]);
        }
 
index 81715c6..cd1865d 100644 (file)
@@ -63,11 +63,6 @@ int show_cpuinfo(struct seq_file *m, void *v)
 
        info = &cpinfo[ARRAY_SIZE(cpinfo) - 1];
 
-#ifdef CONFIG_SMP
-       if (!cpu_online(cpu))
-               return 0;
-#endif
-
        revision = rdvr();
 
        for (i = 0; i < ARRAY_SIZE(cpinfo); i++) {
index 0c9ce9e..3a36ae6 100644 (file)
@@ -72,6 +72,9 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
        /* Make that the user-mode flag is set. */
        regs->ccs |= (1 << (U_CCS_BITNR + CCS_SHIFT));
 
+       /* Don't perform syscall restarting */
+       regs->exs = -1;
+
        /* Restore the old USP. */
        err |= __get_user(old_usp, &sc->usp);
        wrusp(old_usp);
@@ -425,6 +428,8 @@ do_signal(int canrestart, struct pt_regs *regs)
 {
        struct ksignal ksig;
 
+       canrestart = canrestart && ((int)regs->exs >= 0);
+
        /*
         * The common case should go fast, which is why this point is
         * reached from kernel-mode. If that's the case, just return
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
deleted file mode 100644 (file)
index 0698582..0000000
+++ /dev/null
@@ -1,358 +0,0 @@
-#include <linux/types.h>
-#include <asm/delay.h>
-#include <irq.h>
-#include <hwregs/intr_vect.h>
-#include <hwregs/intr_vect_defs.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <hwregs/asm/mmu_defs_asm.h>
-#include <hwregs/supp_reg.h>
-#include <linux/atomic.h>
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#define IPI_SCHEDULE 1
-#define IPI_CALL 2
-#define IPI_FLUSH_TLB 4
-#define IPI_BOOT 8
-
-#define FLUSH_ALL (void*)0xffffffff
-
-/* Vector of locks used for various atomic operations */
-spinlock_t cris_atomic_locks[] = {
-       [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
-};
-
-/* CPU masks */
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(phys_cpu_present_map);
-
-/* Variables used during SMP boot */
-volatile int cpu_now_booting = 0;
-volatile struct thread_info *smp_init_current_idle_thread;
-
-/* Variables used during IPI */
-static DEFINE_SPINLOCK(call_lock);
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       int wait;
-};
-
-static struct call_data_struct * call_data;
-
-static struct mm_struct* flush_mm;
-static struct vm_area_struct* flush_vma;
-static unsigned long flush_addr;
-
-/* Mode registers */
-static unsigned long irq_regs[NR_CPUS] = {
-  regi_irq,
-  regi_irq2
-};
-
-static irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id);
-static int send_ipi(int vector, int wait, cpumask_t cpu_mask);
-static struct irqaction irq_ipi  = {
-       .handler = crisv32_ipi_interrupt,
-       .flags = 0,
-       .name = "ipi",
-};
-
-extern void cris_mmu_init(void);
-extern void cris_timer_init(void);
-
-/* SMP initialization */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-       int i;
-
-       /* From now on we can expect IPIs so set them up */
-       setup_irq(IPI_INTR_VECT, &irq_ipi);
-
-       /* Mark all possible CPUs as present */
-       for (i = 0; i < max_cpus; i++)
-               cpumask_set_cpu(i, &phys_cpu_present_map);
-}
-
-void smp_prepare_boot_cpu(void)
-{
-       /* PGD pointer has moved after per_cpu initialization so
-        * update the MMU.
-        */
-       pgd_t **pgd;
-       pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-
-       SUPP_BANK_SEL(1);
-       SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-       SUPP_BANK_SEL(2);
-       SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-
-       set_cpu_online(0, true);
-       cpumask_set_cpu(0, &phys_cpu_present_map);
-       set_cpu_possible(0, true);
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
-/* Bring one cpu online.*/
-static int __init
-smp_boot_one_cpu(int cpuid, struct task_struct idle)
-{
-       unsigned timeout;
-       cpumask_t cpu_mask;
-
-       cpumask_clear(&cpu_mask);
-       task_thread_info(idle)->cpu = cpuid;
-
-       /* Information to the CPU that is about to boot */
-       smp_init_current_idle_thread = task_thread_info(idle);
-       cpu_now_booting = cpuid;
-
-       /* Kick it */
-       set_cpu_online(cpuid, true);
-       cpumask_set_cpu(cpuid, &cpu_mask);
-       send_ipi(IPI_BOOT, 0, cpu_mask);
-       set_cpu_online(cpuid, false);
-
-       /* Wait for CPU to come online */
-       for (timeout = 0; timeout < 10000; timeout++) {
-               if(cpu_online(cpuid)) {
-                       cpu_now_booting = 0;
-                       smp_init_current_idle_thread = NULL;
-                       return 0; /* CPU online */
-               }
-               udelay(100);
-               barrier();
-       }
-
-       printk(KERN_CRIT "SMP: CPU:%d is stuck.\n", cpuid);
-       return -1;
-}
-
-/* Secondary CPUs starts using C here. Here we need to setup CPU
- * specific stuff such as the local timer and the MMU. */
-void __init smp_callin(void)
-{
-       int cpu = cpu_now_booting;
-       reg_intr_vect_rw_mask vect_mask = {0};
-
-       /* Initialise the idle task for this CPU */
-       atomic_inc(&init_mm.mm_count);
-       current->active_mm = &init_mm;
-
-       /* Set up MMU */
-       cris_mmu_init();
-       __flush_tlb_all();
-
-       /* Setup local timer. */
-       cris_timer_init();
-
-       /* Enable IRQ and idle */
-       REG_WR(intr_vect, irq_regs[cpu], rw_mask, vect_mask);
-       crisv32_unmask_irq(IPI_INTR_VECT);
-       crisv32_unmask_irq(TIMER0_INTR_VECT);
-       preempt_disable();
-       notify_cpu_starting(cpu);
-       local_irq_enable();
-
-       set_cpu_online(cpu, true);
-       cpu_startup_entry(CPUHP_ONLINE);
-}
-
-/* Stop execution on this CPU.*/
-void stop_this_cpu(void* dummy)
-{
-       local_irq_disable();
-       asm volatile("halt");
-}
-
-/* Other calls */
-void smp_send_stop(void)
-{
-       smp_call_function(stop_this_cpu, NULL, 0);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-       return -EINVAL;
-}
-
-
-/* cache_decay_ticks is used by the scheduler to decide if a process
- * is "hot" on one CPU. A higher value means a higher penalty to move
- * a process to another CPU. Our cache is rather small so we report
- * 1 tick.
- */
-unsigned long cache_decay_ticks = 1;
-
-int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-       smp_boot_one_cpu(cpu, tidle);
-       return cpu_online(cpu) ? 0 : -ENOSYS;
-}
-
-void smp_send_reschedule(int cpu)
-{
-       cpumask_t cpu_mask;
-       cpumask_clear(&cpu_mask);
-       cpumask_set_cpu(cpu, &cpu_mask);
-       send_ipi(IPI_SCHEDULE, 0, cpu_mask);
-}
-
-/* TLB flushing
- *
- * Flush needs to be done on the local CPU and on any other CPU that
- * may have the same mapping. The mm->cpu_vm_mask is used to keep track
- * of which CPUs that a specific process has been executed on.
- */
-void flush_tlb_common(struct mm_struct* mm, struct vm_area_struct* vma, unsigned long addr)
-{
-       unsigned long flags;
-       cpumask_t cpu_mask;
-
-       spin_lock_irqsave(&tlbstate_lock, flags);
-       cpu_mask = (mm == FLUSH_ALL ? cpu_all_mask : *mm_cpumask(mm));
-       cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-       flush_mm = mm;
-       flush_vma = vma;
-       flush_addr = addr;
-       send_ipi(IPI_FLUSH_TLB, 1, cpu_mask);
-       spin_unlock_irqrestore(&tlbstate_lock, flags);
-}
-
-void flush_tlb_all(void)
-{
-       __flush_tlb_all();
-       flush_tlb_common(FLUSH_ALL, FLUSH_ALL, 0);
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-       __flush_tlb_mm(mm);
-       flush_tlb_common(mm, FLUSH_ALL, 0);
-       /* No more mappings in other CPUs */
-       cpumask_clear(mm_cpumask(mm));
-       cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-}
-
-void flush_tlb_page(struct vm_area_struct *vma,
-                          unsigned long addr)
-{
-       __flush_tlb_page(vma, addr);
-       flush_tlb_common(vma->vm_mm, vma, addr);
-}
-
-/* Inter processor interrupts
- *
- * The IPIs are used for:
- *   * Force a schedule on a CPU
- *   * FLush TLB on other CPUs
- *   * Call a function on other CPUs
- */
-
-int send_ipi(int vector, int wait, cpumask_t cpu_mask)
-{
-       int i = 0;
-       reg_intr_vect_rw_ipi ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-       int ret = 0;
-
-       /* Calculate CPUs to send to. */
-       cpumask_and(&cpu_mask, &cpu_mask, cpu_online_mask);
-
-       /* Send the IPI. */
-       for_each_cpu(i, &cpu_mask)
-       {
-               ipi.vector |= vector;
-               REG_WR(intr_vect, irq_regs[i], rw_ipi, ipi);
-       }
-
-       /* Wait for IPI to finish on other CPUS */
-       if (wait) {
-               for_each_cpu(i, &cpu_mask) {
-                        int j;
-                        for (j = 0 ; j < 1000; j++) {
-                               ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-                               if (!ipi.vector)
-                                       break;
-                               udelay(100);
-                       }
-
-                       /* Timeout? */
-                       if (ipi.vector) {
-                               printk("SMP call timeout from %d to %d\n", smp_processor_id(), i);
-                               ret = -ETIMEDOUT;
-                               dump_stack();
-                       }
-               }
-       }
-       return ret;
-}
-
-/*
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-       cpumask_t cpu_mask;
-       struct call_data_struct data;
-       int ret;
-
-       cpumask_setall(&cpu_mask);
-       cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       data.wait = wait;
-
-       spin_lock(&call_lock);
-       call_data = &data;
-       ret = send_ipi(IPI_CALL, wait, cpu_mask);
-       spin_unlock(&call_lock);
-
-       return ret;
-}
-
-irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
-{
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       reg_intr_vect_rw_ipi ipi;
-
-       ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
-
-       if (ipi.vector & IPI_SCHEDULE) {
-               scheduler_ipi();
-       }
-       if (ipi.vector & IPI_CALL) {
-               func(info);
-       }
-       if (ipi.vector & IPI_FLUSH_TLB) {
-               if (flush_mm == FLUSH_ALL)
-                       __flush_tlb_all();
-               else if (flush_vma == FLUSH_ALL)
-                       __flush_tlb_mm(flush_mm);
-               else
-                       __flush_tlb_page(flush_vma, flush_addr);
-       }
-
-       ipi.vector = 0;
-       REG_WR(intr_vect, irq_regs[smp_processor_id()], rw_ipi, ipi);
-
-       return IRQ_HANDLED;
-}
-
index c17b01a..4fce9f1 100644 (file)
@@ -8,12 +8,14 @@
 #include <linux/timex.h>
 #include <linux/time.h>
 #include <linux/clocksource.h>
+#include <linux/clockchips.h>
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/sched_clock.h>
 #include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
 /* Number of 763 counts before watchdog bites */
 #define ETRAX_WD_CNT           ((2*ETRAX_WD_HZ)/HZ + 1)
 
-/* Register the continuos readonly timer available in FS and ARTPEC-3.  */
-static cycle_t read_cont_rotime(struct clocksource *cs)
-{
-       return (u32)REG_RD(timer, regi_timer0, r_time);
-}
-
-static struct clocksource cont_rotime = {
-       .name   = "crisv32_rotime",
-       .rating = 300,
-       .read   = read_cont_rotime,
-       .mask   = CLOCKSOURCE_MASK(32),
-       .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init etrax_init_cont_rotime(void)
-{
-       clocksource_register_khz(&cont_rotime, 100000);
-       return 0;
-}
-arch_initcall(etrax_init_cont_rotime);
+#define CRISV32_TIMER_FREQ     (100000000lu)
 
 unsigned long timer_regs[NR_CPUS] =
 {
        regi_timer0,
-#ifdef CONFIG_SMP
-       regi_timer2
-#endif
 };
 
 extern int set_rtc_mmss(unsigned long nowtime);
@@ -189,81 +169,104 @@ void handle_watchdog_bite(struct pt_regs *regs)
 #endif
 }
 
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick.
- */
-extern void cris_do_profile(struct pt_regs *regs);
+extern void cris_profile_sample(struct pt_regs *regs);
+static void __iomem *timer_base;
 
-static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
+static void crisv32_clkevt_mode(enum clock_event_mode mode,
+                               struct clock_event_device *dev)
 {
-       struct pt_regs *regs = get_irq_regs();
-       int cpu = smp_processor_id();
-       reg_timer_r_masked_intr masked_intr;
-       reg_timer_rw_ack_intr ack_intr = { 0 };
-
-       /* Check if the timer interrupt is for us (a tmr0 int) */
-       masked_intr = REG_RD(timer, timer_regs[cpu], r_masked_intr);
-       if (!masked_intr.tmr0)
-               return IRQ_NONE;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
 
-       /* Acknowledge the timer irq. */
-       ack_intr.tmr0 = 1;
-       REG_WR(timer, timer_regs[cpu], rw_ack_intr, ack_intr);
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+}
 
-       /* Reset watchdog otherwise it resets us! */
-       reset_watchdog();
+static int crisv32_clkevt_next_event(unsigned long evt,
+                                    struct clock_event_device *dev)
+{
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_ld,
+               .freq = regk_timer_f100,
+       };
+
+       REG_WR(timer, timer_base, rw_tmr0_div, evt);
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+       ctrl.op = regk_timer_run;
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+       return 0;
+}
+
+static irqreturn_t crisv32_timer_interrupt(int irq, void *dev_id)
+{
+       struct clock_event_device *evt = dev_id;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
+       reg_timer_rw_ack_intr ack = { .tmr0 = 1 };
+       reg_timer_r_masked_intr intr;
+
+       intr = REG_RD(timer, timer_base, r_masked_intr);
+       if (!intr.tmr0)
+               return IRQ_NONE;
 
-       /* Update statistics. */
-       update_process_times(user_mode(regs));
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+       REG_WR(timer, timer_base, rw_ack_intr, ack);
 
-       cris_do_profile(regs); /* Save profiling information */
+       reset_watchdog();
+#ifdef CONFIG_SYSTEM_PROFILER
+       cris_profile_sample(get_irq_regs());
+#endif
 
-       /* The master CPU is responsible for the time keeping. */
-       if (cpu != 0)
-               return IRQ_HANDLED;
+       evt->event_handler(evt);
 
-       /* Call the real timer interrupt handler */
-       xtime_update(1);
        return IRQ_HANDLED;
 }
 
+static struct clock_event_device crisv32_clockevent = {
+       .name = "crisv32-timer",
+       .rating = 300,
+       .features = CLOCK_EVT_FEAT_ONESHOT,
+       .set_mode = crisv32_clkevt_mode,
+       .set_next_event = crisv32_clkevt_next_event,
+};
+
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
 static struct irqaction irq_timer = {
-       .handler = timer_interrupt,
-       .flags = IRQF_SHARED,
-       .name = "timer"
+       .handler = crisv32_timer_interrupt,
+       .flags = IRQF_TIMER | IRQF_SHARED,
+       .name = "crisv32-timer",
+       .dev_id = &crisv32_clockevent,
 };
 
-void __init cris_timer_init(void)
+static u64 notrace crisv32_timer_sched_clock(void)
 {
-       int cpu = smp_processor_id();
-       reg_timer_rw_tmr0_ctrl tmr0_ctrl = { 0 };
-       reg_timer_rw_tmr0_div tmr0_div = TIMER0_DIV;
-       reg_timer_rw_intr_mask timer_intr_mask;
+       return REG_RD(timer, timer_base, r_time);
+}
 
-       /* Setup the etrax timers.
-        * Base frequency is 100MHz, divider 1000000 -> 100 HZ
-        * We use timer0, so timer1 is free.
-        * The trig timer is used by the fasttimer API if enabled.
-        */
+static void __init crisv32_timer_init(void)
+{
+       reg_timer_rw_intr_mask timer_intr_mask;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
 
-       tmr0_ctrl.op = regk_timer_ld;
-       tmr0_ctrl.freq = regk_timer_f100;
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_div, tmr0_div);
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Load */
-       tmr0_ctrl.op = regk_timer_run;
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Start */
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
 
-       /* Enable the timer irq. */
-       timer_intr_mask = REG_RD(timer, timer_regs[cpu], rw_intr_mask);
+       timer_intr_mask = REG_RD(timer, timer_base, rw_intr_mask);
        timer_intr_mask.tmr0 = 1;
-       REG_WR(timer, timer_regs[cpu], rw_intr_mask, timer_intr_mask);
+       REG_WR(timer, timer_base, rw_intr_mask, timer_intr_mask);
 }
 
 void __init time_init(void)
 {
-       reg_intr_vect_rw_mask intr_mask;
+       int irq;
+       int ret;
 
        /* Probe for the RTC and read it if it exists.
         * Before the RTC can be probed the loops_per_usec variable needs
@@ -273,17 +276,28 @@ void __init time_init(void)
         */
        loops_per_usec = 50;
 
-       /* Start CPU local timer. */
-       cris_timer_init();
+       irq = TIMER0_INTR_VECT;
+       timer_base = (void __iomem *) regi_timer0;
+
+       crisv32_timer_init();
+
+       sched_clock_register(crisv32_timer_sched_clock, 32,
+                            CRISV32_TIMER_FREQ);
+
+       clocksource_mmio_init(timer_base + REG_RD_ADDR_timer_r_time,
+                             "crisv32-timer", CRISV32_TIMER_FREQ,
+                             300, 32, clocksource_mmio_readl_up);
+
+       crisv32_clockevent.cpumask = cpu_possible_mask;
+       crisv32_clockevent.irq = irq;
 
-       /* Enable the timer irq in global config. */
-       intr_mask = REG_RD_VECT(intr_vect, regi_irq, rw_mask, 1);
-       intr_mask.timer0 = 1;
-       REG_WR_VECT(intr_vect, regi_irq, rw_mask, 1, intr_mask);
+       ret = setup_irq(irq, &irq_timer);
+       if (ret)
+               pr_warn("failed to setup irq %d\n", irq);
 
-       /* Now actually register the timer irq handler that calls
-        * timer_interrupt(). */
-       setup_irq(TIMER0_INTR_VECT, &irq_timer);
+       clockevents_config_and_register(&crisv32_clockevent,
+                                       CRISV32_TIMER_FREQ,
+                                       2, 0xffffffff);
 
        /* Enable watchdog if we should use one. */
 
index dd296b9..e91cf02 100644 (file)
@@ -3,5 +3,5 @@
 #
 
 lib-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o \
-       csumcpfruser.o spinlock.o delay.o strcmp.o
+       csumcpfruser.o delay.o strcmp.o
 
diff --git a/arch/cris/arch-v32/lib/spinlock.S b/arch/cris/arch-v32/lib/spinlock.S
deleted file mode 100644 (file)
index fe610b9..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-;; Core of the spinlock implementation
-;;
-;; Copyright (C) 2004 Axis Communications AB.
-;;
-;; Author: Mikael Starvik
-
-
-       .global cris_spin_lock
-       .type   cris_spin_lock,@function
-       .global cris_spin_trylock
-       .type   cris_spin_trylock,@function
-
-       .text
-
-cris_spin_lock:
-       clearf  p
-1:     test.b  [$r10]
-       beq     1b
-       clearf  p
-       ax
-       clear.b [$r10]
-       bcs     1b
-       clearf  p
-       ret
-       nop
-
-       .size   cris_spin_lock, . - cris_spin_lock
-
-cris_spin_trylock:
-       clearf  p
-1:     move.b  [$r10], $r11
-       ax
-       clear.b [$r10]
-        bcs    1b
-        clearf p
-       ret
-       movu.b  $r11,$r10
-
-       .size   cris_spin_trylock, . - cris_spin_trylock
-
index 3deca52..f5438ca 100644 (file)
@@ -40,17 +40,6 @@ void __init cris_mmu_init(void)
         */
        per_cpu(current_pgd, smp_processor_id()) = init_mm.pgd;
 
-#ifdef CONFIG_SMP
-       {
-               pgd_t **pgd;
-               pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-               SUPP_BANK_SEL(1);
-               SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-               SUPP_BANK_SEL(2);
-               SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-       }
-#endif
-
        /* Initialise the TLB. Function found in tlb.c. */
        tlb_init();
 
index 72727c1..c098104 100644 (file)
        move.d  $r0, [$r1]      ; last_refill_cause = rw_mm_cause
 
 3:     ; Probably not in a loop, continue normal processing
-#ifdef CONFIG_SMP
-       move    $s7, $acr       ; PGD
-#else
        move.d  current_pgd, $acr ; PGD
-#endif
        ; Look up PMD in PGD
        lsrq    24, $r0 ; Get PMD index into PGD (bit 24-31)
        move.d  [$acr], $acr    ; PGD for the current process
diff --git a/arch/cris/boot/dts/Makefile b/arch/cris/boot/dts/Makefile
new file mode 100644 (file)
index 0000000..faf69fb
--- /dev/null
@@ -0,0 +1,6 @@
+BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+endif
+
+clean-files := *.dtb.S
diff --git a/arch/cris/boot/dts/dev88.dts b/arch/cris/boot/dts/dev88.dts
new file mode 100644 (file)
index 0000000..4fa5a3f
--- /dev/null
@@ -0,0 +1,18 @@
+/dts-v1/;
+
+/include/ "etraxfs.dtsi"
+
+/ {
+       model = "Axis 88 Developer Board";
+       compatible = "axis,dev88";
+
+       aliases {
+               serial0 = &uart0;
+       };
+
+       soc {
+               uart0: serial@b00260000 {
+                       status = "okay";
+               };
+       };
+};
diff --git a/arch/cris/boot/dts/etraxfs.dtsi b/arch/cris/boot/dts/etraxfs.dtsi
new file mode 100644 (file)
index 0000000..909bced
--- /dev/null
@@ -0,0 +1,38 @@
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&intc>;
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       model = "axis,crisv32";
+                       reg = <0>;
+               };
+       };
+
+       soc {
+               compatible = "simple-bus";
+               model = "etraxfs";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges;
+
+               intc: interrupt-controller {
+                       compatible = "axis,crisv32-intc";
+                       reg = <0xb001c000 0x1000>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               serial@b00260000 {
+                       compatible = "axis,etraxfs-uart";
+                       reg = <0xb0026000 0x1000>;
+                       interrupts = <68>;
+                       status = "disabled";
+               };
+       };
+};
diff --git a/arch/cris/include/arch-v10/arch/atomic.h b/arch/cris/include/arch-v10/arch/atomic.h
deleted file mode 100644 (file)
index 6ef5e7d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-
-#endif
index 935fde3..9b5580f 100644 (file)
@@ -36,12 +36,4 @@ static inline unsigned long _get_base(char * addr)
   return 0;
 }
 
-#define nop() __asm__ __volatile__ ("nop");
-
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-#define tas(ptr) (xchg((ptr),1))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
 #endif
diff --git a/arch/cris/include/arch-v32/arch/atomic.h b/arch/cris/include/arch-v32/arch/atomic.h
deleted file mode 100644 (file)
index 852ceff..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#include <linux/spinlock_types.h>
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void* l);
-
-#ifndef CONFIG_SMP
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-#else
-
-extern spinlock_t cris_atomic_locks[];
-#define LOCK_COUNT 128
-#define HASH_ADDR(a) (((int)a) & 127)
-
-#define cris_atomic_save(addr, flags) \
-  local_irq_save(flags); \
-  cris_spin_lock((void *)&cris_atomic_locks[HASH_ADDR(addr)].raw_lock.slock);
-
-#define cris_atomic_restore(addr, flags) \
-  { \
-    spinlock_t *lock = (void*)&cris_atomic_locks[HASH_ADDR(addr)]; \
-    __asm__ volatile ("move.d %1,%0" \
-                       : "=m" (lock->raw_lock.slock) \
-                       : "r" (1) \
-                       : "memory"); \
-    local_irq_restore(flags); \
-  }
-
-#endif
-
-#endif
-
index a024b7d..5687592 100644 (file)
@@ -25,8 +25,7 @@ struct thread_struct {
  */
 #define TASK_SIZE      (0xB0000000UL)
 
-/* CCS I=1, enable interrupts. */
-#define INIT_THREAD { 0, 0, (1 << I_CCS_BITNR) }
+#define INIT_THREAD { }
 
 #define KSTK_EIP(tsk)          \
 ({                             \
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h
deleted file mode 100644 (file)
index f132755..0000000
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef __ASM_ARCH_SPINLOCK_H
-#define __ASM_ARCH_SPINLOCK_H
-
-#include <linux/spinlock_types.h>
-
-#define RW_LOCK_BIAS 0x01000000
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void *l);
-
-static inline int arch_spin_is_locked(arch_spinlock_t *x)
-{
-       return *(volatile signed char *)(&(x)->slock) <= 0;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-       __asm__ volatile ("move.d %1,%0" \
-                         : "=m" (lock->slock) \
-                         : "r" (1) \
-                         : "memory");
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-       while (arch_spin_is_locked(lock))
-               cpu_relax();
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-       return cris_spin_trylock((void *)&lock->slock);
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-       cris_spin_lock((void *)&lock->slock);
-}
-
-static inline void
-arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-       arch_spin_lock(lock);
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- */
-
-static inline int arch_read_can_lock(arch_rwlock_t *x)
-{
-       return (int)(x)->lock > 0;
-}
-
-static inline int arch_write_can_lock(arch_rwlock_t *x)
-{
-       return (x)->lock == RW_LOCK_BIAS;
-}
-
-static  inline void arch_read_lock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock == 0);
-       rw->lock--;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_lock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock != RW_LOCK_BIAS);
-       rw->lock = 0;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       rw->lock++;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock != RW_LOCK_BIAS);
-       rw->lock = RW_LOCK_BIAS;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-       int ret = 0;
-       arch_spin_lock(&rw->slock);
-       if (rw->lock != 0) {
-               rw->lock--;
-               ret = 1;
-       }
-       arch_spin_unlock(&rw->slock);
-       return ret;
-}
-
-static  inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-       int ret = 0;
-       arch_spin_lock(&rw->slock);
-       if (rw->lock == RW_LOCK_BIAS) {
-               rw->lock = 0;
-               ret = 1;
-       }
-       arch_spin_unlock(&rw->slock);
-       return ret;
-}
-
-#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
-#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
-
-#define arch_spin_relax(lock)  cpu_relax()
-#define arch_read_relax(lock)  cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
-
-#endif /* __ASM_ARCH_SPINLOCK_H */
index 889f2de..057e518 100644 (file)
@@ -1,16 +1,29 @@
-
+generic-y += atomic.h
 generic-y += barrier.h
 generic-y += clkdev.h
+generic-y += cmpxchg.h
 generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
 generic-y += exec.h
+generic-y += emergency-restart.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
+generic-y += percpu.h
 generic-y += preempt.h
 generic-y += scatterlist.h
 generic-y += sections.h
+generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += vga.h
 generic-y += xor.h
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
deleted file mode 100644 (file)
index 279766a..0000000
+++ /dev/null
@@ -1,149 +0,0 @@
-/* $Id: atomic.h,v 1.3 2001/07/25 16:15:19 bjornw Exp $ */
-
-#ifndef __ASM_CRIS_ATOMIC__
-#define __ASM_CRIS_ATOMIC__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/cmpxchg.h>
-#include <arch/atomic.h>
-#include <arch/system.h>
-#include <asm/barrier.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/* These should be written in asm but we do it in C for now. */
-
-#define ATOMIC_OP(op, c_op)                                            \
-static inline void atomic_##op(int i, volatile atomic_t *v)            \
-{                                                                      \
-       unsigned long flags;                                            \
-       cris_atomic_save(v, flags);                                     \
-       v->counter c_op i;                                              \
-       cris_atomic_restore(v, flags);                                  \
-}                                                                      \
-
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
-static inline int atomic_##op##_return(int i, volatile atomic_t *v)    \
-{                                                                      \
-       unsigned long flags;                                            \
-       int retval;                                                     \
-       cris_atomic_save(v, flags);                                     \
-       retval = (v->counter c_op i);                                   \
-       cris_atomic_restore(v, flags);                                  \
-       return retval;                                                  \
-}
-
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
-
-ATOMIC_OPS(add, +=)
-ATOMIC_OPS(sub, -=)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#define atomic_add_negative(a, v)      (atomic_add_return((a), (v)) < 0)
-
-static inline int atomic_sub_and_test(int i, volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = (v->counter -= i) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline void atomic_inc(volatile atomic_t *v)
-{
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       (v->counter)++;
-       cris_atomic_restore(v, flags);
-}
-
-static inline void atomic_dec(volatile atomic_t *v)
-{
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       (v->counter)--;
-       cris_atomic_restore(v, flags);
-}
-
-static inline int atomic_inc_return(volatile atomic_t *v)
-{
-       unsigned long flags;
-       int retval;
-       cris_atomic_save(v, flags);
-       retval = ++(v->counter);
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_dec_return(volatile atomic_t *v)
-{
-       unsigned long flags;
-       int retval;
-       cris_atomic_save(v, flags);
-       retval = --(v->counter);
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-static inline int atomic_dec_and_test(volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = --(v->counter) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_inc_and_test(volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = ++(v->counter) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-       int ret;
-       unsigned long flags;
-
-       cris_atomic_save(v, flags);
-       ret = v->counter;
-       if (likely(ret == old))
-               v->counter = new;
-       cris_atomic_restore(v, flags);
-       return ret;
-}
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-       int ret;
-       unsigned long flags;
-
-       cris_atomic_save(v, flags);
-       ret = v->counter;
-       if (ret != u)
-               v->counter += a;
-       cris_atomic_restore(v, flags);
-       return ret;
-}
-
-#endif
index bd49a54..8062cb5 100644 (file)
 #endif
 
 #include <arch/bitops.h>
-#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <asm/barrier.h>
 
-/*
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define set_bit(nr, addr)    (void)test_and_set_bit(nr, addr)
-
-/*
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
-
-#define clear_bit(nr, addr)  (void)test_and_clear_bit(nr, addr)
-
-/*
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define change_bit(nr, addr) (void)test_and_change_bit(nr, addr)
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr |= mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr &= ~mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr ^= mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
+#include <asm-generic/bitops/atomic.h>
 #include <asm-generic/bitops/non-atomic.h>
 
 /*
diff --git a/arch/cris/include/asm/cmpxchg.h b/arch/cris/include/asm/cmpxchg.h
deleted file mode 100644 (file)
index b756dac..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef __ASM_CRIS_CMPXCHG__
-#define __ASM_CRIS_CMPXCHG__
-
-#include <linux/irqflags.h>
-
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-  /* since Etrax doesn't have any atomic xchg instructions, we need to disable
-     irq's (if enabled) and do it with move.d's */
-  unsigned long flags,temp;
-  local_irq_save(flags); /* save flags, including irq enable bit and shut off irqs */
-  switch (size) {
-  case 1:
-    *((unsigned char *)&temp) = x;
-    x = *(unsigned char *)ptr;
-    *(unsigned char *)ptr = *((unsigned char *)&temp);
-    break;
-  case 2:
-    *((unsigned short *)&temp) = x;
-    x = *(unsigned short *)ptr;
-    *(unsigned short *)ptr = *((unsigned short *)&temp);
-    break;
-  case 4:
-    temp = x;
-    x = *(unsigned long *)ptr;
-    *(unsigned long *)ptr = temp;
-    break;
-  }
-  local_irq_restore(flags); /* restore irq enable bit */
-  return x;
-}
-
-#define xchg(ptr,x) \
-       ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-#define tas(ptr) (xchg((ptr),1))
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)                                              \
-       ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-                       (unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#endif /* __ASM_CRIS_CMPXCHG__ */
diff --git a/arch/cris/include/asm/device.h b/arch/cris/include/asm/device.h
deleted file mode 100644 (file)
index d8f9872..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/cris/include/asm/div64.h b/arch/cris/include/asm/div64.h
deleted file mode 100644 (file)
index 6cd978c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
index 30ded8f..c2a394f 100644 (file)
@@ -71,7 +71,7 @@ typedef unsigned long elf_fpregset_t;
    the loader.  We need to make sure that it is out of the way of the program
    that it will "exec", and that there is sufficient room for the brk.  */
 
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,
diff --git a/arch/cris/include/asm/emergency-restart.h b/arch/cris/include/asm/emergency-restart.h
deleted file mode 100644 (file)
index 108d8c4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/cris/include/asm/futex.h b/arch/cris/include/asm/futex.h
deleted file mode 100644 (file)
index 6a332a9..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h
deleted file mode 100644 (file)
index 04126f7..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <asm/irq.h>
-#include <asm-generic/hardirq.h>
-
-#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/cris/include/asm/irq_regs.h b/arch/cris/include/asm/irq_regs.h
deleted file mode 100644 (file)
index 3dd9c0b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/cris/include/asm/kdebug.h b/arch/cris/include/asm/kdebug.h
deleted file mode 100644 (file)
index 6ece1b0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h
deleted file mode 100644 (file)
index d2d643c..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/* Dummy header just to define km_type.  None of this
- * is actually used on cris. 
- */
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/cris/include/asm/local.h b/arch/cris/include/asm/local.h
deleted file mode 100644 (file)
index c11c530..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/percpu.h b/arch/cris/include/asm/percpu.h
deleted file mode 100644 (file)
index 6db9b43..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_PERCPU_H
-#define _CRIS_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* _CRIS_PERCPU_H */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h
deleted file mode 100644 (file)
index c615a06..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-#include <linux/cpumask.h>
-
-extern cpumask_t phys_cpu_present_map;
-
-#define raw_smp_processor_id() (current_thread_info()->cpu)
-
-#endif
diff --git a/arch/cris/include/asm/spinlock.h b/arch/cris/include/asm/spinlock.h
deleted file mode 100644 (file)
index ed816b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <arch/spinlock.h>
index 20697e7..b424f43 100644 (file)
@@ -22,16 +22,9 @@ extern void __flush_tlb_mm(struct mm_struct *mm);
 extern void __flush_tlb_page(struct vm_area_struct *vma,
                           unsigned long addr);
 
-#ifdef CONFIG_SMP
-extern void flush_tlb_all(void);
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, 
-                          unsigned long addr);
-#else
 #define flush_tlb_all __flush_tlb_all
 #define flush_tlb_mm __flush_tlb_mm
 #define flush_tlb_page __flush_tlb_page
-#endif
 
 static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
 {
diff --git a/arch/cris/include/asm/topology.h b/arch/cris/include/asm/topology.h
deleted file mode 100644 (file)
index 2ac613d..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_CRIS_TOPOLOGY_H
-#define _ASM_CRIS_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_CRIS_TOPOLOGY_H */
index b45640b..edef71f 100644 (file)
@@ -7,6 +7,7 @@ CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
 extra-y        := vmlinux.lds
 
 obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y += devicetree.o
 
 obj-$(CONFIG_MODULES)    += crisksyms.o
 obj-$(CONFIG_MODULES)   += module.o
diff --git a/arch/cris/kernel/devicetree.c b/arch/cris/kernel/devicetree.c
new file mode 100644 (file)
index 0000000..53ff8d7
--- /dev/null
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/printk.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+       pr_err("%s(%llx, %llx)\n",
+              __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+       return alloc_bootmem_align(size, align);
+}
index 58d44ee..fd3427e 100644 (file)
@@ -42,3 +42,26 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
                tracehook_notify_resume(regs);
        }
 }
+
+void do_work_pending(int syscall, struct pt_regs *regs,
+                    unsigned int thread_flags)
+{
+       do {
+               if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+                       schedule();
+               } else {
+                       if (unlikely(!user_mode(regs)))
+                               return;
+                       local_irq_enable();
+                       if (thread_flags & _TIF_SIGPENDING) {
+                               do_signal(syscall, regs);
+                               syscall = 0;
+                       } else {
+                               clear_thread_flag(TIF_NOTIFY_RESUME);
+                               tracehook_notify_resume(regs);
+                       }
+               }
+               local_irq_disable();
+               thread_flags = current_thread_info()->flags;
+       } while (thread_flags & _TIF_WORK_MASK);
+}
index 905b70e..bb12aa9 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/utsname.h>
 #include <linux/pfn.h>
 #include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
 #include <asm/setup.h>
 #include <arch/system.h>
 
@@ -64,6 +67,10 @@ void __init setup_arch(char **cmdline_p)
        unsigned long start_pfn, max_pfn;
        unsigned long memory_start;
 
+#ifdef CONFIG_OF
+       early_init_dt_scan(__dtb_start);
+#endif
+
        /* register an initial console printing routine for printk's */
 
        init_etrax_debug();
@@ -141,6 +148,8 @@ void __init setup_arch(char **cmdline_p)
 
        reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size, BOOTMEM_DEFAULT);
 
+       unflatten_and_copy_device_tree();
+
        /* paging_init() sets up the MMU and marks all pages as reserved */
 
        paging_init();
@@ -204,3 +213,9 @@ static int __init topology_init(void)
 
 subsys_initcall(topology_init);
 
+static int __init cris_of_init(void)
+{
+       of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+       return 0;
+}
+core_initcall(cris_of_init);
index fe6acda..7780d37 100644 (file)
@@ -79,11 +79,13 @@ cris_do_profile(struct pt_regs* regs)
 #endif
 }
 
+#ifndef CONFIG_GENERIC_SCHED_CLOCK
 unsigned long long sched_clock(void)
 {
        return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ) +
                get_ns_in_jiffie();
 }
+#endif
 
 static int
 __init init_udelay(void)
index 99bb7ef..0b78bc8 100644 (file)
@@ -342,6 +342,11 @@ static inline void iowrite32(u32 val, void __iomem *p)
                __flush_PCI_writes();
 }
 
+#define ioread16be(addr)       be16_to_cpu(ioread16(addr))
+#define ioread32be(addr)       be32_to_cpu(ioread32(addr))
+#define iowrite16be(v, addr)   iowrite16(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)   iowrite32(cpu_to_be32(v), (addr))
+
 static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count)
 {
        io_insb((unsigned long) p, dst, count);
index 4f9a666..76d25b2 100644 (file)
@@ -15,6 +15,7 @@ config IA64
        select ARCH_MIGHT_HAVE_PC_SERIO
        select PCI if (!IA64_HP_SIM)
        select ACPI if (!IA64_HP_SIM)
+       select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_IDE
index 35bf22c..b1698bc 100644 (file)
@@ -887,7 +887,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
        return _acpi_map_lsapic(handle, physid, pcpu);
 }
index 5f4243f..60e02f7 100644 (file)
@@ -2159,7 +2159,7 @@ static const struct file_operations pfm_file_ops = {
 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
-                            dentry->d_inode->i_ino);
+                            d_inode(dentry)->i_ino);
 }
 
 static const struct dentry_operations pfmfs_dentry_operations = {
index 48cc657..d4e162d 100644 (file)
@@ -240,15 +240,12 @@ static acpi_status resource_to_window(struct acpi_resource *resource,
         * We're only interested in _CRS descriptors that are
         *      - address space descriptors for memory or I/O space
         *      - non-zero size
-        *      - producers, i.e., the address space is routed downstream,
-        *        not consumed by the bridge itself
         */
        status = acpi_resource_to_address64(resource, addr);
        if (ACPI_SUCCESS(status) &&
            (addr->resource_type == ACPI_MEMORY_RANGE ||
             addr->resource_type == ACPI_IO_RANGE) &&
-           addr->address.address_length &&
-           addr->producer_consumer == ACPI_PRODUCER)
+           addr->address.address_length)
                return AE_OK;
 
        return AE_ERROR;
index 483dff9..7f54618 100644 (file)
@@ -174,8 +174,11 @@ void show_regs(struct pt_regs *regs)
        show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
 }
 
+/*
+ * Copy architecture-specific thread state
+ */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-               unsigned long arg, struct task_struct *tsk)
+               unsigned long kthread_arg, struct task_struct *tsk)
 {
        struct pt_regs *childregs = task_pt_regs(tsk);
        void *kernel_context = ((void *) childregs +
@@ -202,12 +205,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                global_base = __core_reg_get(A1GbP);
                childregs->ctx.AX[0].U1 = (unsigned long) global_base;
                childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
-               /* Set D1Ar1=arg and D1RtP=usp (fn) */
+               /* Set D1Ar1=kthread_arg and D1RtP=usp (fn) */
                childregs->ctx.DX[4].U1 = usp;
-               childregs->ctx.DX[3].U1 = arg;
+               childregs->ctx.DX[3].U1 = kthread_arg;
                tsk->thread.int_depth = 2;
                return 0;
        }
+
        /*
         * Get a pointer to where the new child's register block should have
         * been pushed.
index 897ba3c..cc4a2ba 100644 (file)
@@ -197,6 +197,11 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
 #define iowrite16(v, addr)     writew((v), (addr))
 #define iowrite32(v, addr)     writel((v), (addr))
 
+#define ioread16be(addr)       be16_to_cpu(readw(addr))
+#define ioread32be(addr)       be32_to_cpu(readl(addr))
+#define iowrite16be(v, addr)   writew(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)   writel(cpu_to_be32(v), (addr))
+
 #define ioread8_rep(p, dst, count) \
        insb((unsigned long) (p), (dst), (count))
 #define ioread16_rep(p, dst, count) \
index 01c75f3..24b3d89 100644 (file)
@@ -46,7 +46,6 @@ generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
 generic-y += shmbuf.h
-generic-y += shmparam.h
 generic-y += siginfo.h
 generic-y += signal.h
 generic-y += socket.h
diff --git a/arch/nios2/include/asm/shmparam.h b/arch/nios2/include/asm/shmparam.h
new file mode 100644 (file)
index 0000000..6078429
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright Altera Corporation (C) <2015>. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_NIOS2_SHMPARAM_H
+#define _ASM_NIOS2_SHMPARAM_H
+
+#define        SHMLBA  CONFIG_NIOS2_DCACHE_SIZE
+
+#endif /* _ASM_NIOS2_SHMPARAM_H */
index eff00e6..1d35de9 100644 (file)
@@ -14,6 +14,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/types.h>
+
 /*
  * Register numbers used by 'ptrace' system call interface.
  */
index 27b006c..1e515cc 100644 (file)
@@ -92,35 +92,35 @@ exception_table:
 
 trap_table:
        .word   handle_system_call      /* 0  */
-       .word   instruction_trap        /* 1  */
-       .word   instruction_trap        /* 2  */
-       .word   instruction_trap        /* 3  */
-       .word   instruction_trap        /* 4  */
-       .word   instruction_trap        /* 5  */
-       .word   instruction_trap        /* 6  */
-       .word   instruction_trap        /* 7  */
-       .word   instruction_trap        /* 8  */
-       .word   instruction_trap        /* 9  */
-       .word   instruction_trap        /* 10 */
-       .word   instruction_trap        /* 11 */
-       .word   instruction_trap        /* 12 */
-       .word   instruction_trap        /* 13 */
-       .word   instruction_trap        /* 14 */
-       .word   instruction_trap        /* 15 */
-       .word   instruction_trap        /* 16 */
-       .word   instruction_trap        /* 17 */
-       .word   instruction_trap        /* 18 */
-       .word   instruction_trap        /* 19 */
-       .word   instruction_trap        /* 20 */
-       .word   instruction_trap        /* 21 */
-       .word   instruction_trap        /* 22 */
-       .word   instruction_trap        /* 23 */
-       .word   instruction_trap        /* 24 */
-       .word   instruction_trap        /* 25 */
-       .word   instruction_trap        /* 26 */
-       .word   instruction_trap        /* 27 */
-       .word   instruction_trap        /* 28 */
-       .word   instruction_trap        /* 29 */
+       .word   handle_trap_1           /* 1  */
+       .word   handle_trap_2           /* 2  */
+       .word   handle_trap_3           /* 3  */
+       .word   handle_trap_reserved    /* 4  */
+       .word   handle_trap_reserved    /* 5  */
+       .word   handle_trap_reserved    /* 6  */
+       .word   handle_trap_reserved    /* 7  */
+       .word   handle_trap_reserved    /* 8  */
+       .word   handle_trap_reserved    /* 9  */
+       .word   handle_trap_reserved    /* 10 */
+       .word   handle_trap_reserved    /* 11 */
+       .word   handle_trap_reserved    /* 12 */
+       .word   handle_trap_reserved    /* 13 */
+       .word   handle_trap_reserved    /* 14 */
+       .word   handle_trap_reserved    /* 15 */
+       .word   handle_trap_reserved    /* 16 */
+       .word   handle_trap_reserved    /* 17 */
+       .word   handle_trap_reserved    /* 18 */
+       .word   handle_trap_reserved    /* 19 */
+       .word   handle_trap_reserved    /* 20 */
+       .word   handle_trap_reserved    /* 21 */
+       .word   handle_trap_reserved    /* 22 */
+       .word   handle_trap_reserved    /* 23 */
+       .word   handle_trap_reserved    /* 24 */
+       .word   handle_trap_reserved    /* 25 */
+       .word   handle_trap_reserved    /* 26 */
+       .word   handle_trap_reserved    /* 27 */
+       .word   handle_trap_reserved    /* 28 */
+       .word   handle_trap_reserved    /* 29 */
 #ifdef CONFIG_KGDB
        .word   handle_kgdb_breakpoint  /* 30 KGDB breakpoint */
 #else
@@ -455,6 +455,19 @@ handle_kgdb_breakpoint:
        br      ret_from_exception
 #endif
 
+handle_trap_1:
+       call    handle_trap_1_c
+       br      ret_from_exception
+
+handle_trap_2:
+       call    handle_trap_2_c
+       br      ret_from_exception
+
+handle_trap_3:
+handle_trap_reserved:
+       call    handle_trap_3_c
+       br      ret_from_exception
+
 /*
  * Beware - when entering resume, prev (the current task) is
  * in r4, next (the new task) is in r5, don't change these
index b7b9764..81f7da7 100644 (file)
 
 static DEFINE_SPINLOCK(die_lock);
 
+static void _send_sig(int signo, int code, unsigned long addr)
+{
+       siginfo_t info;
+
+       info.si_signo = signo;
+       info.si_errno = 0;
+       info.si_code = code;
+       info.si_addr = (void __user *) addr;
+       force_sig_info(signo, &info, current);
+}
+
 void die(const char *str, struct pt_regs *regs, long err)
 {
        console_verbose();
@@ -39,16 +50,10 @@ void die(const char *str, struct pt_regs *regs, long err)
 
 void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr)
 {
-       siginfo_t info;
-
        if (!user_mode(regs))
                die("Exception in kernel mode", regs, signo);
 
-       info.si_signo = signo;
-       info.si_errno = 0;
-       info.si_code = code;
-       info.si_addr = (void __user *) addr;
-       force_sig_info(signo, &info, current);
+       _send_sig(signo, code, addr);
 }
 
 /*
@@ -183,3 +188,18 @@ asmlinkage void unhandled_exception(struct pt_regs *regs, int cause)
 
        pr_emerg("opcode: 0x%08lx\n", *(unsigned long *)(regs->ea));
 }
+
+asmlinkage void handle_trap_1_c(struct pt_regs *fp)
+{
+       _send_sig(SIGUSR1, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_2_c(struct pt_regs *fp)
+{
+       _send_sig(SIGUSR2, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_3_c(struct pt_regs *fp)
+{
+       _send_sig(SIGILL, ILL_ILLTRP, fp->ea);
+}
index 7966429..223cdcc 100644 (file)
@@ -58,9 +58,6 @@ static void __invalidate_dcache(unsigned long start, unsigned long end)
        end += (cpuinfo.dcache_line_size - 1);
        end &= ~(cpuinfo.dcache_line_size - 1);
 
-       if (end > start + cpuinfo.dcache_size)
-               end = start + cpuinfo.dcache_size;
-
        for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
                __asm__ __volatile__ ("   initda 0(%0)\n"
                                        : /* Outputs */
@@ -131,12 +128,14 @@ void flush_cache_dup_mm(struct mm_struct *mm)
 
 void flush_icache_range(unsigned long start, unsigned long end)
 {
+       __flush_dcache(start, end);
        __flush_icache(start, end);
 }
 
 void flush_dcache_range(unsigned long start, unsigned long end)
 {
        __flush_dcache(start, end);
+       __flush_icache(start, end);
 }
 EXPORT_SYMBOL(flush_dcache_range);
 
@@ -159,6 +158,7 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
        unsigned long start = (unsigned long) page_address(page);
        unsigned long end = start + PAGE_SIZE;
 
+       __flush_dcache(start, end);
        __flush_icache(start, end);
 }
 
@@ -173,6 +173,18 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
                __flush_icache(start, end);
 }
 
+void __flush_dcache_page(struct address_space *mapping, struct page *page)
+{
+       /*
+        * Writeback any data associated with the kernel mapping of this
+        * page.  This ensures that data in the physical page is mutually
+        * coherent with the kernels mapping.
+        */
+       unsigned long start = (unsigned long)page_address(page);
+
+       __flush_dcache_all(start, start + PAGE_SIZE);
+}
+
 void flush_dcache_page(struct page *page)
 {
        struct address_space *mapping;
@@ -190,11 +202,12 @@ void flush_dcache_page(struct page *page)
        if (mapping && !mapping_mapped(mapping)) {
                clear_bit(PG_dcache_clean, &page->flags);
        } else {
-               unsigned long start = (unsigned long)page_address(page);
-
-               __flush_dcache_all(start, start + PAGE_SIZE);
-               if (mapping)
+               __flush_dcache_page(mapping, page);
+               if (mapping) {
+                       unsigned long start = (unsigned long)page_address(page);
                        flush_aliases(mapping,  page);
+                       flush_icache_range(start, start + PAGE_SIZE);
+               }
                set_bit(PG_dcache_clean, &page->flags);
        }
 }
@@ -205,6 +218,7 @@ void update_mmu_cache(struct vm_area_struct *vma,
 {
        unsigned long pfn = pte_pfn(*pte);
        struct page *page;
+       struct address_space *mapping;
 
        if (!pfn_valid(pfn))
                return;
@@ -217,16 +231,15 @@ void update_mmu_cache(struct vm_area_struct *vma,
        if (page == ZERO_PAGE(0))
                return;
 
-       if (!PageReserved(page) &&
-            !test_and_set_bit(PG_dcache_clean, &page->flags)) {
-               unsigned long start = page_to_virt(page);
-               struct address_space *mapping;
-
-               __flush_dcache(start, start + PAGE_SIZE);
-
-               mapping = page_mapping(page);
-               if (mapping)
-                       flush_aliases(mapping, page);
+       mapping = page_mapping(page);
+       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+               __flush_dcache_page(mapping, page);
+
+       if(mapping)
+       {
+               flush_aliases(mapping, page);
+               if (vma->vm_flags & VM_EXEC)
+                       flush_icache_page(vma, page);
        }
 }
 
@@ -234,15 +247,19 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
                    struct page *to)
 {
        __flush_dcache(vaddr, vaddr + PAGE_SIZE);
+       __flush_icache(vaddr, vaddr + PAGE_SIZE);
        copy_page(vto, vfrom);
        __flush_dcache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
+       __flush_icache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
 }
 
 void clear_user_page(void *addr, unsigned long vaddr, struct page *page)
 {
        __flush_dcache(vaddr, vaddr + PAGE_SIZE);
+       __flush_icache(vaddr, vaddr + PAGE_SIZE);
        clear_page(addr);
        __flush_dcache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
+       __flush_icache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
 }
 
 void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
@@ -251,7 +268,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
 {
        flush_cache_page(vma, user_vaddr, page_to_pfn(page));
        memcpy(dst, src, len);
-       __flush_dcache((unsigned long)src, (unsigned long)src + len);
+       __flush_dcache_all((unsigned long)src, (unsigned long)src + len);
        if (vma->vm_flags & VM_EXEC)
                __flush_icache((unsigned long)src, (unsigned long)src + len);
 }
@@ -262,7 +279,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 {
        flush_cache_page(vma, user_vaddr, page_to_pfn(page));
        memcpy(dst, src, len);
-       __flush_dcache((unsigned long)dst, (unsigned long)dst + len);
+       __flush_dcache_all((unsigned long)dst, (unsigned long)dst + len);
        if (vma->vm_flags & VM_EXEC)
                __flush_icache((unsigned long)dst, (unsigned long)dst + len);
 }
index bde5311..0cc6eed 100644 (file)
@@ -30,8 +30,6 @@ static inline int arch_has_random(void)
        return !!ppc_md.get_random_long;
 }
 
-int powernv_get_random_long(unsigned long *v);
-
 static inline int arch_get_random_seed_long(unsigned long *v)
 {
        return 0;
@@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void)
 
 #endif /* CONFIG_ARCH_RANDOM */
 
+#ifdef CONFIG_PPC_POWERNV
+int powernv_hwrng_present(void);
+int powernv_get_random_long(unsigned long *v);
+int powernv_get_random_real_mode(unsigned long *v);
+#else
+static inline int powernv_hwrng_present(void) { return 0; }
+static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+#endif
+
 #endif /* _ASM_POWERPC_ARCHRANDOM_H */
index 9930904..b91e74a 100644 (file)
@@ -288,6 +288,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
        return !is_kvmppc_hv_enabled(vcpu->kvm);
 }
 
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3                        0x113724FA
index 14619a5..3536d12 100644 (file)
@@ -85,6 +85,20 @@ static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
        return old == 0;
 }
 
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+       hpte_v &= ~HPTE_V_HVLOCK;
+       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+       hpte[0] = cpu_to_be64(hpte_v);
+}
+
+/* Without barrier */
+static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+       hpte_v &= ~HPTE_V_HVLOCK;
+       hpte[0] = cpu_to_be64(hpte_v);
+}
+
 static inline int __hpte_actual_psize(unsigned int lp, int psize)
 {
        int i, shift;
@@ -281,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
 
 /*
  * If it's present and writable, atomically set dirty and referenced bits and
- * return the PTE, otherwise return 0. If we find a transparent hugepage
- * and if it is marked splitting we return 0;
+ * return the PTE, otherwise return 0.
  */
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
-                                                unsigned int hugepage)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
 {
        pte_t old_pte, new_pte = __pte(0);
 
        while (1) {
-               old_pte = *ptep;
+               /*
+                * Make sure we don't reload from ptep
+                */
+               old_pte = READ_ONCE(*ptep);
                /*
                 * wait until _PAGE_BUSY is clear then set it atomically
                 */
@@ -298,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
                        cpu_relax();
                        continue;
                }
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               /* If hugepage and is trans splitting return None */
-               if (unlikely(hugepage &&
-                            pmd_trans_splitting(pte_pmd(old_pte))))
-                       return __pte(0);
-#endif
                /* If pte is not present return None */
                if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
                        return __pte(0);
@@ -424,6 +433,10 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
        return rcu_dereference_raw_notrace(kvm->memslots);
 }
 
+extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+
+extern void kvmhv_rm_send_ipi(int cpu);
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
index c610961..a193a13 100644 (file)
@@ -227,10 +227,8 @@ struct kvm_arch {
        unsigned long host_sdr1;
        int tlbie_lock;
        unsigned long lpcr;
-       unsigned long rmor;
-       struct kvm_rma_info *rma;
        unsigned long vrma_slb_v;
-       int rma_setup_done;
+       int hpte_setup_done;
        u32 hpt_order;
        atomic_t vcpus_running;
        u32 online_vcores;
@@ -239,6 +237,8 @@ struct kvm_arch {
        atomic_t hpte_mod_interest;
        cpumask_t need_tlb_flush;
        int hpt_cma_alloc;
+       struct dentry *debugfs_dir;
+       struct dentry *htab_dentry;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
        struct mutex hpt_mutex;
@@ -263,18 +263,15 @@ struct kvm_arch {
 
 /*
  * Struct for a virtual core.
- * Note: entry_exit_count combines an entry count in the bottom 8 bits
- * and an exit count in the next 8 bits.  This is so that we can
- * atomically increment the entry count iff the exit count is 0
- * without taking the lock.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
  */
 struct kvmppc_vcore {
        int n_runnable;
-       int n_busy;
        int num_threads;
-       int entry_exit_count;
-       int n_woken;
-       int nap_count;
+       int entry_exit_map;
        int napping_threads;
        int first_vcpuid;
        u16 pcpu;
@@ -299,13 +296,14 @@ struct kvmppc_vcore {
        ulong conferring_threads;
 };
 
-#define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
-#define VCORE_EXIT_COUNT(vc)   ((vc)->entry_exit_count >> 8)
+#define VCORE_ENTRY_MAP(vc)    ((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc)     ((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc)   (VCORE_EXIT_MAP(vc) != 0)
 
 /* Values for vcore_state */
 #define VCORE_INACTIVE 0
 #define VCORE_SLEEPING 1
-#define VCORE_STARTING 2
+#define VCORE_PREEMPT  2
 #define VCORE_RUNNING  3
 #define VCORE_EXITING  4
 
@@ -368,6 +366,14 @@ struct kvmppc_slb {
        u8 base_page_size;      /* MMU_PAGE_xxx */
 };
 
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+       u64     seqcount;       /* used to synchronize access, also count * 2 */
+       u64     tb_total;       /* total time in timebase ticks */
+       u64     tb_min;         /* min time */
+       u64     tb_max;         /* max time */
+};
+
 # ifdef CONFIG_PPC_FSL_BOOK3E
 #define KVMPPC_BOOKE_IAC_NUM   2
 #define KVMPPC_BOOKE_DAC_NUM   2
@@ -656,6 +662,19 @@ struct kvm_vcpu_arch {
 
        u32 emul_inst;
 #endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       struct kvmhv_tb_accumulator *cur_activity;      /* What we're timing */
+       u64     cur_tb_start;                   /* when it started */
+       struct kvmhv_tb_accumulator rm_entry;   /* real-mode entry code */
+       struct kvmhv_tb_accumulator rm_intr;    /* real-mode intr handling */
+       struct kvmhv_tb_accumulator rm_exit;    /* real-mode exit code */
+       struct kvmhv_tb_accumulator guest_time; /* guest execution */
+       struct kvmhv_tb_accumulator cede_time;  /* time napping inside guest */
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_timings;
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
 };
 
 #define VCPU_FPR(vcpu, i)      (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
index 46bf652..b8475da 100644 (file)
@@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
        return kvm->arch.kvm_ops == kvmppc_hv_ops;
 }
 
+extern int kvmppc_hwrng_present(void);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
index 9835ac4..11a3863 100644 (file)
@@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 #define pmd_large(pmd)         0
 #define has_transparent_hugepage() 0
 #endif
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
                                 unsigned *shift);
-
-static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
-                                    unsigned long *pte_sizep)
+static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+                                              unsigned *shift)
 {
-       pte_t *ptep;
-       unsigned long ps = *pte_sizep;
-       unsigned int shift;
-
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
-       if (!ptep)
-               return NULL;
-       if (shift)
-               *pte_sizep = 1ul << shift;
-       else
-               *pte_sizep = PAGE_SIZE;
-
-       if (ps > *pte_sizep)
-               return NULL;
-
-       return ptep;
+       if (!arch_irqs_disabled()) {
+               pr_info("%s called with irq enabled\n", __func__);
+               dump_stack();
+       }
+       return __find_linux_pte_or_hugepte(pgdir, ea, shift);
 }
 #endif /* __ASSEMBLY__ */
 
index 03cbada..10fc784 100644 (file)
@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
 
 DECLARE_PER_CPU(u64, decrementers_next_tb);
 
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
 #endif /* __KERNEL__ */
 #endif /* __POWERPC_TIME_H */
index 4717859..0034b6b 100644 (file)
@@ -37,6 +37,7 @@
 #include <asm/thread_info.h>
 #include <asm/rtas.h>
 #include <asm/vdso_datapage.h>
+#include <asm/dbell.h>
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 #include <asm/lppaca.h>
@@ -458,6 +459,19 @@ int main(void)
        DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
        DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
        DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
+       DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
+       DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
+       DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
+       DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
+       DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
+       DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
+       DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
+       DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
+       DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
+       DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
 #endif
        DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
        DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
@@ -492,7 +506,6 @@ int main(void)
        DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
        DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
        DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
-       DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
        DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
        DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
        DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
@@ -550,8 +563,7 @@ int main(void)
        DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
        DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
        DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
-       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
-       DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
+       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
        DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
        DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
        DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
@@ -748,5 +760,7 @@ int main(void)
                        offsetof(struct paca_struct, subcore_sibling_mask));
 #endif
 
+       DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+
        return 0;
 }
index a4c62eb..44b480e 100644 (file)
@@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
        int hugepage_shift;
 
        /*
-        * We won't find hugepages here, iomem
+        * We won't find hugepages here(this is iomem). Hence we are not
+        * worried about _PAGE_SPLITTING/collapse. Also we will not hit
+        * page table free, because of init_mm.
         */
-       ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
+       ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
        if (!ptep)
                return token;
        WARN_ON(hugepage_shift);
index 24b968f..63d9cc4 100644 (file)
@@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
                vaddr = (unsigned long)PCI_FIX_ADDR(addr);
                if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
                        return NULL;
-
-               ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+               /*
+                * We won't find huge pages here (iomem). Also can't hit
+                * a page table free due to init_mm
+                */
+               ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
                                                 &hugepage_shift);
                if (ptep == NULL)
                        paddr = 0;
                else {
-                       /*
-                        * we don't have hugepages backing iomem
-                        */
                        WARN_ON(hugepage_shift);
                        paddr = pte_pfn(*ptep) << PAGE_SHIFT;
                }
index 2d7b33f..56f4484 100644 (file)
@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
 }
 #endif
 
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+       return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
 /*
  * Scheduler clock - returns current time in nanosec units.
  *
index 11850f3..3caec2c 100644 (file)
@@ -75,7 +75,7 @@ config KVM_BOOK3S_64
 
 config KVM_BOOK3S_64_HV
        tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
-       depends on KVM_BOOK3S_64
+       depends on KVM_BOOK3S_64 && PPC_POWERNV
        select KVM_BOOK3S_HV_POSSIBLE
        select MMU_NOTIFIER
        select CMA
@@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
          processor, including emulating 32-bit processors on a 64-bit
          host.
 
+config KVM_BOOK3S_HV_EXIT_TIMING
+       bool "Detailed timing for hypervisor real-mode code"
+       depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+       ---help---
+         Calculate time taken for each vcpu in the real-mode guest entry,
+         exit, and interrupt handling code, plus time spent in the guest
+         and in nap mode due to idle (cede) while other threads are still
+         in the guest.  The total, minimum and maximum times in nanoseconds
+         together with the number of executions are reported in debugfs in
+         kvm/vm#/vcpu#/timings.  The overhead is of the order of 30 - 40
+         ns per exit on POWER8.
+
+         If unsure, say N.
+
 config KVM_BOOKE_HV
        bool
 
index cfbcdc6..453a8a4 100644 (file)
@@ -821,6 +821,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 #endif
 }
 
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+       unsigned long size = kvmppc_get_gpr(vcpu, 4);
+       unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+       u64 buf;
+       int ret;
+
+       if (!is_power_of_2(size) || (size > sizeof(buf)))
+               return H_TOO_HARD;
+
+       ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+       if (ret != 0)
+               return H_TOO_HARD;
+
+       switch (size) {
+       case 1:
+               kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+               break;
+
+       case 2:
+               kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
+               break;
+
+       case 4:
+               kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
+               break;
+
+       case 8:
+               kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
+               break;
+
+       default:
+               BUG();
+       }
+
+       return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+       unsigned long size = kvmppc_get_gpr(vcpu, 4);
+       unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+       unsigned long val = kvmppc_get_gpr(vcpu, 6);
+       u64 buf;
+       int ret;
+
+       switch (size) {
+       case 1:
+               *(u8 *)&buf = val;
+               break;
+
+       case 2:
+               *(__be16 *)&buf = cpu_to_be16(val);
+               break;
+
+       case 4:
+               *(__be32 *)&buf = cpu_to_be32(val);
+               break;
+
+       case 8:
+               *(__be64 *)&buf = cpu_to_be64(val);
+               break;
+
+       default:
+               return H_TOO_HARD;
+       }
+
+       ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+       if (ret != 0)
+               return H_TOO_HARD;
+
+       return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
+
 int kvmppc_core_check_processor_compat(void)
 {
        /*
index 534acb3..1a4acf8 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/srcu.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/debugfs.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -116,12 +117,12 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
        long order;
 
        mutex_lock(&kvm->lock);
-       if (kvm->arch.rma_setup_done) {
-               kvm->arch.rma_setup_done = 0;
-               /* order rma_setup_done vs. vcpus_running */
+       if (kvm->arch.hpte_setup_done) {
+               kvm->arch.hpte_setup_done = 0;
+               /* order hpte_setup_done vs. vcpus_running */
                smp_mb();
                if (atomic_read(&kvm->arch.vcpus_running)) {
-                       kvm->arch.rma_setup_done = 1;
+                       kvm->arch.hpte_setup_done = 1;
                        goto out;
                }
        }
@@ -338,9 +339,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
        gr = kvm->arch.revmap[index].guest_rpte;
 
-       /* Unlock the HPTE */
-       asm volatile("lwsync" : : : "memory");
-       hptep[0] = cpu_to_be64(v);
+       unlock_hpte(hptep, v);
        preempt_enable();
 
        gpte->eaddr = eaddr;
@@ -469,8 +468,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
        hpte[1] = be64_to_cpu(hptep[1]);
        hpte[2] = r = rev->guest_rpte;
-       asm volatile("lwsync" : : : "memory");
-       hptep[0] = cpu_to_be64(hpte[0]);
+       unlock_hpte(hptep, hpte[0]);
        preempt_enable();
 
        if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@ -537,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                }
                /* if the guest wants write access, see if that is OK */
                if (!writing && hpte_is_writable(r)) {
-                       unsigned int hugepage_shift;
                        pte_t *ptep, pte;
-
+                       unsigned long flags;
                        /*
                         * We need to protect against page table destruction
-                        * while looking up and updating the pte.
+                        * hugepage split and collapse.
                         */
-                       rcu_read_lock_sched();
+                       local_irq_save(flags);
                        ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-                                                        hva, &hugepage_shift);
+                                                        hva, NULL);
                        if (ptep) {
-                               pte = kvmppc_read_update_linux_pte(ptep, 1,
-                                                          hugepage_shift);
+                               pte = kvmppc_read_update_linux_pte(ptep, 1);
                                if (pte_write(pte))
                                        write_ok = 1;
                        }
-                       rcu_read_unlock_sched();
+                       local_irq_restore(flags);
                }
        }
 
@@ -621,7 +617,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        hptep[1] = cpu_to_be64(r);
        eieio();
-       hptep[0] = cpu_to_be64(hpte[0]);
+       __unlock_hpte(hptep, hpte[0]);
        asm volatile("ptesync" : : : "memory");
        preempt_enable();
        if (page && hpte_is_writable(r))
@@ -642,7 +638,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
        return ret;
 
  out_unlock:
-       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+       __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
        preempt_enable();
        goto out_put;
 }
@@ -771,7 +767,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
                        }
                }
                unlock_rmap(rmapp);
-               hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
        }
        return 0;
 }
@@ -857,7 +853,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
                        }
                        ret = 1;
                }
-               hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
        } while ((i = j) != head);
 
        unlock_rmap(rmapp);
@@ -974,8 +970,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 
                /* Now check and modify the HPTE */
                if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
-                       /* unlock and continue */
-                       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                       __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
                        continue;
                }
 
@@ -996,9 +991,9 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
                                npages_dirty = n;
                        eieio();
                }
-               v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+               v &= ~HPTE_V_ABSENT;
                v |= HPTE_V_VALID;
-               hptep[0] = cpu_to_be64(v);
+               __unlock_hpte(hptep, v);
        } while ((i = j) != head);
 
        unlock_rmap(rmapp);
@@ -1218,8 +1213,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
                        r &= ~HPTE_GR_MODIFIED;
                        revp->guest_rpte = r;
                }
-               asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-               hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               unlock_hpte(hptp, be64_to_cpu(hptp[0]));
                preempt_enable();
                if (!(valid == want_valid && (first_pass || dirty)))
                        ok = 0;
@@ -1339,20 +1333,20 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
        unsigned long tmp[2];
        ssize_t nb;
        long int err, ret;
-       int rma_setup;
+       int hpte_setup;
 
        if (!access_ok(VERIFY_READ, buf, count))
                return -EFAULT;
 
        /* lock out vcpus from running while we're doing this */
        mutex_lock(&kvm->lock);
-       rma_setup = kvm->arch.rma_setup_done;
-       if (rma_setup) {
-               kvm->arch.rma_setup_done = 0;   /* temporarily */
-               /* order rma_setup_done vs. vcpus_running */
+       hpte_setup = kvm->arch.hpte_setup_done;
+       if (hpte_setup) {
+               kvm->arch.hpte_setup_done = 0;  /* temporarily */
+               /* order hpte_setup_done vs. vcpus_running */
                smp_mb();
                if (atomic_read(&kvm->arch.vcpus_running)) {
-                       kvm->arch.rma_setup_done = 1;
+                       kvm->arch.hpte_setup_done = 1;
                        mutex_unlock(&kvm->lock);
                        return -EBUSY;
                }
@@ -1405,7 +1399,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                                       "r=%lx\n", ret, i, v, r);
                                goto out;
                        }
-                       if (!rma_setup && is_vrma_hpte(v)) {
+                       if (!hpte_setup && is_vrma_hpte(v)) {
                                unsigned long psize = hpte_base_page_size(v, r);
                                unsigned long senc = slb_pgsize_encoding(psize);
                                unsigned long lpcr;
@@ -1414,7 +1408,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                                        (VRMA_VSID << SLB_VSID_SHIFT_1T);
                                lpcr = senc << (LPCR_VRMASD_SH - 4);
                                kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
-                               rma_setup = 1;
+                               hpte_setup = 1;
                        }
                        ++i;
                        hptp += 2;
@@ -1430,9 +1424,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
        }
 
  out:
-       /* Order HPTE updates vs. rma_setup_done */
+       /* Order HPTE updates vs. hpte_setup_done */
        smp_wmb();
-       kvm->arch.rma_setup_done = rma_setup;
+       kvm->arch.hpte_setup_done = hpte_setup;
        mutex_unlock(&kvm->lock);
 
        if (err)
@@ -1495,6 +1489,141 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
        return ret;
 }
 
+struct debugfs_htab_state {
+       struct kvm      *kvm;
+       struct mutex    mutex;
+       unsigned long   hpt_index;
+       int             chars_left;
+       int             buf_index;
+       char            buf[64];
+};
+
+static int debugfs_htab_open(struct inode *inode, struct file *file)
+{
+       struct kvm *kvm = inode->i_private;
+       struct debugfs_htab_state *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       kvm_get_kvm(kvm);
+       p->kvm = kvm;
+       mutex_init(&p->mutex);
+       file->private_data = p;
+
+       return nonseekable_open(inode, file);
+}
+
+static int debugfs_htab_release(struct inode *inode, struct file *file)
+{
+       struct debugfs_htab_state *p = file->private_data;
+
+       kvm_put_kvm(p->kvm);
+       kfree(p);
+       return 0;
+}
+
+static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
+                                size_t len, loff_t *ppos)
+{
+       struct debugfs_htab_state *p = file->private_data;
+       ssize_t ret, r;
+       unsigned long i, n;
+       unsigned long v, hr, gr;
+       struct kvm *kvm;
+       __be64 *hptp;
+
+       ret = mutex_lock_interruptible(&p->mutex);
+       if (ret)
+               return ret;
+
+       if (p->chars_left) {
+               n = p->chars_left;
+               if (n > len)
+                       n = len;
+               r = copy_to_user(buf, p->buf + p->buf_index, n);
+               n -= r;
+               p->chars_left -= n;
+               p->buf_index += n;
+               buf += n;
+               len -= n;
+               ret = n;
+               if (r) {
+                       if (!n)
+                               ret = -EFAULT;
+                       goto out;
+               }
+       }
+
+       kvm = p->kvm;
+       i = p->hpt_index;
+       hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+       for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
+               if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
+                       continue;
+
+               /* lock the HPTE so it's stable and read it */
+               preempt_disable();
+               while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+                       cpu_relax();
+               v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
+               hr = be64_to_cpu(hptp[1]);
+               gr = kvm->arch.revmap[i].guest_rpte;
+               unlock_hpte(hptp, v);
+               preempt_enable();
+
+               if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+                       continue;
+
+               n = scnprintf(p->buf, sizeof(p->buf),
+                             "%6lx %.16lx %.16lx %.16lx\n",
+                             i, v, hr, gr);
+               p->chars_left = n;
+               if (n > len)
+                       n = len;
+               r = copy_to_user(buf, p->buf, n);
+               n -= r;
+               p->chars_left -= n;
+               p->buf_index = n;
+               buf += n;
+               len -= n;
+               ret += n;
+               if (r) {
+                       if (!ret)
+                               ret = -EFAULT;
+                       goto out;
+               }
+       }
+       p->hpt_index = i;
+
+ out:
+       mutex_unlock(&p->mutex);
+       return ret;
+}
+
+ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+                          size_t len, loff_t *ppos)
+{
+       return -EACCES;
+}
+
+static const struct file_operations debugfs_htab_fops = {
+       .owner   = THIS_MODULE,
+       .open    = debugfs_htab_open,
+       .release = debugfs_htab_release,
+       .read    = debugfs_htab_read,
+       .write   = debugfs_htab_write,
+       .llseek  = generic_file_llseek,
+};
+
+void kvmppc_mmu_debugfs_init(struct kvm *kvm)
+{
+       kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
+                                                   kvm->arch.debugfs_dir, kvm,
+                                                   &debugfs_htab_fops);
+}
+
 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
        struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
index de74756..48d3c5d 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/page-flags.h>
 #include <linux/srcu.h>
 #include <linux/miscdevice.h>
+#include <linux/debugfs.h>
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
@@ -50,6 +51,7 @@
 #include <asm/hvcall.h>
 #include <asm/switch_to.h>
 #include <asm/smp.h>
+#include <asm/dbell.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -83,9 +85,35 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+static bool kvmppc_ipi_thread(int cpu)
+{
+       /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               preempt_disable();
+               if (cpu_first_thread_sibling(cpu) ==
+                   cpu_first_thread_sibling(smp_processor_id())) {
+                       unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+                       msg |= cpu_thread_in_core(cpu);
+                       smp_mb();
+                       __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+                       preempt_enable();
+                       return true;
+               }
+               preempt_enable();
+       }
+
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+       if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
+               xics_wake_cpu(cpu);
+               return true;
+       }
+#endif
+
+       return false;
+}
+
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
-       int me;
        int cpu = vcpu->cpu;
        wait_queue_head_t *wqp;
 
@@ -95,20 +123,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
                ++vcpu->stat.halt_wakeup;
        }
 
-       me = get_cpu();
+       if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+               return;
 
        /* CPU points to the first thread of the core */
-       if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
-#ifdef CONFIG_PPC_ICP_NATIVE
-               int real_cpu = cpu + vcpu->arch.ptid;
-               if (paca[real_cpu].kvm_hstate.xics_phys)
-                       xics_wake_cpu(real_cpu);
-               else
-#endif
-               if (cpu_online(cpu))
-                       smp_send_reschedule(cpu);
-       }
-       put_cpu();
+       if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
+               smp_send_reschedule(cpu);
 }
 
 /*
@@ -706,6 +726,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
                /* Send the error out to userspace via KVM_RUN */
                return rc;
+       case H_LOGICAL_CI_LOAD:
+               ret = kvmppc_h_logical_ci_load(vcpu);
+               if (ret == H_TOO_HARD)
+                       return RESUME_HOST;
+               break;
+       case H_LOGICAL_CI_STORE:
+               ret = kvmppc_h_logical_ci_store(vcpu);
+               if (ret == H_TOO_HARD)
+                       return RESUME_HOST;
+               break;
        case H_SET_MODE:
                ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
                                        kvmppc_get_gpr(vcpu, 5),
@@ -740,6 +770,8 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
        case H_CONFER:
        case H_REGISTER_VPA:
        case H_SET_MODE:
+       case H_LOGICAL_CI_LOAD:
+       case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
        case H_XIRR:
        case H_CPPR:
@@ -1410,6 +1442,154 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
        return vcore;
 }
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+       const char *name;
+       size_t offset;
+} timings[] = {
+       {"rm_entry",    offsetof(struct kvm_vcpu, arch.rm_entry)},
+       {"rm_intr",     offsetof(struct kvm_vcpu, arch.rm_intr)},
+       {"rm_exit",     offsetof(struct kvm_vcpu, arch.rm_exit)},
+       {"guest",       offsetof(struct kvm_vcpu, arch.guest_time)},
+       {"cede",        offsetof(struct kvm_vcpu, arch.cede_time)},
+};
+
+#define N_TIMINGS      (sizeof(timings) / sizeof(timings[0]))
+
+struct debugfs_timings_state {
+       struct kvm_vcpu *vcpu;
+       unsigned int    buflen;
+       char            buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+       struct kvm_vcpu *vcpu = inode->i_private;
+       struct debugfs_timings_state *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       kvm_get_kvm(vcpu->kvm);
+       p->vcpu = vcpu;
+       file->private_data = p;
+
+       return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+       struct debugfs_timings_state *p = file->private_data;
+
+       kvm_put_kvm(p->vcpu->kvm);
+       kfree(p);
+       return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+                                   size_t len, loff_t *ppos)
+{
+       struct debugfs_timings_state *p = file->private_data;
+       struct kvm_vcpu *vcpu = p->vcpu;
+       char *s, *buf_end;
+       struct kvmhv_tb_accumulator tb;
+       u64 count;
+       loff_t pos;
+       ssize_t n;
+       int i, loops;
+       bool ok;
+
+       if (!p->buflen) {
+               s = p->buf;
+               buf_end = s + sizeof(p->buf);
+               for (i = 0; i < N_TIMINGS; ++i) {
+                       struct kvmhv_tb_accumulator *acc;
+
+                       acc = (struct kvmhv_tb_accumulator *)
+                               ((unsigned long)vcpu + timings[i].offset);
+                       ok = false;
+                       for (loops = 0; loops < 1000; ++loops) {
+                               count = acc->seqcount;
+                               if (!(count & 1)) {
+                                       smp_rmb();
+                                       tb = *acc;
+                                       smp_rmb();
+                                       if (count == acc->seqcount) {
+                                               ok = true;
+                                               break;
+                                       }
+                               }
+                               udelay(1);
+                       }
+                       if (!ok)
+                               snprintf(s, buf_end - s, "%s: stuck\n",
+                                       timings[i].name);
+                       else
+                               snprintf(s, buf_end - s,
+                                       "%s: %llu %llu %llu %llu\n",
+                                       timings[i].name, count / 2,
+                                       tb_to_ns(tb.tb_total),
+                                       tb_to_ns(tb.tb_min),
+                                       tb_to_ns(tb.tb_max));
+                       s += strlen(s);
+               }
+               p->buflen = s - p->buf;
+       }
+
+       pos = *ppos;
+       if (pos >= p->buflen)
+               return 0;
+       if (len > p->buflen - pos)
+               len = p->buflen - pos;
+       n = copy_to_user(buf, p->buf + pos, len);
+       if (n) {
+               if (n == len)
+                       return -EFAULT;
+               len -= n;
+       }
+       *ppos = pos + len;
+       return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+                                    size_t len, loff_t *ppos)
+{
+       return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+       .owner   = THIS_MODULE,
+       .open    = debugfs_timings_open,
+       .release = debugfs_timings_release,
+       .read    = debugfs_timings_read,
+       .write   = debugfs_timings_write,
+       .llseek  = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+       char buf[16];
+       struct kvm *kvm = vcpu->kvm;
+
+       snprintf(buf, sizeof(buf), "vcpu%u", id);
+       if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+               return;
+       vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
+       if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
+               return;
+       vcpu->arch.debugfs_timings =
+               debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
+                                   vcpu, &debugfs_timings_ops);
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
                                                   unsigned int id)
 {
@@ -1479,6 +1659,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
        vcpu->arch.cpu_type = KVM_CPU_3S_64;
        kvmppc_sanity_check(vcpu);
 
+       debugfs_vcpu_init(vcpu, id);
+
        return vcpu;
 
 free_vcpu:
@@ -1566,8 +1748,10 @@ static int kvmppc_grab_hwthread(int cpu)
        tpaca = &paca[cpu];
 
        /* Ensure the thread won't go into the kernel if it wakes */
-       tpaca->kvm_hstate.hwthread_req = 1;
        tpaca->kvm_hstate.kvm_vcpu = NULL;
+       tpaca->kvm_hstate.napping = 0;
+       smp_wmb();
+       tpaca->kvm_hstate.hwthread_req = 1;
 
        /*
         * If the thread is already executing in the kernel (e.g. handling
@@ -1610,35 +1794,41 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
        }
        cpu = vc->pcpu + vcpu->arch.ptid;
        tpaca = &paca[cpu];
-       tpaca->kvm_hstate.kvm_vcpu = vcpu;
        tpaca->kvm_hstate.kvm_vcore = vc;
        tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
        vcpu->cpu = vc->pcpu;
+       /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
        smp_wmb();
-#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
-       if (cpu != smp_processor_id()) {
-               xics_wake_cpu(cpu);
-               if (vcpu->arch.ptid)
-                       ++vc->n_woken;
-       }
-#endif
+       tpaca->kvm_hstate.kvm_vcpu = vcpu;
+       if (cpu != smp_processor_id())
+               kvmppc_ipi_thread(cpu);
 }
 
-static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+static void kvmppc_wait_for_nap(void)
 {
-       int i;
+       int cpu = smp_processor_id();
+       int i, loops;
 
-       HMT_low();
-       i = 0;
-       while (vc->nap_count < vc->n_woken) {
-               if (++i >= 1000000) {
-                       pr_err("kvmppc_wait_for_nap timeout %d %d\n",
-                              vc->nap_count, vc->n_woken);
-                       break;
+       for (loops = 0; loops < 1000000; ++loops) {
+               /*
+                * Check if all threads are finished.
+                * We set the vcpu pointer when starting a thread
+                * and the thread clears it when finished, so we look
+                * for any threads that still have a non-NULL vcpu ptr.
+                */
+               for (i = 1; i < threads_per_subcore; ++i)
+                       if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+                               break;
+               if (i == threads_per_subcore) {
+                       HMT_medium();
+                       return;
                }
-               cpu_relax();
+               HMT_low();
        }
        HMT_medium();
+       for (i = 1; i < threads_per_subcore; ++i)
+               if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+                       pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
 }
 
 /*
@@ -1700,54 +1890,91 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
        mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
 }
 
+static void prepare_threads(struct kvmppc_vcore *vc)
+{
+       struct kvm_vcpu *vcpu, *vnext;
+
+       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                arch.run_list) {
+               if (signal_pending(vcpu->arch.run_task))
+                       vcpu->arch.ret = -EINTR;
+               else if (vcpu->arch.vpa.update_pending ||
+                        vcpu->arch.slb_shadow.update_pending ||
+                        vcpu->arch.dtl.update_pending)
+                       vcpu->arch.ret = RESUME_GUEST;
+               else
+                       continue;
+               kvmppc_remove_runnable(vc, vcpu);
+               wake_up(&vcpu->arch.cpu_run);
+       }
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc)
+{
+       u64 now;
+       long ret;
+       struct kvm_vcpu *vcpu, *vnext;
+
+       now = get_tb();
+       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                arch.run_list) {
+               /* cancel pending dec exception if dec is positive */
+               if (now < vcpu->arch.dec_expires &&
+                   kvmppc_core_pending_dec(vcpu))
+                       kvmppc_core_dequeue_dec(vcpu);
+
+               trace_kvm_guest_exit(vcpu);
+
+               ret = RESUME_GUEST;
+               if (vcpu->arch.trap)
+                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+                                                   vcpu->arch.run_task);
+
+               vcpu->arch.ret = ret;
+               vcpu->arch.trap = 0;
+
+               if (vcpu->arch.ceded) {
+                       if (!is_kvmppc_resume_guest(ret))
+                               kvmppc_end_cede(vcpu);
+                       else
+                               kvmppc_set_timer(vcpu);
+               }
+               if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+                       kvmppc_remove_runnable(vc, vcpu);
+                       wake_up(&vcpu->arch.cpu_run);
+               }
+       }
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
  */
-static void kvmppc_run_core(struct kvmppc_vcore *vc)
+static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
-       struct kvm_vcpu *vcpu, *vnext;
-       long ret;
-       u64 now;
-       int i, need_vpa_update;
+       struct kvm_vcpu *vcpu;
+       int i;
        int srcu_idx;
-       struct kvm_vcpu *vcpus_to_update[threads_per_core];
 
-       /* don't start if any threads have a signal pending */
-       need_vpa_update = 0;
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-               if (signal_pending(vcpu->arch.run_task))
-                       return;
-               if (vcpu->arch.vpa.update_pending ||
-                   vcpu->arch.slb_shadow.update_pending ||
-                   vcpu->arch.dtl.update_pending)
-                       vcpus_to_update[need_vpa_update++] = vcpu;
-       }
+       /*
+        * Remove from the list any threads that have a signal pending
+        * or need a VPA update done
+        */
+       prepare_threads(vc);
+
+       /* if the runner is no longer runnable, let the caller pick a new one */
+       if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
+               return;
 
        /*
-        * Initialize *vc, in particular vc->vcore_state, so we can
-        * drop the vcore lock if necessary.
+        * Initialize *vc.
         */
-       vc->n_woken = 0;
-       vc->nap_count = 0;
-       vc->entry_exit_count = 0;
+       vc->entry_exit_map = 0;
        vc->preempt_tb = TB_NIL;
-       vc->vcore_state = VCORE_STARTING;
        vc->in_guest = 0;
        vc->napping_threads = 0;
        vc->conferring_threads = 0;
 
-       /*
-        * Updating any of the vpas requires calling kvmppc_pin_guest_page,
-        * which can't be called with any spinlocks held.
-        */
-       if (need_vpa_update) {
-               spin_unlock(&vc->lock);
-               for (i = 0; i < need_vpa_update; ++i)
-                       kvmppc_update_vpas(vcpus_to_update[i]);
-               spin_lock(&vc->lock);
-       }
-
        /*
         * Make sure we are running on primary threads, and that secondary
         * threads are offline.  Also check if the number of threads in this
@@ -1755,8 +1982,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
         */
        if ((threads_per_core > 1) &&
            ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-               list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+               list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                        vcpu->arch.ret = -EBUSY;
+                       kvmppc_remove_runnable(vc, vcpu);
+                       wake_up(&vcpu->arch.cpu_run);
+               }
                goto out;
        }
 
@@ -1797,8 +2027,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
        list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
                vcpu->cpu = -1;
        /* wait for secondary threads to finish writing their state to memory */
-       if (vc->nap_count < vc->n_woken)
-               kvmppc_wait_for_nap(vc);
+       kvmppc_wait_for_nap();
        for (i = 0; i < threads_per_subcore; ++i)
                kvmppc_release_hwthread(vc->pcpu + i);
        /* prevent other vcpu threads from doing kvmppc_start_thread() now */
@@ -1812,44 +2041,12 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
        kvm_guest_exit();
 
        preempt_enable();
-       cond_resched();
 
        spin_lock(&vc->lock);
-       now = get_tb();
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-               /* cancel pending dec exception if dec is positive */
-               if (now < vcpu->arch.dec_expires &&
-                   kvmppc_core_pending_dec(vcpu))
-                       kvmppc_core_dequeue_dec(vcpu);
-
-               trace_kvm_guest_exit(vcpu);
-
-               ret = RESUME_GUEST;
-               if (vcpu->arch.trap)
-                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
-                                                   vcpu->arch.run_task);
-
-               vcpu->arch.ret = ret;
-               vcpu->arch.trap = 0;
-
-               if (vcpu->arch.ceded) {
-                       if (!is_kvmppc_resume_guest(ret))
-                               kvmppc_end_cede(vcpu);
-                       else
-                               kvmppc_set_timer(vcpu);
-               }
-       }
+       post_guest_process(vc);
 
  out:
        vc->vcore_state = VCORE_INACTIVE;
-       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-                                arch.run_list) {
-               if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
-                       kvmppc_remove_runnable(vc, vcpu);
-                       wake_up(&vcpu->arch.cpu_run);
-               }
-       }
-
        trace_kvmppc_run_core(vc, 1);
 }
 
@@ -1939,8 +2136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         * this thread straight away and have it join in.
         */
        if (!signal_pending(current)) {
-               if (vc->vcore_state == VCORE_RUNNING &&
-                   VCORE_EXIT_COUNT(vc) == 0) {
+               if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
                        kvmppc_create_dtl_entry(vcpu, vc);
                        kvmppc_start_thread(vcpu);
                        trace_kvm_guest_enter(vcpu);
@@ -1971,7 +2167,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                }
                if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
                        break;
-               vc->runner = vcpu;
                n_ceded = 0;
                list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
                        if (!v->arch.pending_exceptions)
@@ -1979,10 +2174,17 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                        else
                                v->arch.ceded = 0;
                }
-               if (n_ceded == vc->n_runnable)
+               vc->runner = vcpu;
+               if (n_ceded == vc->n_runnable) {
                        kvmppc_vcore_blocked(vc);
-               else
+               } else if (should_resched()) {
+                       vc->vcore_state = VCORE_PREEMPT;
+                       /* Let something else run */
+                       cond_resched_lock(&vc->lock);
+                       vc->vcore_state = VCORE_INACTIVE;
+               } else {
                        kvmppc_run_core(vc);
+               }
                vc->runner = NULL;
        }
 
@@ -2032,11 +2234,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
        }
 
        atomic_inc(&vcpu->kvm->arch.vcpus_running);
-       /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+       /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
        smp_mb();
 
        /* On the first time here, set up HTAB and VRMA */
-       if (!vcpu->kvm->arch.rma_setup_done) {
+       if (!vcpu->kvm->arch.hpte_setup_done) {
                r = kvmppc_hv_setup_htab_rma(vcpu);
                if (r)
                        goto out;
@@ -2238,7 +2440,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
        int srcu_idx;
 
        mutex_lock(&kvm->lock);
-       if (kvm->arch.rma_setup_done)
+       if (kvm->arch.hpte_setup_done)
                goto out;       /* another vcpu beat us to it */
 
        /* Allocate hashed page table (if not done already) and reset it */
@@ -2289,9 +2491,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 
        kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
-       /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+       /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
        smp_wmb();
-       kvm->arch.rma_setup_done = 1;
+       kvm->arch.hpte_setup_done = 1;
        err = 0;
  out_srcu:
        srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -2307,6 +2509,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 {
        unsigned long lpcr, lpid;
+       char buf[32];
 
        /* Allocate the guest's logical partition ID */
 
@@ -2347,6 +2550,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
         */
        kvm_hv_vm_activated();
 
+       /*
+        * Create a debugfs directory for the VM
+        */
+       snprintf(buf, sizeof(buf), "vm%d", current->pid);
+       kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
+       if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+               kvmppc_mmu_debugfs_init(kvm);
+
        return 0;
 }
 
@@ -2367,6 +2578,8 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 
 static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
 {
+       debugfs_remove_recursive(kvm->arch.debugfs_dir);
+
        kvm_hv_vm_deactivated();
 
        kvmppc_free_vcores(kvm);
index 1f083ff..ed2589d 100644 (file)
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
+#include <asm/archrandom.h>
+#include <asm/xics.h>
+#include <asm/dbell.h>
+#include <asm/cputhreads.h>
 
 #define KVM_CMA_CHUNK_ORDER    18
 
@@ -114,11 +118,11 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
        int rv = H_SUCCESS; /* => don't yield */
 
        set_bit(vcpu->arch.ptid, &vc->conferring_threads);
-       while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
-               threads_running = VCORE_ENTRY_COUNT(vc);
-               threads_ceded = hweight32(vc->napping_threads);
-               threads_conferring = hweight32(vc->conferring_threads);
-               if (threads_ceded + threads_conferring >= threads_running) {
+       while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+               threads_running = VCORE_ENTRY_MAP(vc);
+               threads_ceded = vc->napping_threads;
+               threads_conferring = vc->conferring_threads;
+               if ((threads_ceded | threads_conferring) == threads_running) {
                        rv = H_TOO_HARD; /* => do yield */
                        break;
                }
@@ -169,3 +173,89 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
        return 0;
 }
 EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+       return powernv_hwrng_present();
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_h_random(struct kvm_vcpu *vcpu)
+{
+       if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+               return H_SUCCESS;
+
+       return H_HARDWARE;
+}
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+       __asm__ __volatile__("stbcix %0,0,%1"
+               : : "r" (val), "r" (paddr) : "memory");
+}
+
+/*
+ * Send an interrupt or message to another CPU.
+ * This can only be called in real mode.
+ * The caller needs to include any barrier needed to order writes
+ * to memory vs. the IPI/message.
+ */
+void kvmhv_rm_send_ipi(int cpu)
+{
+       unsigned long xics_phys;
+
+       /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+           cpu_first_thread_sibling(cpu) ==
+           cpu_first_thread_sibling(raw_smp_processor_id())) {
+               unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+               msg |= cpu_thread_in_core(cpu);
+               __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+               return;
+       }
+
+       /* Else poke the target with an IPI */
+       xics_phys = paca[cpu].kvm_hstate.xics_phys;
+       rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+/*
+ * The following functions are called from the assembly code
+ * in book3s_hv_rmhandlers.S.
+ */
+static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
+{
+       int cpu = vc->pcpu;
+
+       /* Order setting of exit map vs. msgsnd/IPI */
+       smp_mb();
+       for (; active; active >>= 1, ++cpu)
+               if (active & 1)
+                       kvmhv_rm_send_ipi(cpu);
+}
+
+void kvmhv_commence_exit(int trap)
+{
+       struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+       int ptid = local_paca->kvm_hstate.ptid;
+       int me, ee;
+
+       /* Set our bit in the threads-exiting-guest map in the 0xff00
+          bits of vcore->entry_exit_map */
+       me = 0x100 << ptid;
+       do {
+               ee = vc->entry_exit_map;
+       } while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
+
+       /* Are we the first here? */
+       if ((ee >> 8) != 0)
+               return;
+
+       /*
+        * Trigger the other threads in this vcore to exit the guest.
+        * If this is a hypervisor decrementer interrupt then they
+        * will be already on their way out of the guest.
+        */
+       if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
+               kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+}
index 625407e..b027a89 100644 (file)
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x)
 {
        unsigned long addr = (unsigned long) x;
        pte_t *p;
-
-       p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+       /*
+        * assume we don't have huge pages in vmalloc space...
+        * So don't worry about THP collapse/split. Called
+        * Only in realmode, hence won't need irq_save/restore.
+        */
+       p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
        if (!p || !pte_present(*p))
                return NULL;
-       /* assume we don't have huge pages in vmalloc space... */
        addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
        return __va(addr);
 }
@@ -131,31 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
        unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
-                             int writing, unsigned long *pte_sizep)
-{
-       pte_t *ptep;
-       unsigned long ps = *pte_sizep;
-       unsigned int hugepage_shift;
-
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
-       if (!ptep)
-               return __pte(0);
-       if (hugepage_shift)
-               *pte_sizep = 1ul << hugepage_shift;
-       else
-               *pte_sizep = PAGE_SIZE;
-       if (ps > *pte_sizep)
-               return __pte(0);
-       return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
-static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
-{
-       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-       hpte[0] = cpu_to_be64(hpte_v);
-}
-
 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                       long pte_index, unsigned long pteh, unsigned long ptel,
                       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -166,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
        struct revmap_entry *rev;
        unsigned long g_ptel;
        struct kvm_memory_slot *memslot;
-       unsigned long pte_size;
+       unsigned hpage_shift;
        unsigned long is_io;
        unsigned long *rmap;
-       pte_t pte;
+       pte_t *ptep;
        unsigned int writing;
        unsigned long mmu_seq;
-       unsigned long rcbits;
+       unsigned long rcbits, irq_flags = 0;
 
        psize = hpte_page_size(pteh, ptel);
        if (!psize)
@@ -208,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
        /* Translate to host virtual address */
        hva = __gfn_to_hva_memslot(memslot, gfn);
-
-       /* Look up the Linux PTE for the backing page */
-       pte_size = psize;
-       pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
-       if (pte_present(pte) && !pte_protnone(pte)) {
-               if (writing && !pte_write(pte))
-                       /* make the actual HPTE be read-only */
-                       ptel = hpte_make_readonly(ptel);
-               is_io = hpte_cache_bits(pte_val(pte));
-               pa = pte_pfn(pte) << PAGE_SHIFT;
-               pa |= hva & (pte_size - 1);
-               pa |= gpa & ~PAGE_MASK;
+       /*
+        * If we had a page table table change after lookup, we would
+        * retry via mmu_notifier_retry.
+        */
+       if (realmode)
+               ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+       else {
+               local_irq_save(irq_flags);
+               ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
        }
+       if (ptep) {
+               pte_t pte;
+               unsigned int host_pte_size;
 
-       if (pte_size < psize)
-               return H_PARAMETER;
+               if (hpage_shift)
+                       host_pte_size = 1ul << hpage_shift;
+               else
+                       host_pte_size = PAGE_SIZE;
+               /*
+                * We should always find the guest page size
+                * to <= host page size, if host is using hugepage
+                */
+               if (host_pte_size < psize) {
+                       if (!realmode)
+                               local_irq_restore(flags);
+                       return H_PARAMETER;
+               }
+               pte = kvmppc_read_update_linux_pte(ptep, writing);
+               if (pte_present(pte) && !pte_protnone(pte)) {
+                       if (writing && !pte_write(pte))
+                               /* make the actual HPTE be read-only */
+                               ptel = hpte_make_readonly(ptel);
+                       is_io = hpte_cache_bits(pte_val(pte));
+                       pa = pte_pfn(pte) << PAGE_SHIFT;
+                       pa |= hva & (host_pte_size - 1);
+                       pa |= gpa & ~PAGE_MASK;
+               }
+       }
+       if (!realmode)
+               local_irq_restore(irq_flags);
 
        ptel &= ~(HPTE_R_PP0 - psize);
        ptel |= pa;
@@ -271,10 +273,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                                u64 pte;
                                while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                        cpu_relax();
-                               pte = be64_to_cpu(*hpte);
+                               pte = be64_to_cpu(hpte[0]);
                                if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
                                        break;
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                hpte += 2;
                        }
                        if (i == 8)
@@ -290,9 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
                        while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                cpu_relax();
-                       pte = be64_to_cpu(*hpte);
+                       pte = be64_to_cpu(hpte[0]);
                        if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                return H_PTEG_FULL;
                        }
                }
@@ -331,7 +333,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 
        /* Write the first HPTE dword, unlocking the HPTE and making it valid */
        eieio();
-       hpte[0] = cpu_to_be64(pteh);
+       __unlock_hpte(hpte, pteh);
        asm volatile("ptesync" : : : "memory");
 
        *pte_idx_ret = pte_index;
@@ -412,7 +414,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
            ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                return H_NOT_FOUND;
        }
 
@@ -548,7 +550,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
                                be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
                        rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
                        args[j] |= rcbits << (56 - 5);
-                       hp[0] = 0;
+                       __unlock_hpte(hp, 0);
                }
        }
 
@@ -574,7 +576,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
        pte = be64_to_cpu(hpte[0]);
        if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
            ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                return H_NOT_FOUND;
        }
 
@@ -755,8 +757,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
                                /* Return with the HPTE still locked */
                                return (hash << 3) + (i >> 1);
 
-                       /* Unlock and move on */
-                       hpte[i] = cpu_to_be64(v);
+                       __unlock_hpte(&hpte[i], v);
                }
 
                if (val & HPTE_V_SECONDARY)
index 7c22997..00e45b6 100644 (file)
 
 #define DEBUG_PASSUP
 
-static inline void rm_writeb(unsigned long paddr, u8 val)
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+                           u32 new_irq);
+
+/* -- ICS routines -- */
+static void ics_rm_check_resend(struct kvmppc_xics *xics,
+                               struct kvmppc_ics *ics, struct kvmppc_icp *icp)
 {
-       __asm__ __volatile__("sync; stbcix %0,0,%1"
-               : : "r" (val), "r" (paddr) : "memory");
+       int i;
+
+       arch_spin_lock(&ics->lock);
+
+       for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+               struct ics_irq_state *state = &ics->irq_state[i];
+
+               if (!state->resend)
+                       continue;
+
+               arch_spin_unlock(&ics->lock);
+               icp_rm_deliver_irq(xics, icp, state->number);
+               arch_spin_lock(&ics->lock);
+       }
+
+       arch_spin_unlock(&ics->lock);
 }
 
+/* -- ICP routines -- */
+
 static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
                                struct kvm_vcpu *this_vcpu)
 {
        struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
-       unsigned long xics_phys;
        int cpu;
 
        /* Mark the target VCPU as having an interrupt pending */
@@ -56,9 +76,8 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
        /* In SMT cpu will always point to thread 0, we adjust it */
        cpu += vcpu->arch.ptid;
 
-       /* Not too hard, then poke the target */
-       xics_phys = paca[cpu].kvm_hstate.xics_phys;
-       rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+       smp_mb();
+       kvmhv_rm_send_ipi(cpu);
 }
 
 static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
@@ -116,6 +135,180 @@ static inline int check_too_hard(struct kvmppc_xics *xics,
        return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
 }
 
+static void icp_rm_check_resend(struct kvmppc_xics *xics,
+                            struct kvmppc_icp *icp)
+{
+       u32 icsid;
+
+       /* Order this load with the test for need_resend in the caller */
+       smp_rmb();
+       for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+               struct kvmppc_ics *ics = xics->ics[icsid];
+
+               if (!test_and_clear_bit(icsid, icp->resend_map))
+                       continue;
+               if (!ics)
+                       continue;
+               ics_rm_check_resend(xics, ics, icp);
+       }
+}
+
+static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+                              u32 *reject)
+{
+       union kvmppc_icp_state old_state, new_state;
+       bool success;
+
+       do {
+               old_state = new_state = READ_ONCE(icp->state);
+
+               *reject = 0;
+
+               /* See if we can deliver */
+               success = new_state.cppr > priority &&
+                       new_state.mfrr > priority &&
+                       new_state.pending_pri > priority;
+
+               /*
+                * If we can, check for a rejection and perform the
+                * delivery
+                */
+               if (success) {
+                       *reject = new_state.xisr;
+                       new_state.xisr = irq;
+                       new_state.pending_pri = priority;
+               } else {
+                       /*
+                        * If we failed to deliver we set need_resend
+                        * so a subsequent CPPR state change causes us
+                        * to try a new delivery.
+                        */
+                       new_state.need_resend = true;
+               }
+
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       return success;
+}
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+                           u32 new_irq)
+{
+       struct ics_irq_state *state;
+       struct kvmppc_ics *ics;
+       u32 reject;
+       u16 src;
+
+       /*
+        * This is used both for initial delivery of an interrupt and
+        * for subsequent rejection.
+        *
+        * Rejection can be racy vs. resends. We have evaluated the
+        * rejection in an atomic ICP transaction which is now complete,
+        * so potentially the ICP can already accept the interrupt again.
+        *
+        * So we need to retry the delivery. Essentially the reject path
+        * boils down to a failed delivery. Always.
+        *
+        * Now the interrupt could also have moved to a different target,
+        * thus we may need to re-do the ICP lookup as well
+        */
+
+ again:
+       /* Get the ICS state and lock it */
+       ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+       if (!ics) {
+               /* Unsafe increment, but this does not need to be accurate */
+               xics->err_noics++;
+               return;
+       }
+       state = &ics->irq_state[src];
+
+       /* Get a lock on the ICS */
+       arch_spin_lock(&ics->lock);
+
+       /* Get our server */
+       if (!icp || state->server != icp->server_num) {
+               icp = kvmppc_xics_find_server(xics->kvm, state->server);
+               if (!icp) {
+                       /* Unsafe increment again*/
+                       xics->err_noicp++;
+                       goto out;
+               }
+       }
+
+       /* Clear the resend bit of that interrupt */
+       state->resend = 0;
+
+       /*
+        * If masked, bail out
+        *
+        * Note: PAPR doesn't mention anything about masked pending
+        * when doing a resend, only when doing a delivery.
+        *
+        * However that would have the effect of losing a masked
+        * interrupt that was rejected and isn't consistent with
+        * the whole masked_pending business which is about not
+        * losing interrupts that occur while masked.
+        *
+        * I don't differentiate normal deliveries and resends, this
+        * implementation will differ from PAPR and not lose such
+        * interrupts.
+        */
+       if (state->priority == MASKED) {
+               state->masked_pending = 1;
+               goto out;
+       }
+
+       /*
+        * Try the delivery, this will set the need_resend flag
+        * in the ICP as part of the atomic transaction if the
+        * delivery is not possible.
+        *
+        * Note that if successful, the new delivery might have itself
+        * rejected an interrupt that was "delivered" before we took the
+        * ics spin lock.
+        *
+        * In this case we do the whole sequence all over again for the
+        * new guy. We cannot assume that the rejected interrupt is less
+        * favored than the new one, and thus doesn't need to be delivered,
+        * because by the time we exit icp_rm_try_to_deliver() the target
+        * processor may well have already consumed & completed it, and thus
+        * the rejected interrupt might actually be already acceptable.
+        */
+       if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+               /*
+                * Delivery was successful, did we reject somebody else ?
+                */
+               if (reject && reject != XICS_IPI) {
+                       arch_spin_unlock(&ics->lock);
+                       new_irq = reject;
+                       goto again;
+               }
+       } else {
+               /*
+                * We failed to deliver the interrupt we need to set the
+                * resend map bit and mark the ICS state as needing a resend
+                */
+               set_bit(ics->icsid, icp->resend_map);
+               state->resend = 1;
+
+               /*
+                * If the need_resend flag got cleared in the ICP some time
+                * between icp_rm_try_to_deliver() atomic update and now, then
+                * we know it might have missed the resend_map bit. So we
+                * retry
+                */
+               smp_mb();
+               if (!icp->state.need_resend) {
+                       arch_spin_unlock(&ics->lock);
+                       goto again;
+               }
+       }
+ out:
+       arch_spin_unlock(&ics->lock);
+}
+
 static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                             u8 new_cppr)
 {
@@ -184,8 +377,8 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
         * separately here as well.
         */
        if (resend) {
-               icp->rm_action |= XICS_RM_CHECK_RESEND;
-               icp->rm_resend_icp = icp;
+               icp->n_check_resend++;
+               icp_rm_check_resend(xics, icp);
        }
 }
 
@@ -300,16 +493,16 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                }
        } while (!icp_rm_try_update(icp, old_state, new_state));
 
-       /* Pass rejects to virtual mode */
+       /* Handle reject in real mode */
        if (reject && reject != XICS_IPI) {
-               this_icp->rm_action |= XICS_RM_REJECT;
-               this_icp->rm_reject = reject;
+               this_icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, reject);
        }
 
-       /* Pass resends to virtual mode */
+       /* Handle resends in real mode */
        if (resend) {
-               this_icp->rm_action |= XICS_RM_CHECK_RESEND;
-               this_icp->rm_resend_icp = icp;
+               this_icp->n_check_resend++;
+               icp_rm_check_resend(xics, icp);
        }
 
        return check_too_hard(xics, this_icp);
@@ -365,10 +558,13 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 
        } while (!icp_rm_try_update(icp, old_state, new_state));
 
-       /* Pass rejects to virtual mode */
+       /*
+        * Check for rejects. They are handled by doing a new delivery
+        * attempt (see comments in icp_rm_deliver_irq).
+        */
        if (reject && reject != XICS_IPI) {
-               icp->rm_action |= XICS_RM_REJECT;
-               icp->rm_reject = reject;
+               icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, reject);
        }
  bail:
        return check_too_hard(xics, icp);
@@ -416,10 +612,10 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
                goto bail;
        state = &ics->irq_state[src];
 
-       /* Still asserted, resend it, we make it look like a reject */
+       /* Still asserted, resend it */
        if (state->asserted) {
-               icp->rm_action |= XICS_RM_REJECT;
-               icp->rm_reject = irq;
+               icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, irq);
        }
 
        if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
index 6cbf163..4d70df2 100644 (file)
@@ -172,6 +172,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
 kvmppc_primary_no_guest:
        /* We handle this much like a ceded vcpu */
+       /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+       mfspr   r3, SPRN_HDEC
+       mtspr   SPRN_DEC, r3
+       /*
+        * Make sure the primary has finished the MMU switch.
+        * We should never get here on a secondary thread, but
+        * check it for robustness' sake.
+        */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+65:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     65b
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
        /* set our bit in napping_threads */
        ld      r5, HSTATE_KVM_VCORE(r13)
        lbz     r7, HSTATE_PTID(r13)
@@ -182,7 +198,7 @@ kvmppc_primary_no_guest:
        or      r3, r3, r0
        stwcx.  r3, 0, r6
        bne     1b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
        isync
        li      r12, 0
        lwz     r7, VCORE_ENTRY_EXIT(r5)
@@ -191,6 +207,7 @@ kvmppc_primary_no_guest:
        li      r3, NAPPING_NOVCPU
        stb     r3, HSTATE_NAPPING(r13)
 
+       li      r3, 0           /* Don't wake on privileged (OS) doorbell */
        b       kvm_do_nap
 
 kvm_novcpu_wakeup:
@@ -202,7 +219,7 @@ kvm_novcpu_wakeup:
 
        /* check the wake reason */
        bl      kvmppc_check_wake_reason
-       
+
        /* see if any other thread is already exiting */
        lwz     r0, VCORE_ENTRY_EXIT(r5)
        cmpwi   r0, 0x100
@@ -222,13 +239,37 @@ kvm_novcpu_wakeup:
        cmpdi   r3, 0
        bge     kvm_novcpu_exit
 
+       /* See if our timeslice has expired (HDEC is negative) */
+       mfspr   r0, SPRN_HDEC
+       li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+       cmpwi   r0, 0
+       blt     kvm_novcpu_exit
+
        /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
        ld      r4, HSTATE_KVM_VCPU(r13)
        cmpdi   r4, 0
-       bne     kvmppc_got_guest
+       beq     kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+#endif
+       b       kvmppc_got_guest
 
 kvm_novcpu_exit:
-       b       hdec_soon
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       beq     13f
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+13:    mr      r3, r12
+       stw     r12, 112-4(r1)
+       bl      kvmhv_commence_exit
+       nop
+       lwz     r12, 112-4(r1)
+       b       kvmhv_switch_to_host
 
 /*
  * We come in here when wakened from nap mode.
@@ -239,9 +280,9 @@ kvm_novcpu_exit:
 kvm_start_guest:
 
        /* Set runlatch bit the minute you wake up from nap */
-       mfspr   r1, SPRN_CTRLF
-       ori     r1, r1, 1
-       mtspr   SPRN_CTRLT, r1
+       mfspr   r0, SPRN_CTRLF
+       ori     r0, r0, 1
+       mtspr   SPRN_CTRLT, r0
 
        ld      r2,PACATOC(r13)
 
@@ -286,26 +327,21 @@ kvm_secondary_got_guest:
        ld      r6, PACA_DSCR(r13)
        std     r6, HSTATE_DSCR(r13)
 
+       /* Order load of vcore, ptid etc. after load of vcpu */
+       lwsync
        bl      kvmppc_hv_entry
 
        /* Back from the guest, go back to nap */
        /* Clear our vcpu pointer so we don't come back in early */
        li      r0, 0
-       std     r0, HSTATE_KVM_VCPU(r13)
        /*
-        * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
-        * the nap_count, because once the increment to nap_count is
-        * visible we could be given another vcpu.
+        * Once we clear HSTATE_KVM_VCPU(r13), the code in
+        * kvmppc_run_core() is going to assume that all our vcpu
+        * state is visible in memory.  This lwsync makes sure
+        * that that is true.
         */
        lwsync
-
-       /* increment the nap count and then go to nap mode */
-       ld      r4, HSTATE_KVM_VCORE(r13)
-       addi    r4, r4, VCORE_NAP_COUNT
-51:    lwarx   r3, 0, r4
-       addi    r3, r3, 1
-       stwcx.  r3, 0, r4
-       bne     51b
+       std     r0, HSTATE_KVM_VCPU(r13)
 
 /*
  * At this point we have finished executing in the guest.
@@ -376,6 +412,14 @@ kvmppc_hv_entry:
        li      r6, KVM_GUEST_MODE_HOST_HV
        stb     r6, HSTATE_IN_GUEST(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Store initial timestamp */
+       cmpdi   r4, 0
+       beq     1f
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+1:
+#endif
        /* Clear out SLB */
        li      r6,0
        slbmte  r6,r6
@@ -387,21 +431,23 @@ kvmppc_hv_entry:
         * We don't have to lock against concurrent tlbies,
         * but we do have to coordinate across hardware threads.
         */
-       /* Increment entry count iff exit count is zero. */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r9,r5,VCORE_ENTRY_EXIT
-21:    lwarx   r3,0,r9
-       cmpwi   r3,0x100                /* any threads starting to exit? */
+       /* Set bit in entry map iff exit map is zero. */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       li      r7, 1
+       lbz     r6, HSTATE_PTID(r13)
+       sld     r7, r7, r6
+       addi    r9, r5, VCORE_ENTRY_EXIT
+21:    lwarx   r3, 0, r9
+       cmpwi   r3, 0x100               /* any threads starting to exit? */
        bge     secondary_too_late      /* if so we're too late to the party */
-       addi    r3,r3,1
-       stwcx.  r3,0,r9
+       or      r3, r3, r7
+       stwcx.  r3, 0, r9
        bne     21b
 
        /* Primary thread switches to guest partition. */
        ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
-       lbz     r6,HSTATE_PTID(r13)
        cmpwi   r6,0
-       bne     20f
+       bne     10f
        ld      r6,KVM_SDR1(r9)
        lwz     r7,KVM_LPID(r9)
        li      r0,LPID_RSVD            /* switch to reserved LPID */
@@ -472,28 +518,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        li      r0,1
        stb     r0,VCORE_IN_GUEST(r5)   /* signal secondaries to continue */
-       b       10f
-
-       /* Secondary threads wait for primary to have done partition switch */
-20:    lbz     r0,VCORE_IN_GUEST(r5)
-       cmpwi   r0,0
-       beq     20b
-
-       /* Set LPCR and RMOR. */
-10:    ld      r8,VCORE_LPCR(r5)
-       mtspr   SPRN_LPCR,r8
-       ld      r8,KVM_RMOR(r9)
-       mtspr   SPRN_RMOR,r8
-       isync
-
-       /* Check if HDEC expires soon */
-       mfspr   r3,SPRN_HDEC
-       cmpwi   r3,512          /* 1 microsecond */
-       li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       blt     hdec_soon
 
        /* Do we have a guest vcpu to run? */
-       cmpdi   r4, 0
+10:    cmpdi   r4, 0
        beq     kvmppc_primary_no_guest
 kvmppc_got_guest:
 
@@ -818,6 +845,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
        clrrdi  r6,r6,1
        mtspr   SPRN_CTRLT,r6
 4:
+       /* Secondary threads wait for primary to have done partition switch */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r6, HSTATE_PTID(r13)
+       cmpwi   r6, 0
+       beq     21f
+       lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       bne     21f
+       HMT_LOW
+20:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     20b
+       HMT_MEDIUM
+21:
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
+
+       /* Check if HDEC expires soon */
+       mfspr   r3, SPRN_HDEC
+       cmpwi   r3, 512         /* 1 microsecond */
+       blt     hdec_soon
+
        ld      r6, VCPU_CTR(r4)
        lwz     r7, VCPU_XER(r4)
 
@@ -880,6 +931,12 @@ fast_guest_return:
        li      r9, KVM_GUEST_MODE_GUEST_HV
        stb     r9, HSTATE_IN_GUEST(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Accumulate timing */
+       addi    r3, r4, VCPU_TB_GUEST
+       bl      kvmhv_accumulate_time
+#endif
+
        /* Enter guest */
 
 BEGIN_FTR_SECTION
@@ -917,6 +974,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        hrfid
        b       .
 
+secondary_too_late:
+       li      r12, 0
+       cmpdi   r4, 0
+       beq     11f
+       stw     r12, VCPU_TRAP(r4)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+11:    b       kvmhv_switch_to_host
+
+hdec_soon:
+       li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+       stw     r12, VCPU_TRAP(r4)
+       mr      r9, r4
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+       b       guest_exit_cont
+
 /******************************************************************************
  *                                                                            *
  *                               Exit code                                    *
@@ -1002,6 +1080,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
        stw     r12,VCPU_TRAP(r9)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMINTR
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+       ld      r5, VCPU_GPR(R5)(r9)
+       ld      r6, VCPU_GPR(R6)(r9)
+       ld      r7, VCPU_GPR(R7)(r9)
+       ld      r8, VCPU_GPR(R8)(r9)
+#endif
+
        /* Save HEIR (HV emulation assist reg) in emul_inst
           if this is an HEI (HV emulation interrupt, e40) */
        li      r3,KVM_INST_FETCH_FAILED
@@ -1028,34 +1116,37 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        bne     2f
        mfspr   r3,SPRN_HDEC
        cmpwi   r3,0
-       bge     ignore_hdec
+       mr      r4,r9
+       bge     fast_guest_return
 2:
        /* See if this is an hcall we can handle in real mode */
        cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
        beq     hcall_try_real_mode
 
+       /* Hypervisor doorbell - exit only if host IPI flag set */
+       cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
+       bne     3f
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       beq     4f
+       b       guest_exit_cont
+3:
        /* External interrupt ? */
        cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-       bne+    ext_interrupt_to_host
+       bne+    guest_exit_cont
 
        /* External interrupt, first check for host_ipi. If this is
         * set, we know the host wants us out so let's do it now
         */
        bl      kvmppc_read_intr
        cmpdi   r3, 0
-       bgt     ext_interrupt_to_host
+       bgt     guest_exit_cont
 
        /* Check if any CPU is heading out to the host, if so head out too */
-       ld      r5, HSTATE_KVM_VCORE(r13)
+4:     ld      r5, HSTATE_KVM_VCORE(r13)
        lwz     r0, VCORE_ENTRY_EXIT(r5)
        cmpwi   r0, 0x100
-       bge     ext_interrupt_to_host
-
-       /* Return to guest after delivering any pending interrupt */
        mr      r4, r9
-       b       deliver_guest_interrupt
-
-ext_interrupt_to_host:
+       blt     deliver_guest_interrupt
 
 guest_exit_cont:               /* r9 = vcpu, r12 = trap, r13 = paca */
        /* Save more register state  */
@@ -1065,7 +1156,7 @@ guest_exit_cont:          /* r9 = vcpu, r12 = trap, r13 = paca */
        stw     r7, VCPU_DSISR(r9)
        /* don't overwrite fault_dar/fault_dsisr if HDSI */
        cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-       beq     6f
+       beq     mc_cont
        std     r6, VCPU_FAULT_DAR(r9)
        stw     r7, VCPU_FAULT_DSISR(r9)
 
@@ -1073,9 +1164,20 @@ guest_exit_cont:         /* r9 = vcpu, r12 = trap, r13 = paca */
        cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
        beq     machine_check_realmode
 mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMEXIT
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+#endif
+
+       /* Increment exit count, poke other threads to exit */
+       bl      kvmhv_commence_exit
+       nop
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       lwz     r12, VCPU_TRAP(r9)
 
        /* Save guest CTRL register, set runlatch to 1 */
-6:     mfspr   r6,SPRN_CTRLF
+       mfspr   r6,SPRN_CTRLF
        stw     r6,VCPU_CTRL(r9)
        andi.   r0,r6,1
        bne     4f
@@ -1417,68 +1519,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        slbia
        ptesync
 
-hdec_soon:                     /* r12 = trap, r13 = paca */
        /*
         * POWER7/POWER8 guest -> host partition switch code.
         * We don't have to lock against tlbies but we do
         * have to coordinate the hardware threads.
         */
-       /* Increment the threads-exiting-guest count in the 0xff00
-          bits of vcore->entry_exit_count */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r6,r5,VCORE_ENTRY_EXIT
-41:    lwarx   r3,0,r6
-       addi    r0,r3,0x100
-       stwcx.  r0,0,r6
-       bne     41b
-       isync           /* order stwcx. vs. reading napping_threads */
-
-       /*
-        * At this point we have an interrupt that we have to pass
-        * up to the kernel or qemu; we can't handle it in real mode.
-        * Thus we have to do a partition switch, so we have to
-        * collect the other threads, if we are the first thread
-        * to take an interrupt.  To do this, we set the HDEC to 0,
-        * which causes an HDEC interrupt in all threads within 2ns
-        * because the HDEC register is shared between all 4 threads.
-        * However, we don't need to bother if this is an HDEC
-        * interrupt, since the other threads will already be on their
-        * way here in that case.
-        */
-       cmpwi   r3,0x100        /* Are we the first here? */
-       bge     43f
-       cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       beq     40f
-       li      r0,0
-       mtspr   SPRN_HDEC,r0
-40:
-       /*
-        * Send an IPI to any napping threads, since an HDEC interrupt
-        * doesn't wake CPUs up from nap.
-        */
-       lwz     r3,VCORE_NAPPING_THREADS(r5)
-       lbz     r4,HSTATE_PTID(r13)
-       li      r0,1
-       sld     r0,r0,r4
-       andc.   r3,r3,r0                /* no sense IPI'ing ourselves */
-       beq     43f
-       /* Order entry/exit update vs. IPIs */
-       sync
-       mulli   r4,r4,PACA_SIZE         /* get paca for thread 0 */
-       subf    r6,r4,r13
-42:    andi.   r0,r3,1
-       beq     44f
-       ld      r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
-       li      r0,IPI_PRIORITY
-       li      r7,XICS_MFRR
-       stbcix  r0,r7,r8                /* trigger the IPI */
-44:    srdi.   r3,r3,1
-       addi    r6,r6,PACA_SIZE
-       bne     42b
-
-secondary_too_late:
+kvmhv_switch_to_host:
        /* Secondary threads wait for primary to do partition switch */
-43:    ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r5,HSTATE_KVM_VCORE(r13)
        ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
        lbz     r3,HSTATE_PTID(r13)
        cmpwi   r3,0
@@ -1562,6 +1610,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 1:     addi    r8,r8,16
        .endr
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Finish timing, if we have a vcpu */
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       li      r3, 0
+       beq     2f
+       bl      kvmhv_accumulate_time
+2:
+#endif
        /* Unset guest mode */
        li      r0, KVM_GUEST_MODE_NONE
        stb     r0, HSTATE_IN_GUEST(r13)
@@ -1696,8 +1753,10 @@ kvmppc_hisi:
  * Returns to the guest if we handle it, or continues on up to
  * the kernel if we can't (i.e. if we don't have a handler for
  * it, or if the handler returns H_TOO_HARD).
+ *
+ * r5 - r8 contain hcall args,
+ * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
  */
-       .globl  hcall_try_real_mode
 hcall_try_real_mode:
        ld      r3,VCPU_GPR(R3)(r9)
        andi.   r0,r11,MSR_PR
@@ -1839,13 +1898,124 @@ hcall_real_table:
        .long   0               /* 0x12c */
        .long   0               /* 0x130 */
        .long   DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+       .long   0               /* 0x138 */
+       .long   0               /* 0x13c */
+       .long   0               /* 0x140 */
+       .long   0               /* 0x144 */
+       .long   0               /* 0x148 */
+       .long   0               /* 0x14c */
+       .long   0               /* 0x150 */
+       .long   0               /* 0x154 */
+       .long   0               /* 0x158 */
+       .long   0               /* 0x15c */
+       .long   0               /* 0x160 */
+       .long   0               /* 0x164 */
+       .long   0               /* 0x168 */
+       .long   0               /* 0x16c */
+       .long   0               /* 0x170 */
+       .long   0               /* 0x174 */
+       .long   0               /* 0x178 */
+       .long   0               /* 0x17c */
+       .long   0               /* 0x180 */
+       .long   0               /* 0x184 */
+       .long   0               /* 0x188 */
+       .long   0               /* 0x18c */
+       .long   0               /* 0x190 */
+       .long   0               /* 0x194 */
+       .long   0               /* 0x198 */
+       .long   0               /* 0x19c */
+       .long   0               /* 0x1a0 */
+       .long   0               /* 0x1a4 */
+       .long   0               /* 0x1a8 */
+       .long   0               /* 0x1ac */
+       .long   0               /* 0x1b0 */
+       .long   0               /* 0x1b4 */
+       .long   0               /* 0x1b8 */
+       .long   0               /* 0x1bc */
+       .long   0               /* 0x1c0 */
+       .long   0               /* 0x1c4 */
+       .long   0               /* 0x1c8 */
+       .long   0               /* 0x1cc */
+       .long   0               /* 0x1d0 */
+       .long   0               /* 0x1d4 */
+       .long   0               /* 0x1d8 */
+       .long   0               /* 0x1dc */
+       .long   0               /* 0x1e0 */
+       .long   0               /* 0x1e4 */
+       .long   0               /* 0x1e8 */
+       .long   0               /* 0x1ec */
+       .long   0               /* 0x1f0 */
+       .long   0               /* 0x1f4 */
+       .long   0               /* 0x1f8 */
+       .long   0               /* 0x1fc */
+       .long   0               /* 0x200 */
+       .long   0               /* 0x204 */
+       .long   0               /* 0x208 */
+       .long   0               /* 0x20c */
+       .long   0               /* 0x210 */
+       .long   0               /* 0x214 */
+       .long   0               /* 0x218 */
+       .long   0               /* 0x21c */
+       .long   0               /* 0x220 */
+       .long   0               /* 0x224 */
+       .long   0               /* 0x228 */
+       .long   0               /* 0x22c */
+       .long   0               /* 0x230 */
+       .long   0               /* 0x234 */
+       .long   0               /* 0x238 */
+       .long   0               /* 0x23c */
+       .long   0               /* 0x240 */
+       .long   0               /* 0x244 */
+       .long   0               /* 0x248 */
+       .long   0               /* 0x24c */
+       .long   0               /* 0x250 */
+       .long   0               /* 0x254 */
+       .long   0               /* 0x258 */
+       .long   0               /* 0x25c */
+       .long   0               /* 0x260 */
+       .long   0               /* 0x264 */
+       .long   0               /* 0x268 */
+       .long   0               /* 0x26c */
+       .long   0               /* 0x270 */
+       .long   0               /* 0x274 */
+       .long   0               /* 0x278 */
+       .long   0               /* 0x27c */
+       .long   0               /* 0x280 */
+       .long   0               /* 0x284 */
+       .long   0               /* 0x288 */
+       .long   0               /* 0x28c */
+       .long   0               /* 0x290 */
+       .long   0               /* 0x294 */
+       .long   0               /* 0x298 */
+       .long   0               /* 0x29c */
+       .long   0               /* 0x2a0 */
+       .long   0               /* 0x2a4 */
+       .long   0               /* 0x2a8 */
+       .long   0               /* 0x2ac */
+       .long   0               /* 0x2b0 */
+       .long   0               /* 0x2b4 */
+       .long   0               /* 0x2b8 */
+       .long   0               /* 0x2bc */
+       .long   0               /* 0x2c0 */
+       .long   0               /* 0x2c4 */
+       .long   0               /* 0x2c8 */
+       .long   0               /* 0x2cc */
+       .long   0               /* 0x2d0 */
+       .long   0               /* 0x2d4 */
+       .long   0               /* 0x2d8 */
+       .long   0               /* 0x2dc */
+       .long   0               /* 0x2e0 */
+       .long   0               /* 0x2e4 */
+       .long   0               /* 0x2e8 */
+       .long   0               /* 0x2ec */
+       .long   0               /* 0x2f0 */
+       .long   0               /* 0x2f4 */
+       .long   0               /* 0x2f8 */
+       .long   0               /* 0x2fc */
+       .long   DOTSYM(kvmppc_h_random) - hcall_real_table
        .globl  hcall_real_table_end
 hcall_real_table_end:
 
-ignore_hdec:
-       mr      r4,r9
-       b       fast_guest_return
-
 _GLOBAL(kvmppc_h_set_xdabr)
        andi.   r0, r5, DABRX_USER | DABRX_KERNEL
        beq     6f
@@ -1884,7 +2054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        li      r3, 0
        blr
 
-_GLOBAL(kvmppc_h_cede)
+_GLOBAL(kvmppc_h_cede)         /* r3 = vcpu pointer, r11 = msr, r13 = paca */
        ori     r11,r11,MSR_EE
        std     r11,VCPU_MSR(r3)
        li      r0,1
@@ -1893,8 +2063,8 @@ _GLOBAL(kvmppc_h_cede)
        lbz     r5,VCPU_PRODDED(r3)
        cmpwi   r5,0
        bne     kvm_cede_prodded
-       li      r0,0            /* set trap to 0 to say hcall is handled */
-       stw     r0,VCPU_TRAP(r3)
+       li      r12,0           /* set trap to 0 to say hcall is handled */
+       stw     r12,VCPU_TRAP(r3)
        li      r0,H_SUCCESS
        std     r0,VCPU_GPR(R3)(r3)
 
@@ -1912,12 +2082,11 @@ _GLOBAL(kvmppc_h_cede)
        addi    r6,r5,VCORE_NAPPING_THREADS
 31:    lwarx   r4,0,r6
        or      r4,r4,r0
-       PPC_POPCNTW(R7,R4)
-       cmpw    r7,r8
-       bge     kvm_cede_exit
+       cmpw    r4,r8
+       beq     kvm_cede_exit
        stwcx.  r4,0,r6
        bne     31b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
        isync
        li      r0,NAPPING_CEDE
        stb     r0,HSTATE_NAPPING(r13)
@@ -1954,22 +2123,53 @@ _GLOBAL(kvmppc_h_cede)
        /* save FP state */
        bl      kvmppc_save_fp
 
+       /*
+        * Set DEC to the smaller of DEC and HDEC, so that we wake
+        * no later than the end of our timeslice (HDEC interrupts
+        * don't wake us from nap).
+        */
+       mfspr   r3, SPRN_DEC
+       mfspr   r4, SPRN_HDEC
+       mftb    r5
+       cmpw    r3, r4
+       ble     67f
+       mtspr   SPRN_DEC, r4
+67:
+       /* save expiry time of guest decrementer */
+       extsw   r3, r3
+       add     r3, r3, r5
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_TB_OFFSET(r5)
+       subf    r3, r6, r3      /* convert to host TB value */
+       std     r3, VCPU_DEC_EXPIRES(r4)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       addi    r3, r4, VCPU_TB_CEDE
+       bl      kvmhv_accumulate_time
+#endif
+
+       lis     r3, LPCR_PECEDP@h       /* Do wake on privileged doorbell */
+
        /*
         * Take a nap until a decrementer or external or doobell interrupt
-        * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
-        * runlatch bit before napping.
+        * occurs, with PECE1 and PECE0 set in LPCR.
+        * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
+        * Also clear the runlatch bit before napping.
         */
 kvm_do_nap:
-       mfspr   r2, SPRN_CTRLF
-       clrrdi  r2, r2, 1
-       mtspr   SPRN_CTRLT, r2
+       mfspr   r0, SPRN_CTRLF
+       clrrdi  r0, r0, 1
+       mtspr   SPRN_CTRLT, r0
 
        li      r0,1
        stb     r0,HSTATE_HWTHREAD_REQ(r13)
        mfspr   r5,SPRN_LPCR
        ori     r5,r5,LPCR_PECE0 | LPCR_PECE1
 BEGIN_FTR_SECTION
-       oris    r5,r5,LPCR_PECEDP@h
+       ori     r5, r5, LPCR_PECEDH
+       rlwimi  r5, r3, 0, LPCR_PECEDP
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
        mtspr   SPRN_LPCR,r5
        isync
@@ -1994,9 +2194,23 @@ kvm_end_cede:
        /* Woken by external or decrementer interrupt */
        ld      r1, HSTATE_HOST_R1(r13)
 
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMINTR
+       bl      kvmhv_accumulate_time
+#endif
+
        /* load up FP state */
        bl      kvmppc_load_fp
 
+       /* Restore guest decrementer */
+       ld      r3, VCPU_DEC_EXPIRES(r4)
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_TB_OFFSET(r5)
+       add     r3, r3, r6      /* convert host TB to guest TB value */
+       mftb    r7
+       subf    r3, r7, r3
+       mtspr   SPRN_DEC, r3
+
        /* Load NV GPRS */
        ld      r14, VCPU_GPR(R14)(r4)
        ld      r15, VCPU_GPR(R15)(r4)
@@ -2057,7 +2271,8 @@ kvm_cede_prodded:
 
        /* we've ceded but we want to give control to the host */
 kvm_cede_exit:
-       b       hcall_real_fallback
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       b       guest_exit_cont
 
        /* Try to handle a machine check in real mode */
 machine_check_realmode:
@@ -2089,13 +2304,14 @@ machine_check_realmode:
 
 /*
  * Check the reason we woke from nap, and take appropriate action.
- * Returns:
+ * Returns (in r3):
  *     0 if nothing needs to be done
  *     1 if something happened that needs to be handled by the host
- *     -1 if there was a guest wakeup (IPI)
+ *     -1 if there was a guest wakeup (IPI or msgsnd)
  *
  * Also sets r12 to the interrupt vector for any interrupt that needs
  * to be handled now by the host (0x500 for external interrupt), or zero.
+ * Modifies r0, r6, r7, r8.
  */
 kvmppc_check_wake_reason:
        mfspr   r6, SPRN_SRR1
@@ -2122,7 +2338,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 
        /* hypervisor doorbell */
 3:     li      r12, BOOK3S_INTERRUPT_H_DOORBELL
+       /* see if it's a host IPI */
        li      r3, 1
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bnelr
+       /* if not, clear it and return -1 */
+       lis     r6, (PPC_DBELL_SERVER << (63-36))@h
+       PPC_MSGCLR(6)
+       li      r3, -1
        blr
 
 /*
@@ -2131,6 +2355,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  *     0 if no interrupt is pending
  *     1 if an interrupt is pending that needs to be handled by the host
  *     -1 if there was a guest wakeup IPI (which has now been cleared)
+ * Modifies r0, r6, r7, r8, returns value in r3.
  */
 kvmppc_read_intr:
        /* see if a host IPI is pending */
@@ -2185,6 +2410,7 @@ kvmppc_read_intr:
        bne-    43f
 
        /* OK, it's an IPI for us */
+       li      r12, 0
        li      r3, -1
 1:     blr
 
@@ -2314,3 +2540,62 @@ kvmppc_fix_pmao:
        mtspr   SPRN_PMC6, r3
        isync
        blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r6, VCORE_IN_GUEST(r5)
+       cmpwi   r6, 0
+       beq     5f                              /* if in guest, need to */
+       ld      r6, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
+5:     mftb    r5
+       subf    r5, r6, r5
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       std     r5, VCPU_ACTIVITY_START(r4)
+       blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r8, VCORE_IN_GUEST(r5)
+       cmpwi   r8, 0
+       beq     4f                              /* if in guest, need to */
+       ld      r8, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
+4:     ld      r5, VCPU_CUR_ACTIVITY(r4)
+       ld      r6, VCPU_ACTIVITY_START(r4)
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       mftb    r7
+       subf    r7, r8, r7
+       std     r7, VCPU_ACTIVITY_START(r4)
+       cmpdi   r5, 0
+       beqlr
+       subf    r3, r6, r7
+       ld      r8, TAS_SEQCOUNT(r5)
+       cmpdi   r8, 0
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       lwsync
+       ld      r7, TAS_TOTAL(r5)
+       add     r7, r7, r3
+       std     r7, TAS_TOTAL(r5)
+       ld      r6, TAS_MIN(r5)
+       ld      r7, TAS_MAX(r5)
+       beq     3f
+       cmpd    r3, r6
+       bge     1f
+3:     std     r3, TAS_MIN(r5)
+1:     cmpd    r3, r7
+       ble     2f
+       std     r3, TAS_MAX(r5)
+2:     lwsync
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       blr
+#endif
index ce3c893..f2c75a1 100644 (file)
@@ -258,6 +258,28 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
        return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+       long rc;
+
+       rc = kvmppc_h_logical_ci_load(vcpu);
+       if (rc == H_TOO_HARD)
+               return EMULATE_FAIL;
+       kvmppc_set_gpr(vcpu, 3, rc);
+       return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+       long rc;
+
+       rc = kvmppc_h_logical_ci_store(vcpu);
+       if (rc == H_TOO_HARD)
+               return EMULATE_FAIL;
+       kvmppc_set_gpr(vcpu, 3, rc);
+       return EMULATE_DONE;
+}
+
 static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 {
        long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -290,6 +312,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
                clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
                vcpu->stat.halt_wakeup++;
                return EMULATE_DONE;
+       case H_LOGICAL_CI_LOAD:
+               return kvmppc_h_pr_logical_ci_load(vcpu);
+       case H_LOGICAL_CI_STORE:
+               return kvmppc_h_pr_logical_ci_store(vcpu);
        case H_XIRR:
        case H_CPPR:
        case H_EOI:
@@ -323,6 +349,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
        case H_BULK_REMOVE:
        case H_PUT_TCE:
        case H_CEDE:
+       case H_LOGICAL_CI_LOAD:
+       case H_LOGICAL_CI_STORE:
 #ifdef CONFIG_KVM_XICS
        case H_XIRR:
        case H_CPPR:
index a4a8d9f..8f3e6cc 100644 (file)
@@ -20,6 +20,7 @@
 #include <asm/xics.h>
 #include <asm/debug.h>
 #include <asm/time.h>
+#include <asm/spinlock.h>
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -39,7 +40,7 @@
  * LOCKING
  * =======
  *
- * Each ICS has a mutex protecting the information about the IRQ
+ * Each ICS has a spin lock protecting the information about the IRQ
  * sources and avoiding simultaneous deliveries if the same interrupt.
  *
  * ICP operations are done via a single compare & swap transaction
@@ -109,7 +110,10 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
 {
        int i;
 
-       mutex_lock(&ics->lock);
+       unsigned long flags;
+
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
 
        for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
                struct ics_irq_state *state = &ics->irq_state[i];
@@ -120,12 +124,15 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                XICS_DBG("resend %#x prio %#x\n", state->number,
                              state->priority);
 
-               mutex_unlock(&ics->lock);
+               arch_spin_unlock(&ics->lock);
+               local_irq_restore(flags);
                icp_deliver_irq(xics, icp, state->number);
-               mutex_lock(&ics->lock);
+               local_irq_save(flags);
+               arch_spin_lock(&ics->lock);
        }
 
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 }
 
 static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -133,8 +140,10 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                       u32 server, u32 priority, u32 saved_priority)
 {
        bool deliver;
+       unsigned long flags;
 
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
 
        state->server = server;
        state->priority = priority;
@@ -145,7 +154,8 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                deliver = true;
        }
 
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 
        return deliver;
 }
@@ -186,6 +196,7 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
        struct kvmppc_ics *ics;
        struct ics_irq_state *state;
        u16 src;
+       unsigned long flags;
 
        if (!xics)
                return -ENODEV;
@@ -195,10 +206,12 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
                return -EINVAL;
        state = &ics->irq_state[src];
 
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
        *server = state->server;
        *priority = state->priority;
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 
        return 0;
 }
@@ -365,6 +378,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
        struct kvmppc_ics *ics;
        u32 reject;
        u16 src;
+       unsigned long flags;
 
        /*
         * This is used both for initial delivery of an interrupt and
@@ -391,7 +405,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
        state = &ics->irq_state[src];
 
        /* Get a lock on the ICS */
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
 
        /* Get our server */
        if (!icp || state->server != icp->server_num) {
@@ -434,7 +449,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
         *
         * Note that if successful, the new delivery might have itself
         * rejected an interrupt that was "delivered" before we took the
-        * icp mutex.
+        * ics spin lock.
         *
         * In this case we do the whole sequence all over again for the
         * new guy. We cannot assume that the rejected interrupt is less
@@ -448,7 +463,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                 * Delivery was successful, did we reject somebody else ?
                 */
                if (reject && reject != XICS_IPI) {
-                       mutex_unlock(&ics->lock);
+                       arch_spin_unlock(&ics->lock);
+                       local_irq_restore(flags);
                        new_irq = reject;
                        goto again;
                }
@@ -468,12 +484,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                 */
                smp_mb();
                if (!icp->state.need_resend) {
-                       mutex_unlock(&ics->lock);
+                       arch_spin_unlock(&ics->lock);
+                       local_irq_restore(flags);
                        goto again;
                }
        }
  out:
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 }
 
 static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -802,14 +820,22 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
        XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
                 hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
 
-       if (icp->rm_action & XICS_RM_KICK_VCPU)
+       if (icp->rm_action & XICS_RM_KICK_VCPU) {
+               icp->n_rm_kick_vcpu++;
                kvmppc_fast_vcpu_kick(icp->rm_kick_target);
-       if (icp->rm_action & XICS_RM_CHECK_RESEND)
+       }
+       if (icp->rm_action & XICS_RM_CHECK_RESEND) {
+               icp->n_rm_check_resend++;
                icp_check_resend(xics, icp->rm_resend_icp);
-       if (icp->rm_action & XICS_RM_REJECT)
+       }
+       if (icp->rm_action & XICS_RM_REJECT) {
+               icp->n_rm_reject++;
                icp_deliver_irq(xics, icp, icp->rm_reject);
-       if (icp->rm_action & XICS_RM_NOTIFY_EOI)
+       }
+       if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
+               icp->n_rm_notify_eoi++;
                kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
+       }
 
        icp->rm_action = 0;
 
@@ -872,10 +898,21 @@ static int xics_debug_show(struct seq_file *m, void *private)
        struct kvm *kvm = xics->kvm;
        struct kvm_vcpu *vcpu;
        int icsid, i;
+       unsigned long flags;
+       unsigned long t_rm_kick_vcpu, t_rm_check_resend;
+       unsigned long t_rm_reject, t_rm_notify_eoi;
+       unsigned long t_reject, t_check_resend;
 
        if (!kvm)
                return 0;
 
+       t_rm_kick_vcpu = 0;
+       t_rm_notify_eoi = 0;
+       t_rm_check_resend = 0;
+       t_rm_reject = 0;
+       t_check_resend = 0;
+       t_reject = 0;
+
        seq_printf(m, "=========\nICP state\n=========\n");
 
        kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -890,8 +927,19 @@ static int xics_debug_show(struct seq_file *m, void *private)
                           icp->server_num, state.xisr,
                           state.pending_pri, state.cppr, state.mfrr,
                           state.out_ee, state.need_resend);
+               t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
+               t_rm_notify_eoi += icp->n_rm_notify_eoi;
+               t_rm_check_resend += icp->n_rm_check_resend;
+               t_rm_reject += icp->n_rm_reject;
+               t_check_resend += icp->n_check_resend;
+               t_reject += icp->n_reject;
        }
 
+       seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
+                       t_rm_kick_vcpu, t_rm_check_resend,
+                       t_rm_reject, t_rm_notify_eoi);
+       seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
+                       t_check_resend, t_reject);
        for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
                struct kvmppc_ics *ics = xics->ics[icsid];
 
@@ -901,7 +949,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
                seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
                           icsid);
 
-               mutex_lock(&ics->lock);
+               local_irq_save(flags);
+               arch_spin_lock(&ics->lock);
 
                for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
                        struct ics_irq_state *irq = &ics->irq_state[i];
@@ -912,7 +961,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
                                   irq->resend, irq->masked_pending);
 
                }
-               mutex_unlock(&ics->lock);
+               arch_spin_unlock(&ics->lock);
+               local_irq_restore(flags);
        }
        return 0;
 }
@@ -965,7 +1015,6 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
        if (!ics)
                goto out;
 
-       mutex_init(&ics->lock);
        ics->icsid = icsid;
 
        for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
@@ -1107,13 +1156,15 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
        u64 __user *ubufp = (u64 __user *) addr;
        u16 idx;
        u64 val, prio;
+       unsigned long flags;
 
        ics = kvmppc_xics_find_ics(xics, irq, &idx);
        if (!ics)
                return -ENOENT;
 
        irqp = &ics->irq_state[idx];
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
        ret = -ENOENT;
        if (irqp->exists) {
                val = irqp->server;
@@ -1129,7 +1180,8 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
                        val |= KVM_XICS_PENDING;
                ret = 0;
        }
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 
        if (!ret && put_user(val, ubufp))
                ret = -EFAULT;
@@ -1146,6 +1198,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
        u64 val;
        u8 prio;
        u32 server;
+       unsigned long flags;
 
        if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
                return -ENOENT;
@@ -1166,7 +1219,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
            kvmppc_xics_find_server(xics->kvm, server) == NULL)
                return -EINVAL;
 
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
        irqp->server = server;
        irqp->saved_priority = prio;
        if (val & KVM_XICS_MASKED)
@@ -1178,7 +1232,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
        if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
                irqp->asserted = 1;
        irqp->exists = 1;
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
 
        if (val & KVM_XICS_PENDING)
                icp_deliver_irq(xics, NULL, irqp->number);
index 73f0f27..56ea44f 100644 (file)
@@ -78,13 +78,22 @@ struct kvmppc_icp {
        u32  rm_reject;
        u32  rm_eoied_irq;
 
+       /* Counters for each reason we exited real mode */
+       unsigned long n_rm_kick_vcpu;
+       unsigned long n_rm_check_resend;
+       unsigned long n_rm_reject;
+       unsigned long n_rm_notify_eoi;
+       /* Counters for handling ICP processing in real mode */
+       unsigned long n_check_resend;
+       unsigned long n_reject;
+
        /* Debug stuff for real mode */
        union kvmppc_icp_state rm_dbgstate;
        struct kvm_vcpu *rm_dbgtgt;
 };
 
 struct kvmppc_ics {
-       struct mutex lock;
+       arch_spinlock_t lock;
        u16 icsid;
        struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
 };
@@ -96,6 +105,8 @@ struct kvmppc_xics {
        u32 max_icsid;
        bool real_mode;
        bool real_mode_dbg;
+       u32 err_noics;
+       u32 err_noicp;
        struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
 };
 
index cc536d4..4d33e19 100644 (file)
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
        pte_t *ptep;
        unsigned int wimg = 0;
        pgd_t *pgdir;
+       unsigned long flags;
 
        /* used to check for invalidations in progress */
        mmu_seq = kvm->mmu_notifier_seq;
@@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 
        pgdir = vcpu_e500->vcpu.arch.pgdir;
-       ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
-       if (pte_present(*ptep))
-               wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
-       else {
-               if (printk_ratelimit())
-                       pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
-                               __func__, (long)gfn, pfn);
-               ret = -EINVAL;
-               goto out;
+       /*
+        * We are just looking at the wimg bits, so we don't
+        * care much about the trans splitting bit.
+        * We are holding kvm->mmu_lock so a notifier invalidate
+        * can't run hence pfn won't change.
+        */
+       local_irq_save(flags);
+       ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL);
+       if (ptep) {
+               pte_t pte = READ_ONCE(*ptep);
+
+               if (pte_present(pte)) {
+                       wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
+                               MAS2_WIMGE_MASK;
+                       local_irq_restore(flags);
+               } else {
+                       local_irq_restore(flags);
+                       pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
+                                          __func__, (long)gfn, pfn);
+                       ret = -EINVAL;
+                       goto out;
+               }
        }
        kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
 
index 91bbc84..ac3ddf1 100644 (file)
@@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_PPC_RMA:
                r = 0;
                break;
+       case KVM_CAP_PPC_HWRNG:
+               r = kvmppc_hwrng_present();
+               break;
 #endif
        case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
index 2c2022d..fda236f 100644 (file)
@@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 #endif /* CONFIG_PPC_64K_PAGES */
 
        /* Get PTE and page size from page tables */
-       ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+       ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
        if (ptep == NULL || !pte_present(*ptep)) {
                DBG_LOW(" no PTE !\n");
                rc = 1;
@@ -1394,6 +1394,7 @@ tm_abort:
                tm_abort(TM_CAUSE_TLBI);
        }
 #endif
+       return;
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
index fa9d5c2..0ce968b 100644 (file)
@@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd)
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
        /* Only called for hugetlbfs pages, hence can ignore THP */
-       return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+       return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
 }
 
 static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
        pmd = pmd_offset(pud, start);
        pud_clear(pud);
        pmd_free_tlb(tlb, pmd, start);
+       mm_dec_nr_pmds(tlb->mm);
 }
 
 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
        } while (addr = next, addr != end);
 }
 
+/*
+ * We are holding mmap_sem, so a parallel huge page collapse cannot run.
+ * To prevent hugepage split, disable irq.
+ */
 struct page *
 follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
 {
        pte_t *ptep;
        struct page *page;
        unsigned shift;
-       unsigned long mask;
+       unsigned long mask, flags;
        /*
         * Transparent hugepages are handled by generic code. We can skip them
         * here.
         */
+       local_irq_save(flags);
        ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
 
        /* Verify it is a huge page else bail. */
-       if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
+       if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
+               local_irq_restore(flags);
                return ERR_PTR(-EINVAL);
-
+       }
        mask = (1UL << shift) - 1;
        page = pte_page(*ptep);
        if (page)
                page += (address & mask) / PAGE_SIZE;
 
+       local_irq_restore(flags);
        return page;
 }
 
@@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page)
  *
  * So long as we atomically load page table pointers we are safe against teardown,
  * we can follow the address down to the the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
  */
 
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+                                  unsigned *shift)
 {
        pgd_t pgd, *pgdp;
        pud_t pud, *pudp;
@@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
                         * A hugepage collapse is captured by pmd_none, because
                         * it mark the pmd none and do a hpte invalidate.
                         *
-                        * A hugepage split is captured by pmd_trans_splitting
-                        * because we mark the pmd trans splitting and do a
-                        * hpte invalidate
-                        *
+                        * We don't worry about pmd_trans_splitting here, The
+                        * caller if it needs to handle the splitting case
+                        * should check for that.
                         */
-                       if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+                       if (pmd_none(pmd))
                                return NULL;
 
                        if (pmd_huge(pmd) || pmd_large(pmd)) {
@@ -1030,7 +1040,7 @@ out:
                *shift = pdshift;
        return ret_pte;
 }
-EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
+EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
 
 int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
                unsigned long end, int write, struct page **pages, int *nr)
index ead5535..ff09cde 100644 (file)
@@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
  * interrupt context, so if the access faults, we read the page tables
  * to find which page (if any) is mapped and access it directly.
  */
-static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
+static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
 {
+       int ret = -EFAULT;
        pgd_t *pgdir;
        pte_t *ptep, pte;
        unsigned shift;
        unsigned long addr = (unsigned long) ptr;
        unsigned long offset;
-       unsigned long pfn;
+       unsigned long pfn, flags;
        void *kaddr;
 
        pgdir = current->mm->pgd;
        if (!pgdir)
                return -EFAULT;
 
+       local_irq_save(flags);
        ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+       if (!ptep)
+               goto err_out;
        if (!shift)
                shift = PAGE_SHIFT;
 
        /* align address to page boundary */
        offset = addr & ((1UL << shift) - 1);
-       addr -= offset;
 
-       if (ptep == NULL)
-               return -EFAULT;
-       pte = *ptep;
+       pte = READ_ONCE(*ptep);
        if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
-               return -EFAULT;
+               goto err_out;
        pfn = pte_pfn(pte);
        if (!page_is_ram(pfn))
-               return -EFAULT;
+               goto err_out;
 
        /* no highmem to worry about here */
        kaddr = pfn_to_kaddr(pfn);
-       memcpy(ret, kaddr + offset, nb);
-       return 0;
+       memcpy(buf, kaddr + offset, nb);
+       ret = 0;
+err_out:
+       local_irq_restore(flags);
+       return ret;
 }
 
 static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
index 1a3429e..1ba6307 100644 (file)
@@ -111,7 +111,7 @@ out:
 static int
 spufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if ((attr->ia_valid & ATTR_SIZE) &&
            (attr->ia_size != inode->i_size))
@@ -163,14 +163,14 @@ static void spufs_prune_dir(struct dentry *dir)
 {
        struct dentry *dentry, *tmp;
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
        list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
                spin_lock(&dentry->d_lock);
-               if (!(d_unhashed(dentry)) && dentry->d_inode) {
+               if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
                        dget_dlock(dentry);
                        __d_drop(dentry);
                        spin_unlock(&dentry->d_lock);
-                       simple_unlink(dir->d_inode, dentry);
+                       simple_unlink(d_inode(dir), dentry);
                        /* XXX: what was dcache_lock protecting here? Other
                         * filesystems (IB, configfs) release dcache_lock
                         * before unlink */
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
                }
        }
        shrink_dcache_parent(dir);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 }
 
 /* Caller must hold parent->i_mutex */
@@ -192,7 +192,7 @@ static int spufs_rmdir(struct inode *parent, struct dentry *dir)
        d_drop(dir);
        res = simple_rmdir(parent, dir);
        /* We have to give up the mm_struct */
-       spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
+       spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
        return res;
 }
 
@@ -222,8 +222,8 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
        int ret;
 
        dir = file->f_path.dentry;
-       parent = dir->d_parent->d_inode;
-       ctx = SPUFS_I(dir->d_inode)->i_ctx;
+       parent = d_inode(dir->d_parent);
+       ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
        mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
        ret = spufs_rmdir(parent, dir);
@@ -460,7 +460,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
                goto out_aff_unlock;
 
        if (affinity) {
-               spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+               spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
                                                                neighbor);
                if (neighbor)
                        put_spu_context(neighbor);
@@ -504,7 +504,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
 
        d_instantiate(dentry, inode);
        inc_nlink(dir);
-       inc_nlink(dentry->d_inode);
+       inc_nlink(d_inode(dentry));
        return ret;
 
 out_iput:
@@ -561,7 +561,7 @@ static struct file_system_type spufs_type;
 long spufs_create(struct path *path, struct dentry *dentry,
                unsigned int flags, umode_t mode, struct file *filp)
 {
-       struct inode *dir = path->dentry->d_inode;
+       struct inode *dir = d_inode(path->dentry);
        int ret;
 
        /* check if we are on spufs */
index 80db439..6eb808f 100644 (file)
 
 struct powernv_rng {
        void __iomem *regs;
+       void __iomem *regs_real;
        unsigned long mask;
 };
 
 static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
 
 
+int powernv_hwrng_present(void)
+{
+       struct powernv_rng *rng;
+
+       rng = get_cpu_var(powernv_rng);
+       put_cpu_var(rng);
+       return rng != NULL;
+}
+
 static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
 {
        unsigned long parity;
@@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
        return val;
 }
 
+int powernv_get_random_real_mode(unsigned long *v)
+{
+       struct powernv_rng *rng;
+
+       rng = raw_cpu_read(powernv_rng);
+
+       *v = rng_whiten(rng, in_rm64(rng->regs_real));
+
+       return 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
        struct powernv_rng *rng;
@@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
 static __init int rng_create(struct device_node *dn)
 {
        struct powernv_rng *rng;
+       struct resource res;
        unsigned long val;
 
        rng = kzalloc(sizeof(*rng), GFP_KERNEL);
        if (!rng)
                return -ENOMEM;
 
+       if (of_address_to_resource(dn, 0, &res)) {
+               kfree(rng);
+               return -ENXIO;
+       }
+
+       rng->regs_real = (void __iomem *)res.start;
+
        rng->regs = of_iomap(dn, 0);
        if (!rng->regs) {
                kfree(rng);
index 3f5c799..d3f896a 100644 (file)
@@ -48,7 +48,7 @@ static struct dentry *hypfs_last_dentry;
 static void hypfs_update_update(struct super_block *sb)
 {
        struct hypfs_sb_info *sb_info = sb->s_fs_info;
-       struct inode *inode = sb_info->update_file->d_inode;
+       struct inode *inode = d_inode(sb_info->update_file);
 
        sb_info->last_update = get_seconds();
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -64,7 +64,7 @@ static void hypfs_add_dentry(struct dentry *dentry)
 
 static inline int hypfs_positive(struct dentry *dentry)
 {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 static void hypfs_remove(struct dentry *dentry)
@@ -72,16 +72,16 @@ static void hypfs_remove(struct dentry *dentry)
        struct dentry *parent;
 
        parent = dentry->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        if (hypfs_positive(dentry)) {
                if (d_is_dir(dentry))
-                       simple_rmdir(parent->d_inode, dentry);
+                       simple_rmdir(d_inode(parent), dentry);
                else
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
        }
        d_delete(dentry);
        dput(dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 }
 
 static void hypfs_delete_tree(struct dentry *root)
@@ -336,7 +336,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry)) {
                dentry = ERR_PTR(-ENOMEM);
@@ -357,14 +357,14 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
        } else if (S_ISDIR(mode)) {
                inode->i_op = &simple_dir_inode_operations;
                inode->i_fop = &simple_dir_operations;
-               inc_nlink(parent->d_inode);
+               inc_nlink(d_inode(parent));
        } else
                BUG();
        inode->i_private = data;
        d_instantiate(dentry, inode);
        dget(dentry);
 fail:
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        return dentry;
 }
 
index afa2bd7..8cd8e7b 100644 (file)
@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
        0xffe6fffbfcfdfc40UL,
-       0x205c800000000000UL,
+       0x005c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
index 669df51..324599b 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/io.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mmcif.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
@@ -243,10 +244,10 @@ static struct platform_device sh_mmcif_device = {
 };
 
 /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED,
 };
 
 static struct resource sdhi_resources[] = {
index d4b01d4..cbd2a9f 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mtd/sh_flctl.h>
+#include <linux/mfd/tmio.h>
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/regulator/fixed.h>
@@ -447,8 +448,8 @@ static struct resource sdhi0_cn3_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sdhi0_cn3_data = {
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi0_cn3_data = {
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi0_cn3_device = {
@@ -474,8 +475,8 @@ static struct resource sdhi1_cn7_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sdhi1_cn7_data = {
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi1_cn7_data = {
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi1_cn7_device = {
index 0d30492..d531791 100644 (file)
@@ -601,12 +601,12 @@ static struct platform_device sdhi0_power = {
        },
 };
 
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
                          MMC_CAP_NEEDS_POLL,
-       .tmio_flags     = TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_USE_GPIO_CD,
        .cd_gpio        = GPIO_PTY7,
 };
 
@@ -635,12 +635,12 @@ static struct platform_device sdhi0_device = {
 
 #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
 /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi1_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
                          MMC_CAP_NEEDS_POLL,
-       .tmio_flags     = TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_USE_GPIO_CD,
        .cd_gpio        = GPIO_PTW7,
 };
 
index 1df4398..7d997ce 100644 (file)
@@ -373,11 +373,11 @@ static struct resource kfr2r09_sh_sdhi0_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .flags          = TMIO_MMC_WRPROTECT_DISABLE,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device kfr2r09_sh_sdhi0_device = {
index 8b73194..29b7c0d 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
 #include <linux/mtd/physmap.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mtd/nand.h>
 #include <linux/i2c.h>
 #include <linux/regulator/fixed.h>
@@ -408,10 +409,10 @@ static struct resource sdhi_cn9_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi_cn9_device = {
index 1162bc6..4f6635a 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/sh_mobile_sdhi.h>
+#include <linux/mfd/tmio.h>
 #include <linux/mtd/physmap.h>
 #include <linux/delay.h>
 #include <linux/regulator/fixed.h>
@@ -468,10 +469,10 @@ static struct resource sdhi0_cn7_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi0_cn7_device = {
@@ -497,10 +498,10 @@ static struct resource sdhi1_cn8_resources[] = {
        },
 };
 
-static struct sh_mobile_sdhi_info sh7724_sdhi1_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi1_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
 };
 
 static struct platform_device sdhi1_cn8_device = {
index 6049d58..226d569 100644 (file)
@@ -22,6 +22,7 @@ config X86_64
 ### Arch settings
 config X86
        def_bool y
+       select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
        select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
        select ARCH_HAS_FAST_MULTIPLIER
index a4771dc..1f20b35 100644 (file)
@@ -79,7 +79,7 @@ NUM_BLKS    = %rdx
 c           = %rcx
 d           = %r8
 e           = %rdx
-y3          = %rdi
+y3          = %rsi
 
 TBL   = %rbp
 
index a821b1c..72bf268 100644 (file)
@@ -427,6 +427,13 @@ sysretl_from_sys_call:
         * cs and ss are loaded from MSRs.
         * (Note: 32bit->32bit SYSRET is different: since r11
         * does not exist, it merely sets eflags.IF=1).
+        *
+        * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+        * descriptor is not reinitialized.  This means that we must
+        * avoid SYSRET with SS == NULL, which could happen if we schedule,
+        * exit the kernel, and re-enter using an interrupt vector.  (All
+        * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+        * from happening by reloading SS in __switch_to.
         */
        USERGS_SYSRET32
 
index 7ee9b94..3d6606f 100644 (file)
 #define X86_BUG_11AP           X86_BUG(5) /* Bad local APIC aka 11AP */
 #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
 #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
index e2d4a4a..3bbc07a 100644 (file)
@@ -20,13 +20,10 @@ extern unsigned long switcher_addr;
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
 
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
+/* Declarations for definitions in arch/x86/lguest/head_32.S */
+extern char lguest_noirq_iret[];
 extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
 extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
 
 extern void lguest_iret(void);
 extern void lguest_init(void);
index 803b684..dbe76a1 100644 (file)
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
 {
        return _acpi_map_lsapic(handle, physid, pcpu);
 }
index fd470eb..e4cf633 100644 (file)
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
        if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
                if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
                        set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+       /* AMD CPUs don't reset SS attributes on SYSRET */
+       set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
 }
 
 #ifdef CONFIG_X86_32
index c7b2384..02c2eff 100644 (file)
@@ -295,6 +295,15 @@ system_call_fastpath:
         * rflags from r11 (but RF and VM bits are forced to 0),
         * cs and ss are loaded from MSRs.
         * Restoration of rflags re-enables interrupts.
+        *
+        * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+        * descriptor is not reinitialized.  This means that we should
+        * avoid SYSRET with SS == NULL, which could happen if we schedule,
+        * exit the kernel, and re-enter using an interrupt vector.  (All
+        * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+        * from happening by reloading SS in __switch_to.  (Actually
+        * detecting the failure in 64-bit userspace is tricky but can be
+        * done.)
         */
        USERGS_SYSRET64
 
index 4baaa97..ddfdbf7 100644 (file)
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
                     task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
                __switch_to_xtra(prev_p, next_p, tss);
 
+       if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+               /*
+                * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+                * does not update the cached descriptor.  As a result, if we
+                * do SYSRET while SS is NULL, we'll end up in user mode with
+                * SS apparently equal to __USER_DS but actually unusable.
+                *
+                * The straightforward workaround would be to fix it up just
+                * before SYSRET, but that would slow down the system call
+                * fast paths.  Instead, we ensure that SS is never NULL in
+                * system call context.  We do this by replacing NULL SS
+                * selectors at every context switch.  SYSCALL sets up a valid
+                * SS, so the only way to get NULL is to re-enter the kernel
+                * from CPL 3 through an interrupt.  Since that can't happen
+                * in the same task as a running syscall, we are guaranteed to
+                * context switch between every interrupt vector entry and a
+                * subsequent SYSRET.
+                *
+                * We read SS first because SS reads are much faster than
+                * writes.  Out of caution, we force SS to __KERNEL_DS even if
+                * it previously had a different non-NULL value.
+                */
+               unsigned short ss_sel;
+               savesegment(ss, ss_sel);
+               if (ss_sel != __KERNEL_DS)
+                       loadsegment(ss, __KERNEL_DS);
+       }
+
        return prev_p;
 }
 
index 6eb5c20..d090ecf 100644 (file)
@@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev)
                if (r)
                        return r;
 
-               inode = path.dentry->d_inode;
+               inode = d_backing_inode(path.dentry);
 
                r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
                path_put(&path);
index d67206a..629af0f 100644 (file)
@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        unsigned long bitmap = 1;
        struct kvm_lapic **dst;
        int i;
-       bool ret = false;
-       bool x2apic_ipi = src && apic_x2apic_mode(src);
+       bool ret, x2apic_ipi;
 
        *r = -1;
 
@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        if (irq->shorthand)
                return false;
 
+       x2apic_ipi = src && apic_x2apic_mode(src);
        if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
                return false;
 
+       ret = true;
        rcu_read_lock();
        map = rcu_dereference(kvm->arch.apic_map);
 
-       if (!map)
+       if (!map) {
+               ret = false;
                goto out;
-
-       ret = true;
+       }
 
        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
                if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
index 146f295..d43867c 100644 (file)
@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
                pfn = spte_to_pfn(*sptep);
 
                /*
-                * Only EPT supported for now; otherwise, one would need to
-                * find out efficiently whether the guest page tables are
-                * also using huge pages.
+                * We cannot do huge page mapping for indirect shadow pages,
+                * which are found on the last rmap (level = 1) when not using
+                * tdp; such shadow pages are synced with the page table in
+                * the guest, and the guest page table is using 4K page size
+                * mapping if the indirect sp has level = 1.
                 */
                if (sp->role.direct &&
                        !kvm_is_reserved_pfn(pfn) &&
@@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        bool flush = false;
        unsigned long *rmapp;
        unsigned long last_index, index;
-       gfn_t gfn_start, gfn_end;
 
        spin_lock(&kvm->mmu_lock);
 
-       gfn_start = memslot->base_gfn;
-       gfn_end = memslot->base_gfn + memslot->npages - 1;
-
-       if (gfn_start >= gfn_end)
-               goto out;
-
        rmapp = memslot->arch.rmap[0];
-       last_index = gfn_to_index(gfn_end, memslot->base_gfn,
-                                       PT_PAGE_TABLE_LEVEL);
+       last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
+                               memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
 
        for (index = 0; index <= last_index; ++index, ++rmapp) {
                if (*rmapp)
@@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
        if (flush)
                kvm_flush_remote_tlbs(kvm);
 
-out:
        spin_unlock(&kvm->mmu_lock);
 }
 
index f5e8dce..f7b6168 100644 (file)
@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-                   KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+       /*
+        * Pass through host's Machine Check Enable value to hw_cr4, which
+        * is in force while we are in guest mode.  Do not let guests control
+        * this bit, even if host CR4.MCE == 0.
+        */
+       unsigned long hw_cr4 =
+               (cr4_read_shadow() & X86_CR4_MCE) |
+               (cr4 & ~X86_CR4_MCE) |
+               (to_vmx(vcpu)->rmode.vm86_active ?
+                KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
 
        if (cr4 & X86_CR4_VMXE) {
                /*
index e1a8126..ed31c31 100644 (file)
@@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque)
        kvm_set_mmio_spte_mask();
 
        kvm_x86_ops = ops;
-       kvm_init_msr_list();
 
        kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                        PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void)
 
 int kvm_arch_hardware_setup(void)
 {
-       return kvm_x86_ops->hardware_setup();
+       int r;
+
+       r = kvm_x86_ops->hardware_setup();
+       if (r != 0)
+               return r;
+
+       kvm_init_msr_list();
+       return 0;
 }
 
 void kvm_arch_hardware_unsetup(void)
index 717908b..8f9a133 100644 (file)
@@ -87,8 +87,7 @@
 
 struct lguest_data lguest_data = {
        .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
-       .noirq_start = (u32)lguest_noirq_start,
-       .noirq_end = (u32)lguest_noirq_end,
+       .noirq_iret = (u32)lguest_noirq_iret,
        .kernel_address = PAGE_OFFSET,
        .blocked_interrupts = { 1 }, /* Block timer interrupts */
        .syscall_vec = SYSCALL_VECTOR,
@@ -262,7 +261,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
 PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
 /*:*/
 
-/* These are in i386_head.S */
+/* These are in head_32.S */
 extern void lg_irq_enable(void);
 extern void lg_restore_fl(unsigned long flags);
 
@@ -1368,7 +1367,7 @@ static void lguest_restart(char *reason)
  * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
- * and these are in i386_head.S.
+ * and these are in head_32.S.
  */
 
 /*G:060 We construct a table from the assembler templates: */
index 6ddfe4f..d5ae63f 100644 (file)
@@ -84,7 +84,7 @@ ENTRY(lg_irq_enable)
         * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
         * jump to send_interrupts, otherwise we're done.
         */
-       testl $0, lguest_data+LGUEST_DATA_irq_pending
+       cmpl $0, lguest_data+LGUEST_DATA_irq_pending
        jnz send_interrupts
        /*
         * One cool thing about x86 is that you can do many things without using
@@ -133,9 +133,8 @@ ENTRY(lg_restore_fl)
        ret
 /*:*/
 
-/* These demark the EIP range where host should never deliver interrupts. */
-.global lguest_noirq_start
-.global lguest_noirq_end
+/* These demark the EIP where host should never deliver interrupts. */
+.global lguest_noirq_iret
 
 /*M:004
  * When the Host reflects a trap or injects an interrupt into the Guest, it
@@ -168,29 +167,26 @@ ENTRY(lg_restore_fl)
  * So we have to copy eflags from the stack to lguest_data.irq_enabled before
  * we do the "iret".
  *
- * There are two problems with this: firstly, we need to use a register to do
- * the copy and secondly, the whole thing needs to be atomic.  The first
- * problem is easy to solve: push %eax on the stack so we can use it, and then
- * restore it at the end just before the real "iret".
+ * There are two problems with this: firstly, we can't clobber any registers
+ * and secondly, the whole thing needs to be atomic.  The first problem
+ * is solved by using "push memory"/"pop memory" instruction pair for copying.
  *
  * The second is harder: copying eflags to lguest_data.irq_enabled will turn
  * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever.  Our solution to this is to surround the
- * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
+ * return to userspace or wherever.  Our solution to this is to tell the
  * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled.
+ * enabled. (It's not necessary to protect pop instruction, since
+ * data gets updated only after it completes, so we only need to protect
+ * one instruction, iret).
  */
 ENTRY(lguest_iret)
-       pushl   %eax
-       movl    12(%esp), %eax
-lguest_noirq_start:
+       pushl   2*4(%esp)
        /*
         * Note the %ss: segment prefix here.  Normal data accesses use the
         * "ds" segment, but that will have already been restored for whatever
         * we're returning to (such as userspace): we can't trust it.  The %ss:
         * prefix makes sure we use the stack segment, which is still valid.
         */
-       movl    %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
-       popl    %eax
+       popl    %ss:lguest_data+LGUEST_DATA_irq_enabled
+lguest_noirq_iret:
        iret
-lguest_noirq_end:
index 1f33b3d..0a42327 100644 (file)
@@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
        clac();
 
        /* If the destination is a kernel buffer, we always clear the end */
-       if ((unsigned long)to >= TASK_SIZE_MAX)
+       if (!__addr_ok(to))
                memset(to, 0, len);
        return len;
 }
index d05327c..5d355e0 100644 (file)
@@ -124,6 +124,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 {
        void **srcs;
        int i;
+       int start = -1, stop = disks - 3;
 
        if (submit->scribble)
                srcs = submit->scribble;
@@ -134,10 +135,21 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
                if (blocks[i] == NULL) {
                        BUG_ON(i > disks - 3); /* P or Q can't be zero */
                        srcs[i] = (void*)raid6_empty_zero_page;
-               } else
+               } else {
                        srcs[i] = page_address(blocks[i]) + offset;
+                       if (i < disks - 2) {
+                               stop = i;
+                               if (start == -1)
+                                       start = i;
+                       }
+               }
        }
-       raid6_call.gen_syndrome(disks, len, srcs);
+       if (submit->flags & ASYNC_TX_PQ_XOR_DST) {
+               BUG_ON(!raid6_call.xor_syndrome);
+               if (start >= 0)
+                       raid6_call.xor_syndrome(disks, start, stop, len, srcs);
+       } else
+               raid6_call.gen_syndrome(disks, len, srcs);
        async_tx_sync_epilog(submit);
 }
 
@@ -178,7 +190,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
        if (device)
                unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
 
-       if (unmap &&
+       /* XORing P/Q is only implemented in software */
+       if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
            (src_cnt <= dma_maxpq(device, 0) ||
             dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
            is_dma_pq_aligned(device, offset, 0, len)) {
index e6c3ddd..ab2cbb5 100644 (file)
@@ -5,7 +5,7 @@
 menuconfig ACPI
        bool "ACPI (Advanced Configuration and Power Interface) Support"
        depends on !IA64_HP_SIM
-       depends on IA64 || X86
+       depends on IA64 || X86 || (ARM64 && EXPERT)
        depends on PCI
        select PNP
        default y
@@ -48,9 +48,16 @@ config ACPI_LEGACY_TABLES_LOOKUP
 config ARCH_MIGHT_HAVE_ACPI_PDC
        bool
 
+config ACPI_GENERIC_GSI
+       bool
+
+config ACPI_SYSTEM_POWER_STATES_SUPPORT
+       bool
+
 config ACPI_SLEEP
        bool
        depends on SUSPEND || HIBERNATION
+       depends on ACPI_SYSTEM_POWER_STATES_SUPPORT
        default y
 
 config ACPI_PROCFS_POWER
@@ -163,6 +170,7 @@ config ACPI_PROCESSOR
        tristate "Processor"
        select THERMAL
        select CPU_IDLE
+       depends on X86 || IA64
        default y
        help
          This driver installs ACPI as the idle handler for Linux and uses
index 623b117..8a063e2 100644 (file)
@@ -23,7 +23,7 @@ acpi-y                                += nvs.o
 
 # Power management related files
 acpi-y                         += wakeup.o
-acpi-y                         += sleep.o
+acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o
 acpi-y                         += device_pm.o
 acpi-$(CONFIG_ACPI_SLEEP)      += proc.o
 
@@ -56,6 +56,7 @@ ifdef CONFIG_ACPI_VIDEO
 acpi-y                         += video_detect.o
 endif
 acpi-y                         += acpi_lpat.o
+acpi-$(CONFIG_ACPI_GENERIC_GSI) += gsi.o
 
 # These are (potentially) separate modules
 
index 1020b1b..58f335c 100644 (file)
@@ -170,7 +170,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
        acpi_status status;
        int ret;
 
-       if (pr->phys_id == -1)
+       if (pr->phys_id == PHYS_CPUID_INVALID)
                return -ENODEV;
 
        status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
@@ -215,7 +215,8 @@ static int acpi_processor_get_info(struct acpi_device *device)
        union acpi_object object = { 0 };
        struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
        struct acpi_processor *pr = acpi_driver_data(device);
-       int phys_id, cpu_index, device_declaration = 0;
+       phys_cpuid_t phys_id;
+       int cpu_index, device_declaration = 0;
        acpi_status status = AE_OK;
        static int cpu0_initialized;
        unsigned long long value;
@@ -263,7 +264,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
        }
 
        phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id);
-       if (phys_id < 0)
+       if (phys_id == PHYS_CPUID_INVALID)
                acpi_handle_debug(pr->handle, "failed to get CPU physical ID.\n");
        pr->phys_id = phys_id;
 
index 8b67bd0..c412fdb 100644 (file)
@@ -448,6 +448,9 @@ static int __init acpi_bus_init_irq(void)
        case ACPI_IRQ_MODEL_IOSAPIC:
                message = "IOSAPIC";
                break;
+       case ACPI_IRQ_MODEL_GIC:
+               message = "GIC";
+               break;
        case ACPI_IRQ_MODEL_PLATFORM:
                message = "platform specific model";
                break;
index 220d640..5e8fed4 100644 (file)
@@ -861,7 +861,7 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
                }
        }
        mutex_unlock(&ec->mutex);
-       list_for_each_entry(handler, &free_list, node)
+       list_for_each_entry_safe(handler, tmp, &free_list, node)
                acpi_ec_put_query_handler(handler);
 }
 EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
diff --git a/drivers/acpi/gsi.c b/drivers/acpi/gsi.c
new file mode 100644 (file)
index 0000000..38208f2
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * ACPI GSI IRQ layer
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+enum acpi_irq_model_id acpi_irq_model;
+
+static unsigned int acpi_gsi_get_irq_type(int trigger, int polarity)
+{
+       switch (polarity) {
+       case ACPI_ACTIVE_LOW:
+               return trigger == ACPI_EDGE_SENSITIVE ?
+                      IRQ_TYPE_EDGE_FALLING :
+                      IRQ_TYPE_LEVEL_LOW;
+       case ACPI_ACTIVE_HIGH:
+               return trigger == ACPI_EDGE_SENSITIVE ?
+                      IRQ_TYPE_EDGE_RISING :
+                      IRQ_TYPE_LEVEL_HIGH;
+       case ACPI_ACTIVE_BOTH:
+               if (trigger == ACPI_EDGE_SENSITIVE)
+                       return IRQ_TYPE_EDGE_BOTH;
+       default:
+               return IRQ_TYPE_NONE;
+       }
+}
+
+/**
+ * acpi_gsi_to_irq() - Retrieve the linux irq number for a given GSI
+ * @gsi: GSI IRQ number to map
+ * @irq: pointer where linux IRQ number is stored
+ *
+ * irq location updated with irq value [>0 on success, 0 on failure]
+ *
+ * Returns: linux IRQ number on success (>0)
+ *          -EINVAL on failure
+ */
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+       /*
+        * Only default domain is supported at present, always find
+        * the mapping corresponding to default domain by passing NULL
+        * as irq_domain parameter
+        */
+       *irq = irq_find_mapping(NULL, gsi);
+       /*
+        * *irq == 0 means no mapping, that should
+        * be reported as a failure
+        */
+       return (*irq > 0) ? *irq : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
+
+/**
+ * acpi_register_gsi() - Map a GSI to a linux IRQ number
+ * @dev: device for which IRQ has to be mapped
+ * @gsi: GSI IRQ number
+ * @trigger: trigger type of the GSI number to be mapped
+ * @polarity: polarity of the GSI to be mapped
+ *
+ * Returns: a valid linux IRQ number on success
+ *          -EINVAL on failure
+ */
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger,
+                     int polarity)
+{
+       unsigned int irq;
+       unsigned int irq_type = acpi_gsi_get_irq_type(trigger, polarity);
+
+       /*
+        * There is no way at present to look-up the IRQ domain on ACPI,
+        * hence always create mapping referring to the default domain
+        * by passing NULL as irq_domain parameter
+        */
+       irq = irq_create_mapping(NULL, gsi);
+       if (!irq)
+               return -EINVAL;
+
+       /* Set irq type if specified and different than the current one */
+       if (irq_type != IRQ_TYPE_NONE &&
+               irq_type != irq_get_trigger_type(irq))
+               irq_set_irq_type(irq, irq_type);
+       return irq;
+}
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
+
+/**
+ * acpi_unregister_gsi() - Free a GSI<->linux IRQ number mapping
+ * @gsi: GSI IRQ number
+ */
+void acpi_unregister_gsi(u32 gsi)
+{
+       int irq = irq_find_mapping(NULL, gsi);
+
+       irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
index 56b321a..ba4a61e 100644 (file)
@@ -161,7 +161,11 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
 /*--------------------------------------------------------------------------
                                   Suspend/Resume
   -------------------------------------------------------------------------- */
+#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT
 extern int acpi_sleep_init(void);
+#else
+static inline int acpi_sleep_init(void) { return -ENXIO; }
+#endif
 
 #ifdef CONFIG_ACPI_SLEEP
 int acpi_sleep_proc_init(void);
index f9eeae8..39748bb 100644 (file)
@@ -336,11 +336,11 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
        return NULL;
 }
 
-#ifndef CONFIG_IA64
-#define should_use_kmap(pfn)   page_is_ram(pfn)
-#else
+#if defined(CONFIG_IA64) || defined(CONFIG_ARM64)
 /* ioremap will take care of cache attributes */
 #define should_use_kmap(pfn)   0
+#else
+#define should_use_kmap(pfn)   page_is_ram(pfn)
 #endif
 
 static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz)
index 7962651..b1ec78b 100644 (file)
@@ -32,7 +32,7 @@ static struct acpi_table_madt *get_madt_table(void)
 }
 
 static int map_lapic_id(struct acpi_subtable_header *entry,
-                u32 acpi_id, int *apic_id)
+                u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_apic *lapic =
                container_of(entry, struct acpi_madt_local_apic, header);
@@ -48,7 +48,7 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_x2apic_id(struct acpi_subtable_header *entry,
-                        int device_declaration, u32 acpi_id, int *apic_id)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_x2apic *apic =
                container_of(entry, struct acpi_madt_local_x2apic, header);
@@ -65,7 +65,7 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
 }
 
 static int map_lsapic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, int *apic_id)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
 {
        struct acpi_madt_local_sapic *lsapic =
                container_of(entry, struct acpi_madt_local_sapic, header);
@@ -83,10 +83,35 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
        return 0;
 }
 
-static int map_madt_entry(int type, u32 acpi_id)
+/*
+ * Retrieve the ARM CPU physical identifier (MPIDR)
+ */
+static int map_gicc_mpidr(struct acpi_subtable_header *entry,
+               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
+{
+       struct acpi_madt_generic_interrupt *gicc =
+           container_of(entry, struct acpi_madt_generic_interrupt, header);
+
+       if (!(gicc->flags & ACPI_MADT_ENABLED))
+               return -ENODEV;
+
+       /* device_declaration means Device object in DSDT, in the
+        * GIC interrupt model, logical processors are required to
+        * have a Processor Device object in the DSDT, so we should
+        * check device_declaration here
+        */
+       if (device_declaration && (gicc->uid == acpi_id)) {
+               *mpidr = gicc->arm_mpidr;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static phys_cpuid_t map_madt_entry(int type, u32 acpi_id)
 {
        unsigned long madt_end, entry;
-       int phys_id = -1;       /* CPU hardware ID */
+       phys_cpuid_t phys_id = PHYS_CPUID_INVALID;      /* CPU hardware ID */
        struct acpi_table_madt *madt;
 
        madt = get_madt_table();
@@ -111,18 +136,21 @@ static int map_madt_entry(int type, u32 acpi_id)
                } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
                        if (!map_lsapic_id(header, type, acpi_id, &phys_id))
                                break;
+               } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
+                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
+                               break;
                }
                entry += header->length;
        }
        return phys_id;
 }
 
-static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
 {
        struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
        union acpi_object *obj;
        struct acpi_subtable_header *header;
-       int phys_id = -1;
+       phys_cpuid_t phys_id = PHYS_CPUID_INVALID;
 
        if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
                goto exit;
@@ -143,33 +171,35 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
                map_lsapic_id(header, type, acpi_id, &phys_id);
        else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
                map_x2apic_id(header, type, acpi_id, &phys_id);
+       else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
+               map_gicc_mpidr(header, type, acpi_id, &phys_id);
 
 exit:
        kfree(buffer.pointer);
        return phys_id;
 }
 
-int acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
 {
-       int phys_id;
+       phys_cpuid_t phys_id;
 
        phys_id = map_mat_entry(handle, type, acpi_id);
-       if (phys_id == -1)
+       if (phys_id == PHYS_CPUID_INVALID)
                phys_id = map_madt_entry(type, acpi_id);
 
        return phys_id;
 }
 
-int acpi_map_cpuid(int phys_id, u32 acpi_id)
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
 {
 #ifdef CONFIG_SMP
        int i;
 #endif
 
-       if (phys_id == -1) {
+       if (phys_id == PHYS_CPUID_INVALID) {
                /*
                 * On UP processor, there is no _MAT or MADT table.
-                * So above phys_id is always set to -1.
+                * So above phys_id is always set to PHYS_CPUID_INVALID.
                 *
                 * BIOS may define multiple CPU handles even for UP processor.
                 * For example,
@@ -190,7 +220,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
                if (nr_cpu_ids <= 1 && acpi_id == 0)
                        return acpi_id;
                else
-                       return phys_id;
+                       return -1;
        }
 
 #ifdef CONFIG_SMP
@@ -208,7 +238,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
 
 int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
 {
-       int phys_id;
+       phys_cpuid_t phys_id;
 
        phys_id = acpi_get_phys_id(handle, type, acpi_id);
 
index 69bc0d8..03141aa 100644 (file)
@@ -375,7 +375,11 @@ bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent)
        struct acpi_device_physical_node *pn;
        bool offline = true;
 
-       mutex_lock(&adev->physical_node_lock);
+       /*
+        * acpi_container_offline() calls this for all of the container's
+        * children under the container's physical_node_lock lock.
+        */
+       mutex_lock_nested(&adev->physical_node_lock, SINGLE_DEPTH_NESTING);
 
        list_for_each_entry(pn, &adev->physical_node_list, node)
                if (device_supports_offline(pn->dev) && !pn->dev->offline) {
@@ -2388,9 +2392,6 @@ static void acpi_default_enumeration(struct acpi_device *device)
        struct list_head resource_list;
        bool is_spi_i2c_slave = false;
 
-       if (!device->pnp.type.platform_id || device->handler)
-               return;
-
        /*
         * Do not enemerate SPI/I2C slaves as they will be enuerated by their
         * respective parents.
@@ -2403,6 +2404,29 @@ static void acpi_default_enumeration(struct acpi_device *device)
                acpi_create_platform_device(device);
 }
 
+static const struct acpi_device_id generic_device_ids[] = {
+       {"PRP0001", },
+       {"", },
+};
+
+static int acpi_generic_device_attach(struct acpi_device *adev,
+                                     const struct acpi_device_id *not_used)
+{
+       /*
+        * Since PRP0001 is the only ID handled here, the test below can be
+        * unconditional.
+        */
+       if (adev->data.of_compatible)
+               acpi_default_enumeration(adev);
+
+       return 1;
+}
+
+static struct acpi_scan_handler generic_device_handler = {
+       .ids = generic_device_ids,
+       .attach = acpi_generic_device_attach,
+};
+
 static int acpi_scan_attach_handler(struct acpi_device *device)
 {
        struct acpi_hardware_id *hwid;
@@ -2428,8 +2452,6 @@ static int acpi_scan_attach_handler(struct acpi_device *device)
                                break;
                }
        }
-       if (!ret)
-               acpi_default_enumeration(device);
 
        return ret;
 }
@@ -2471,6 +2493,9 @@ static void acpi_bus_attach(struct acpi_device *device)
                ret = device_attach(&device->dev);
                if (ret < 0)
                        return;
+
+               if (!ret && device->pnp.type.platform_id)
+                       acpi_default_enumeration(device);
        }
        device->flags.visited = true;
 
@@ -2629,6 +2654,8 @@ int __init acpi_scan_init(void)
        acpi_pnp_init();
        acpi_int340x_thermal_init();
 
+       acpi_scan_add_handler(&generic_device_handler);
+
        mutex_lock(&acpi_scan_lock);
        /*
         * Enumerate devices in the ACPI namespace.
index 93b8152..2e19189 100644 (file)
@@ -23,6 +23,8 @@
  *
  */
 
+/* Uncomment next line to get verbose printout */
+/* #define DEBUG */
 #define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/init.h>
@@ -61,9 +63,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                {
                        struct acpi_madt_local_apic *p =
                            (struct acpi_madt_local_apic *)header;
-                       pr_info("LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
-                               p->processor_id, p->id,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
+                                p->processor_id, p->id,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                }
                break;
 
@@ -71,9 +73,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                {
                        struct acpi_madt_local_x2apic *p =
                            (struct acpi_madt_local_x2apic *)header;
-                       pr_info("X2APIC (apic_id[0x%02x] uid[0x%02x] %s)\n",
-                               p->local_apic_id, p->uid,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("X2APIC (apic_id[0x%02x] uid[0x%02x] %s)\n",
+                                p->local_apic_id, p->uid,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                }
                break;
 
@@ -81,8 +83,8 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                {
                        struct acpi_madt_io_apic *p =
                            (struct acpi_madt_io_apic *)header;
-                       pr_info("IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
-                               p->id, p->address, p->global_irq_base);
+                       pr_debug("IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
+                                p->id, p->address, p->global_irq_base);
                }
                break;
 
@@ -155,9 +157,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                {
                        struct acpi_madt_io_sapic *p =
                            (struct acpi_madt_io_sapic *)header;
-                       pr_info("IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
-                               p->id, (void *)(unsigned long)p->address,
-                               p->global_irq_base);
+                       pr_debug("IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
+                                p->id, (void *)(unsigned long)p->address,
+                                p->global_irq_base);
                }
                break;
 
@@ -165,9 +167,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                {
                        struct acpi_madt_local_sapic *p =
                            (struct acpi_madt_local_sapic *)header;
-                       pr_info("LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
-                               p->processor_id, p->id, p->eid,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
+                                p->processor_id, p->id, p->eid,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                }
                break;
 
@@ -183,6 +185,28 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                }
                break;
 
+       case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+               {
+                       struct acpi_madt_generic_interrupt *p =
+                               (struct acpi_madt_generic_interrupt *)header;
+                       pr_debug("GICC (acpi_id[0x%04x] address[%llx] MPIDR[0x%llx] %s)\n",
+                                p->uid, p->base_address,
+                                p->arm_mpidr,
+                                (p->flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+
+               }
+               break;
+
+       case ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR:
+               {
+                       struct acpi_madt_generic_distributor *p =
+                               (struct acpi_madt_generic_distributor *)header;
+                       pr_debug("GIC Distributor (gic_id[0x%04x] address[%llx] gsi_base[%d])\n",
+                                p->gic_id, p->base_address,
+                                p->global_irq_base);
+               }
+               break;
+
        default:
                pr_warn("Found unsupported MADT entry (type = 0x%x)\n",
                        header->type);
index 25798db..68f0314 100644 (file)
@@ -157,10 +157,10 @@ static int dev_mkdir(const char *name, umode_t mode)
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+       err = vfs_mkdir(d_inode(path.dentry), dentry, mode);
        if (!err)
                /* mark as kernel-created inode */
-               dentry->d_inode->i_private = &thread;
+               d_inode(dentry)->i_private = &thread;
        done_path_create(&path, dentry);
        return err;
 }
@@ -207,7 +207,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       err = vfs_mknod(path.dentry->d_inode, dentry, mode, dev->devt);
+       err = vfs_mknod(d_inode(path.dentry), dentry, mode, dev->devt);
        if (!err) {
                struct iattr newattrs;
 
@@ -215,12 +215,12 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                newattrs.ia_uid = uid;
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
-               mutex_lock(&dentry->d_inode->i_mutex);
+               mutex_lock(&d_inode(dentry)->i_mutex);
                notify_change(dentry, &newattrs, NULL);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
 
                /* mark as kernel-created inode */
-               dentry->d_inode->i_private = &thread;
+               d_inode(dentry)->i_private = &thread;
        }
        done_path_create(&path, dentry);
        return err;
@@ -235,16 +235,16 @@ static int dev_rmdir(const char *name)
        dentry = kern_path_locked(name, &parent);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
-       if (dentry->d_inode) {
-               if (dentry->d_inode->i_private == &thread)
-                       err = vfs_rmdir(parent.dentry->d_inode, dentry);
+       if (d_really_is_positive(dentry)) {
+               if (d_inode(dentry)->i_private == &thread)
+                       err = vfs_rmdir(d_inode(parent.dentry), dentry);
                else
                        err = -EPERM;
        } else {
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&parent.dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
        path_put(&parent);
        return err;
 }
@@ -306,11 +306,11 @@ static int handle_remove(const char *nodename, struct device *dev)
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                struct kstat stat;
                struct path p = {.mnt = parent.mnt, .dentry = dentry};
                err = vfs_getattr(&p, &stat);
-               if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+               if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
                        struct iattr newattrs;
                        /*
                         * before unlinking this node, reset permissions
@@ -321,10 +321,10 @@ static int handle_remove(const char *nodename, struct device *dev)
                        newattrs.ia_mode = stat.mode & ~0777;
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
-                       mutex_lock(&dentry->d_inode->i_mutex);
+                       mutex_lock(&d_inode(dentry)->i_mutex);
                        notify_change(dentry, &newattrs, NULL);
-                       mutex_unlock(&dentry->d_inode->i_mutex);
-                       err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
                }
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                err = -ENOENT;
        }
        dput(dentry);
-       mutex_unlock(&parent.dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
 
        path_put(&parent);
        if (deleted && strchr(nodename, '/'))
index 9a95002..a6ee3d7 100644 (file)
@@ -424,7 +424,7 @@ static int in_flight_summary_show(struct seq_file *m, void *pos)
  * So we have our own inline version of it above.  :-( */
 static inline int debugfs_positive(struct dentry *dentry)
 {
-        return dentry->d_inode && !d_unhashed(dentry);
+        return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 /* make sure at *open* time that the respective object won't go away. */
@@ -439,15 +439,15 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
         * or has debugfs_remove() already been called? */
        parent = file->f_path.dentry->d_parent;
        /* not sure if this can happen: */
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                goto out;
        /* serialize with d_delete() */
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        /* Make sure the object is still alive */
        if (debugfs_positive(file->f_path.dentry)
        && kref_get_unless_zero(kref))
                ret = 0;
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        if (!ret) {
                ret = single_open(file, show, data);
                if (ret)
index b40af32..8125233 100644 (file)
@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
                goto out_tag_set;
        }
 
-       /* We use the default size, but let's be explicit about it. */
-       blk_queue_physical_block_size(q, SECTOR_SIZE);
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+       /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
 
        /* set io sizes to object size */
        segment_size = rbd_obj_bytes(&rbd_dev->header);
@@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
 
        if (mapping) {
                ret = rbd_dev_header_watch_sync(rbd_dev);
-               if (ret)
+               if (ret) {
+                       if (ret == -ENOENT)
+                               pr_info("image %s/%s does not exist\n",
+                                       rbd_dev->spec->pool_name,
+                                       rbd_dev->spec->image_name);
                        goto out_header_name;
+               }
        }
 
        ret = rbd_dev_header_info(rbd_dev);
@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
                ret = rbd_spec_fill_snap_id(rbd_dev);
        else
                ret = rbd_spec_fill_names(rbd_dev);
-       if (ret)
+       if (ret) {
+               if (ret == -ENOENT)
+                       pr_info("snap %s/%s@%s does not exist\n",
+                               rbd_dev->spec->pool_name,
+                               rbd_dev->spec->image_name,
+                               rbd_dev->spec->snap_name);
                goto err_out_probe;
+       }
 
        if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
                ret = rbd_dev_v2_parent_info(rbd_dev);
@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
 
        /* pick the pool */
        rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
-       if (rc < 0)
+       if (rc < 0) {
+               if (rc == -ENOENT)
+                       pr_info("pool %s does not exist\n", spec->pool_name);
                goto err_out_client;
+       }
        spec->pool_id = (u64)rc;
 
        /* The ceph file layout needs to fit pool id in 32 bits */
@@ -5673,7 +5687,7 @@ static int __init rbd_init(void)
 
        /*
         * The number of active work items is limited by the number of
-        * rbd devices, so leave @max_active at default.
+        * rbd devices * queue depth, so leave @max_active at default.
         */
        rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
        if (!rbd_wq) {
index 2664696..0aa135d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/sched_clock.h>
+#include <linux/acpi.h>
 
 #include <asm/arch_timer.h>
 #include <asm/virt.h>
@@ -371,8 +372,12 @@ arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np)
        if (arch_timer_rate)
                return;
 
-       /* Try to determine the frequency from the device tree or CNTFRQ */
-       if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
+       /*
+        * Try to determine the frequency from the device tree or CNTFRQ,
+        * if ACPI is enabled, get the frequency from CNTFRQ ONLY.
+        */
+       if (!acpi_disabled ||
+           of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
                if (cntbase)
                        arch_timer_rate = readl_relaxed(cntbase + CNTFRQ);
                else
@@ -691,28 +696,8 @@ static void __init arch_timer_common_init(void)
        arch_timer_arch_init();
 }
 
-static void __init arch_timer_init(struct device_node *np)
+static void __init arch_timer_init(void)
 {
-       int i;
-
-       if (arch_timers_present & ARCH_CP15_TIMER) {
-               pr_warn("arch_timer: multiple nodes in dt, skipping\n");
-               return;
-       }
-
-       arch_timers_present |= ARCH_CP15_TIMER;
-       for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
-               arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
-       arch_timer_detect_rate(NULL, np);
-
-       /*
-        * If we cannot rely on firmware initializing the timer registers then
-        * we should use the physical timers instead.
-        */
-       if (IS_ENABLED(CONFIG_ARM) &&
-           of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
-                       arch_timer_use_virtual = false;
-
        /*
         * If HYP mode is available, we know that the physical timer
         * has been configured to be accessible from PL1. Use it, so
@@ -731,13 +716,39 @@ static void __init arch_timer_init(struct device_node *np)
                }
        }
 
-       arch_timer_c3stop = !of_property_read_bool(np, "always-on");
-
        arch_timer_register();
        arch_timer_common_init();
 }
-CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init);
-CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init);
+
+static void __init arch_timer_of_init(struct device_node *np)
+{
+       int i;
+
+       if (arch_timers_present & ARCH_CP15_TIMER) {
+               pr_warn("arch_timer: multiple nodes in dt, skipping\n");
+               return;
+       }
+
+       arch_timers_present |= ARCH_CP15_TIMER;
+       for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
+               arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
+
+       arch_timer_detect_rate(NULL, np);
+
+       arch_timer_c3stop = !of_property_read_bool(np, "always-on");
+
+       /*
+        * If we cannot rely on firmware initializing the timer registers then
+        * we should use the physical timers instead.
+        */
+       if (IS_ENABLED(CONFIG_ARM) &&
+           of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+                       arch_timer_use_virtual = false;
+
+       arch_timer_init();
+}
+CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
+CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
 
 static void __init arch_timer_mem_init(struct device_node *np)
 {
@@ -804,3 +815,70 @@ static void __init arch_timer_mem_init(struct device_node *np)
 }
 CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
                       arch_timer_mem_init);
+
+#ifdef CONFIG_ACPI
+static int __init map_generic_timer_interrupt(u32 interrupt, u32 flags)
+{
+       int trigger, polarity;
+
+       if (!interrupt)
+               return 0;
+
+       trigger = (flags & ACPI_GTDT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE
+                       : ACPI_LEVEL_SENSITIVE;
+
+       polarity = (flags & ACPI_GTDT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW
+                       : ACPI_ACTIVE_HIGH;
+
+       return acpi_register_gsi(NULL, interrupt, trigger, polarity);
+}
+
+/* Initialize per-processor generic timer */
+static int __init arch_timer_acpi_init(struct acpi_table_header *table)
+{
+       struct acpi_table_gtdt *gtdt;
+
+       if (arch_timers_present & ARCH_CP15_TIMER) {
+               pr_warn("arch_timer: already initialized, skipping\n");
+               return -EINVAL;
+       }
+
+       gtdt = container_of(table, struct acpi_table_gtdt, header);
+
+       arch_timers_present |= ARCH_CP15_TIMER;
+
+       arch_timer_ppi[PHYS_SECURE_PPI] =
+               map_generic_timer_interrupt(gtdt->secure_el1_interrupt,
+               gtdt->secure_el1_flags);
+
+       arch_timer_ppi[PHYS_NONSECURE_PPI] =
+               map_generic_timer_interrupt(gtdt->non_secure_el1_interrupt,
+               gtdt->non_secure_el1_flags);
+
+       arch_timer_ppi[VIRT_PPI] =
+               map_generic_timer_interrupt(gtdt->virtual_timer_interrupt,
+               gtdt->virtual_timer_flags);
+
+       arch_timer_ppi[HYP_PPI] =
+               map_generic_timer_interrupt(gtdt->non_secure_el2_interrupt,
+               gtdt->non_secure_el2_flags);
+
+       /* Get the frequency from CNTFRQ */
+       arch_timer_detect_rate(NULL, NULL);
+
+       /* Always-on capability */
+       arch_timer_c3stop = !(gtdt->non_secure_el1_flags & ACPI_GTDT_ALWAYS_ON);
+
+       arch_timer_init();
+       return 0;
+}
+
+/* Initialize all the generic timers presented in GTDT */
+void __init acpi_generic_timer_init(void)
+{
+       if (acpi_disabled)
+               return;
+
+       acpi_table_parse(ACPI_SIG_GTDT, arch_timer_acpi_init);
+}
+#endif
index c5b81be..6414661 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/div64.h>
 #include <asm/msr.h>
 #include <asm/cpu_device_id.h>
+#include <asm/cpufeature.h>
 
 #define BYT_RATIOS             0x66a
 #define BYT_VIDS               0x66b
@@ -649,7 +650,7 @@ static struct cpu_defaults byt_params = {
        .pid_policy = {
                .sample_rate_ms = 10,
                .deadband = 0,
-               .setpoint = 97,
+               .setpoint = 60,
                .p_gain_pct = 14,
                .d_gain_pct = 0,
                .i_gain_pct = 4,
@@ -1200,8 +1201,7 @@ static int __init intel_pstate_init(void)
 {
        int cpu, rc = 0;
        const struct x86_cpu_id *id;
-       struct cpu_defaults *cpu_info;
-       struct cpuinfo_x86 *c = &boot_cpu_data;
+       struct cpu_defaults *cpu_def;
 
        if (no_load)
                return -ENODEV;
@@ -1217,10 +1217,10 @@ static int __init intel_pstate_init(void)
        if (intel_pstate_platform_pwr_mgmt_exists())
                return -ENODEV;
 
-       cpu_info = (struct cpu_defaults *)id->driver_data;
+       cpu_def = (struct cpu_defaults *)id->driver_data;
 
-       copy_pid_params(&cpu_info->pid_policy);
-       copy_cpu_funcs(&cpu_info->funcs);
+       copy_pid_params(&cpu_def->pid_policy);
+       copy_cpu_funcs(&cpu_def->funcs);
 
        if (intel_pstate_msrs_not_valid())
                return -ENODEV;
@@ -1231,7 +1231,7 @@ static int __init intel_pstate_init(void)
        if (!all_cpu_data)
                return -ENOMEM;
 
-       if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp)
+       if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp)
                intel_pstate_hwp_enable();
 
        if (!hwp_active && hwp_only)
index 800bf41..033c0c8 100644 (file)
@@ -446,8 +446,9 @@ config CRYPTO_DEV_VMX
 source "drivers/crypto/vmx/Kconfig"
 
 config CRYPTO_DEV_IMGTEC_HASH
-       depends on MIPS || COMPILE_TEST
        tristate "Imagination Technologies hardware hash accelerator"
+       depends on MIPS || COMPILE_TEST
+       depends on HAS_DMA
        select CRYPTO_ALGAPI
        select CRYPTO_MD5
        select CRYPTO_SHA1
index 5be225c..c5a9138 100644 (file)
@@ -265,43 +265,40 @@ static inline int is_dma_buf_file(struct file *file)
 }
 
 /**
- * dma_buf_export_named - Creates a new dma_buf, and associates an anon file
+ * dma_buf_export - Creates a new dma_buf, and associates an anon file
  * with this buffer, so it can be exported.
  * Also connect the allocator specific data and ops to the buffer.
  * Additionally, provide a name string for exporter; useful in debugging.
  *
- * @priv:      [in]    Attach private data of allocator to this buffer
- * @ops:       [in]    Attach allocator-defined dma buf ops to the new buffer.
- * @size:      [in]    Size of the buffer
- * @flags:     [in]    mode flags for the file.
- * @exp_name:  [in]    name of the exporting module - useful for debugging.
- * @resv:      [in]    reservation-object, NULL to allocate default one.
+ * @exp_info:  [in]    holds all the export related information provided
+ *                     by the exporter. see struct dma_buf_export_info
+ *                     for further details.
  *
  * Returns, on success, a newly created dma_buf object, which wraps the
  * supplied private data and operations for dma_buf_ops. On either missing
  * ops, or error in allocating struct dma_buf, will return negative error.
  *
  */
-struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-                               size_t size, int flags, const char *exp_name,
-                               struct reservation_object *resv)
+struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 {
        struct dma_buf *dmabuf;
+       struct reservation_object *resv = exp_info->resv;
        struct file *file;
        size_t alloc_size = sizeof(struct dma_buf);
-       if (!resv)
+       if (!exp_info->resv)
                alloc_size += sizeof(struct reservation_object);
        else
                /* prevent &dma_buf[1] == dma_buf->resv */
                alloc_size += 1;
 
-       if (WARN_ON(!priv || !ops
-                         || !ops->map_dma_buf
-                         || !ops->unmap_dma_buf
-                         || !ops->release
-                         || !ops->kmap_atomic
-                         || !ops->kmap
-                         || !ops->mmap)) {
+       if (WARN_ON(!exp_info->priv
+                         || !exp_info->ops
+                         || !exp_info->ops->map_dma_buf
+                         || !exp_info->ops->unmap_dma_buf
+                         || !exp_info->ops->release
+                         || !exp_info->ops->kmap_atomic
+                         || !exp_info->ops->kmap
+                         || !exp_info->ops->mmap)) {
                return ERR_PTR(-EINVAL);
        }
 
@@ -309,10 +306,10 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
        if (dmabuf == NULL)
                return ERR_PTR(-ENOMEM);
 
-       dmabuf->priv = priv;
-       dmabuf->ops = ops;
-       dmabuf->size = size;
-       dmabuf->exp_name = exp_name;
+       dmabuf->priv = exp_info->priv;
+       dmabuf->ops = exp_info->ops;
+       dmabuf->size = exp_info->size;
+       dmabuf->exp_name = exp_info->exp_name;
        init_waitqueue_head(&dmabuf->poll);
        dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
        dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
@@ -323,7 +320,8 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
        }
        dmabuf->resv = resv;
 
-       file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
+       file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
+                                       exp_info->flags);
        if (IS_ERR(file)) {
                kfree(dmabuf);
                return ERR_CAST(file);
@@ -341,8 +339,7 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
 
        return dmabuf;
 }
-EXPORT_SYMBOL_GPL(dma_buf_export_named);
-
+EXPORT_SYMBOL_GPL(dma_buf_export);
 
 /**
  * dma_buf_fd - returns a file descriptor for the given dma_buf
index 91eced0..fd7ac13 100644 (file)
@@ -112,6 +112,17 @@ config FSL_DMA
          EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
          some Txxx and Bxxx parts.
 
+config FSL_RAID
+        tristate "Freescale RAID engine Support"
+        depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
+        select DMA_ENGINE
+        select DMA_ENGINE_RAID
+        ---help---
+          Enable support for Freescale RAID Engine. RAID Engine is
+          available on some QorIQ SoCs (like P5020/P5040). It has
+          the capability to offload memcpy, xor and pq computation
+         for raid5/6.
+
 source "drivers/dma/hsu/Kconfig"
 
 config MPC512X_DMA
@@ -347,6 +358,16 @@ config DMA_JZ4740
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
 
+config DMA_JZ4780
+       tristate "JZ4780 DMA support"
+       depends on MACH_JZ4780
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         This selects support for the DMA controller in Ingenic JZ4780 SoCs.
+         If you have a board based on such a SoC and wish to use DMA for
+         devices which can use the DMA controller, say Y or M here.
+
 config K3_DMA
        tristate "Hisilicon K3 DMA support"
        depends on ARCH_HI3xxx
@@ -414,6 +435,14 @@ config IMG_MDC_DMA
        help
          Enable support for the IMG multi-threaded DMA controller (MDC).
 
+config XGENE_DMA
+       tristate "APM X-Gene DMA support"
+       select DMA_ENGINE
+       select DMA_ENGINE_RAID
+       select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+       help
+         Enable support for the APM X-Gene SoC DMA engine.
+
 config DMA_ENGINE
        bool
 
index 7e8301c..69f77d5 100644 (file)
@@ -41,9 +41,11 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
 obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
 obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
+obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
+obj-$(CONFIG_FSL_RAID) += fsl_raid.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
 obj-y += xilinx/
@@ -51,3 +53,4 @@ obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
 obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
 obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
 obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
index 83aa55d..49d396e 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is in this distribution in the file
  * called COPYING.
  *
@@ -1195,11 +1191,6 @@ static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
 /*
  * The DMA ENGINE API
  */
-static int pl08x_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void pl08x_free_chan_resources(struct dma_chan *chan)
 {
        /* Ensure all queued descriptors are freed */
@@ -2066,7 +2057,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
        /* Initialize memcpy engine */
        dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
        pl08x->memcpy.dev = &adev->dev;
-       pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
        pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
        pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
        pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
@@ -2085,7 +2075,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
        dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
        dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
        pl08x->slave.dev = &adev->dev;
-       pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
        pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
        pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
        pl08x->slave.device_tx_status = pl08x_dma_tx_status;
index 0b4fc6f..57b2141 100644 (file)
@@ -65,6 +65,21 @@ static void atc_issue_pending(struct dma_chan *chan);
 
 /*----------------------------------------------------------------------*/
 
+static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
+                                               size_t len)
+{
+       unsigned int width;
+
+       if (!((src | dst  | len) & 3))
+               width = 2;
+       else if (!((src | dst | len) & 1))
+               width = 1;
+       else
+               width = 0;
+
+       return width;
+}
+
 static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
 {
        return list_first_entry(&atchan->active_list,
@@ -659,16 +674,10 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
         * We can be a lot more clever here, but this should take care
         * of the most common optimization.
         */
-       if (!((src | dest  | len) & 3)) {
-               ctrla = ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD;
-               src_width = dst_width = 2;
-       } else if (!((src | dest | len) & 1)) {
-               ctrla = ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD;
-               src_width = dst_width = 1;
-       } else {
-               ctrla = ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE;
-               src_width = dst_width = 0;
-       }
+       src_width = dst_width = atc_get_xfer_width(src, dest, len);
+
+       ctrla = ATC_SRC_WIDTH(src_width) |
+               ATC_DST_WIDTH(dst_width);
 
        for (offset = 0; offset < len; offset += xfer_count << src_width) {
                xfer_count = min_t(size_t, (len - offset) >> src_width,
@@ -861,6 +870,144 @@ err:
        return NULL;
 }
 
+/**
+ * atc_prep_dma_sg - prepare memory to memory scather-gather operation
+ * @chan: the channel to prepare operation on
+ * @dst_sg: destination scatterlist
+ * @dst_nents: number of destination scatterlist entries
+ * @src_sg: source scatterlist
+ * @src_nents: number of source scatterlist entries
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_sg(struct dma_chan *chan,
+               struct scatterlist *dst_sg, unsigned int dst_nents,
+               struct scatterlist *src_sg, unsigned int src_nents,
+               unsigned long flags)
+{
+       struct at_dma_chan      *atchan = to_at_dma_chan(chan);
+       struct at_desc          *desc = NULL;
+       struct at_desc          *first = NULL;
+       struct at_desc          *prev = NULL;
+       unsigned int            src_width;
+       unsigned int            dst_width;
+       size_t                  xfer_count;
+       u32                     ctrla;
+       u32                     ctrlb;
+       size_t                  dst_len = 0, src_len = 0;
+       dma_addr_t              dst = 0, src = 0;
+       size_t                  len = 0, total_len = 0;
+
+       if (unlikely(dst_nents == 0 || src_nents == 0))
+               return NULL;
+
+       if (unlikely(dst_sg == NULL || src_sg == NULL))
+               return NULL;
+
+       ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+               | ATC_SRC_ADDR_MODE_INCR
+               | ATC_DST_ADDR_MODE_INCR
+               | ATC_FC_MEM2MEM;
+
+       /*
+        * loop until there is either no more source or no more destination
+        * scatterlist entry
+        */
+       while (true) {
+
+               /* prepare the next transfer */
+               if (dst_len == 0) {
+
+                       /* no more destination scatterlist entries */
+                       if (!dst_sg || !dst_nents)
+                               break;
+
+                       dst = sg_dma_address(dst_sg);
+                       dst_len = sg_dma_len(dst_sg);
+
+                       dst_sg = sg_next(dst_sg);
+                       dst_nents--;
+               }
+
+               if (src_len == 0) {
+
+                       /* no more source scatterlist entries */
+                       if (!src_sg || !src_nents)
+                               break;
+
+                       src = sg_dma_address(src_sg);
+                       src_len = sg_dma_len(src_sg);
+
+                       src_sg = sg_next(src_sg);
+                       src_nents--;
+               }
+
+               len = min_t(size_t, src_len, dst_len);
+               if (len == 0)
+                       continue;
+
+               /* take care for the alignment */
+               src_width = dst_width = atc_get_xfer_width(src, dst, len);
+
+               ctrla = ATC_SRC_WIDTH(src_width) |
+                       ATC_DST_WIDTH(dst_width);
+
+               /*
+                * The number of transfers to set up refer to the source width
+                * that depends on the alignment.
+                */
+               xfer_count = len >> src_width;
+               if (xfer_count > ATC_BTSIZE_MAX) {
+                       xfer_count = ATC_BTSIZE_MAX;
+                       len = ATC_BTSIZE_MAX << src_width;
+               }
+
+               /* create the transfer */
+               desc = atc_desc_get(atchan);
+               if (!desc)
+                       goto err_desc_get;
+
+               desc->lli.saddr = src;
+               desc->lli.daddr = dst;
+               desc->lli.ctrla = ctrla | xfer_count;
+               desc->lli.ctrlb = ctrlb;
+
+               desc->txd.cookie = 0;
+               desc->len = len;
+
+               /*
+                * Although we only need the transfer width for the first and
+                * the last descriptor, its easier to set it to all descriptors.
+                */
+               desc->tx_width = src_width;
+
+               atc_desc_chain(&first, &prev, desc);
+
+               /* update the lengths and addresses for the next loop cycle */
+               dst_len -= len;
+               src_len -= len;
+               dst += len;
+               src += len;
+
+               total_len += len;
+       }
+
+       /* First descriptor of the chain embedds additional information */
+       first->txd.cookie = -EBUSY;
+       first->total_len = total_len;
+
+       /* set end-of-link to the last link descriptor of list*/
+       set_desc_eol(desc);
+
+       first->txd.flags = flags; /* client is in control of this ack */
+
+       return &first->txd;
+
+err_desc_get:
+       atc_desc_put(atchan, first);
+       return NULL;
+}
+
 /**
  * atc_dma_cyclic_check_values
  * Check for too big/unaligned periods and unaligned DMA buffer
@@ -1461,8 +1608,10 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
        /* setup platform data for each SoC */
        dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+       dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
        dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
        dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+       dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
 
        /* get DMA parameters from controller type */
        plat_dat = at_dma_get_driver_data(pdev);
@@ -1582,11 +1731,15 @@ static int __init at_dma_probe(struct platform_device *pdev)
                atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        }
 
+       if (dma_has_cap(DMA_SG, atdma->dma_common.cap_mask))
+               atdma->dma_common.device_prep_dma_sg = atc_prep_dma_sg;
+
        dma_writel(atdma, EN, AT_DMA_ENABLE);
 
-       dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n",
+       dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
          dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
          dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
+         dma_has_cap(DMA_SG, atdma->dma_common.cap_mask)  ? "sg-cpy " : "",
          plat_dat->nr_channels);
 
        dma_async_device_register(&atdma->dma_common);
index d9891d3..933e4b3 100644 (file)
@@ -1154,8 +1154,10 @@ static int at_xdmac_device_resume(struct dma_chan *chan)
        dev_dbg(chan2dev(chan), "%s\n", __func__);
 
        spin_lock_bh(&atchan->lock);
-       if (!at_xdmac_chan_is_paused(atchan))
+       if (!at_xdmac_chan_is_paused(atchan)) {
+               spin_unlock_bh(&atchan->lock);
                return 0;
+       }
 
        at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
        clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
index fa378d8..180fedb 100644 (file)
@@ -30,7 +30,7 @@
 #define DRIVER_NAME "bestcomm-core"
 
 /* MPC5200 device tree match tables */
-static struct of_device_id mpc52xx_sram_ids[] = {
+static const struct of_device_id mpc52xx_sram_ids[] = {
        { .compatible = "fsl,mpc5200-sram", },
        { .compatible = "mpc5200-sram", },
        {}
@@ -481,7 +481,7 @@ static int mpc52xx_bcom_remove(struct platform_device *op)
        return 0;
 }
 
-static struct of_device_id mpc52xx_bcom_of_match[] = {
+static const struct of_device_id mpc52xx_bcom_of_match[] = {
        { .compatible = "fsl,mpc5200-bestcomm", },
        { .compatible = "mpc5200-bestcomm", },
        {},
index 8488441..7638b24 100644 (file)
@@ -7,10 +7,6 @@
  *  Free Software Foundation;  either version 2 of the License, or (at your
  *  option) any later version.
  *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
  */
 
 #include <linux/dmaengine.h>
@@ -343,7 +339,7 @@ static void jz4740_dma_chan_irq(struct jz4740_dmaengine_chan *chan)
 {
        spin_lock(&chan->vchan.lock);
        if (chan->desc) {
-               if (chan->desc && chan->desc->cyclic) {
+               if (chan->desc->cyclic) {
                        vchan_cyclic_callback(&chan->desc->vdesc);
                } else {
                        if (chan->next_sg == chan->desc->num_sgs) {
@@ -496,11 +492,6 @@ static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
        return status;
 }
 
-static int jz4740_dma_alloc_chan_resources(struct dma_chan *c)
-{
-       return 0;
-}
-
 static void jz4740_dma_free_chan_resources(struct dma_chan *c)
 {
        vchan_free_chan_resources(to_virt_chan(c));
@@ -543,7 +534,6 @@ static int jz4740_dma_probe(struct platform_device *pdev)
 
        dma_cap_set(DMA_SLAVE, dd->cap_mask);
        dma_cap_set(DMA_CYCLIC, dd->cap_mask);
-       dd->device_alloc_chan_resources = jz4740_dma_alloc_chan_resources;
        dd->device_free_chan_resources = jz4740_dma_free_chan_resources;
        dd->device_tx_status = jz4740_dma_tx_status;
        dd->device_issue_pending = jz4740_dma_issue_pending;
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
new file mode 100644 (file)
index 0000000..26d2f0e
--- /dev/null
@@ -0,0 +1,877 @@
+/*
+ * Ingenic JZ4780 DMA controller
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Author: Alex Smith <alex@alex-smith.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define JZ_DMA_NR_CHANNELS     32
+
+/* Global registers. */
+#define JZ_DMA_REG_DMAC                0x1000
+#define JZ_DMA_REG_DIRQP       0x1004
+#define JZ_DMA_REG_DDR         0x1008
+#define JZ_DMA_REG_DDRS                0x100c
+#define JZ_DMA_REG_DMACP       0x101c
+#define JZ_DMA_REG_DSIRQP      0x1020
+#define JZ_DMA_REG_DSIRQM      0x1024
+#define JZ_DMA_REG_DCIRQP      0x1028
+#define JZ_DMA_REG_DCIRQM      0x102c
+
+/* Per-channel registers. */
+#define JZ_DMA_REG_CHAN(n)     (n * 0x20)
+#define JZ_DMA_REG_DSA(n)      (0x00 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTA(n)      (0x04 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTC(n)      (0x08 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DRT(n)      (0x0c + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCS(n)      (0x10 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCM(n)      (0x14 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DDA(n)      (0x18 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DSD(n)      (0x1c + JZ_DMA_REG_CHAN(n))
+
+#define JZ_DMA_DMAC_DMAE       BIT(0)
+#define JZ_DMA_DMAC_AR         BIT(2)
+#define JZ_DMA_DMAC_HLT                BIT(3)
+#define JZ_DMA_DMAC_FMSC       BIT(31)
+
+#define JZ_DMA_DRT_AUTO                0x8
+
+#define JZ_DMA_DCS_CTE         BIT(0)
+#define JZ_DMA_DCS_HLT         BIT(2)
+#define JZ_DMA_DCS_TT          BIT(3)
+#define JZ_DMA_DCS_AR          BIT(4)
+#define JZ_DMA_DCS_DES8                BIT(30)
+
+#define JZ_DMA_DCM_LINK                BIT(0)
+#define JZ_DMA_DCM_TIE         BIT(1)
+#define JZ_DMA_DCM_STDE                BIT(2)
+#define JZ_DMA_DCM_TSZ_SHIFT   8
+#define JZ_DMA_DCM_TSZ_MASK    (0x7 << JZ_DMA_DCM_TSZ_SHIFT)
+#define JZ_DMA_DCM_DP_SHIFT    12
+#define JZ_DMA_DCM_SP_SHIFT    14
+#define JZ_DMA_DCM_DAI         BIT(22)
+#define JZ_DMA_DCM_SAI         BIT(23)
+
+#define JZ_DMA_SIZE_4_BYTE     0x0
+#define JZ_DMA_SIZE_1_BYTE     0x1
+#define JZ_DMA_SIZE_2_BYTE     0x2
+#define JZ_DMA_SIZE_16_BYTE    0x3
+#define JZ_DMA_SIZE_32_BYTE    0x4
+#define JZ_DMA_SIZE_64_BYTE    0x5
+#define JZ_DMA_SIZE_128_BYTE   0x6
+
+#define JZ_DMA_WIDTH_32_BIT    0x0
+#define JZ_DMA_WIDTH_8_BIT     0x1
+#define JZ_DMA_WIDTH_16_BIT    0x2
+
+#define JZ_DMA_BUSWIDTHS       (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)  | \
+                                BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+                                BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+/**
+ * struct jz4780_dma_hwdesc - descriptor structure read by the DMA controller.
+ * @dcm: value for the DCM (channel command) register
+ * @dsa: source address
+ * @dta: target address
+ * @dtc: transfer count (number of blocks of the transfer size specified in DCM
+ * to transfer) in the low 24 bits, offset of the next descriptor from the
+ * descriptor base address in the upper 8 bits.
+ * @sd: target/source stride difference (in stride transfer mode).
+ * @drt: request type
+ */
+struct jz4780_dma_hwdesc {
+       uint32_t dcm;
+       uint32_t dsa;
+       uint32_t dta;
+       uint32_t dtc;
+       uint32_t sd;
+       uint32_t drt;
+       uint32_t reserved[2];
+};
+
+/* Size of allocations for hardware descriptor blocks. */
+#define JZ_DMA_DESC_BLOCK_SIZE PAGE_SIZE
+#define JZ_DMA_MAX_DESC                \
+       (JZ_DMA_DESC_BLOCK_SIZE / sizeof(struct jz4780_dma_hwdesc))
+
+struct jz4780_dma_desc {
+       struct virt_dma_desc vdesc;
+
+       struct jz4780_dma_hwdesc *desc;
+       dma_addr_t desc_phys;
+       unsigned int count;
+       enum dma_transaction_type type;
+       uint32_t status;
+};
+
+struct jz4780_dma_chan {
+       struct virt_dma_chan vchan;
+       unsigned int id;
+       struct dma_pool *desc_pool;
+
+       uint32_t transfer_type;
+       uint32_t transfer_shift;
+       struct dma_slave_config config;
+
+       struct jz4780_dma_desc *desc;
+       unsigned int curr_hwdesc;
+};
+
+struct jz4780_dma_dev {
+       struct dma_device dma_device;
+       void __iomem *base;
+       struct clk *clk;
+       unsigned int irq;
+
+       uint32_t chan_reserved;
+       struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS];
+};
+
+struct jz4780_dma_data {
+       uint32_t transfer_type;
+       int channel;
+};
+
+static inline struct jz4780_dma_chan *to_jz4780_dma_chan(struct dma_chan *chan)
+{
+       return container_of(chan, struct jz4780_dma_chan, vchan.chan);
+}
+
+static inline struct jz4780_dma_desc *to_jz4780_dma_desc(
+       struct virt_dma_desc *vdesc)
+{
+       return container_of(vdesc, struct jz4780_dma_desc, vdesc);
+}
+
+static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
+       struct jz4780_dma_chan *jzchan)
+{
+       return container_of(jzchan->vchan.chan.device, struct jz4780_dma_dev,
+                           dma_device);
+}
+
+static inline uint32_t jz4780_dma_readl(struct jz4780_dma_dev *jzdma,
+       unsigned int reg)
+{
+       return readl(jzdma->base + reg);
+}
+
+static inline void jz4780_dma_writel(struct jz4780_dma_dev *jzdma,
+       unsigned int reg, uint32_t val)
+{
+       writel(val, jzdma->base + reg);
+}
+
+static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
+       struct jz4780_dma_chan *jzchan, unsigned int count,
+       enum dma_transaction_type type)
+{
+       struct jz4780_dma_desc *desc;
+
+       if (count > JZ_DMA_MAX_DESC)
+               return NULL;
+
+       desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       desc->desc = dma_pool_alloc(jzchan->desc_pool, GFP_NOWAIT,
+                                   &desc->desc_phys);
+       if (!desc->desc) {
+               kfree(desc);
+               return NULL;
+       }
+
+       desc->count = count;
+       desc->type = type;
+       return desc;
+}
+
+static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+       struct jz4780_dma_desc *desc = to_jz4780_dma_desc(vdesc);
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(vdesc->tx.chan);
+
+       dma_pool_free(jzchan->desc_pool, desc->desc, desc->desc_phys);
+       kfree(desc);
+}
+
+static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord)
+{
+       *ord = ffs(val) - 1;
+
+       switch (*ord) {
+       case 0:
+               return JZ_DMA_SIZE_1_BYTE;
+       case 1:
+               return JZ_DMA_SIZE_2_BYTE;
+       case 2:
+               return JZ_DMA_SIZE_4_BYTE;
+       case 4:
+               return JZ_DMA_SIZE_16_BYTE;
+       case 5:
+               return JZ_DMA_SIZE_32_BYTE;
+       case 6:
+               return JZ_DMA_SIZE_64_BYTE;
+       case 7:
+               return JZ_DMA_SIZE_128_BYTE;
+       default:
+               return -EINVAL;
+       }
+}
+
+static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
+       struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len,
+       enum dma_transfer_direction direction)
+{
+       struct dma_slave_config *config = &jzchan->config;
+       uint32_t width, maxburst, tsz;
+       int ord;
+
+       if (direction == DMA_MEM_TO_DEV) {
+               desc->dcm = JZ_DMA_DCM_SAI;
+               desc->dsa = addr;
+               desc->dta = config->dst_addr;
+               desc->drt = jzchan->transfer_type;
+
+               width = config->dst_addr_width;
+               maxburst = config->dst_maxburst;
+       } else {
+               desc->dcm = JZ_DMA_DCM_DAI;
+               desc->dsa = config->src_addr;
+               desc->dta = addr;
+               desc->drt = jzchan->transfer_type;
+
+               width = config->src_addr_width;
+               maxburst = config->src_maxburst;
+       }
+
+       /*
+        * This calculates the maximum transfer size that can be used with the
+        * given address, length, width and maximum burst size. The address
+        * must be aligned to the transfer size, the total length must be
+        * divisible by the transfer size, and we must not use more than the
+        * maximum burst specified by the user.
+        */
+       tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord);
+       jzchan->transfer_shift = ord;
+
+       switch (width) {
+       case DMA_SLAVE_BUSWIDTH_1_BYTE:
+       case DMA_SLAVE_BUSWIDTH_2_BYTES:
+               break;
+       case DMA_SLAVE_BUSWIDTH_4_BYTES:
+               width = JZ_DMA_WIDTH_32_BIT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       desc->dcm |= tsz << JZ_DMA_DCM_TSZ_SHIFT;
+       desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT;
+       desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT;
+
+       desc->dtc = len >> ord;
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
+       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_transfer_direction direction, unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       unsigned int i;
+       int err;
+
+       desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE);
+       if (!desc)
+               return NULL;
+
+       for (i = 0; i < sg_len; i++) {
+               err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i],
+                                       sg_dma_address(&sgl[i]),
+                                       sg_dma_len(&sgl[i]),
+                                       direction);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+
+               desc->desc[i].dcm |= JZ_DMA_DCM_TIE;
+
+               if (i != (sg_len - 1)) {
+                       /* Automatically proceeed to the next descriptor. */
+                       desc->desc[i].dcm |= JZ_DMA_DCM_LINK;
+
+                       /*
+                        * The upper 8 bits of the DTC field in the descriptor
+                        * must be set to (offset from descriptor base of next
+                        * descriptor >> 4).
+                        */
+                       desc->desc[i].dtc |=
+                               (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+               }
+       }
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
+       struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+       size_t period_len, enum dma_transfer_direction direction,
+       unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       unsigned int periods, i;
+       int err;
+
+       if (buf_len % period_len)
+               return NULL;
+
+       periods = buf_len / period_len;
+
+       desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC);
+       if (!desc)
+               return NULL;
+
+       for (i = 0; i < periods; i++) {
+               err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr,
+                                       period_len, direction);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               buf_addr += period_len;
+
+               /*
+                * Set the link bit to indicate that the controller should
+                * automatically proceed to the next descriptor. In
+                * jz4780_dma_begin(), this will be cleared if we need to issue
+                * an interrupt after each period.
+                */
+               desc->desc[i].dcm |= JZ_DMA_DCM_TIE | JZ_DMA_DCM_LINK;
+
+               /*
+                * The upper 8 bits of the DTC field in the descriptor must be
+                * set to (offset from descriptor base of next descriptor >> 4).
+                * If this is the last descriptor, link it back to the first,
+                * i.e. leave offset set to 0, otherwise point to the next one.
+                */
+               if (i != (periods - 1)) {
+                       desc->desc[i].dtc |=
+                               (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+               }
+       }
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
+       struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+       size_t len, unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       uint32_t tsz;
+       int ord;
+
+       desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
+       if (!desc)
+               return NULL;
+
+       tsz = jz4780_dma_transfer_size(dest | src | len, &ord);
+       if (tsz < 0)
+               return ERR_PTR(tsz);
+
+       desc->desc[0].dsa = src;
+       desc->desc[0].dta = dest;
+       desc->desc[0].drt = JZ_DMA_DRT_AUTO;
+       desc->desc[0].dcm = JZ_DMA_DCM_TIE | JZ_DMA_DCM_SAI | JZ_DMA_DCM_DAI |
+                           tsz << JZ_DMA_DCM_TSZ_SHIFT |
+                           JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT |
+                           JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT;
+       desc->desc[0].dtc = len >> ord;
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       struct virt_dma_desc *vdesc;
+       unsigned int i;
+       dma_addr_t desc_phys;
+
+       if (!jzchan->desc) {
+               vdesc = vchan_next_desc(&jzchan->vchan);
+               if (!vdesc)
+                       return;
+
+               list_del(&vdesc->node);
+
+               jzchan->desc = to_jz4780_dma_desc(vdesc);
+               jzchan->curr_hwdesc = 0;
+
+               if (jzchan->desc->type == DMA_CYCLIC && vdesc->tx.callback) {
+                       /*
+                        * The DMA controller doesn't support triggering an
+                        * interrupt after processing each descriptor, only
+                        * after processing an entire terminated list of
+                        * descriptors. For a cyclic DMA setup the list of
+                        * descriptors is not terminated so we can never get an
+                        * interrupt.
+                        *
+                        * If the user requested a callback for a cyclic DMA
+                        * setup then we workaround this hardware limitation
+                        * here by degrading to a set of unlinked descriptors
+                        * which we will submit in sequence in response to the
+                        * completion of processing the previous descriptor.
+                        */
+                       for (i = 0; i < jzchan->desc->count; i++)
+                               jzchan->desc->desc[i].dcm &= ~JZ_DMA_DCM_LINK;
+               }
+       } else {
+               /*
+                * There is an existing transfer, therefore this must be one
+                * for which we unlinked the descriptors above. Advance to the
+                * next one in the list.
+                */
+               jzchan->curr_hwdesc =
+                       (jzchan->curr_hwdesc + 1) % jzchan->desc->count;
+       }
+
+       /* Use 8-word descriptors. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), JZ_DMA_DCS_DES8);
+
+       /* Write descriptor address and initiate descriptor fetch. */
+       desc_phys = jzchan->desc->desc_phys +
+                   (jzchan->curr_hwdesc * sizeof(*jzchan->desc->desc));
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DDA(jzchan->id), desc_phys);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DDRS, BIT(jzchan->id));
+
+       /* Enable the channel. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id),
+                         JZ_DMA_DCS_DES8 | JZ_DMA_DCS_CTE);
+}
+
+static void jz4780_dma_issue_pending(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       if (vchan_issue_pending(&jzchan->vchan) && !jzchan->desc)
+               jz4780_dma_begin(jzchan);
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+}
+
+static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       /* Clear the DMA status and stop the transfer. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+       if (jzchan->desc) {
+               jz4780_dma_desc_free(&jzchan->desc->vdesc);
+               jzchan->desc = NULL;
+       }
+
+       vchan_get_all_descriptors(&jzchan->vchan, &head);
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+
+       vchan_dma_desc_free_list(&jzchan->vchan, &head);
+       return 0;
+}
+
+static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan,
+       const struct dma_slave_config *config)
+{
+       if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+          || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES))
+               return -EINVAL;
+
+       /* Copy the reset of the slave configuration, it is used later. */
+       memcpy(&jzchan->config, config, sizeof(jzchan->config));
+
+       return 0;
+}
+
+static size_t jz4780_dma_desc_residue(struct jz4780_dma_chan *jzchan,
+       struct jz4780_dma_desc *desc, unsigned int next_sg)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       unsigned int residue, count;
+       unsigned int i;
+
+       residue = 0;
+
+       for (i = next_sg; i < desc->count; i++)
+               residue += desc->desc[i].dtc << jzchan->transfer_shift;
+
+       if (next_sg != 0) {
+               count = jz4780_dma_readl(jzdma,
+                                        JZ_DMA_REG_DTC(jzchan->id));
+               residue += count << jzchan->transfer_shift;
+       }
+
+       return residue;
+}
+
+static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan,
+       dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct virt_dma_desc *vdesc;
+       enum dma_status status;
+       unsigned long flags;
+
+       status = dma_cookie_status(chan, cookie, txstate);
+       if ((status == DMA_COMPLETE) || (txstate == NULL))
+               return status;
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       vdesc = vchan_find_desc(&jzchan->vchan, cookie);
+       if (vdesc) {
+               /* On the issued list, so hasn't been processed yet */
+               txstate->residue = jz4780_dma_desc_residue(jzchan,
+                                       to_jz4780_dma_desc(vdesc), 0);
+       } else if (cookie == jzchan->desc->vdesc.tx.cookie) {
+               txstate->residue = jz4780_dma_desc_residue(jzchan, jzchan->desc,
+                         (jzchan->curr_hwdesc + 1) % jzchan->desc->count);
+       } else
+               txstate->residue = 0;
+
+       if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc
+               && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
+                       status = DMA_ERROR;
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+       return status;
+}
+
+static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
+       struct jz4780_dma_chan *jzchan)
+{
+       uint32_t dcs;
+
+       spin_lock(&jzchan->vchan.lock);
+
+       dcs = jz4780_dma_readl(jzdma, JZ_DMA_REG_DCS(jzchan->id));
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+
+       if (dcs & JZ_DMA_DCS_AR) {
+               dev_warn(&jzchan->vchan.chan.dev->device,
+                        "address error (DCS=0x%x)\n", dcs);
+       }
+
+       if (dcs & JZ_DMA_DCS_HLT) {
+               dev_warn(&jzchan->vchan.chan.dev->device,
+                        "channel halt (DCS=0x%x)\n", dcs);
+       }
+
+       if (jzchan->desc) {
+               jzchan->desc->status = dcs;
+
+               if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) {
+                       if (jzchan->desc->type == DMA_CYCLIC) {
+                               vchan_cyclic_callback(&jzchan->desc->vdesc);
+                       } else {
+                               vchan_cookie_complete(&jzchan->desc->vdesc);
+                               jzchan->desc = NULL;
+                       }
+
+                       jz4780_dma_begin(jzchan);
+               }
+       } else {
+               dev_err(&jzchan->vchan.chan.dev->device,
+                       "channel IRQ with no active transfer\n");
+       }
+
+       spin_unlock(&jzchan->vchan.lock);
+}
+
+static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
+{
+       struct jz4780_dma_dev *jzdma = data;
+       uint32_t pending, dmac;
+       int i;
+
+       pending = jz4780_dma_readl(jzdma, JZ_DMA_REG_DIRQP);
+
+       for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+               if (!(pending & (1<<i)))
+                       continue;
+
+               jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]);
+       }
+
+       /* Clear halt and address error status of all channels. */
+       dmac = jz4780_dma_readl(jzdma, JZ_DMA_REG_DMAC);
+       dmac &= ~(JZ_DMA_DMAC_HLT | JZ_DMA_DMAC_AR);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC, dmac);
+
+       /* Clear interrupt pending status. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DIRQP, 0);
+
+       return IRQ_HANDLED;
+}
+
+static int jz4780_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+       jzchan->desc_pool = dma_pool_create(dev_name(&chan->dev->device),
+                                           chan->device->dev,
+                                           JZ_DMA_DESC_BLOCK_SIZE,
+                                           PAGE_SIZE, 0);
+       if (!jzchan->desc_pool) {
+               dev_err(&chan->dev->device,
+                       "failed to allocate descriptor pool\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void jz4780_dma_free_chan_resources(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+       vchan_free_chan_resources(&jzchan->vchan);
+       dma_pool_destroy(jzchan->desc_pool);
+       jzchan->desc_pool = NULL;
+}
+
+static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       struct jz4780_dma_data *data = param;
+
+       if (data->channel > -1) {
+               if (data->channel != jzchan->id)
+                       return false;
+       } else if (jzdma->chan_reserved & BIT(jzchan->id)) {
+               return false;
+       }
+
+       jzchan->transfer_type = data->transfer_type;
+
+       return true;
+}
+
+static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
+       struct of_dma *ofdma)
+{
+       struct jz4780_dma_dev *jzdma = ofdma->of_dma_data;
+       dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
+       struct jz4780_dma_data data;
+
+       if (dma_spec->args_count != 2)
+               return NULL;
+
+       data.transfer_type = dma_spec->args[0];
+       data.channel = dma_spec->args[1];
+
+       if (data.channel > -1) {
+               if (data.channel >= JZ_DMA_NR_CHANNELS) {
+                       dev_err(jzdma->dma_device.dev,
+                               "device requested non-existent channel %u\n",
+                               data.channel);
+                       return NULL;
+               }
+
+               /* Can only select a channel marked as reserved. */
+               if (!(jzdma->chan_reserved & BIT(data.channel))) {
+                       dev_err(jzdma->dma_device.dev,
+                               "device requested unreserved channel %u\n",
+                               data.channel);
+                       return NULL;
+               }
+       }
+
+       return dma_request_channel(mask, jz4780_dma_filter_fn, &data);
+}
+
+static int jz4780_dma_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct jz4780_dma_dev *jzdma;
+       struct jz4780_dma_chan *jzchan;
+       struct dma_device *dd;
+       struct resource *res;
+       int i, ret;
+
+       jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL);
+       if (!jzdma)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, jzdma);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "failed to get I/O memory\n");
+               return -EINVAL;
+       }
+
+       jzdma->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(jzdma->base))
+               return PTR_ERR(jzdma->base);
+
+       jzdma->irq = platform_get_irq(pdev, 0);
+       if (jzdma->irq < 0) {
+               dev_err(dev, "failed to get IRQ: %d\n", ret);
+               return jzdma->irq;
+       }
+
+       ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0,
+                              dev_name(dev), jzdma);
+       if (ret) {
+               dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq);
+               return -EINVAL;
+       }
+
+       jzdma->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(jzdma->clk)) {
+               dev_err(dev, "failed to get clock\n");
+               return PTR_ERR(jzdma->clk);
+       }
+
+       clk_prepare_enable(jzdma->clk);
+
+       /* Property is optional, if it doesn't exist the value will remain 0. */
+       of_property_read_u32_index(dev->of_node, "ingenic,reserved-channels",
+                                  0, &jzdma->chan_reserved);
+
+       dd = &jzdma->dma_device;
+
+       dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+       dma_cap_set(DMA_SLAVE, dd->cap_mask);
+       dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+
+       dd->dev = dev;
+       dd->copy_align = 2; /* 2^2 = 4 byte alignment */
+       dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources;
+       dd->device_free_chan_resources = jz4780_dma_free_chan_resources;
+       dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg;
+       dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic;
+       dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy;
+       dd->device_config = jz4780_dma_slave_config;
+       dd->device_terminate_all = jz4780_dma_terminate_all;
+       dd->device_tx_status = jz4780_dma_tx_status;
+       dd->device_issue_pending = jz4780_dma_issue_pending;
+       dd->src_addr_widths = JZ_DMA_BUSWIDTHS;
+       dd->dst_addr_widths = JZ_DMA_BUSWIDTHS;
+       dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+
+       /*
+        * Enable DMA controller, mark all channels as not programmable.
+        * Also set the FMSC bit - it increases MSC performance, so it makes
+        * little sense not to enable it.
+        */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC,
+                         JZ_DMA_DMAC_DMAE | JZ_DMA_DMAC_FMSC);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMACP, 0);
+
+       INIT_LIST_HEAD(&dd->channels);
+
+       for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+               jzchan = &jzdma->chan[i];
+               jzchan->id = i;
+
+               vchan_init(&jzchan->vchan, dd);
+               jzchan->vchan.desc_free = jz4780_dma_desc_free;
+       }
+
+       ret = dma_async_device_register(dd);
+       if (ret) {
+               dev_err(dev, "failed to register device\n");
+               goto err_disable_clk;
+       }
+
+       /* Register with OF DMA helpers. */
+       ret = of_dma_controller_register(dev->of_node, jz4780_of_dma_xlate,
+                                        jzdma);
+       if (ret) {
+               dev_err(dev, "failed to register OF DMA controller\n");
+               goto err_unregister_dev;
+       }
+
+       dev_info(dev, "JZ4780 DMA controller initialised\n");
+       return 0;
+
+err_unregister_dev:
+       dma_async_device_unregister(dd);
+
+err_disable_clk:
+       clk_disable_unprepare(jzdma->clk);
+       return ret;
+}
+
+static int jz4780_dma_remove(struct platform_device *pdev)
+{
+       struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev);
+
+       of_dma_controller_free(pdev->dev.of_node);
+       devm_free_irq(&pdev->dev, jzdma->irq, jzdma);
+       dma_async_device_unregister(&jzdma->dma_device);
+       return 0;
+}
+
+static const struct of_device_id jz4780_dma_dt_match[] = {
+       { .compatible = "ingenic,jz4780-dma", .data = NULL },
+       {},
+};
+MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
+
+static struct platform_driver jz4780_dma_driver = {
+       .probe          = jz4780_dma_probe,
+       .remove         = jz4780_dma_remove,
+       .driver = {
+               .name   = "jz4780-dma",
+               .of_match_table = of_match_ptr(jz4780_dma_dt_match),
+       },
+};
+
+static int __init jz4780_dma_init(void)
+{
+       return platform_driver_register(&jz4780_dma_driver);
+}
+subsys_initcall(jz4780_dma_init);
+
+static void __exit jz4780_dma_exit(void)
+{
+       platform_driver_unregister(&jz4780_dma_driver);
+}
+module_exit(jz4780_dma_exit);
+
+MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
+MODULE_DESCRIPTION("Ingenic JZ4780 DMA controller driver");
+MODULE_LICENSE("GPL");
index ac336a9..0e035a8 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
@@ -355,20 +351,6 @@ struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
 }
 EXPORT_SYMBOL(dma_find_channel);
 
-/*
- * net_dma_find_channel - find a channel for net_dma
- * net_dma has alignment requirements
- */
-struct dma_chan *net_dma_find_channel(void)
-{
-       struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
-       if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
-               return NULL;
-
-       return chan;
-}
-EXPORT_SYMBOL(net_dma_find_channel);
-
 /**
  * dma_issue_pending_all - flush all pending operations across all channels
  */
index dcfe964..36e02f0 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 config DW_DMAC_CORE
-       tristate "Synopsys DesignWare AHB DMA support"
+       tristate
        select DMA_ENGINE
 
 config DW_DMAC
index a8ad052..1022c2e 100644 (file)
@@ -230,7 +230,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
        /* ASSERT:  channel is idle */
        if (dma_readl(dw, CH_EN) & dwc->mask) {
                dev_err(chan2dev(&dwc->chan),
-                       "BUG: Attempted to start non-idle channel\n");
+                       "%s: BUG: Attempted to start non-idle channel\n",
+                       __func__);
                dwc_dump_chan_regs(dwc);
 
                /* The tasklet will hopefully advance the queue... */
@@ -814,11 +815,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 slave_sg_todev_fill_desc:
                        desc = dwc_desc_get(dwc);
-                       if (!desc) {
-                               dev_err(chan2dev(chan),
-                                       "not enough descriptors available\n");
+                       if (!desc)
                                goto err_desc_get;
-                       }
 
                        desc->lli.sar = mem;
                        desc->lli.dar = reg;
@@ -874,11 +872,8 @@ slave_sg_todev_fill_desc:
 
 slave_sg_fromdev_fill_desc:
                        desc = dwc_desc_get(dwc);
-                       if (!desc) {
-                               dev_err(chan2dev(chan),
-                                               "not enough descriptors available\n");
+                       if (!desc)
                                goto err_desc_get;
-                       }
 
                        desc->lli.sar = reg;
                        desc->lli.dar = mem;
@@ -922,6 +917,8 @@ slave_sg_fromdev_fill_desc:
        return &first->txd;
 
 err_desc_get:
+       dev_err(chan2dev(chan),
+               "not enough descriptors available. Direction %d\n", direction);
        dwc_desc_put(dwc, first);
        return NULL;
 }
@@ -1261,7 +1258,8 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
        /* Assert channel is idle */
        if (dma_readl(dw, CH_EN) & dwc->mask) {
                dev_err(chan2dev(&dwc->chan),
-                       "BUG: Attempted to start non-idle channel\n");
+                       "%s: BUG: Attempted to start non-idle channel\n",
+                       __func__);
                dwc_dump_chan_regs(dwc);
                spin_unlock_irqrestore(&dwc->lock, flags);
                return -EBUSY;
index 53dbd3b..bf09db7 100644 (file)
@@ -812,7 +812,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
        LIST_HEAD(descs);
 
        a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
-                                       chan, EVENTQ_DEFAULT);
+                                       echan, EVENTQ_DEFAULT);
 
        if (a_ch_num < 0) {
                ret = -ENODEV;
diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c
new file mode 100644 (file)
index 0000000..4d9470f
--- /dev/null
@@ -0,0 +1,904 @@
+/*
+ * drivers/dma/fsl_raid.c
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *     Harninder Rai <harninder.rai@freescale.com>
+ *     Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *     Xuelin Shi <xuelin.shi@freescale.com>
+ *
+ * Copyright (c) 2010-2014 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Theory of operation:
+ *
+ * General capabilities:
+ *     RAID Engine (RE) block is capable of offloading XOR, memcpy and P/Q
+ *     calculations required in RAID5 and RAID6 operations. RE driver
+ *     registers with Linux's ASYNC layer as dma driver. RE hardware
+ *     maintains strict ordering of the requests through chained
+ *     command queueing.
+ *
+ * Data flow:
+ *     Software RAID layer of Linux (MD layer) maintains RAID partitions,
+ *     strips, stripes etc. It sends requests to the underlying ASYNC layer
+ *     which further passes it to RE driver. ASYNC layer decides which request
+ *     goes to which job ring of RE hardware. For every request processed by
+ *     RAID Engine, driver gets an interrupt unless coalescing is set. The
+ *     per job ring interrupt handler checks the status register for errors,
+ *     clears the interrupt and leave the post interrupt processing to the irq
+ *     thread.
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "fsl_raid.h"
+
+#define FSL_RE_MAX_XOR_SRCS    16
+#define FSL_RE_MAX_PQ_SRCS     16
+#define FSL_RE_MIN_DESCS       256
+#define FSL_RE_MAX_DESCS       (4 * FSL_RE_MIN_DESCS)
+#define FSL_RE_FRAME_FORMAT    0x1
+#define FSL_RE_MAX_DATA_LEN    (1024*1024)
+
+#define to_fsl_re_dma_desc(tx) container_of(tx, struct fsl_re_desc, async_tx)
+
+/* Add descriptors into per chan software queue - submit_q */
+static dma_cookie_t fsl_re_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct fsl_re_desc *desc;
+       struct fsl_re_chan *re_chan;
+       dma_cookie_t cookie;
+       unsigned long flags;
+
+       desc = to_fsl_re_dma_desc(tx);
+       re_chan = container_of(tx->chan, struct fsl_re_chan, chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       cookie = dma_cookie_assign(tx);
+       list_add_tail(&desc->node, &re_chan->submit_q);
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+       return cookie;
+}
+
+/* Copy descriptor from per chan software queue into hardware job ring */
+static void fsl_re_issue_pending(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       int avail;
+       struct fsl_re_desc *desc, *_desc;
+       unsigned long flags;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       avail = FSL_RE_SLOT_AVAIL(
+               in_be32(&re_chan->jrregs->inbring_slot_avail));
+
+       list_for_each_entry_safe(desc, _desc, &re_chan->submit_q, node) {
+               if (!avail)
+                       break;
+
+               list_move_tail(&desc->node, &re_chan->active_q);
+
+               memcpy(&re_chan->inb_ring_virt_addr[re_chan->inb_count],
+                      &desc->hwdesc, sizeof(struct fsl_re_hw_desc));
+
+               re_chan->inb_count = (re_chan->inb_count + 1) &
+                                               FSL_RE_RING_SIZE_MASK;
+               out_be32(&re_chan->jrregs->inbring_add_job, FSL_RE_ADD_JOB(1));
+               avail--;
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+static void fsl_re_desc_done(struct fsl_re_desc *desc)
+{
+       dma_async_tx_callback callback;
+       void *callback_param;
+
+       dma_cookie_complete(&desc->async_tx);
+
+       callback = desc->async_tx.callback;
+       callback_param = desc->async_tx.callback_param;
+       if (callback)
+               callback(callback_param);
+
+       dma_descriptor_unmap(&desc->async_tx);
+}
+
+static void fsl_re_cleanup_descs(struct fsl_re_chan *re_chan)
+{
+       struct fsl_re_desc *desc, *_desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       list_for_each_entry_safe(desc, _desc, &re_chan->ack_q, node) {
+               if (async_tx_test_ack(&desc->async_tx))
+                       list_move_tail(&desc->node, &re_chan->free_q);
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+       fsl_re_issue_pending(&re_chan->chan);
+}
+
+static void fsl_re_dequeue(unsigned long data)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc, *_desc;
+       struct fsl_re_hw_desc *hwdesc;
+       unsigned long flags;
+       unsigned int count, oub_count;
+       int found;
+
+       re_chan = dev_get_drvdata((struct device *)data);
+
+       fsl_re_cleanup_descs(re_chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       count = FSL_RE_SLOT_FULL(in_be32(&re_chan->jrregs->oubring_slot_full));
+       while (count--) {
+               found = 0;
+               hwdesc = &re_chan->oub_ring_virt_addr[re_chan->oub_count];
+               list_for_each_entry_safe(desc, _desc, &re_chan->active_q,
+                                        node) {
+                       /* compare the hw dma addr to find the completed */
+                       if (desc->hwdesc.lbea32 == hwdesc->lbea32 &&
+                           desc->hwdesc.addr_low == hwdesc->addr_low) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (found) {
+                       fsl_re_desc_done(desc);
+                       list_move_tail(&desc->node, &re_chan->ack_q);
+               } else {
+                       dev_err(re_chan->dev,
+                               "found hwdesc not in sw queue, discard it\n");
+               }
+
+               oub_count = (re_chan->oub_count + 1) & FSL_RE_RING_SIZE_MASK;
+               re_chan->oub_count = oub_count;
+
+               out_be32(&re_chan->jrregs->oubring_job_rmvd,
+                        FSL_RE_RMVD_JOB(1));
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+/* Per Job Ring interrupt handler */
+static irqreturn_t fsl_re_isr(int irq, void *data)
+{
+       struct fsl_re_chan *re_chan;
+       u32 irqstate, status;
+
+       re_chan = dev_get_drvdata((struct device *)data);
+
+       irqstate = in_be32(&re_chan->jrregs->jr_interrupt_status);
+       if (!irqstate)
+               return IRQ_NONE;
+
+       /*
+        * There's no way in upper layer (read MD layer) to recover from
+        * error conditions except restart everything. In long term we
+        * need to do something more than just crashing
+        */
+       if (irqstate & FSL_RE_ERROR) {
+               status = in_be32(&re_chan->jrregs->jr_status);
+               dev_err(re_chan->dev, "chan error irqstate: %x, status: %x\n",
+                       irqstate, status);
+       }
+
+       /* Clear interrupt */
+       out_be32(&re_chan->jrregs->jr_interrupt_status, FSL_RE_CLR_INTR);
+
+       tasklet_schedule(&re_chan->irqtask);
+
+       return IRQ_HANDLED;
+}
+
+static enum dma_status fsl_re_tx_status(struct dma_chan *chan,
+                                       dma_cookie_t cookie,
+                                       struct dma_tx_state *txstate)
+{
+       return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void fill_cfd_frame(struct fsl_re_cmpnd_frame *cf, u8 index,
+                          size_t length, dma_addr_t addr, bool final)
+{
+       u32 efrl = length & FSL_RE_CF_LENGTH_MASK;
+
+       efrl |= final << FSL_RE_CF_FINAL_SHIFT;
+       cf[index].efrl32 = efrl;
+       cf[index].addr_high = upper_32_bits(addr);
+       cf[index].addr_low = lower_32_bits(addr);
+}
+
+static struct fsl_re_desc *fsl_re_init_desc(struct fsl_re_chan *re_chan,
+                                           struct fsl_re_desc *desc,
+                                           void *cf, dma_addr_t paddr)
+{
+       desc->re_chan = re_chan;
+       desc->async_tx.tx_submit = fsl_re_tx_submit;
+       dma_async_tx_descriptor_init(&desc->async_tx, &re_chan->chan);
+       INIT_LIST_HEAD(&desc->node);
+
+       desc->hwdesc.fmt32 = FSL_RE_FRAME_FORMAT << FSL_RE_HWDESC_FMT_SHIFT;
+       desc->hwdesc.lbea32 = upper_32_bits(paddr);
+       desc->hwdesc.addr_low = lower_32_bits(paddr);
+       desc->cf_addr = cf;
+       desc->cf_paddr = paddr;
+
+       desc->cdb_addr = (void *)(cf + FSL_RE_CF_DESC_SIZE);
+       desc->cdb_paddr = paddr + FSL_RE_CF_DESC_SIZE;
+
+       return desc;
+}
+
+static struct fsl_re_desc *fsl_re_chan_alloc_desc(struct fsl_re_chan *re_chan,
+                                                 unsigned long flags)
+{
+       struct fsl_re_desc *desc = NULL;
+       void *cf;
+       dma_addr_t paddr;
+       unsigned long lock_flag;
+
+       fsl_re_cleanup_descs(re_chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+       if (!list_empty(&re_chan->free_q)) {
+               /* take one desc from free_q */
+               desc = list_first_entry(&re_chan->free_q,
+                                       struct fsl_re_desc, node);
+               list_del(&desc->node);
+
+               desc->async_tx.flags = flags;
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+
+       if (!desc) {
+               desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+               if (!desc)
+                       return NULL;
+
+               cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_NOWAIT,
+                                   &paddr);
+               if (!cf) {
+                       kfree(desc);
+                       return NULL;
+               }
+
+               desc = fsl_re_init_desc(re_chan, desc, cf, paddr);
+               desc->async_tx.flags = flags;
+
+               spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+               re_chan->alloc_count++;
+               spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+       }
+
+       return desc;
+}
+
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_genq(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       struct fsl_re_xor_cdb *xor;
+       struct fsl_re_cmpnd_frame *cf;
+       u32 cdb;
+       unsigned int i, j;
+       unsigned int save_src_cnt = src_cnt;
+       int cont_q = 0;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "genq tx length %lu, max length %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       if (scf && (flags & DMA_PREP_CONTINUE)) {
+               cont_q = 1;
+               src_cnt += 1;
+       }
+
+       /* Filling xor CDB */
+       cdb = FSL_RE_XOR_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+       xor = desc->cdb_addr;
+       xor->cdb32 = cdb;
+
+       if (scf) {
+               /* compute q = src0*coef0^src1*coef1^..., * is GF(8) mult */
+               for (i = 0; i < save_src_cnt; i++)
+                       xor->gfm[i] = scf[i];
+               if (cont_q)
+                       xor->gfm[i++] = 1;
+       } else {
+               /* compute P, that is XOR all srcs */
+               for (i = 0; i < src_cnt; i++)
+                       xor->gfm[i] = 1;
+       }
+
+       /* Filling frame 0 of compound frame descriptor with CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(*xor), desc->cdb_paddr, 0);
+
+       /* Fill CFD's 1st frame with dest buffer */
+       fill_cfd_frame(cf, 1, len, dest, 0);
+
+       /* Fill CFD's rest of the frames with source buffers */
+       for (i = 2, j = 0; j < save_src_cnt; i++, j++)
+               fill_cfd_frame(cf, i, len, src[j], 0);
+
+       if (cont_q)
+               fill_cfd_frame(cf, i++, len, dest, 0);
+
+       /* Setting the final bit in the last source buffer frame in CFD */
+       cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+       return &desc->async_tx;
+}
+
+/*
+ * Prep function for P parity calculation.In RAID Engine terminology,
+ * XOR calculation is called GenQ calculation done through GenQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_xor(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+               unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       /* NULL let genq take all coef as 1 */
+       return fsl_re_prep_dma_genq(chan, dest, src, src_cnt, NULL, len, flags);
+}
+
+/*
+ * Prep function for P/Q parity calculation.In RAID Engine terminology,
+ * P/Q calculation is called GenQQ done through GenQQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_pq(
+               struct dma_chan *chan, dma_addr_t *dest, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       struct fsl_re_pq_cdb *pq;
+       struct fsl_re_cmpnd_frame *cf;
+       u32 cdb;
+       u8 *p;
+       int gfmq_len, i, j;
+       unsigned int save_src_cnt = src_cnt;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "pq tx length is %lu, max length is %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       /*
+        * RE requires at least 2 sources, if given only one source, we pass the
+        * second source same as the first one.
+        * With only one source, generating P is meaningless, only generate Q.
+        */
+       if (src_cnt == 1) {
+               struct dma_async_tx_descriptor *tx;
+               dma_addr_t dma_src[2];
+               unsigned char coef[2];
+
+               dma_src[0] = *src;
+               coef[0] = *scf;
+               dma_src[1] = *src;
+               coef[1] = 0;
+               tx = fsl_re_prep_dma_genq(chan, dest[1], dma_src, 2, coef, len,
+                                         flags);
+               if (tx)
+                       desc = to_fsl_re_dma_desc(tx);
+
+               return tx;
+       }
+
+       /*
+        * During RAID6 array creation, Linux's MD layer gets P and Q
+        * calculated separately in two steps. But our RAID Engine has
+        * the capability to calculate both P and Q with a single command
+        * Hence to merge well with MD layer, we need to provide a hook
+        * here and call re_jq_prep_dma_genq() function
+        */
+
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               return fsl_re_prep_dma_genq(chan, dest[1], src, src_cnt,
+                               scf, len, flags);
+
+       if (flags & DMA_PREP_CONTINUE)
+               src_cnt += 3;
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       /* Filling GenQQ CDB */
+       cdb = FSL_RE_PQ_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_BUFFER_OUTPUT << FSL_RE_CDB_BUFFER_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+       pq = desc->cdb_addr;
+       pq->cdb32 = cdb;
+
+       p = pq->gfm_q1;
+       /* Init gfm_q1[] */
+       for (i = 0; i < src_cnt; i++)
+               p[i] = 1;
+
+       /* Align gfm[] to 32bit */
+       gfmq_len = ALIGN(src_cnt, 4);
+
+       /* Init gfm_q2[] */
+       p += gfmq_len;
+       for (i = 0; i < src_cnt; i++)
+               p[i] = scf[i];
+
+       /* Filling frame 0 of compound frame descriptor with CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(struct fsl_re_pq_cdb), desc->cdb_paddr, 0);
+
+       /* Fill CFD's 1st & 2nd frame with dest buffers */
+       for (i = 1, j = 0; i < 3; i++, j++)
+               fill_cfd_frame(cf, i, len, dest[j], 0);
+
+       /* Fill CFD's rest of the frames with source buffers */
+       for (i = 3, j = 0; j < save_src_cnt; i++, j++)
+               fill_cfd_frame(cf, i, len, src[j], 0);
+
+       /* PQ computation continuation */
+       if (flags & DMA_PREP_CONTINUE) {
+               if (src_cnt - save_src_cnt == 3) {
+                       p[save_src_cnt] = 0;
+                       p[save_src_cnt + 1] = 0;
+                       p[save_src_cnt + 2] = 1;
+                       fill_cfd_frame(cf, i++, len, dest[0], 0);
+                       fill_cfd_frame(cf, i++, len, dest[1], 0);
+                       fill_cfd_frame(cf, i++, len, dest[1], 0);
+               } else {
+                       dev_err(re_chan->dev, "PQ tx continuation error!\n");
+                       return NULL;
+               }
+       }
+
+       /* Setting the final bit in the last source buffer frame in CFD */
+       cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+       return &desc->async_tx;
+}
+
+/*
+ * Prep function for memcpy. In RAID Engine, memcpy is done through MOVE
+ * command. Logic of this function will need to be modified once multipage
+ * support is added in Linux's MD/ASYNC Layer
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_memcpy(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+               size_t len, unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       size_t length;
+       struct fsl_re_cmpnd_frame *cf;
+       struct fsl_re_move_cdb *move;
+       u32 cdb;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "cp tx length is %lu, max length is %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       /* Filling move CDB */
+       cdb = FSL_RE_MOVE_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+       move = desc->cdb_addr;
+       move->cdb32 = cdb;
+
+       /* Filling frame 0 of CFD with move CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(*move), desc->cdb_paddr, 0);
+
+       length = min_t(size_t, len, FSL_RE_MAX_DATA_LEN);
+
+       /* Fill CFD's 1st frame with dest buffer */
+       fill_cfd_frame(cf, 1, length, dest, 0);
+
+       /* Fill CFD's 2nd frame with src buffer */
+       fill_cfd_frame(cf, 2, length, src, 1);
+
+       return &desc->async_tx;
+}
+
+static int fsl_re_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       void *cf;
+       dma_addr_t paddr;
+       int i;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       for (i = 0; i < FSL_RE_MIN_DESCS; i++) {
+               desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+               if (!desc)
+                       break;
+
+               cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_KERNEL,
+                                   &paddr);
+               if (!cf) {
+                       kfree(desc);
+                       break;
+               }
+
+               INIT_LIST_HEAD(&desc->node);
+               fsl_re_init_desc(re_chan, desc, cf, paddr);
+
+               list_add_tail(&desc->node, &re_chan->free_q);
+               re_chan->alloc_count++;
+       }
+       return re_chan->alloc_count;
+}
+
+static void fsl_re_free_chan_resources(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       while (re_chan->alloc_count--) {
+               desc = list_first_entry(&re_chan->free_q,
+                                       struct fsl_re_desc,
+                                       node);
+
+               list_del(&desc->node);
+               dma_pool_free(re_chan->re_dev->cf_desc_pool, desc->cf_addr,
+                             desc->cf_paddr);
+               kfree(desc);
+       }
+
+       if (!list_empty(&re_chan->free_q))
+               dev_err(re_chan->dev, "chan resource cannot be cleaned!\n");
+}
+
+static int fsl_re_chan_probe(struct platform_device *ofdev,
+                     struct device_node *np, u8 q, u32 off)
+{
+       struct device *dev, *chandev;
+       struct fsl_re_drv_private *re_priv;
+       struct fsl_re_chan *chan;
+       struct dma_device *dma_dev;
+       u32 ptr;
+       u32 status;
+       int ret = 0, rc;
+       struct platform_device *chan_ofdev;
+
+       dev = &ofdev->dev;
+       re_priv = dev_get_drvdata(dev);
+       dma_dev = &re_priv->dma_dev;
+
+       chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+       if (!chan)
+               return -ENOMEM;
+
+       /* create platform device for chan node */
+       chan_ofdev = of_platform_device_create(np, NULL, dev);
+       if (!chan_ofdev) {
+               dev_err(dev, "Not able to create ofdev for jr %d\n", q);
+               ret = -EINVAL;
+               goto err_free;
+       }
+
+       /* read reg property from dts */
+       rc = of_property_read_u32(np, "reg", &ptr);
+       if (rc) {
+               dev_err(dev, "Reg property not found in jr %d\n", q);
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       chan->jrregs = (struct fsl_re_chan_cfg *)((u8 *)re_priv->re_regs +
+                       off + ptr);
+
+       /* read irq property from dts */
+       chan->irq = irq_of_parse_and_map(np, 0);
+       if (chan->irq == NO_IRQ) {
+               dev_err(dev, "No IRQ defined for JR %d\n", q);
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       snprintf(chan->name, sizeof(chan->name), "re_jr%02d", q);
+
+       chandev = &chan_ofdev->dev;
+       tasklet_init(&chan->irqtask, fsl_re_dequeue, (unsigned long)chandev);
+
+       ret = request_irq(chan->irq, fsl_re_isr, 0, chan->name, chandev);
+       if (ret) {
+               dev_err(dev, "Unable to register interrupt for JR %d\n", q);
+               ret = -EINVAL;
+               goto err_free;
+       }
+
+       re_priv->re_jrs[q] = chan;
+       chan->chan.device = dma_dev;
+       chan->chan.private = chan;
+       chan->dev = chandev;
+       chan->re_dev = re_priv;
+
+       spin_lock_init(&chan->desc_lock);
+       INIT_LIST_HEAD(&chan->ack_q);
+       INIT_LIST_HEAD(&chan->active_q);
+       INIT_LIST_HEAD(&chan->submit_q);
+       INIT_LIST_HEAD(&chan->free_q);
+
+       chan->inb_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+               GFP_KERNEL, &chan->inb_phys_addr);
+       if (!chan->inb_ring_virt_addr) {
+               dev_err(dev, "No dma memory for inb_ring_virt_addr\n");
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       chan->oub_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+               GFP_KERNEL, &chan->oub_phys_addr);
+       if (!chan->oub_ring_virt_addr) {
+               dev_err(dev, "No dma memory for oub_ring_virt_addr\n");
+               ret = -ENOMEM;
+               goto err_free_1;
+       }
+
+       /* Program the Inbound/Outbound ring base addresses and size */
+       out_be32(&chan->jrregs->inbring_base_h,
+                chan->inb_phys_addr & FSL_RE_ADDR_BIT_MASK);
+       out_be32(&chan->jrregs->oubring_base_h,
+                chan->oub_phys_addr & FSL_RE_ADDR_BIT_MASK);
+       out_be32(&chan->jrregs->inbring_base_l,
+                chan->inb_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+       out_be32(&chan->jrregs->oubring_base_l,
+                chan->oub_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+       out_be32(&chan->jrregs->inbring_size,
+                FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+       out_be32(&chan->jrregs->oubring_size,
+                FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+
+       /* Read LIODN value from u-boot */
+       status = in_be32(&chan->jrregs->jr_config_1) & FSL_RE_REG_LIODN_MASK;
+
+       /* Program the CFG reg */
+       out_be32(&chan->jrregs->jr_config_1,
+                FSL_RE_CFG1_CBSI | FSL_RE_CFG1_CBS0 | status);
+
+       dev_set_drvdata(chandev, chan);
+
+       /* Enable RE/CHAN */
+       out_be32(&chan->jrregs->jr_command, FSL_RE_ENABLE);
+
+       return 0;
+
+err_free_1:
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+                     chan->inb_phys_addr);
+err_free:
+       return ret;
+}
+
+/* Probe function for RAID Engine */
+static int fsl_re_probe(struct platform_device *ofdev)
+{
+       struct fsl_re_drv_private *re_priv;
+       struct device_node *np;
+       struct device_node *child;
+       u32 off;
+       u8 ridx = 0;
+       struct dma_device *dma_dev;
+       struct resource *res;
+       int rc;
+       struct device *dev = &ofdev->dev;
+
+       re_priv = devm_kzalloc(dev, sizeof(*re_priv), GFP_KERNEL);
+       if (!re_priv)
+               return -ENOMEM;
+
+       res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       /* IOMAP the entire RAID Engine region */
+       re_priv->re_regs = devm_ioremap(dev, res->start, resource_size(res));
+       if (!re_priv->re_regs)
+               return -EBUSY;
+
+       /* Program the RE mode */
+       out_be32(&re_priv->re_regs->global_config, FSL_RE_NON_DPAA_MODE);
+
+       /* Program Galois Field polynomial */
+       out_be32(&re_priv->re_regs->galois_field_config, FSL_RE_GFM_POLY);
+
+       dev_info(dev, "version %x, mode %x, gfp %x\n",
+                in_be32(&re_priv->re_regs->re_version_id),
+                in_be32(&re_priv->re_regs->global_config),
+                in_be32(&re_priv->re_regs->galois_field_config));
+
+       dma_dev = &re_priv->dma_dev;
+       dma_dev->dev = dev;
+       INIT_LIST_HEAD(&dma_dev->channels);
+       dma_set_mask(dev, DMA_BIT_MASK(40));
+
+       dma_dev->device_alloc_chan_resources = fsl_re_alloc_chan_resources;
+       dma_dev->device_tx_status = fsl_re_tx_status;
+       dma_dev->device_issue_pending = fsl_re_issue_pending;
+
+       dma_dev->max_xor = FSL_RE_MAX_XOR_SRCS;
+       dma_dev->device_prep_dma_xor = fsl_re_prep_dma_xor;
+       dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+       dma_dev->max_pq = FSL_RE_MAX_PQ_SRCS;
+       dma_dev->device_prep_dma_pq = fsl_re_prep_dma_pq;
+       dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+       dma_dev->device_prep_dma_memcpy = fsl_re_prep_dma_memcpy;
+       dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+
+       dma_dev->device_free_chan_resources = fsl_re_free_chan_resources;
+
+       re_priv->total_chans = 0;
+
+       re_priv->cf_desc_pool = dmam_pool_create("fsl_re_cf_desc_pool", dev,
+                                       FSL_RE_CF_CDB_SIZE,
+                                       FSL_RE_CF_CDB_ALIGN, 0);
+
+       if (!re_priv->cf_desc_pool) {
+               dev_err(dev, "No memory for fsl re_cf desc pool\n");
+               return -ENOMEM;
+       }
+
+       re_priv->hw_desc_pool = dmam_pool_create("fsl_re_hw_desc_pool", dev,
+                       sizeof(struct fsl_re_hw_desc) * FSL_RE_RING_SIZE,
+                       FSL_RE_FRAME_ALIGN, 0);
+       if (!re_priv->hw_desc_pool) {
+               dev_err(dev, "No memory for fsl re_hw desc pool\n");
+               return -ENOMEM;
+       }
+
+       dev_set_drvdata(dev, re_priv);
+
+       /* Parse Device tree to find out the total number of JQs present */
+       for_each_compatible_node(np, NULL, "fsl,raideng-v1.0-job-queue") {
+               rc = of_property_read_u32(np, "reg", &off);
+               if (rc) {
+                       dev_err(dev, "Reg property not found in JQ node\n");
+                       return -ENODEV;
+               }
+               /* Find out the Job Rings present under each JQ */
+               for_each_child_of_node(np, child) {
+                       rc = of_device_is_compatible(child,
+                                            "fsl,raideng-v1.0-job-ring");
+                       if (rc) {
+                               fsl_re_chan_probe(ofdev, child, ridx++, off);
+                               re_priv->total_chans++;
+                       }
+               }
+       }
+
+       dma_async_device_register(dma_dev);
+
+       return 0;
+}
+
+static void fsl_re_remove_chan(struct fsl_re_chan *chan)
+{
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+                     chan->inb_phys_addr);
+
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->oub_ring_virt_addr,
+                     chan->oub_phys_addr);
+}
+
+static int fsl_re_remove(struct platform_device *ofdev)
+{
+       struct fsl_re_drv_private *re_priv;
+       struct device *dev;
+       int i;
+
+       dev = &ofdev->dev;
+       re_priv = dev_get_drvdata(dev);
+
+       /* Cleanup chan related memory areas */
+       for (i = 0; i < re_priv->total_chans; i++)
+               fsl_re_remove_chan(re_priv->re_jrs[i]);
+
+       /* Unregister the driver */
+       dma_async_device_unregister(&re_priv->dma_dev);
+
+       return 0;
+}
+
+static struct of_device_id fsl_re_ids[] = {
+       { .compatible = "fsl,raideng-v1.0", },
+       {}
+};
+
+static struct platform_driver fsl_re_driver = {
+       .driver = {
+               .name = "fsl-raideng",
+               .owner = THIS_MODULE,
+               .of_match_table = fsl_re_ids,
+       },
+       .probe = fsl_re_probe,
+       .remove = fsl_re_remove,
+};
+
+module_platform_driver(fsl_re_driver);
+
+MODULE_AUTHOR("Harninder Rai <harninder.rai@freescale.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Freescale RAID Engine Device Driver");
diff --git a/drivers/dma/fsl_raid.h b/drivers/dma/fsl_raid.h
new file mode 100644 (file)
index 0000000..69d743c
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * drivers/dma/fsl_raid.h
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *     Harninder Rai <harninder.rai@freescale.com>
+ *     Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *     Xuelin Shi <xuelin.shi@freescale.com>
+
+ * Copyright (c) 2010-2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define FSL_RE_MAX_CHANS               4
+#define FSL_RE_DPAA_MODE               BIT(30)
+#define FSL_RE_NON_DPAA_MODE           BIT(31)
+#define FSL_RE_GFM_POLY                        0x1d000000
+#define FSL_RE_ADD_JOB(x)              ((x) << 16)
+#define FSL_RE_RMVD_JOB(x)             ((x) << 16)
+#define FSL_RE_CFG1_CBSI               0x08000000
+#define FSL_RE_CFG1_CBS0               0x00080000
+#define FSL_RE_SLOT_FULL_SHIFT         8
+#define FSL_RE_SLOT_FULL(x)            ((x) >> FSL_RE_SLOT_FULL_SHIFT)
+#define FSL_RE_SLOT_AVAIL_SHIFT                8
+#define FSL_RE_SLOT_AVAIL(x)           ((x) >> FSL_RE_SLOT_AVAIL_SHIFT)
+#define FSL_RE_PQ_OPCODE               0x1B
+#define FSL_RE_XOR_OPCODE              0x1A
+#define FSL_RE_MOVE_OPCODE             0x8
+#define FSL_RE_FRAME_ALIGN             16
+#define FSL_RE_BLOCK_SIZE              0x3 /* 4096 bytes */
+#define FSL_RE_CACHEABLE_IO            0x0
+#define FSL_RE_BUFFER_OUTPUT           0x0
+#define FSL_RE_INTR_ON_ERROR           0x1
+#define FSL_RE_DATA_DEP                        0x1
+#define FSL_RE_ENABLE_DPI              0x0
+#define FSL_RE_RING_SIZE               0x400
+#define FSL_RE_RING_SIZE_MASK          (FSL_RE_RING_SIZE - 1)
+#define FSL_RE_RING_SIZE_SHIFT         8
+#define FSL_RE_ADDR_BIT_SHIFT          4
+#define FSL_RE_ADDR_BIT_MASK           (BIT(FSL_RE_ADDR_BIT_SHIFT) - 1)
+#define FSL_RE_ERROR                   0x40000000
+#define FSL_RE_INTR                    0x80000000
+#define FSL_RE_CLR_INTR                        0x80000000
+#define FSL_RE_PAUSE                   0x80000000
+#define FSL_RE_ENABLE                  0x80000000
+#define FSL_RE_REG_LIODN_MASK          0x00000FFF
+
+#define FSL_RE_CDB_OPCODE_MASK         0xF8000000
+#define FSL_RE_CDB_OPCODE_SHIFT                27
+#define FSL_RE_CDB_EXCLEN_MASK         0x03000000
+#define FSL_RE_CDB_EXCLEN_SHIFT                24
+#define FSL_RE_CDB_EXCLQ1_MASK         0x00F00000
+#define FSL_RE_CDB_EXCLQ1_SHIFT                20
+#define FSL_RE_CDB_EXCLQ2_MASK         0x000F0000
+#define FSL_RE_CDB_EXCLQ2_SHIFT                16
+#define FSL_RE_CDB_BLKSIZE_MASK                0x0000C000
+#define FSL_RE_CDB_BLKSIZE_SHIFT       14
+#define FSL_RE_CDB_CACHE_MASK          0x00003000
+#define FSL_RE_CDB_CACHE_SHIFT         12
+#define FSL_RE_CDB_BUFFER_MASK         0x00000800
+#define FSL_RE_CDB_BUFFER_SHIFT                11
+#define FSL_RE_CDB_ERROR_MASK          0x00000400
+#define FSL_RE_CDB_ERROR_SHIFT         10
+#define FSL_RE_CDB_NRCS_MASK           0x0000003C
+#define FSL_RE_CDB_NRCS_SHIFT          6
+#define FSL_RE_CDB_DEPEND_MASK         0x00000008
+#define FSL_RE_CDB_DEPEND_SHIFT                3
+#define FSL_RE_CDB_DPI_MASK            0x00000004
+#define FSL_RE_CDB_DPI_SHIFT           2
+
+/*
+ * the largest cf block is 19*sizeof(struct cmpnd_frame), which is 304 bytes.
+ * here 19 = 1(cdb)+2(dest)+16(src), align to 64bytes, that is 320 bytes.
+ * the largest cdb block: struct pq_cdb which is 180 bytes, adding to cf block
+ * 320+180=500, align to 64bytes, that is 512 bytes.
+ */
+#define FSL_RE_CF_DESC_SIZE            320
+#define FSL_RE_CF_CDB_SIZE             512
+#define FSL_RE_CF_CDB_ALIGN            64
+
+struct fsl_re_ctrl {
+       /* General Configuration Registers */
+       __be32 global_config;   /* Global Configuration Register */
+       u8     rsvd1[4];
+       __be32 galois_field_config; /* Galois Field Configuration Register */
+       u8     rsvd2[4];
+       __be32 jq_wrr_config;   /* WRR Configuration register */
+       u8     rsvd3[4];
+       __be32 crc_config;      /* CRC Configuration register */
+       u8     rsvd4[228];
+       __be32 system_reset;    /* System Reset Register */
+       u8     rsvd5[252];
+       __be32 global_status;   /* Global Status Register */
+       u8     rsvd6[832];
+       __be32 re_liodn_base;   /* LIODN Base Register */
+       u8     rsvd7[1712];
+       __be32 re_version_id;   /* Version ID register of RE */
+       __be32 re_version_id_2; /* Version ID 2 register of RE */
+       u8     rsvd8[512];
+       __be32 host_config;     /* Host I/F Configuration Register */
+};
+
+struct fsl_re_chan_cfg {
+       /* Registers for JR interface */
+       __be32 jr_config_0;     /* Job Queue Configuration 0 Register */
+       __be32 jr_config_1;     /* Job Queue Configuration 1 Register */
+       __be32 jr_interrupt_status; /* Job Queue Interrupt Status Register */
+       u8     rsvd1[4];
+       __be32 jr_command;      /* Job Queue Command Register */
+       u8     rsvd2[4];
+       __be32 jr_status;       /* Job Queue Status Register */
+       u8     rsvd3[228];
+
+       /* Input Ring */
+       __be32 inbring_base_h;  /* Inbound Ring Base Address Register - High */
+       __be32 inbring_base_l;  /* Inbound Ring Base Address Register - Low */
+       __be32 inbring_size;    /* Inbound Ring Size Register */
+       u8     rsvd4[4];
+       __be32 inbring_slot_avail; /* Inbound Ring Slot Available Register */
+       u8     rsvd5[4];
+       __be32 inbring_add_job; /* Inbound Ring Add Job Register */
+       u8     rsvd6[4];
+       __be32 inbring_cnsmr_indx; /* Inbound Ring Consumer Index Register */
+       u8     rsvd7[220];
+
+       /* Output Ring */
+       __be32 oubring_base_h;  /* Outbound Ring Base Address Register - High */
+       __be32 oubring_base_l;  /* Outbound Ring Base Address Register - Low */
+       __be32 oubring_size;    /* Outbound Ring Size Register */
+       u8     rsvd8[4];
+       __be32 oubring_job_rmvd; /* Outbound Ring Job Removed Register */
+       u8     rsvd9[4];
+       __be32 oubring_slot_full; /* Outbound Ring Slot Full Register */
+       u8     rsvd10[4];
+       __be32 oubring_prdcr_indx; /* Outbound Ring Producer Index */
+};
+
+/*
+ * Command Descriptor Block (CDB) for unicast move command.
+ * In RAID Engine terms, memcpy is done through move command
+ */
+struct fsl_re_move_cdb {
+       __be32 cdb32;
+};
+
+/* Data protection/integrity related fields */
+#define FSL_RE_DPI_APPS_MASK           0xC0000000
+#define FSL_RE_DPI_APPS_SHIFT          30
+#define FSL_RE_DPI_REF_MASK            0x30000000
+#define FSL_RE_DPI_REF_SHIFT           28
+#define FSL_RE_DPI_GUARD_MASK          0x0C000000
+#define FSL_RE_DPI_GUARD_SHIFT         26
+#define FSL_RE_DPI_ATTR_MASK           0x03000000
+#define FSL_RE_DPI_ATTR_SHIFT          24
+#define FSL_RE_DPI_META_MASK           0x0000FFFF
+
+struct fsl_re_dpi {
+       __be32 dpi32;
+       __be32 ref;
+};
+
+/*
+ * CDB for GenQ command. In RAID Engine terminology, XOR is
+ * done through this command
+ */
+struct fsl_re_xor_cdb {
+       __be32 cdb32;
+       u8 gfm[16];
+       struct fsl_re_dpi dpi_dest_spec;
+       struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* CDB for no-op command */
+struct fsl_re_noop_cdb {
+       __be32 cdb32;
+};
+
+/*
+ * CDB for GenQQ command. In RAID Engine terminology, P/Q is
+ * done through this command
+ */
+struct fsl_re_pq_cdb {
+       __be32 cdb32;
+       u8 gfm_q1[16];
+       u8 gfm_q2[16];
+       struct fsl_re_dpi dpi_dest_spec[2];
+       struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* Compound frame */
+#define FSL_RE_CF_ADDR_HIGH_MASK       0x000000FF
+#define FSL_RE_CF_EXT_MASK             0x80000000
+#define FSL_RE_CF_EXT_SHIFT            31
+#define FSL_RE_CF_FINAL_MASK           0x40000000
+#define FSL_RE_CF_FINAL_SHIFT          30
+#define FSL_RE_CF_LENGTH_MASK          0x000FFFFF
+#define FSL_RE_CF_BPID_MASK            0x00FF0000
+#define FSL_RE_CF_BPID_SHIFT           16
+#define FSL_RE_CF_OFFSET_MASK          0x00001FFF
+
+struct fsl_re_cmpnd_frame {
+       __be32 addr_high;
+       __be32 addr_low;
+       __be32 efrl32;
+       __be32 rbro32;
+};
+
+/* Frame descriptor */
+#define FSL_RE_HWDESC_LIODN_MASK       0x3F000000
+#define FSL_RE_HWDESC_LIODN_SHIFT      24
+#define FSL_RE_HWDESC_BPID_MASK                0x00FF0000
+#define FSL_RE_HWDESC_BPID_SHIFT       16
+#define FSL_RE_HWDESC_ELIODN_MASK      0x0000F000
+#define FSL_RE_HWDESC_ELIODN_SHIFT     12
+#define FSL_RE_HWDESC_FMT_SHIFT                29
+#define FSL_RE_HWDESC_FMT_MASK         (0x3 << FSL_RE_HWDESC_FMT_SHIFT)
+
+struct fsl_re_hw_desc {
+       __be32 lbea32;
+       __be32 addr_low;
+       __be32 fmt32;
+       __be32 status;
+};
+
+/* Raid Engine device private data */
+struct fsl_re_drv_private {
+       u8 total_chans;
+       struct dma_device dma_dev;
+       struct fsl_re_ctrl *re_regs;
+       struct fsl_re_chan *re_jrs[FSL_RE_MAX_CHANS];
+       struct dma_pool *cf_desc_pool;
+       struct dma_pool *hw_desc_pool;
+};
+
+/* Per job ring data structure */
+struct fsl_re_chan {
+       char name[16];
+       spinlock_t desc_lock; /* queue lock */
+       struct list_head ack_q;  /* wait to acked queue */
+       struct list_head active_q; /* already issued on hw, not completed */
+       struct list_head submit_q;
+       struct list_head free_q; /* alloc available queue */
+       struct device *dev;
+       struct fsl_re_drv_private *re_dev;
+       struct dma_chan chan;
+       struct fsl_re_chan_cfg *jrregs;
+       int irq;
+       struct tasklet_struct irqtask;
+       u32 alloc_count;
+
+       /* hw descriptor ring for inbound queue*/
+       dma_addr_t inb_phys_addr;
+       struct fsl_re_hw_desc *inb_ring_virt_addr;
+       u32 inb_count;
+
+       /* hw descriptor ring for outbound queue */
+       dma_addr_t oub_phys_addr;
+       struct fsl_re_hw_desc *oub_ring_virt_addr;
+       u32 oub_count;
+};
+
+/* Async transaction descriptor */
+struct fsl_re_desc {
+       struct dma_async_tx_descriptor async_tx;
+       struct list_head node;
+       struct fsl_re_hw_desc hwdesc;
+       struct fsl_re_chan *re_chan;
+
+       /* hwdesc will point to cf_addr */
+       void *cf_addr;
+       dma_addr_t cf_paddr;
+
+       void *cdb_addr;
+       dma_addr_t cdb_paddr;
+       int status;
+};
index ed045a9..9ca5683 100644 (file)
@@ -689,11 +689,6 @@ static int mdc_slave_config(struct dma_chan *chan,
        return 0;
 }
 
-static int mdc_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void mdc_free_chan_resources(struct dma_chan *chan)
 {
        struct mdc_chan *mchan = to_mdc_chan(chan);
@@ -910,7 +905,6 @@ static int mdc_dma_probe(struct platform_device *pdev)
        mdma->dma_dev.device_prep_slave_sg = mdc_prep_slave_sg;
        mdma->dma_dev.device_prep_dma_cyclic = mdc_prep_dma_cyclic;
        mdma->dma_dev.device_prep_dma_memcpy = mdc_prep_dma_memcpy;
-       mdma->dma_dev.device_alloc_chan_resources = mdc_alloc_chan_resources;
        mdma->dma_dev.device_free_chan_resources = mdc_free_chan_resources;
        mdma->dma_dev.device_tx_status = mdc_tx_status;
        mdma->dma_dev.device_issue_pending = mdc_issue_pending;
index 66a0efb..62bbd79 100644 (file)
@@ -1260,6 +1260,7 @@ static void sdma_issue_pending(struct dma_chan *chan)
 
 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1        34
 #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2        38
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3        41
 
 static void sdma_add_scripts(struct sdma_engine *sdma,
                const struct sdma_script_start_addrs *addr)
@@ -1306,6 +1307,9 @@ static void sdma_load_firmware(const struct firmware *fw, void *context)
        case 2:
                sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2;
                break;
+       case 3:
+               sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3;
+               break;
        default:
                dev_err(sdma->dev, "unknown firmware version\n");
                goto err_firmware;
index 3b55bb8..ea1e107 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
index 940c150..ee0aa9f 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
index d63f68b..30f5c7e 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
index 695483e..69c7dfc 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
index 4702927..bf24ebe 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
index 194ec20..64790a4 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
index 02177ec..a3e731e 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
index 5501eb0..76f0dc6 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * The full GNU General Public License is included in this distribution in
  * the file called "COPYING".
  *
index 2f1cfa0..909352f 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
index 263d9f6..9988268 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 /*
index 6f7f435..647e362 100644 (file)
@@ -313,11 +313,6 @@ static void k3_dma_tasklet(unsigned long arg)
        }
 }
 
-static int k3_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void k3_dma_free_chan_resources(struct dma_chan *chan)
 {
        struct k3_dma_chan *c = to_k3_chan(chan);
@@ -654,7 +649,7 @@ static void k3_dma_free_desc(struct virt_dma_desc *vd)
        kfree(ds);
 }
 
-static struct of_device_id k3_pdma_dt_ids[] = {
+static const struct of_device_id k3_pdma_dt_ids[] = {
        { .compatible = "hisilicon,k3-dma-1.0", },
        {}
 };
@@ -728,7 +723,6 @@ static int k3_dma_probe(struct platform_device *op)
        dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
        dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
        d->slave.dev = &op->dev;
-       d->slave.device_alloc_chan_resources = k3_dma_alloc_chan_resources;
        d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
        d->slave.device_tx_status = k3_dma_tx_status;
        d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
index eb41004..462a022 100644 (file)
@@ -973,7 +973,7 @@ static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev, int idx, int irq)
        return 0;
 }
 
-static struct of_device_id mmp_pdma_dt_ids[] = {
+static const struct of_device_id mmp_pdma_dt_ids[] = {
        { .compatible = "marvell,pdma-1.0", },
        {}
 };
index b6f4e1f..449e785 100644 (file)
@@ -613,7 +613,7 @@ struct dma_chan *mmp_tdma_xlate(struct of_phandle_args *dma_spec,
        return dma_request_channel(mask, mmp_tdma_filter_fn, &param);
 }
 
-static struct of_device_id mmp_tdma_dt_ids[] = {
+static const struct of_device_id mmp_tdma_dt_ids[] = {
        { .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
        { .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
        {}
index 57d2457..e6281e7 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
@@ -1072,7 +1068,7 @@ static int mpc_dma_remove(struct platform_device *op)
        return 0;
 }
 
-static struct of_device_id mpc_dma_match[] = {
+static const struct of_device_id mpc_dma_match[] = {
        { .compatible = "fsl,mpc5121-dma", },
        { .compatible = "fsl,mpc8308-dma", },
        {},
index b03e813..1c56001 100644 (file)
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/init.h>
@@ -1249,7 +1245,7 @@ static int mv_xor_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_OF
-static struct of_device_id mv_xor_dt_ids[] = {
+static const struct of_device_id mv_xor_dt_ids[] = {
        { .compatible = "marvell,orion-xor", },
        {},
 };
index 78edc7e..91958db 100644 (file)
@@ -9,10 +9,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef MV_XOR_H
index 35c143c..b859792 100644 (file)
@@ -949,6 +949,7 @@ err_free_res:
 err_disable_pdev:
        pci_disable_device(pdev);
 err_free_mem:
+       kfree(pd);
        return err;
 }
 
index 0e1f567..a7d9d30 100644 (file)
@@ -556,7 +556,7 @@ static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
 
        buf[0] = CMD_DMAADDH;
        buf[0] |= (da << 1);
-       *((u16 *)&buf[1]) = val;
+       *((__le16 *)&buf[1]) = cpu_to_le16(val);
 
        PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n",
                da == 1 ? "DA" : "SA", val);
@@ -710,7 +710,7 @@ static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
 
        buf[0] = CMD_DMAMOV;
        buf[1] = dst;
-       *((u32 *)&buf[2]) = val;
+       *((__le32 *)&buf[2]) = cpu_to_le32(val);
 
        PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
                dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
@@ -888,7 +888,7 @@ static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
 
        buf[1] = chan & 0x7;
 
-       *((u32 *)&buf[2]) = addr;
+       *((__le32 *)&buf[2]) = cpu_to_le32(addr);
 
        return SZ_DMAGO;
 }
@@ -928,7 +928,7 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
        }
        writel(val, regs + DBGINST0);
 
-       val = *((u32 *)&insn[2]);
+       val = le32_to_cpu(*((__le32 *)&insn[2]));
        writel(val, regs + DBGINST1);
 
        /* If timed out due to halted state-machine */
@@ -2162,7 +2162,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
  * DMA transfer again. This pause feature was implemented to
  * allow safely read residue before channel termination.
  */
-int pl330_pause(struct dma_chan *chan)
+static int pl330_pause(struct dma_chan *chan)
 {
        struct dma_pl330_chan *pch = to_pchan(chan);
        struct pl330_dmac *pl330 = pch->dmac;
@@ -2203,8 +2203,8 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
        pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
 }
 
-int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
-               struct dma_pl330_desc *desc)
+static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
+                                          struct dma_pl330_desc *desc)
 {
        struct pl330_thread *thrd = pch->thread;
        struct pl330_dmac *pl330 = pch->dmac;
@@ -2259,7 +2259,17 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                        transferred = 0;
                residual += desc->bytes_requested - transferred;
                if (desc->txd.cookie == cookie) {
-                       ret = desc->status;
+                       switch (desc->status) {
+                       case DONE:
+                               ret = DMA_COMPLETE;
+                               break;
+                       case PREP:
+                       case BUSY:
+                               ret = DMA_IN_PROGRESS;
+                               break;
+                       default:
+                               WARN_ON(1);
+                       }
                        break;
                }
                if (desc->last)
index fa764a3..9217f89 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
index 9c914d6..5a250cd 100644 (file)
@@ -171,6 +171,35 @@ static const struct reg_offset_data bam_v1_4_reg_info[] = {
        [BAM_P_FIFO_SIZES]      = { 0x1820, 0x00, 0x1000, 0x00 },
 };
 
+static const struct reg_offset_data bam_v1_7_reg_info[] = {
+       [BAM_CTRL]              = { 0x00000, 0x00, 0x00, 0x00 },
+       [BAM_REVISION]          = { 0x01000, 0x00, 0x00, 0x00 },
+       [BAM_NUM_PIPES]         = { 0x01008, 0x00, 0x00, 0x00 },
+       [BAM_DESC_CNT_TRSHLD]   = { 0x00008, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS]          = { 0x03010, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_MSK]      = { 0x03014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_UNMASKED] = { 0x03018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_STTS]          = { 0x00014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_CLR]           = { 0x00018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_EN]            = { 0x0001C, 0x00, 0x00, 0x00 },
+       [BAM_CNFG_BITS]         = { 0x0007C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_EE]       = { 0x03000, 0x00, 0x00, 0x1000 },
+       [BAM_IRQ_SRCS_MSK_EE]   = { 0x03004, 0x00, 0x00, 0x1000 },
+       [BAM_P_CTRL]            = { 0x13000, 0x1000, 0x00, 0x00 },
+       [BAM_P_RST]             = { 0x13004, 0x1000, 0x00, 0x00 },
+       [BAM_P_HALT]            = { 0x13008, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_STTS]        = { 0x13010, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_CLR]         = { 0x13014, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_EN]          = { 0x13018, 0x1000, 0x00, 0x00 },
+       [BAM_P_EVNT_DEST_ADDR]  = { 0x1382C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_REG]        = { 0x13818, 0x00, 0x1000, 0x00 },
+       [BAM_P_SW_OFSTS]        = { 0x13800, 0x00, 0x1000, 0x00 },
+       [BAM_P_DATA_FIFO_ADDR]  = { 0x13824, 0x00, 0x1000, 0x00 },
+       [BAM_P_DESC_FIFO_ADDR]  = { 0x1381C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_GEN_TRSHLD] = { 0x13828, 0x00, 0x1000, 0x00 },
+       [BAM_P_FIFO_SIZES]      = { 0x13820, 0x00, 0x1000, 0x00 },
+};
+
 /* BAM CTRL */
 #define BAM_SW_RST                     BIT(0)
 #define BAM_EN                         BIT(1)
@@ -1051,6 +1080,7 @@ static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
 static const struct of_device_id bam_of_match[] = {
        { .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
        { .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+       { .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info },
        {}
 };
 
@@ -1113,7 +1143,7 @@ static int bam_dma_probe(struct platform_device *pdev)
 
        if (!bdev->channels) {
                ret = -ENOMEM;
-               goto err_disable_clk;
+               goto err_tasklet_kill;
        }
 
        /* allocate and initialize channels */
@@ -1125,7 +1155,7 @@ static int bam_dma_probe(struct platform_device *pdev)
        ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
                        IRQF_TRIGGER_HIGH, "bam_dma", bdev);
        if (ret)
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
 
        /* set max dma segment size */
        bdev->common.dev = bdev->dev;
@@ -1133,7 +1163,7 @@ static int bam_dma_probe(struct platform_device *pdev)
        ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE);
        if (ret) {
                dev_err(bdev->dev, "cannot set maximum segment size\n");
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
        }
 
        platform_set_drvdata(pdev, bdev);
@@ -1161,7 +1191,7 @@ static int bam_dma_probe(struct platform_device *pdev)
        ret = dma_async_device_register(&bdev->common);
        if (ret) {
                dev_err(bdev->dev, "failed to register dma async device\n");
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
        }
 
        ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
@@ -1173,8 +1203,14 @@ static int bam_dma_probe(struct platform_device *pdev)
 
 err_unregister_dma:
        dma_async_device_unregister(&bdev->common);
+err_bam_channel_exit:
+       for (i = 0; i < bdev->num_channels; i++)
+               tasklet_kill(&bdev->channels[i].vc.task);
+err_tasklet_kill:
+       tasklet_kill(&bdev->task);
 err_disable_clk:
        clk_disable_unprepare(bdev->bamclk);
+
        return ret;
 }
 
index 2f91da3..01dcaf2 100644 (file)
@@ -749,11 +749,6 @@ unlock:
        return ret;
 }
 
-static int s3c24xx_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
 {
        /* Ensure all queued descriptors are freed */
@@ -1238,7 +1233,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
        if (!s3cdma->phy_chans)
                return -ENOMEM;
 
-       /* aquire irqs and clocks for all physical channels */
+       /* acquire irqs and clocks for all physical channels */
        for (i = 0; i < pdata->num_phy_channels; i++) {
                struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
                char clk_name[6];
@@ -1266,7 +1261,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
                        sprintf(clk_name, "dma.%d", i);
                        phy->clk = devm_clk_get(&pdev->dev, clk_name);
                        if (IS_ERR(phy->clk) && sdata->has_clocks) {
-                               dev_err(&pdev->dev, "unable to aquire clock for channel %d, error %lu",
+                               dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
                                        i, PTR_ERR(phy->clk));
                                continue;
                        }
@@ -1290,8 +1285,6 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
        dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
        dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
        s3cdma->memcpy.dev = &pdev->dev;
-       s3cdma->memcpy.device_alloc_chan_resources =
-                                       s3c24xx_dma_alloc_chan_resources;
        s3cdma->memcpy.device_free_chan_resources =
                                        s3c24xx_dma_free_chan_resources;
        s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
@@ -1305,8 +1298,6 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
        dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
        dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
        s3cdma->slave.dev = &pdev->dev;
-       s3cdma->slave.device_alloc_chan_resources =
-                                       s3c24xx_dma_alloc_chan_resources;
        s3cdma->slave.device_free_chan_resources =
                                        s3c24xx_dma_free_chan_resources;
        s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
index 5adf540..43db255 100644 (file)
@@ -389,11 +389,6 @@ static void sa11x0_dma_tasklet(unsigned long arg)
 }
 
 
-static int sa11x0_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void sa11x0_dma_free_chan_resources(struct dma_chan *chan)
 {
        struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
@@ -835,7 +830,6 @@ static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
 
        INIT_LIST_HEAD(&dmadev->channels);
        dmadev->dev = dev;
-       dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
        dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources;
        dmadev->device_config = sa11x0_dma_device_config;
        dmadev->device_pause = sa11x0_dma_device_pause;
@@ -948,6 +942,12 @@ static int sa11x0_dma_probe(struct platform_device *pdev)
        dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
        d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg;
        d->slave.device_prep_dma_cyclic = sa11x0_dma_prep_dma_cyclic;
+       d->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+       d->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+                                  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+       d->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+                                  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
        ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev);
        if (ret) {
                dev_warn(d->slave.dev, "failed to register slave async device: %d\n",
index 8190ad2..0f37152 100644 (file)
@@ -51,12 +51,6 @@ config RCAR_HPB_DMAE
        help
          Enable support for the Renesas R-Car series DMA controllers.
 
-config RCAR_AUDMAC_PP
-       tristate "Renesas R-Car Audio DMAC Peripheral Peripheral support"
-       depends on SH_DMAE_BASE
-       help
-         Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
-
 config RCAR_DMAC
        tristate "Renesas R-Car Gen2 DMA Controller"
        depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -64,3 +58,12 @@ config RCAR_DMAC
        help
          This driver supports the general purpose DMA controller found in the
          Renesas R-Car second generation SoCs.
+
+config RENESAS_USB_DMAC
+       tristate "Renesas USB-DMA Controller"
+       depends on ARCH_SHMOBILE || COMPILE_TEST
+       select RENESAS_DMA
+       select DMA_VIRTUAL_CHANNELS
+       help
+         This driver supports the USB-DMA controller found in the Renesas
+         SoCs.
index 2852f9d..b8a5980 100644 (file)
@@ -15,5 +15,5 @@ obj-$(CONFIG_SH_DMAE) += shdma.o
 
 obj-$(CONFIG_SUDMAC) += sudmac.o
 obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
-obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o
 obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
+obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c
deleted file mode 100644 (file)
index d95bbdd..0000000
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * This is for Renesas R-Car Audio-DMAC-peri-peri.
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- * Copyright (C) 2014 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * based on the drivers/dma/sh/shdma.c
- *
- * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
- * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-#include <linux/of_dma.h>
-#include <linux/platform_data/dma-rcar-audmapp.h>
-#include <linux/platform_device.h>
-#include <linux/shdma-base.h>
-
-/*
- * DMA register
- */
-#define PDMASAR                0x00
-#define PDMADAR                0x04
-#define PDMACHCR       0x0c
-
-/* PDMACHCR */
-#define PDMACHCR_DE            (1 << 0)
-
-#define AUDMAPP_MAX_CHANNELS   29
-
-/* Default MEMCPY transfer size = 2^2 = 4 bytes */
-#define LOG2_DEFAULT_XFER_SIZE 2
-#define AUDMAPP_SLAVE_NUMBER   256
-#define AUDMAPP_LEN_MAX                (16 * 1024 * 1024)
-
-struct audmapp_chan {
-       struct shdma_chan shdma_chan;
-       void __iomem *base;
-       dma_addr_t slave_addr;
-       u32 chcr;
-};
-
-struct audmapp_device {
-       struct shdma_dev shdma_dev;
-       struct audmapp_pdata *pdata;
-       struct device *dev;
-       void __iomem *chan_reg;
-};
-
-struct audmapp_desc {
-       struct shdma_desc shdma_desc;
-       dma_addr_t src;
-       dma_addr_t dst;
-};
-
-#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
-
-#define to_chan(chan) container_of(chan, struct audmapp_chan, shdma_chan)
-#define to_desc(sdesc) container_of(sdesc, struct audmapp_desc, shdma_desc)
-#define to_dev(chan) container_of(chan->shdma_chan.dma_chan.device,    \
-                                 struct audmapp_device, shdma_dev.dma_dev)
-
-static void audmapp_write(struct audmapp_chan *auchan, u32 data, u32 reg)
-{
-       struct audmapp_device *audev = to_dev(auchan);
-       struct device *dev = audev->dev;
-
-       dev_dbg(dev, "w %p : %08x\n", auchan->base + reg, data);
-
-       iowrite32(data, auchan->base + reg);
-}
-
-static u32 audmapp_read(struct audmapp_chan *auchan, u32 reg)
-{
-       return ioread32(auchan->base + reg);
-}
-
-static void audmapp_halt(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       int i;
-
-       audmapp_write(auchan, 0, PDMACHCR);
-
-       for (i = 0; i < 1024; i++) {
-               if (0 == audmapp_read(auchan, PDMACHCR))
-                       return;
-               udelay(1);
-       }
-}
-
-static void audmapp_start_xfer(struct shdma_chan *schan,
-                              struct shdma_desc *sdesc)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       struct audmapp_device *audev = to_dev(auchan);
-       struct audmapp_desc *desc = to_desc(sdesc);
-       struct device *dev = audev->dev;
-       u32 chcr = auchan->chcr | PDMACHCR_DE;
-
-       dev_dbg(dev, "src/dst/chcr = %pad/%pad/%08x\n",
-               &desc->src, &desc->dst, chcr);
-
-       audmapp_write(auchan, desc->src,        PDMASAR);
-       audmapp_write(auchan, desc->dst,        PDMADAR);
-       audmapp_write(auchan, chcr,     PDMACHCR);
-}
-
-static int audmapp_get_config(struct audmapp_chan *auchan, int slave_id,
-                             u32 *chcr, dma_addr_t *dst)
-{
-       struct audmapp_device *audev = to_dev(auchan);
-       struct audmapp_pdata *pdata = audev->pdata;
-       struct audmapp_slave_config *cfg;
-       int i;
-
-       *chcr   = 0;
-       *dst    = 0;
-
-       if (!pdata) { /* DT */
-               *chcr = ((u32)slave_id) << 16;
-               auchan->shdma_chan.slave_id = (slave_id) >> 8;
-               return 0;
-       }
-
-       /* non-DT */
-
-       if (slave_id >= AUDMAPP_SLAVE_NUMBER)
-               return -ENXIO;
-
-       for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
-               if (cfg->slave_id == slave_id) {
-                       *chcr   = cfg->chcr;
-                       *dst    = cfg->dst;
-                       return 0;
-               }
-
-       return -ENXIO;
-}
-
-static int audmapp_set_slave(struct shdma_chan *schan, int slave_id,
-                            dma_addr_t slave_addr, bool try)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       u32 chcr;
-       dma_addr_t dst;
-       int ret;
-
-       ret = audmapp_get_config(auchan, slave_id, &chcr, &dst);
-       if (ret < 0)
-               return ret;
-
-       if (try)
-               return 0;
-
-       auchan->chcr            = chcr;
-       auchan->slave_addr      = slave_addr ? : dst;
-
-       return 0;
-}
-
-static int audmapp_desc_setup(struct shdma_chan *schan,
-                             struct shdma_desc *sdesc,
-                             dma_addr_t src, dma_addr_t dst, size_t *len)
-{
-       struct audmapp_desc *desc = to_desc(sdesc);
-
-       if (*len > (size_t)AUDMAPP_LEN_MAX)
-               *len = (size_t)AUDMAPP_LEN_MAX;
-
-       desc->src = src;
-       desc->dst = dst;
-
-       return 0;
-}
-
-static void audmapp_setup_xfer(struct shdma_chan *schan,
-                              int slave_id)
-{
-}
-
-static dma_addr_t audmapp_slave_addr(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-
-       return auchan->slave_addr;
-}
-
-static bool audmapp_channel_busy(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       u32 chcr = audmapp_read(auchan, PDMACHCR);
-
-       return chcr & ~PDMACHCR_DE;
-}
-
-static bool audmapp_desc_completed(struct shdma_chan *schan,
-                                  struct shdma_desc *sdesc)
-{
-       return true;
-}
-
-static struct shdma_desc *audmapp_embedded_desc(void *buf, int i)
-{
-       return &((struct audmapp_desc *)buf)[i].shdma_desc;
-}
-
-static const struct shdma_ops audmapp_shdma_ops = {
-       .halt_channel   = audmapp_halt,
-       .desc_setup     = audmapp_desc_setup,
-       .set_slave      = audmapp_set_slave,
-       .start_xfer     = audmapp_start_xfer,
-       .embedded_desc  = audmapp_embedded_desc,
-       .setup_xfer     = audmapp_setup_xfer,
-       .slave_addr     = audmapp_slave_addr,
-       .channel_busy   = audmapp_channel_busy,
-       .desc_completed = audmapp_desc_completed,
-};
-
-static int audmapp_chan_probe(struct platform_device *pdev,
-                             struct audmapp_device *audev, int id)
-{
-       struct shdma_dev *sdev = &audev->shdma_dev;
-       struct audmapp_chan *auchan;
-       struct shdma_chan *schan;
-       struct device *dev = audev->dev;
-
-       auchan = devm_kzalloc(dev, sizeof(*auchan), GFP_KERNEL);
-       if (!auchan)
-               return -ENOMEM;
-
-       schan = &auchan->shdma_chan;
-       schan->max_xfer_len = AUDMAPP_LEN_MAX;
-
-       shdma_chan_probe(sdev, schan, id);
-
-       auchan->base = audev->chan_reg + 0x20 + (0x10 * id);
-       dev_dbg(dev, "%02d : %p / %p", id, auchan->base, audev->chan_reg);
-
-       return 0;
-}
-
-static void audmapp_chan_remove(struct audmapp_device *audev)
-{
-       struct shdma_chan *schan;
-       int i;
-
-       shdma_for_each_chan(schan, &audev->shdma_dev, i) {
-               BUG_ON(!schan);
-               shdma_chan_remove(schan);
-       }
-}
-
-static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
-                                        struct of_dma *ofdma)
-{
-       dma_cap_mask_t mask;
-       struct dma_chan *chan;
-       u32 chcr = dma_spec->args[0];
-
-       if (dma_spec->args_count != 1)
-               return NULL;
-
-       dma_cap_zero(mask);
-       dma_cap_set(DMA_SLAVE, mask);
-
-       chan = dma_request_channel(mask, shdma_chan_filter, NULL);
-       if (chan)
-               to_shdma_chan(chan)->hw_req = chcr;
-
-       return chan;
-}
-
-static int audmapp_probe(struct platform_device *pdev)
-{
-       struct audmapp_pdata *pdata = pdev->dev.platform_data;
-       struct device_node *np = pdev->dev.of_node;
-       struct audmapp_device *audev;
-       struct shdma_dev *sdev;
-       struct dma_device *dma_dev;
-       struct resource *res;
-       int err, i;
-
-       if (np)
-               of_dma_controller_register(np, audmapp_of_xlate, pdev);
-       else if (!pdata)
-               return -ENODEV;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       audev = devm_kzalloc(&pdev->dev, sizeof(*audev), GFP_KERNEL);
-       if (!audev)
-               return -ENOMEM;
-
-       audev->dev      = &pdev->dev;
-       audev->pdata    = pdata;
-       audev->chan_reg = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(audev->chan_reg))
-               return PTR_ERR(audev->chan_reg);
-
-       sdev            = &audev->shdma_dev;
-       sdev->ops       = &audmapp_shdma_ops;
-       sdev->desc_size = sizeof(struct audmapp_desc);
-
-       dma_dev                 = &sdev->dma_dev;
-       dma_dev->copy_align     = LOG2_DEFAULT_XFER_SIZE;
-       dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
-
-       err = shdma_init(&pdev->dev, sdev, AUDMAPP_MAX_CHANNELS);
-       if (err < 0)
-               return err;
-
-       platform_set_drvdata(pdev, audev);
-
-       /* Create DMA Channel */
-       for (i = 0; i < AUDMAPP_MAX_CHANNELS; i++) {
-               err = audmapp_chan_probe(pdev, audev, i);
-               if (err)
-                       goto chan_probe_err;
-       }
-
-       err = dma_async_device_register(dma_dev);
-       if (err < 0)
-               goto chan_probe_err;
-
-       return err;
-
-chan_probe_err:
-       audmapp_chan_remove(audev);
-       shdma_cleanup(sdev);
-
-       return err;
-}
-
-static int audmapp_remove(struct platform_device *pdev)
-{
-       struct audmapp_device *audev = platform_get_drvdata(pdev);
-       struct dma_device *dma_dev = &audev->shdma_dev.dma_dev;
-
-       dma_async_device_unregister(dma_dev);
-
-       audmapp_chan_remove(audev);
-       shdma_cleanup(&audev->shdma_dev);
-
-       return 0;
-}
-
-static const struct of_device_id audmapp_of_match[] = {
-       { .compatible = "renesas,rcar-audmapp", },
-       {},
-};
-
-static struct platform_driver audmapp_driver = {
-       .probe          = audmapp_probe,
-       .remove         = audmapp_remove,
-       .driver         = {
-               .name   = "rcar-audmapp-engine",
-               .of_match_table = audmapp_of_match,
-       },
-};
-module_platform_driver(audmapp_driver);
-
-MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
-MODULE_DESCRIPTION("Renesas R-Car Audio DMAC peri-peri driver");
-MODULE_LICENSE("GPL");
index 8ee383d..10fcaba 100644 (file)
@@ -171,8 +171,7 @@ static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan)
        return NULL;
 }
 
-static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
-                            dma_addr_t slave_addr)
+static int shdma_setup_slave(struct shdma_chan *schan, dma_addr_t slave_addr)
 {
        struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
        const struct shdma_ops *ops = sdev->ops;
@@ -183,25 +182,23 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
                ret = ops->set_slave(schan, match, slave_addr, true);
                if (ret < 0)
                        return ret;
-
-               slave_id = schan->slave_id;
        } else {
-               match = slave_id;
+               match = schan->real_slave_id;
        }
 
-       if (slave_id < 0 || slave_id >= slave_num)
+       if (schan->real_slave_id < 0 || schan->real_slave_id >= slave_num)
                return -EINVAL;
 
-       if (test_and_set_bit(slave_id, shdma_slave_used))
+       if (test_and_set_bit(schan->real_slave_id, shdma_slave_used))
                return -EBUSY;
 
        ret = ops->set_slave(schan, match, slave_addr, false);
        if (ret < 0) {
-               clear_bit(slave_id, shdma_slave_used);
+               clear_bit(schan->real_slave_id, shdma_slave_used);
                return ret;
        }
 
-       schan->slave_id = slave_id;
+       schan->slave_id = schan->real_slave_id;
 
        return 0;
 }
@@ -221,10 +218,12 @@ static int shdma_alloc_chan_resources(struct dma_chan *chan)
         */
        if (slave) {
                /* Legacy mode: .private is set in filter */
-               ret = shdma_setup_slave(schan, slave->slave_id, 0);
+               schan->real_slave_id = slave->slave_id;
+               ret = shdma_setup_slave(schan, 0);
                if (ret < 0)
                        goto esetslave;
        } else {
+               /* Normal mode: real_slave_id was set by filter */
                schan->slave_id = -EINVAL;
        }
 
@@ -258,11 +257,14 @@ esetslave:
 
 /*
  * This is the standard shdma filter function to be used as a replacement to the
- * "old" method, using the .private pointer. If for some reason you allocate a
- * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter
+ * "old" method, using the .private pointer.
+ * You always have to pass a valid slave id as the argument, old drivers that
+ * pass ERR_PTR(-EINVAL) as a filter parameter and set it up in dma_slave_config
+ * need to be updated so we can remove the slave_id field from dma_slave_config.
  * parameter. If this filter is used, the slave driver, after calling
  * dma_request_channel(), will also have to call dmaengine_slave_config() with
- * .slave_id, .direction, and either .src_addr or .dst_addr set.
+ * .direction, and either .src_addr or .dst_addr set.
+ *
  * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
  * capability! If this becomes a requirement, hardware glue drivers, using this
  * services would have to provide their own filters, which first would check
@@ -276,7 +278,7 @@ bool shdma_chan_filter(struct dma_chan *chan, void *arg)
 {
        struct shdma_chan *schan;
        struct shdma_dev *sdev;
-       int match = (long)arg;
+       int slave_id = (long)arg;
        int ret;
 
        /* Only support channels handled by this driver. */
@@ -284,19 +286,39 @@ bool shdma_chan_filter(struct dma_chan *chan, void *arg)
            shdma_alloc_chan_resources)
                return false;
 
-       if (match < 0)
+       schan = to_shdma_chan(chan);
+       sdev = to_shdma_dev(chan->device);
+
+       /*
+        * For DT, the schan->slave_id field is generated by the
+        * set_slave function from the slave ID that is passed in
+        * from xlate. For the non-DT case, the slave ID is
+        * directly passed into the filter function by the driver
+        */
+       if (schan->dev->of_node) {
+               ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+               if (ret < 0)
+                       return false;
+
+               schan->real_slave_id = schan->slave_id;
+               return true;
+       }
+
+       if (slave_id < 0) {
                /* No slave requested - arbitrary channel */
+               dev_warn(sdev->dma_dev.dev, "invalid slave ID passed to dma_request_slave\n");
                return true;
+       }
 
-       schan = to_shdma_chan(chan);
-       if (!schan->dev->of_node && match >= slave_num)
+       if (slave_id >= slave_num)
                return false;
 
-       sdev = to_shdma_dev(schan->dma_chan.device);
-       ret = sdev->ops->set_slave(schan, match, 0, true);
+       ret = sdev->ops->set_slave(schan, slave_id, 0, true);
        if (ret < 0)
                return false;
 
+       schan->real_slave_id = slave_id;
+
        return true;
 }
 EXPORT_SYMBOL(shdma_chan_filter);
@@ -452,6 +474,8 @@ static void shdma_free_chan_resources(struct dma_chan *chan)
                chan->private = NULL;
        }
 
+       schan->real_slave_id = 0;
+
        spin_lock_irq(&schan->chan_lock);
 
        list_splice_init(&schan->ld_free, &list);
@@ -764,11 +788,20 @@ static int shdma_config(struct dma_chan *chan,
         */
        if (!config)
                return -EINVAL;
+
+       /*
+        * overriding the slave_id through dma_slave_config is deprecated,
+        * but possibly some out-of-tree drivers still do it.
+        */
+       if (WARN_ON_ONCE(config->slave_id &&
+                        config->slave_id != schan->real_slave_id))
+               schan->real_slave_id = config->slave_id;
+
        /*
         * We could lock this, but you shouldn't be configuring the
         * channel, while using it...
         */
-       return shdma_setup_slave(schan, config->slave_id,
+       return shdma_setup_slave(schan,
                                 config->direction == DMA_DEV_TO_MEM ?
                                 config->src_addr : config->dst_addr);
 }
index 9f1d4c7..11707df 100644 (file)
@@ -443,7 +443,7 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev)
        return ret;
 }
 
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARM)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
 static irqreturn_t sh_dmae_err(int irq, void *data)
 {
        struct sh_dmae_device *shdev = data;
@@ -689,7 +689,7 @@ static int sh_dmae_probe(struct platform_device *pdev)
        const struct sh_dmae_pdata *pdata;
        unsigned long chan_flag[SH_DMAE_MAX_CHANNELS] = {};
        int chan_irq[SH_DMAE_MAX_CHANNELS];
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARM)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
        unsigned long irqflags = 0;
        int errirq;
 #endif
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
new file mode 100644 (file)
index 0000000..f705798
--- /dev/null
@@ -0,0 +1,910 @@
+/*
+ * Renesas USB DMA Controller Driver
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * based on rcar-dmac.c
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * struct usb_dmac_sg - Descriptor for a hardware transfer
+ * @mem_addr: memory address
+ * @size: transfer size in bytes
+ */
+struct usb_dmac_sg {
+       dma_addr_t mem_addr;
+       u32 size;
+};
+
+/*
+ * struct usb_dmac_desc - USB DMA Transfer Descriptor
+ * @vd: base virtual channel DMA transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @sg_allocated_len: length of allocated sg
+ * @sg_len: length of sg
+ * @sg_index: index of sg
+ * @residue: residue after the DMAC completed a transfer
+ * @node: node for desc_got and desc_freed
+ * @done_cookie: cookie after the DMAC completed a transfer
+ * @sg: information for the transfer
+ */
+struct usb_dmac_desc {
+       struct virt_dma_desc vd;
+       enum dma_transfer_direction direction;
+       unsigned int sg_allocated_len;
+       unsigned int sg_len;
+       unsigned int sg_index;
+       u32 residue;
+       struct list_head node;
+       dma_cookie_t done_cookie;
+       struct usb_dmac_sg sg[0];
+};
+
+#define to_usb_dmac_desc(vd)   container_of(vd, struct usb_dmac_desc, vd)
+
+/*
+ * struct usb_dmac_chan - USB DMA Controller Channel
+ * @vc: base virtual DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: irq number of this channel
+ * @desc: the current descriptor
+ * @descs_allocated: number of descriptors allocated
+ * @desc_got: got descriptors
+ * @desc_freed: freed descriptors after the DMAC completed a transfer
+ */
+struct usb_dmac_chan {
+       struct virt_dma_chan vc;
+       void __iomem *iomem;
+       unsigned int index;
+       int irq;
+       struct usb_dmac_desc *desc;
+       int descs_allocated;
+       struct list_head desc_got;
+       struct list_head desc_freed;
+};
+
+#define to_usb_dmac_chan(c) container_of(c, struct usb_dmac_chan, vc.chan)
+
+/*
+ * struct usb_dmac - USB DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ */
+struct usb_dmac {
+       struct dma_device engine;
+       struct device *dev;
+       void __iomem *iomem;
+
+       unsigned int n_channels;
+       struct usb_dmac_chan *channels;
+};
+
+#define to_usb_dmac(d)         container_of(d, struct usb_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define USB_DMAC_CHAN_OFFSET(i)                (0x20 + 0x20 * (i))
+
+#define USB_DMASWR                     0x0008
+#define USB_DMASWR_SWR                 (1 << 0)
+#define USB_DMAOR                      0x0060
+#define USB_DMAOR_AE                   (1 << 2)
+#define USB_DMAOR_DME                  (1 << 0)
+
+#define USB_DMASAR                     0x0000
+#define USB_DMADAR                     0x0004
+#define USB_DMATCR                     0x0008
+#define USB_DMATCR_MASK                        0x00ffffff
+#define USB_DMACHCR                    0x0014
+#define USB_DMACHCR_FTE                        (1 << 24)
+#define USB_DMACHCR_NULLE              (1 << 16)
+#define USB_DMACHCR_NULL               (1 << 12)
+#define USB_DMACHCR_TS_8B              ((0 << 7) | (0 << 6))
+#define USB_DMACHCR_TS_16B             ((0 << 7) | (1 << 6))
+#define USB_DMACHCR_TS_32B             ((1 << 7) | (0 << 6))
+#define USB_DMACHCR_IE                 (1 << 5)
+#define USB_DMACHCR_SP                 (1 << 2)
+#define USB_DMACHCR_TE                 (1 << 1)
+#define USB_DMACHCR_DE                 (1 << 0)
+#define USB_DMATEND                    0x0018
+
+/* Hardcode the xfer_shift to 5 (32bytes) */
+#define USB_DMAC_XFER_SHIFT    5
+#define USB_DMAC_XFER_SIZE     (1 << USB_DMAC_XFER_SHIFT)
+#define USB_DMAC_CHCR_TS       USB_DMACHCR_TS_32B
+#define USB_DMAC_SLAVE_BUSWIDTH        DMA_SLAVE_BUSWIDTH_32_BYTES
+
+/* for descriptors */
+#define USB_DMAC_INITIAL_NR_DESC       16
+#define USB_DMAC_INITIAL_NR_SG         8
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void usb_dmac_write(struct usb_dmac *dmac, u32 reg, u32 data)
+{
+       writel(data, dmac->iomem + reg);
+}
+
+static u32 usb_dmac_read(struct usb_dmac *dmac, u32 reg)
+{
+       return readl(dmac->iomem + reg);
+}
+
+static u32 usb_dmac_chan_read(struct usb_dmac_chan *chan, u32 reg)
+{
+       return readl(chan->iomem + reg);
+}
+
+static void usb_dmac_chan_write(struct usb_dmac_chan *chan, u32 reg, u32 data)
+{
+       writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool usb_dmac_chan_is_busy(struct usb_dmac_chan *chan)
+{
+       u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+       return (chcr & (USB_DMACHCR_DE | USB_DMACHCR_TE)) == USB_DMACHCR_DE;
+}
+
+static u32 usb_dmac_calc_tend(u32 size)
+{
+       /*
+        * Please refer to the Figure "Example of Final Transaction Valid
+        * Data Transfer Enable (EDTEN) Setting" in the data sheet.
+        */
+       return 0xffffffff << (32 - (size % USB_DMAC_XFER_SIZE ? :
+                                               USB_DMAC_XFER_SIZE));
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_sg(struct usb_dmac_chan *chan,
+                                  unsigned int index)
+{
+       struct usb_dmac_desc *desc = chan->desc;
+       struct usb_dmac_sg *sg = desc->sg + index;
+       dma_addr_t src_addr = 0, dst_addr = 0;
+
+       WARN_ON_ONCE(usb_dmac_chan_is_busy(chan));
+
+       if (desc->direction == DMA_DEV_TO_MEM)
+               dst_addr = sg->mem_addr;
+       else
+               src_addr = sg->mem_addr;
+
+       dev_dbg(chan->vc.chan.device->dev,
+               "chan%u: queue sg %p: %u@%pad -> %pad\n",
+               chan->index, sg, sg->size, &src_addr, &dst_addr);
+
+       usb_dmac_chan_write(chan, USB_DMASAR, src_addr & 0xffffffff);
+       usb_dmac_chan_write(chan, USB_DMADAR, dst_addr & 0xffffffff);
+       usb_dmac_chan_write(chan, USB_DMATCR,
+                           DIV_ROUND_UP(sg->size, USB_DMAC_XFER_SIZE));
+       usb_dmac_chan_write(chan, USB_DMATEND, usb_dmac_calc_tend(sg->size));
+
+       usb_dmac_chan_write(chan, USB_DMACHCR, USB_DMAC_CHCR_TS |
+                       USB_DMACHCR_NULLE | USB_DMACHCR_IE | USB_DMACHCR_DE);
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_desc(struct usb_dmac_chan *chan)
+{
+       struct virt_dma_desc *vd;
+
+       vd = vchan_next_desc(&chan->vc);
+       if (!vd) {
+               chan->desc = NULL;
+               return;
+       }
+
+       /*
+        * Remove this request from vc->desc_issued. Otherwise, this driver
+        * will get the previous value from vchan_next_desc() after a transfer
+        * was completed.
+        */
+       list_del(&vd->node);
+
+       chan->desc = to_usb_dmac_desc(vd);
+       chan->desc->sg_index = 0;
+       usb_dmac_chan_start_sg(chan, 0);
+}
+
+static int usb_dmac_init(struct usb_dmac *dmac)
+{
+       u16 dmaor;
+
+       /* Clear all channels and enable the DMAC globally. */
+       usb_dmac_write(dmac, USB_DMAOR, USB_DMAOR_DME);
+
+       dmaor = usb_dmac_read(dmac, USB_DMAOR);
+       if ((dmaor & (USB_DMAOR_AE | USB_DMAOR_DME)) != USB_DMAOR_DME) {
+               dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+static int usb_dmac_desc_alloc(struct usb_dmac_chan *chan, unsigned int sg_len,
+                              gfp_t gfp)
+{
+       struct usb_dmac_desc *desc;
+       unsigned long flags;
+
+       desc = kzalloc(sizeof(*desc) + sg_len * sizeof(desc->sg[0]), gfp);
+       if (!desc)
+               return -ENOMEM;
+
+       desc->sg_allocated_len = sg_len;
+       INIT_LIST_HEAD(&desc->node);
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_add_tail(&desc->node, &chan->desc_freed);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       return 0;
+}
+
+static void usb_dmac_desc_free(struct usb_dmac_chan *chan)
+{
+       struct usb_dmac_desc *desc, *_desc;
+       LIST_HEAD(list);
+
+       list_splice_init(&chan->desc_freed, &list);
+       list_splice_init(&chan->desc_got, &list);
+
+       list_for_each_entry_safe(desc, _desc, &list, node) {
+               list_del(&desc->node);
+               kfree(desc);
+       }
+       chan->descs_allocated = 0;
+}
+
+static struct usb_dmac_desc *usb_dmac_desc_get(struct usb_dmac_chan *chan,
+                                              unsigned int sg_len, gfp_t gfp)
+{
+       struct usb_dmac_desc *desc = NULL;
+       unsigned long flags;
+
+       /* Get a freed descritpor */
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_for_each_entry(desc, &chan->desc_freed, node) {
+               if (sg_len <= desc->sg_allocated_len) {
+                       list_move_tail(&desc->node, &chan->desc_got);
+                       spin_unlock_irqrestore(&chan->vc.lock, flags);
+                       return desc;
+               }
+       }
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       /* Allocate a new descriptor */
+       if (!usb_dmac_desc_alloc(chan, sg_len, gfp)) {
+               /* If allocated the desc, it was added to tail of the list */
+               spin_lock_irqsave(&chan->vc.lock, flags);
+               desc = list_last_entry(&chan->desc_freed, struct usb_dmac_desc,
+                                      node);
+               list_move_tail(&desc->node, &chan->desc_got);
+               spin_unlock_irqrestore(&chan->vc.lock, flags);
+               return desc;
+       }
+
+       return NULL;
+}
+
+static void usb_dmac_desc_put(struct usb_dmac_chan *chan,
+                             struct usb_dmac_desc *desc)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_move_tail(&desc->node, &chan->desc_freed);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void usb_dmac_soft_reset(struct usb_dmac_chan *uchan)
+{
+       struct dma_chan *chan = &uchan->vc.chan;
+       struct usb_dmac *dmac = to_usb_dmac(chan->device);
+       int i;
+
+       /* Don't issue soft reset if any one of channels is busy */
+       for (i = 0; i < dmac->n_channels; ++i) {
+               if (usb_dmac_chan_is_busy(uchan))
+                       return;
+       }
+
+       usb_dmac_write(dmac, USB_DMAOR, 0);
+       usb_dmac_write(dmac, USB_DMASWR, USB_DMASWR_SWR);
+       udelay(100);
+       usb_dmac_write(dmac, USB_DMASWR, 0);
+       usb_dmac_write(dmac, USB_DMAOR, 1);
+}
+
+static void usb_dmac_chan_halt(struct usb_dmac_chan *chan)
+{
+       u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+       chcr &= ~(USB_DMACHCR_IE | USB_DMACHCR_TE | USB_DMACHCR_DE);
+       usb_dmac_chan_write(chan, USB_DMACHCR, chcr);
+
+       usb_dmac_soft_reset(chan);
+}
+
+static void usb_dmac_stop(struct usb_dmac *dmac)
+{
+       usb_dmac_write(dmac, USB_DMAOR, 0);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int usb_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       int ret;
+
+       while (uchan->descs_allocated < USB_DMAC_INITIAL_NR_DESC) {
+               ret = usb_dmac_desc_alloc(uchan, USB_DMAC_INITIAL_NR_SG,
+                                         GFP_KERNEL);
+               if (ret < 0) {
+                       usb_dmac_desc_free(uchan);
+                       return ret;
+               }
+               uchan->descs_allocated++;
+       }
+
+       return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void usb_dmac_free_chan_resources(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       unsigned long flags;
+
+       /* Protect against ISR */
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       usb_dmac_chan_halt(uchan);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+       usb_dmac_desc_free(uchan);
+       vchan_free_chan_resources(&uchan->vc);
+
+       pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+usb_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+                      unsigned int sg_len, enum dma_transfer_direction dir,
+                      unsigned long dma_flags, void *context)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct usb_dmac_desc *desc;
+       struct scatterlist *sg;
+       int i;
+
+       if (!sg_len) {
+               dev_warn(chan->device->dev,
+                        "%s: bad parameter: len=%d\n", __func__, sg_len);
+               return NULL;
+       }
+
+       desc = usb_dmac_desc_get(uchan, sg_len, GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       desc->direction = dir;
+       desc->sg_len = sg_len;
+       for_each_sg(sgl, sg, sg_len, i) {
+               desc->sg[i].mem_addr = sg_dma_address(sg);
+               desc->sg[i].size = sg_dma_len(sg);
+       }
+
+       return vchan_tx_prep(&uchan->vc, &desc->vd, dma_flags);
+}
+
+static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct usb_dmac_desc *desc;
+       unsigned long flags;
+       LIST_HEAD(head);
+       LIST_HEAD(list);
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       usb_dmac_chan_halt(uchan);
+       vchan_get_all_descriptors(&uchan->vc, &head);
+       if (uchan->desc)
+               uchan->desc = NULL;
+       list_splice_init(&uchan->desc_got, &list);
+       list_for_each_entry(desc, &list, node)
+               list_move_tail(&desc->node, &uchan->desc_freed);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+       vchan_dma_desc_free_list(&uchan->vc, &head);
+
+       return 0;
+}
+
+static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
+                                                struct usb_dmac_desc *desc,
+                                                int sg_index)
+{
+       struct usb_dmac_sg *sg = desc->sg + sg_index;
+       u32 mem_addr = sg->mem_addr & 0xffffffff;
+       unsigned int residue = sg->size;
+
+       /*
+        * We cannot use USB_DMATCR to calculate residue because USB_DMATCR
+        * has unsuited value to calculate.
+        */
+       if (desc->direction == DMA_DEV_TO_MEM)
+               residue -= usb_dmac_chan_read(chan, USB_DMADAR) - mem_addr;
+       else
+               residue -= usb_dmac_chan_read(chan, USB_DMASAR) - mem_addr;
+
+       return residue;
+}
+
+static u32 usb_dmac_chan_get_residue_if_complete(struct usb_dmac_chan *chan,
+                                                dma_cookie_t cookie)
+{
+       struct usb_dmac_desc *desc;
+       u32 residue = 0;
+
+       list_for_each_entry_reverse(desc, &chan->desc_freed, node) {
+               if (desc->done_cookie == cookie) {
+                       residue = desc->residue;
+                       break;
+               }
+       }
+
+       return residue;
+}
+
+static u32 usb_dmac_chan_get_residue(struct usb_dmac_chan *chan,
+                                    dma_cookie_t cookie)
+{
+       u32 residue = 0;
+       struct virt_dma_desc *vd;
+       struct usb_dmac_desc *desc = chan->desc;
+       int i;
+
+       if (!desc) {
+               vd = vchan_find_desc(&chan->vc, cookie);
+               if (!vd)
+                       return 0;
+               desc = to_usb_dmac_desc(vd);
+       }
+
+       /* Compute the size of all usb_dmac_sg still to be transferred */
+       for (i = desc->sg_index + 1; i < desc->sg_len; i++)
+               residue += desc->sg[i].size;
+
+       /* Add the residue for the current sg */
+       residue += usb_dmac_get_current_residue(chan, desc, desc->sg_index);
+
+       return residue;
+}
+
+static enum dma_status usb_dmac_tx_status(struct dma_chan *chan,
+                                         dma_cookie_t cookie,
+                                         struct dma_tx_state *txstate)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       enum dma_status status;
+       unsigned int residue = 0;
+       unsigned long flags;
+
+       status = dma_cookie_status(chan, cookie, txstate);
+       /* a client driver will get residue after DMA_COMPLETE */
+       if (!txstate)
+               return status;
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       if (status == DMA_COMPLETE)
+               residue = usb_dmac_chan_get_residue_if_complete(uchan, cookie);
+       else
+               residue = usb_dmac_chan_get_residue(uchan, cookie);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+       dma_set_residue(txstate, residue);
+
+       return status;
+}
+
+static void usb_dmac_issue_pending(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       if (vchan_issue_pending(&uchan->vc) && !uchan->desc)
+               usb_dmac_chan_start_desc(uchan);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+}
+
+static void usb_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+       struct usb_dmac_desc *desc = to_usb_dmac_desc(vd);
+       struct usb_dmac_chan *chan = to_usb_dmac_chan(vd->tx.chan);
+
+       usb_dmac_desc_put(chan, desc);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void usb_dmac_isr_transfer_end(struct usb_dmac_chan *chan)
+{
+       struct usb_dmac_desc *desc = chan->desc;
+
+       BUG_ON(!desc);
+
+       if (++desc->sg_index < desc->sg_len) {
+               usb_dmac_chan_start_sg(chan, desc->sg_index);
+       } else {
+               desc->residue = usb_dmac_get_current_residue(chan, desc,
+                                                       desc->sg_index - 1);
+               desc->done_cookie = desc->vd.tx.cookie;
+               vchan_cookie_complete(&desc->vd);
+
+               /* Restart the next transfer if this driver has a next desc */
+               usb_dmac_chan_start_desc(chan);
+       }
+}
+
+static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
+{
+       struct usb_dmac_chan *chan = dev;
+       irqreturn_t ret = IRQ_NONE;
+       u32 mask = USB_DMACHCR_TE;
+       u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP;
+       u32 chcr;
+
+       spin_lock(&chan->vc.lock);
+
+       chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+       if (chcr & check_bits)
+               mask |= USB_DMACHCR_DE | check_bits;
+       if (chcr & USB_DMACHCR_NULL) {
+               /* An interruption of TE will happen after we set FTE */
+               mask |= USB_DMACHCR_NULL;
+               chcr |= USB_DMACHCR_FTE;
+               ret |= IRQ_HANDLED;
+       }
+       usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+
+       if (chcr & check_bits) {
+               usb_dmac_isr_transfer_end(chan);
+               ret |= IRQ_HANDLED;
+       }
+
+       spin_unlock(&chan->vc.lock);
+
+       return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool usb_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct of_phandle_args *dma_spec = arg;
+
+       if (dma_spec->np != chan->device->dev->of_node)
+               return false;
+
+       /* USB-DMAC should be used with fixed usb controller's FIFO */
+       if (uchan->index != dma_spec->args[0])
+               return false;
+
+       return true;
+}
+
+static struct dma_chan *usb_dmac_of_xlate(struct of_phandle_args *dma_spec,
+                                         struct of_dma *ofdma)
+{
+       struct usb_dmac_chan *uchan;
+       struct dma_chan *chan;
+       dma_cap_mask_t mask;
+
+       if (dma_spec->args_count != 1)
+               return NULL;
+
+       /* Only slave DMA channels can be allocated via DT */
+       dma_cap_zero(mask);
+       dma_cap_set(DMA_SLAVE, mask);
+
+       chan = dma_request_channel(mask, usb_dmac_chan_filter, dma_spec);
+       if (!chan)
+               return NULL;
+
+       uchan = to_usb_dmac_chan(chan);
+
+       return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+static int usb_dmac_runtime_suspend(struct device *dev)
+{
+       struct usb_dmac *dmac = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < dmac->n_channels; ++i)
+               usb_dmac_chan_halt(&dmac->channels[i]);
+
+       return 0;
+}
+
+static int usb_dmac_runtime_resume(struct device *dev)
+{
+       struct usb_dmac *dmac = dev_get_drvdata(dev);
+
+       return usb_dmac_init(dmac);
+}
+
+static const struct dev_pm_ops usb_dmac_pm = {
+       SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume,
+                          NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int usb_dmac_chan_probe(struct usb_dmac *dmac,
+                              struct usb_dmac_chan *uchan,
+                              unsigned int index)
+{
+       struct platform_device *pdev = to_platform_device(dmac->dev);
+       char pdev_irqname[5];
+       char *irqname;
+       int ret;
+
+       uchan->index = index;
+       uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index);
+
+       /* Request the channel interrupt. */
+       sprintf(pdev_irqname, "ch%u", index);
+       uchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+       if (uchan->irq < 0) {
+               dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+               return -ENODEV;
+       }
+
+       irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+                                dev_name(dmac->dev), index);
+       if (!irqname)
+               return -ENOMEM;
+
+       ret = devm_request_irq(dmac->dev, uchan->irq, usb_dmac_isr_channel,
+                              IRQF_SHARED, irqname, uchan);
+       if (ret) {
+               dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+                       uchan->irq, ret);
+               return ret;
+       }
+
+       uchan->vc.desc_free = usb_dmac_virt_desc_free;
+       vchan_init(&uchan->vc, &dmac->engine);
+       INIT_LIST_HEAD(&uchan->desc_freed);
+       INIT_LIST_HEAD(&uchan->desc_got);
+
+       return 0;
+}
+
+static int usb_dmac_parse_of(struct device *dev, struct usb_dmac *dmac)
+{
+       struct device_node *np = dev->of_node;
+       int ret;
+
+       ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+       if (ret < 0) {
+               dev_err(dev, "unable to read dma-channels property\n");
+               return ret;
+       }
+
+       if (dmac->n_channels <= 0 || dmac->n_channels >= 100) {
+               dev_err(dev, "invalid number of channels %u\n",
+                       dmac->n_channels);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int usb_dmac_probe(struct platform_device *pdev)
+{
+       const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH;
+       struct dma_device *engine;
+       struct usb_dmac *dmac;
+       struct resource *mem;
+       unsigned int i;
+       int ret;
+
+       dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+       if (!dmac)
+               return -ENOMEM;
+
+       dmac->dev = &pdev->dev;
+       platform_set_drvdata(pdev, dmac);
+
+       ret = usb_dmac_parse_of(&pdev->dev, dmac);
+       if (ret < 0)
+               return ret;
+
+       dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+                                     sizeof(*dmac->channels), GFP_KERNEL);
+       if (!dmac->channels)
+               return -ENOMEM;
+
+       /* Request resources. */
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+       if (IS_ERR(dmac->iomem))
+               return PTR_ERR(dmac->iomem);
+
+       /* Enable runtime PM and initialize the device. */
+       pm_runtime_enable(&pdev->dev);
+       ret = pm_runtime_get_sync(&pdev->dev);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+               return ret;
+       }
+
+       ret = usb_dmac_init(dmac);
+       pm_runtime_put(&pdev->dev);
+
+       if (ret) {
+               dev_err(&pdev->dev, "failed to reset device\n");
+               goto error;
+       }
+
+       /* Initialize the channels. */
+       INIT_LIST_HEAD(&dmac->engine.channels);
+
+       for (i = 0; i < dmac->n_channels; ++i) {
+               ret = usb_dmac_chan_probe(dmac, &dmac->channels[i], i);
+               if (ret < 0)
+                       goto error;
+       }
+
+       /* Register the DMAC as a DMA provider for DT. */
+       ret = of_dma_controller_register(pdev->dev.of_node, usb_dmac_of_xlate,
+                                        NULL);
+       if (ret < 0)
+               goto error;
+
+       /*
+        * Register the DMA engine device.
+        *
+        * Default transfer size of 32 bytes requires 32-byte alignment.
+        */
+       engine = &dmac->engine;
+       dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+       engine->dev = &pdev->dev;
+
+       engine->src_addr_widths = widths;
+       engine->dst_addr_widths = widths;
+       engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+       engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+       engine->device_alloc_chan_resources = usb_dmac_alloc_chan_resources;
+       engine->device_free_chan_resources = usb_dmac_free_chan_resources;
+       engine->device_prep_slave_sg = usb_dmac_prep_slave_sg;
+       engine->device_terminate_all = usb_dmac_chan_terminate_all;
+       engine->device_tx_status = usb_dmac_tx_status;
+       engine->device_issue_pending = usb_dmac_issue_pending;
+
+       ret = dma_async_device_register(engine);
+       if (ret < 0)
+               goto error;
+
+       return 0;
+
+error:
+       of_dma_controller_free(pdev->dev.of_node);
+       pm_runtime_disable(&pdev->dev);
+       return ret;
+}
+
+static void usb_dmac_chan_remove(struct usb_dmac *dmac,
+                                struct usb_dmac_chan *uchan)
+{
+       usb_dmac_chan_halt(uchan);
+       devm_free_irq(dmac->dev, uchan->irq, uchan);
+}
+
+static int usb_dmac_remove(struct platform_device *pdev)
+{
+       struct usb_dmac *dmac = platform_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < dmac->n_channels; ++i)
+               usb_dmac_chan_remove(dmac, &dmac->channels[i]);
+       of_dma_controller_free(pdev->dev.of_node);
+       dma_async_device_unregister(&dmac->engine);
+
+       pm_runtime_disable(&pdev->dev);
+
+       return 0;
+}
+
+static void usb_dmac_shutdown(struct platform_device *pdev)
+{
+       struct usb_dmac *dmac = platform_get_drvdata(pdev);
+
+       usb_dmac_stop(dmac);
+}
+
+static const struct of_device_id usb_dmac_of_ids[] = {
+       { .compatible = "renesas,usb-dmac", },
+       { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, usb_dmac_of_ids);
+
+static struct platform_driver usb_dmac_driver = {
+       .driver         = {
+               .pm     = &usb_dmac_pm,
+               .name   = "usb-dmac",
+               .of_match_table = usb_dmac_of_ids,
+       },
+       .probe          = usb_dmac_probe,
+       .remove         = usb_dmac_remove,
+       .shutdown       = usb_dmac_shutdown,
+};
+
+module_platform_driver(usb_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas USB DMA Controller Driver");
+MODULE_AUTHOR("Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>");
+MODULE_LICENSE("GPL v2");
index d0086e9..a1afda4 100644 (file)
@@ -896,7 +896,7 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
 };
 
-static struct of_device_id sirfsoc_dma_match[] = {
+static const struct of_device_id sirfsoc_dma_match[] = {
        { .compatible = "sirf,prima2-dmac", },
        { .compatible = "sirf,marco-dmac", },
        {},
index 1332b1d..3c10f03 100644 (file)
@@ -2514,7 +2514,8 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
        sg_dma_len(&dst_sg) = size;
        sg_dma_len(&src_sg) = size;
 
-       return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags);
+       return d40_prep_sg(chan, &src_sg, &dst_sg, 1,
+                          DMA_MEM_TO_MEM, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *
@@ -2526,7 +2527,8 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
        if (dst_nents != src_nents)
                return NULL;
 
-       return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags);
+       return d40_prep_sg(chan, src_sg, dst_sg, src_nents,
+                          DMA_MEM_TO_MEM, dma_flags);
 }
 
 static struct dma_async_tx_descriptor *
index 7ebcf9b..11e5365 100644 (file)
@@ -796,11 +796,6 @@ static void sun6i_dma_issue_pending(struct dma_chan *chan)
        spin_unlock_irqrestore(&vchan->vc.lock, flags);
 }
 
-static int sun6i_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
 static void sun6i_dma_free_chan_resources(struct dma_chan *chan)
 {
        struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
@@ -896,7 +891,7 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = {
        .nr_max_vchans   = 37,
 };
 
-static struct of_device_id sun6i_dma_match[] = {
+static const struct of_device_id sun6i_dma_match[] = {
        { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
        { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
        { /* sentinel */ }
@@ -957,7 +952,6 @@ static int sun6i_dma_probe(struct platform_device *pdev)
        dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
 
        INIT_LIST_HEAD(&sdc->slave.channels);
-       sdc->slave.device_alloc_chan_resources  = sun6i_dma_alloc_chan_resources;
        sdc->slave.device_free_chan_resources   = sun6i_dma_free_chan_resources;
        sdc->slave.device_tx_status             = sun6i_dma_tx_status;
        sdc->slave.device_issue_pending         = sun6i_dma_issue_pending;
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
new file mode 100755 (executable)
index 0000000..f52e375
--- /dev/null
@@ -0,0 +1,2089 @@
+/*
+ * Applied Micro X-Gene SoC DMA engine Driver
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Authors: Rameshwar Prasad Sahu <rsahu@apm.com>
+ *         Loc Ho <lho@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * NOTE: PM support is currently not available.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include "dmaengine.h"
+
+/* X-Gene DMA ring csr registers and bit definations */
+#define XGENE_DMA_RING_CONFIG                  0x04
+#define XGENE_DMA_RING_ENABLE                  BIT(31)
+#define XGENE_DMA_RING_ID                      0x08
+#define XGENE_DMA_RING_ID_SETUP(v)             ((v) | BIT(31))
+#define XGENE_DMA_RING_ID_BUF                  0x0C
+#define XGENE_DMA_RING_ID_BUF_SETUP(v)         (((v) << 9) | BIT(21))
+#define XGENE_DMA_RING_THRESLD0_SET1           0x30
+#define XGENE_DMA_RING_THRESLD0_SET1_VAL       0X64
+#define XGENE_DMA_RING_THRESLD1_SET1           0x34
+#define XGENE_DMA_RING_THRESLD1_SET1_VAL       0xC8
+#define XGENE_DMA_RING_HYSTERESIS              0x68
+#define XGENE_DMA_RING_HYSTERESIS_VAL          0xFFFFFFFF
+#define XGENE_DMA_RING_STATE                   0x6C
+#define XGENE_DMA_RING_STATE_WR_BASE           0x70
+#define XGENE_DMA_RING_NE_INT_MODE             0x017C
+#define XGENE_DMA_RING_NE_INT_MODE_SET(m, v)   \
+       ((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v)))
+#define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v) \
+       ((m) &= (~BIT(31 - (v))))
+#define XGENE_DMA_RING_CLKEN                   0xC208
+#define XGENE_DMA_RING_SRST                    0xC200
+#define XGENE_DMA_RING_MEM_RAM_SHUTDOWN                0xD070
+#define XGENE_DMA_RING_BLK_MEM_RDY             0xD074
+#define XGENE_DMA_RING_BLK_MEM_RDY_VAL         0xFFFFFFFF
+#define XGENE_DMA_RING_DESC_CNT(v)             (((v) & 0x0001FFFE) >> 1)
+#define XGENE_DMA_RING_ID_GET(owner, num)      (((owner) << 6) | (num))
+#define XGENE_DMA_RING_DST_ID(v)               ((1 << 10) | (v))
+#define XGENE_DMA_RING_CMD_OFFSET              0x2C
+#define XGENE_DMA_RING_CMD_BASE_OFFSET(v)      ((v) << 6)
+#define XGENE_DMA_RING_COHERENT_SET(m)         \
+       (((u32 *)(m))[2] |= BIT(4))
+#define XGENE_DMA_RING_ADDRL_SET(m, v)         \
+       (((u32 *)(m))[2] |= (((v) >> 8) << 5))
+#define XGENE_DMA_RING_ADDRH_SET(m, v)         \
+       (((u32 *)(m))[3] |= ((v) >> 35))
+#define XGENE_DMA_RING_ACCEPTLERR_SET(m)       \
+       (((u32 *)(m))[3] |= BIT(19))
+#define XGENE_DMA_RING_SIZE_SET(m, v)          \
+       (((u32 *)(m))[3] |= ((v) << 23))
+#define XGENE_DMA_RING_RECOMBBUF_SET(m)                \
+       (((u32 *)(m))[3] |= BIT(27))
+#define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m)    \
+       (((u32 *)(m))[3] |= (0x7 << 28))
+#define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m)    \
+       (((u32 *)(m))[4] |= 0x3)
+#define XGENE_DMA_RING_SELTHRSH_SET(m)         \
+       (((u32 *)(m))[4] |= BIT(3))
+#define XGENE_DMA_RING_TYPE_SET(m, v)          \
+       (((u32 *)(m))[4] |= ((v) << 19))
+
+/* X-Gene DMA device csr registers and bit definitions */
+#define XGENE_DMA_IPBRR                                0x0
+#define XGENE_DMA_DEV_ID_RD(v)                 ((v) & 0x00000FFF)
+#define XGENE_DMA_BUS_ID_RD(v)                 (((v) >> 12) & 3)
+#define XGENE_DMA_REV_NO_RD(v)                 (((v) >> 14) & 3)
+#define XGENE_DMA_GCR                          0x10
+#define XGENE_DMA_CH_SETUP(v)                  \
+       ((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF)
+#define XGENE_DMA_ENABLE(v)                    ((v) |= BIT(31))
+#define XGENE_DMA_DISABLE(v)                   ((v) &= ~BIT(31))
+#define XGENE_DMA_RAID6_CONT                   0x14
+#define XGENE_DMA_RAID6_MULTI_CTRL(v)          ((v) << 24)
+#define XGENE_DMA_INT                          0x70
+#define XGENE_DMA_INT_MASK                     0x74
+#define XGENE_DMA_INT_ALL_MASK                 0xFFFFFFFF
+#define XGENE_DMA_INT_ALL_UNMASK               0x0
+#define XGENE_DMA_INT_MASK_SHIFT               0x14
+#define XGENE_DMA_RING_INT0_MASK               0x90A0
+#define XGENE_DMA_RING_INT1_MASK               0x90A8
+#define XGENE_DMA_RING_INT2_MASK               0x90B0
+#define XGENE_DMA_RING_INT3_MASK               0x90B8
+#define XGENE_DMA_RING_INT4_MASK               0x90C0
+#define XGENE_DMA_CFG_RING_WQ_ASSOC            0x90E0
+#define XGENE_DMA_ASSOC_RING_MNGR1             0xFFFFFFFF
+#define XGENE_DMA_MEM_RAM_SHUTDOWN             0xD070
+#define XGENE_DMA_BLK_MEM_RDY                  0xD074
+#define XGENE_DMA_BLK_MEM_RDY_VAL              0xFFFFFFFF
+
+/* X-Gene SoC EFUSE csr register and bit defination */
+#define XGENE_SOC_JTAG1_SHADOW                 0x18
+#define XGENE_DMA_PQ_DISABLE_MASK              BIT(13)
+
+/* X-Gene DMA Descriptor format */
+#define XGENE_DMA_DESC_NV_BIT                  BIT_ULL(50)
+#define XGENE_DMA_DESC_IN_BIT                  BIT_ULL(55)
+#define XGENE_DMA_DESC_C_BIT                   BIT_ULL(63)
+#define XGENE_DMA_DESC_DR_BIT                  BIT_ULL(61)
+#define XGENE_DMA_DESC_ELERR_POS               46
+#define XGENE_DMA_DESC_RTYPE_POS               56
+#define XGENE_DMA_DESC_LERR_POS                        60
+#define XGENE_DMA_DESC_FLYBY_POS               4
+#define XGENE_DMA_DESC_BUFLEN_POS              48
+#define XGENE_DMA_DESC_HOENQ_NUM_POS           48
+
+#define XGENE_DMA_DESC_NV_SET(m)               \
+       (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT)
+#define XGENE_DMA_DESC_IN_SET(m)               \
+       (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT)
+#define XGENE_DMA_DESC_RTYPE_SET(m, v)         \
+       (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS))
+#define XGENE_DMA_DESC_BUFADDR_SET(m, v)       \
+       (((u64 *)(m))[0] |= (v))
+#define XGENE_DMA_DESC_BUFLEN_SET(m, v)                \
+       (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS))
+#define XGENE_DMA_DESC_C_SET(m)                        \
+       (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT)
+#define XGENE_DMA_DESC_FLYBY_SET(m, v)         \
+       (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS))
+#define XGENE_DMA_DESC_MULTI_SET(m, v, i)      \
+       (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8)))
+#define XGENE_DMA_DESC_DR_SET(m)               \
+       (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT)
+#define XGENE_DMA_DESC_DST_ADDR_SET(m, v)      \
+       (((u64 *)(m))[3] |= (v))
+#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v)     \
+       (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS))
+#define XGENE_DMA_DESC_ELERR_RD(m)             \
+       (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
+#define XGENE_DMA_DESC_LERR_RD(m)              \
+       (((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7)
+#define XGENE_DMA_DESC_STATUS(elerr, lerr)     \
+       (((elerr) << 4) | (lerr))
+
+/* X-Gene DMA descriptor empty s/w signature */
+#define XGENE_DMA_DESC_EMPTY_INDEX             0
+#define XGENE_DMA_DESC_EMPTY_SIGNATURE         ~0ULL
+#define XGENE_DMA_DESC_SET_EMPTY(m)            \
+       (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] =     \
+        XGENE_DMA_DESC_EMPTY_SIGNATURE)
+#define XGENE_DMA_DESC_IS_EMPTY(m)             \
+       (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] ==    \
+        XGENE_DMA_DESC_EMPTY_SIGNATURE)
+
+/* X-Gene DMA configurable parameters defines */
+#define XGENE_DMA_RING_NUM             512
+#define XGENE_DMA_BUFNUM               0x0
+#define XGENE_DMA_CPU_BUFNUM           0x18
+#define XGENE_DMA_RING_OWNER_DMA       0x03
+#define XGENE_DMA_RING_OWNER_CPU       0x0F
+#define XGENE_DMA_RING_TYPE_REGULAR    0x01
+#define XGENE_DMA_RING_WQ_DESC_SIZE    32      /* 32 Bytes */
+#define XGENE_DMA_RING_NUM_CONFIG      5
+#define XGENE_DMA_MAX_CHANNEL          4
+#define XGENE_DMA_XOR_CHANNEL          0
+#define XGENE_DMA_PQ_CHANNEL           1
+#define XGENE_DMA_MAX_BYTE_CNT         0x4000  /* 16 KB */
+#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT        0x14000 /* 80 KB */
+#define XGENE_DMA_XOR_ALIGNMENT                6       /* 64 Bytes */
+#define XGENE_DMA_MAX_XOR_SRC          5
+#define XGENE_DMA_16K_BUFFER_LEN_CODE  0x0
+#define XGENE_DMA_INVALID_LEN_CODE     0x7800
+
+/* X-Gene DMA descriptor error codes */
+#define ERR_DESC_AXI                   0x01
+#define ERR_BAD_DESC                   0x02
+#define ERR_READ_DATA_AXI              0x03
+#define ERR_WRITE_DATA_AXI             0x04
+#define ERR_FBP_TIMEOUT                        0x05
+#define ERR_ECC                                0x06
+#define ERR_DIFF_SIZE                  0x08
+#define ERR_SCT_GAT_LEN                        0x09
+#define ERR_CRC_ERR                    0x11
+#define ERR_CHKSUM                     0x12
+#define ERR_DIF                                0x13
+
+/* X-Gene DMA error interrupt codes */
+#define ERR_DIF_SIZE_INT               0x0
+#define ERR_GS_ERR_INT                 0x1
+#define ERR_FPB_TIMEO_INT              0x2
+#define ERR_WFIFO_OVF_INT              0x3
+#define ERR_RFIFO_OVF_INT              0x4
+#define ERR_WR_TIMEO_INT               0x5
+#define ERR_RD_TIMEO_INT               0x6
+#define ERR_WR_ERR_INT                 0x7
+#define ERR_RD_ERR_INT                 0x8
+#define ERR_BAD_DESC_INT               0x9
+#define ERR_DESC_DST_INT               0xA
+#define ERR_DESC_SRC_INT               0xB
+
+/* X-Gene DMA flyby operation code */
+#define FLYBY_2SRC_XOR                 0x8
+#define FLYBY_3SRC_XOR                 0x9
+#define FLYBY_4SRC_XOR                 0xA
+#define FLYBY_5SRC_XOR                 0xB
+
+/* X-Gene DMA SW descriptor flags */
+#define XGENE_DMA_FLAG_64B_DESC                BIT(0)
+
+/* Define to dump X-Gene DMA descriptor */
+#define XGENE_DMA_DESC_DUMP(desc, m)   \
+       print_hex_dump(KERN_ERR, (m),   \
+                       DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0)
+
+#define to_dma_desc_sw(tx)             \
+       container_of(tx, struct xgene_dma_desc_sw, tx)
+#define to_dma_chan(dchan)             \
+       container_of(dchan, struct xgene_dma_chan, dma_chan)
+
+#define chan_dbg(chan, fmt, arg...)    \
+       dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)    \
+       dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+struct xgene_dma_desc_hw {
+       u64 m0;
+       u64 m1;
+       u64 m2;
+       u64 m3;
+};
+
+enum xgene_dma_ring_cfgsize {
+       XGENE_DMA_RING_CFG_SIZE_512B,
+       XGENE_DMA_RING_CFG_SIZE_2KB,
+       XGENE_DMA_RING_CFG_SIZE_16KB,
+       XGENE_DMA_RING_CFG_SIZE_64KB,
+       XGENE_DMA_RING_CFG_SIZE_512KB,
+       XGENE_DMA_RING_CFG_SIZE_INVALID
+};
+
+struct xgene_dma_ring {
+       struct xgene_dma *pdma;
+       u8 buf_num;
+       u16 id;
+       u16 num;
+       u16 head;
+       u16 owner;
+       u16 slots;
+       u16 dst_ring_num;
+       u32 size;
+       void __iomem *cmd;
+       void __iomem *cmd_base;
+       dma_addr_t desc_paddr;
+       u32 state[XGENE_DMA_RING_NUM_CONFIG];
+       enum xgene_dma_ring_cfgsize cfgsize;
+       union {
+               void *desc_vaddr;
+               struct xgene_dma_desc_hw *desc_hw;
+       };
+};
+
+struct xgene_dma_desc_sw {
+       struct xgene_dma_desc_hw desc1;
+       struct xgene_dma_desc_hw desc2;
+       u32 flags;
+       struct list_head node;
+       struct list_head tx_list;
+       struct dma_async_tx_descriptor tx;
+};
+
+/**
+ * struct xgene_dma_chan - internal representation of an X-Gene DMA channel
+ * @dma_chan: dmaengine channel object member
+ * @pdma: X-Gene DMA device structure reference
+ * @dev: struct device reference for dma mapping api
+ * @id: raw id of this channel
+ * @rx_irq: channel IRQ
+ * @name: name of X-Gene DMA channel
+ * @lock: serializes enqueue/dequeue operations to the descriptor pool
+ * @pending: number of transaction request pushed to DMA controller for
+ *     execution, but still waiting for completion,
+ * @max_outstanding: max number of outstanding request we can push to channel
+ * @ld_pending: descriptors which are queued to run, but have not yet been
+ *     submitted to the hardware for execution
+ * @ld_running: descriptors which are currently being executing by the hardware
+ * @ld_completed: descriptors which have finished execution by the hardware.
+ *     These descriptors have already had their cleanup actions run. They
+ *     are waiting for the ACK bit to be set by the async tx API.
+ * @desc_pool: descriptor pool for DMA operations
+ * @tasklet: bottom half where all completed descriptors cleans
+ * @tx_ring: transmit ring descriptor that we use to prepare actual
+ *     descriptors for further executions
+ * @rx_ring: receive ring descriptor that we use to get completed DMA
+ *     descriptors during cleanup time
+ */
+struct xgene_dma_chan {
+       struct dma_chan dma_chan;
+       struct xgene_dma *pdma;
+       struct device *dev;
+       int id;
+       int rx_irq;
+       char name[10];
+       spinlock_t lock;
+       int pending;
+       int max_outstanding;
+       struct list_head ld_pending;
+       struct list_head ld_running;
+       struct list_head ld_completed;
+       struct dma_pool *desc_pool;
+       struct tasklet_struct tasklet;
+       struct xgene_dma_ring tx_ring;
+       struct xgene_dma_ring rx_ring;
+};
+
+/**
+ * struct xgene_dma - internal representation of an X-Gene DMA device
+ * @err_irq: DMA error irq number
+ * @ring_num: start id number for DMA ring
+ * @csr_dma: base for DMA register access
+ * @csr_ring: base for DMA ring register access
+ * @csr_ring_cmd: base for DMA ring command register access
+ * @csr_efuse: base for efuse register access
+ * @dma_dev: embedded struct dma_device
+ * @chan: reference to X-Gene DMA channels
+ */
+struct xgene_dma {
+       struct device *dev;
+       struct clk *clk;
+       int err_irq;
+       int ring_num;
+       void __iomem *csr_dma;
+       void __iomem *csr_ring;
+       void __iomem *csr_ring_cmd;
+       void __iomem *csr_efuse;
+       struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL];
+       struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL];
+};
+
+static const char * const xgene_dma_desc_err[] = {
+       [ERR_DESC_AXI] = "AXI error when reading src/dst link list",
+       [ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc",
+       [ERR_READ_DATA_AXI] = "AXI error when reading data",
+       [ERR_WRITE_DATA_AXI] = "AXI error when writing data",
+       [ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch",
+       [ERR_ECC] = "ECC double bit error",
+       [ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result",
+       [ERR_SCT_GAT_LEN] = "Gather and scatter data length not same",
+       [ERR_CRC_ERR] = "CRC error",
+       [ERR_CHKSUM] = "Checksum error",
+       [ERR_DIF] = "DIF error",
+};
+
+static const char * const xgene_dma_err[] = {
+       [ERR_DIF_SIZE_INT] = "DIF size error",
+       [ERR_GS_ERR_INT] = "Gather scatter not same size error",
+       [ERR_FPB_TIMEO_INT] = "Free pool time out error",
+       [ERR_WFIFO_OVF_INT] = "Write FIFO over flow error",
+       [ERR_RFIFO_OVF_INT] = "Read FIFO over flow error",
+       [ERR_WR_TIMEO_INT] = "Write time out error",
+       [ERR_RD_TIMEO_INT] = "Read time out error",
+       [ERR_WR_ERR_INT] = "HBF bus write error",
+       [ERR_RD_ERR_INT] = "HBF bus read error",
+       [ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error",
+       [ERR_DESC_DST_INT] = "HFB reading dst link address error",
+       [ERR_DESC_SRC_INT] = "HFB reading src link address error",
+};
+
+static bool is_pq_enabled(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW);
+       return !(val & XGENE_DMA_PQ_DISABLE_MASK);
+}
+
+static void xgene_dma_cpu_to_le64(u64 *desc, int count)
+{
+       int i;
+
+       for (i = 0; i < count; i++)
+               desc[i] = cpu_to_le64(desc[i]);
+}
+
+static u16 xgene_dma_encode_len(u32 len)
+{
+       return (len < XGENE_DMA_MAX_BYTE_CNT) ?
+               len : XGENE_DMA_16K_BUFFER_LEN_CODE;
+}
+
+static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
+{
+       static u8 flyby_type[] = {
+               FLYBY_2SRC_XOR, /* Dummy */
+               FLYBY_2SRC_XOR, /* Dummy */
+               FLYBY_2SRC_XOR,
+               FLYBY_3SRC_XOR,
+               FLYBY_4SRC_XOR,
+               FLYBY_5SRC_XOR
+       };
+
+       return flyby_type[src_cnt];
+}
+
+static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring)
+{
+       u32 __iomem *cmd_base = ring->cmd_base;
+       u32 ring_state = ioread32(&cmd_base[1]);
+
+       return XGENE_DMA_RING_DESC_CNT(ring_state);
+}
+
+static void xgene_dma_set_src_buffer(void *ext8, size_t *len,
+                                    dma_addr_t *paddr)
+{
+       size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
+                       *len : XGENE_DMA_MAX_BYTE_CNT;
+
+       XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr);
+       XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes));
+       *len -= nbytes;
+       *paddr += nbytes;
+}
+
+static void xgene_dma_invalidate_buffer(void *ext8)
+{
+       XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE);
+}
+
+static void *xgene_dma_lookup_ext8(u64 *desc, int idx)
+{
+       return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1);
+}
+
+static void xgene_dma_init_desc(void *desc, u16 dst_ring_num)
+{
+       XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */
+       XGENE_DMA_DESC_IN_SET(desc);
+       XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num);
+       XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA);
+}
+
+static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
+                                   struct xgene_dma_desc_sw *desc_sw,
+                                   dma_addr_t dst, dma_addr_t src,
+                                   size_t len)
+{
+       void *desc1, *desc2;
+       int i;
+
+       /* Get 1st descriptor */
+       desc1 = &desc_sw->desc1;
+       xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+       /* Set destination address */
+       XGENE_DMA_DESC_DR_SET(desc1);
+       XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst);
+
+       /* Set 1st source address */
+       xgene_dma_set_src_buffer(desc1 + 8, &len, &src);
+
+       if (len <= 0) {
+               desc2 = NULL;
+               goto skip_additional_src;
+       }
+
+       /*
+        * We need to split this source buffer,
+        * and need to use 2nd descriptor
+        */
+       desc2 = &desc_sw->desc2;
+       XGENE_DMA_DESC_NV_SET(desc1);
+
+       /* Set 2nd to 5th source address */
+       for (i = 0; i < 4 && len; i++)
+               xgene_dma_set_src_buffer(xgene_dma_lookup_ext8(desc2, i),
+                                        &len, &src);
+
+       /* Invalidate unused source address field */
+       for (; i < 4; i++)
+               xgene_dma_invalidate_buffer(xgene_dma_lookup_ext8(desc2, i));
+
+       /* Updated flag that we have prepared 64B descriptor */
+       desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+
+skip_additional_src:
+       /* Hardware stores descriptor in little endian format */
+       xgene_dma_cpu_to_le64(desc1, 4);
+       if (desc2)
+               xgene_dma_cpu_to_le64(desc2, 4);
+}
+
+static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
+                                   struct xgene_dma_desc_sw *desc_sw,
+                                   dma_addr_t *dst, dma_addr_t *src,
+                                   u32 src_cnt, size_t *nbytes,
+                                   const u8 *scf)
+{
+       void *desc1, *desc2;
+       size_t len = *nbytes;
+       int i;
+
+       desc1 = &desc_sw->desc1;
+       desc2 = &desc_sw->desc2;
+
+       /* Initialize DMA descriptor */
+       xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+       /* Set destination address */
+       XGENE_DMA_DESC_DR_SET(desc1);
+       XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst);
+
+       /* We have multiple source addresses, so need to set NV bit*/
+       XGENE_DMA_DESC_NV_SET(desc1);
+
+       /* Set flyby opcode */
+       XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt));
+
+       /* Set 1st to 5th source addresses */
+       for (i = 0; i < src_cnt; i++) {
+               len = *nbytes;
+               xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) :
+                                        xgene_dma_lookup_ext8(desc2, i - 1),
+                                        &len, &src[i]);
+               XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i);
+       }
+
+       /* Hardware stores descriptor in little endian format */
+       xgene_dma_cpu_to_le64(desc1, 4);
+       xgene_dma_cpu_to_le64(desc2, 4);
+
+       /* Update meta data */
+       *nbytes = len;
+       *dst += XGENE_DMA_MAX_BYTE_CNT;
+
+       /* We need always 64B descriptor to perform xor or pq operations */
+       desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+}
+
+static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct xgene_dma_desc_sw *desc;
+       struct xgene_dma_chan *chan;
+       dma_cookie_t cookie;
+
+       if (unlikely(!tx))
+               return -EINVAL;
+
+       chan = to_dma_chan(tx->chan);
+       desc = to_dma_desc_sw(tx);
+
+       spin_lock_bh(&chan->lock);
+
+       cookie = dma_cookie_assign(tx);
+
+       /* Add this transaction list onto the tail of the pending queue */
+       list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+
+       spin_unlock_bh(&chan->lock);
+
+       return cookie;
+}
+
+static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan,
+                                      struct xgene_dma_desc_sw *desc)
+{
+       list_del(&desc->node);
+       chan_dbg(chan, "LD %p free\n", desc);
+       dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor(
+                                struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc;
+       dma_addr_t phys;
+
+       desc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &phys);
+       if (!desc) {
+               chan_err(chan, "Failed to allocate LDs\n");
+               return NULL;
+       }
+
+       memset(desc, 0, sizeof(*desc));
+
+       INIT_LIST_HEAD(&desc->tx_list);
+       desc->tx.phys = phys;
+       desc->tx.tx_submit = xgene_dma_tx_submit;
+       dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan);
+
+       chan_dbg(chan, "LD %p allocated\n", desc);
+
+       return desc;
+}
+
+/**
+ * xgene_dma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ */
+static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       /* Run the callback for each descriptor, in order */
+       list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
+               if (async_tx_test_ack(&desc->tx))
+                       xgene_dma_clean_descriptor(chan, desc);
+       }
+}
+
+/**
+ * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: X-Gene DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan,
+                                             struct xgene_dma_desc_sw *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->tx;
+
+       /*
+        * If this is not the last transaction in the group,
+        * then no need to complete cookie and run any callback as
+        * this is not the tx_descriptor which had been sent to caller
+        * of this DMA request
+        */
+
+       if (tx->cookie == 0)
+               return;
+
+       dma_cookie_complete(tx);
+
+       /* Run the link descriptor callback function */
+       if (tx->callback)
+               tx->callback(tx->callback_param);
+
+       dma_descriptor_unmap(tx);
+
+       /* Run any dependencies */
+       dma_run_dependencies(tx);
+}
+
+/**
+ * xgene_dma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: X-Gene DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api,
+ * else move it to queue ld_completed.
+ */
+static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan,
+                                              struct xgene_dma_desc_sw *desc)
+{
+       /* Remove from the list of running transactions */
+       list_del(&desc->node);
+
+       /*
+        * the client is allowed to attach dependent operations
+        * until 'ack' is set
+        */
+       if (!async_tx_test_ack(&desc->tx)) {
+               /*
+                * Move this descriptor to the list of descriptors which is
+                * completed, but still awaiting the 'ack' bit to be set.
+                */
+               list_add_tail(&desc->node, &chan->ld_completed);
+               return;
+       }
+
+       chan_dbg(chan, "LD %p free\n", desc);
+       dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static int xgene_chan_xfer_request(struct xgene_dma_ring *ring,
+                                  struct xgene_dma_desc_sw *desc_sw)
+{
+       struct xgene_dma_desc_hw *desc_hw;
+
+       /* Check if can push more descriptor to hw for execution */
+       if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2))
+               return -EBUSY;
+
+       /* Get hw descriptor from DMA tx ring */
+       desc_hw = &ring->desc_hw[ring->head];
+
+       /*
+        * Increment the head count to point next
+        * descriptor for next time
+        */
+       if (++ring->head == ring->slots)
+               ring->head = 0;
+
+       /* Copy prepared sw descriptor data to hw descriptor */
+       memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw));
+
+       /*
+        * Check if we have prepared 64B descriptor,
+        * in this case we need one more hw descriptor
+        */
+       if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) {
+               desc_hw = &ring->desc_hw[ring->head];
+
+               if (++ring->head == ring->slots)
+                       ring->head = 0;
+
+               memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw));
+       }
+
+       /* Notify the hw that we have descriptor ready for execution */
+       iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ?
+                 2 : 1, ring->cmd);
+
+       return 0;
+}
+
+/**
+ * xgene_chan_xfer_ld_pending - push any pending transactions to hw
+ * @chan : X-Gene DMA channel
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+       int ret;
+
+       /*
+        * If the list of pending descriptors is empty, then we
+        * don't need to do any work at all
+        */
+       if (list_empty(&chan->ld_pending)) {
+               chan_dbg(chan, "No pending LDs\n");
+               return;
+       }
+
+       /*
+        * Move elements from the queue of pending transactions onto the list
+        * of running transactions and push it to hw for further executions
+        */
+       list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) {
+               /*
+                * Check if have pushed max number of transactions to hw
+                * as capable, so let's stop here and will push remaining
+                * elements from pening ld queue after completing some
+                * descriptors that we have already pushed
+                */
+               if (chan->pending >= chan->max_outstanding)
+                       return;
+
+               ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw);
+               if (ret)
+                       return;
+
+               /*
+                * Delete this element from ld pending queue and append it to
+                * ld running queue
+                */
+               list_move_tail(&desc_sw->node, &chan->ld_running);
+
+               /* Increment the pending transaction count */
+               chan->pending++;
+       }
+}
+
+/**
+ * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_ring *ring = &chan->rx_ring;
+       struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+       struct xgene_dma_desc_hw *desc_hw;
+       u8 status;
+
+       /* Clean already completed and acked descriptors */
+       xgene_dma_clean_completed_descriptor(chan);
+
+       /* Run the callback for each descriptor, in order */
+       list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
+               /* Get subsequent hw descriptor from DMA rx ring */
+               desc_hw = &ring->desc_hw[ring->head];
+
+               /* Check if this descriptor has been completed */
+               if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw)))
+                       break;
+
+               if (++ring->head == ring->slots)
+                       ring->head = 0;
+
+               /* Check if we have any error with DMA transactions */
+               status = XGENE_DMA_DESC_STATUS(
+                               XGENE_DMA_DESC_ELERR_RD(le64_to_cpu(
+                                                       desc_hw->m0)),
+                               XGENE_DMA_DESC_LERR_RD(le64_to_cpu(
+                                                      desc_hw->m0)));
+               if (status) {
+                       /* Print the DMA error type */
+                       chan_err(chan, "%s\n", xgene_dma_desc_err[status]);
+
+                       /*
+                        * We have DMA transactions error here. Dump DMA Tx
+                        * and Rx descriptors for this request */
+                       XGENE_DMA_DESC_DUMP(&desc_sw->desc1,
+                                           "X-Gene DMA TX DESC1: ");
+
+                       if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC)
+                               XGENE_DMA_DESC_DUMP(&desc_sw->desc2,
+                                                   "X-Gene DMA TX DESC2: ");
+
+                       XGENE_DMA_DESC_DUMP(desc_hw,
+                                           "X-Gene DMA RX ERR DESC: ");
+               }
+
+               /* Notify the hw about this completed descriptor */
+               iowrite32(-1, ring->cmd);
+
+               /* Mark this hw descriptor as processed */
+               XGENE_DMA_DESC_SET_EMPTY(desc_hw);
+
+               xgene_dma_run_tx_complete_actions(chan, desc_sw);
+
+               xgene_dma_clean_running_descriptor(chan, desc_sw);
+
+               /*
+                * Decrement the pending transaction count
+                * as we have processed one
+                */
+               chan->pending--;
+       }
+
+       /*
+        * Start any pending transactions automatically
+        * In the ideal case, we keep the DMA controller busy while we go
+        * ahead and free the descriptors below.
+        */
+       xgene_chan_xfer_ld_pending(chan);
+}
+
+static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       /* Has this channel already been allocated? */
+       if (chan->desc_pool)
+               return 1;
+
+       chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+                                         sizeof(struct xgene_dma_desc_sw),
+                                         0, 0);
+       if (!chan->desc_pool) {
+               chan_err(chan, "Failed to allocate descriptor pool\n");
+               return -ENOMEM;
+       }
+
+       chan_dbg(chan, "Allocate descripto pool\n");
+
+       return 1;
+}
+
+/**
+ * xgene_dma_free_desc_list - Free all descriptors in a queue
+ * @chan: X-Gene DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
+                                    struct list_head *list)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       list_for_each_entry_safe(desc, _desc, list, node)
+               xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan,
+                                       struct list_head *list)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       list_for_each_entry_safe(desc, _desc, list, node)
+               xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       chan_dbg(chan, "Free all resources\n");
+
+       if (!chan->desc_pool)
+               return;
+
+       spin_lock_bh(&chan->lock);
+
+       /* Process all running descriptor */
+       xgene_dma_cleanup_descriptors(chan);
+
+       /* Clean all link descriptor queues */
+       xgene_dma_free_desc_list(chan, &chan->ld_pending);
+       xgene_dma_free_desc_list(chan, &chan->ld_running);
+       xgene_dma_free_desc_list(chan, &chan->ld_completed);
+
+       spin_unlock_bh(&chan->lock);
+
+       /* Delete this channel DMA pool */
+       dma_pool_destroy(chan->desc_pool);
+       chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_memcpy(
+       struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+       size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       size_t copy;
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Create the largest transaction possible */
+               copy = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT);
+
+               /* Prepare DMA descriptor */
+               xgene_dma_prep_cpy_desc(chan, new, dst, src, copy);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Update metadata */
+               len -= copy;
+               dst += copy;
+               src += copy;
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_sg(
+       struct dma_chan *dchan, struct scatterlist *dst_sg,
+       u32 dst_nents, struct scatterlist *src_sg,
+       u32 src_nents, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new = NULL;
+       struct xgene_dma_chan *chan;
+       size_t dst_avail, src_avail;
+       dma_addr_t dst, src;
+       size_t len;
+
+       if (unlikely(!dchan))
+               return NULL;
+
+       if (unlikely(!dst_nents || !src_nents))
+               return NULL;
+
+       if (unlikely(!dst_sg || !src_sg))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       /* Get prepared for the loop */
+       dst_avail = sg_dma_len(dst_sg);
+       src_avail = sg_dma_len(src_sg);
+       dst_nents--;
+       src_nents--;
+
+       /* Run until we are out of scatterlist entries */
+       while (true) {
+               /* Create the largest transaction possible */
+               len = min_t(size_t, src_avail, dst_avail);
+               len = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT);
+               if (len == 0)
+                       goto fetch;
+
+               dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+               src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Prepare DMA descriptor */
+               xgene_dma_prep_cpy_desc(chan, new, dst, src, len);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* update metadata */
+               dst_avail -= len;
+               src_avail -= len;
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+
+fetch:
+               /* fetch the next dst scatterlist entry */
+               if (dst_avail == 0) {
+                       /* no more entries: we're done */
+                       if (dst_nents == 0)
+                               break;
+
+                       /* fetch the next entry: if there are no more: done */
+                       dst_sg = sg_next(dst_sg);
+                       if (!dst_sg)
+                               break;
+
+                       dst_nents--;
+                       dst_avail = sg_dma_len(dst_sg);
+               }
+
+               /* fetch the next src scatterlist entry */
+               if (src_avail == 0) {
+                       /* no more entries: we're done */
+                       if (src_nents == 0)
+                               break;
+
+                       /* fetch the next entry: if there are no more: done */
+                       src_sg = sg_next(src_sg);
+                       if (!src_sg)
+                               break;
+
+                       src_nents--;
+                       src_avail = sg_dma_len(src_sg);
+               }
+       }
+
+       if (!new)
+               return NULL;
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_xor(
+       struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+       u32 src_cnt, size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {
+                               0x01, 0x01, 0x01, 0x01, 0x01};
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Prepare xor DMA descriptor */
+               xgene_dma_prep_xor_desc(chan, new, &dst, src,
+                                       src_cnt, &len, multi);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_pq(
+       struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+       u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       size_t _len = len;
+       dma_addr_t _src[XGENE_DMA_MAX_XOR_SRC];
+       static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {0x01, 0x01, 0x01, 0x01, 0x01};
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       /*
+        * Save source addresses on local variable, may be we have to
+        * prepare two descriptor to generate P and Q if both enabled
+        * in the flags by client
+        */
+       memcpy(_src, src, sizeof(*src) * src_cnt);
+
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               len = 0;
+
+       if (flags & DMA_PREP_PQ_DISABLE_Q)
+               _len = 0;
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+
+               /*
+                * Prepare DMA descriptor to generate P,
+                * if DMA_PREP_PQ_DISABLE_P flag is not set
+                */
+               if (len) {
+                       xgene_dma_prep_xor_desc(chan, new, &dst[0], src,
+                                               src_cnt, &len, multi);
+                       continue;
+               }
+
+               /*
+                * Prepare DMA descriptor to generate Q,
+                * if DMA_PREP_PQ_DISABLE_Q flag is not set
+                */
+               if (_len) {
+                       xgene_dma_prep_xor_desc(chan, new, &dst[1], _src,
+                                               src_cnt, &_len, scf);
+               }
+       } while (len || _len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static void xgene_dma_issue_pending(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       spin_lock_bh(&chan->lock);
+       xgene_chan_xfer_ld_pending(chan);
+       spin_unlock_bh(&chan->lock);
+}
+
+static enum dma_status xgene_dma_tx_status(struct dma_chan *dchan,
+                                          dma_cookie_t cookie,
+                                          struct dma_tx_state *txstate)
+{
+       return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void xgene_dma_tasklet_cb(unsigned long data)
+{
+       struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data;
+
+       spin_lock_bh(&chan->lock);
+
+       /* Run all cleanup for descriptors which have been completed */
+       xgene_dma_cleanup_descriptors(chan);
+
+       /* Re-enable DMA channel IRQ */
+       enable_irq(chan->rx_irq);
+
+       spin_unlock_bh(&chan->lock);
+}
+
+static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id)
+{
+       struct xgene_dma_chan *chan = (struct xgene_dma_chan *)id;
+
+       BUG_ON(!chan);
+
+       /*
+        * Disable DMA channel IRQ until we process completed
+        * descriptors
+        */
+       disable_irq_nosync(chan->rx_irq);
+
+       /*
+        * Schedule the tasklet to handle all cleanup of the current
+        * transaction. It will start a new transaction if there is
+        * one pending.
+        */
+       tasklet_schedule(&chan->tasklet);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t xgene_dma_err_isr(int irq, void *id)
+{
+       struct xgene_dma *pdma = (struct xgene_dma *)id;
+       unsigned long int_mask;
+       u32 val, i;
+
+       val = ioread32(pdma->csr_dma + XGENE_DMA_INT);
+
+       /* Clear DMA interrupts */
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_INT);
+
+       /* Print DMA error info */
+       int_mask = val >> XGENE_DMA_INT_MASK_SHIFT;
+       for_each_set_bit(i, &int_mask, ARRAY_SIZE(xgene_dma_err))
+               dev_err(pdma->dev,
+                       "Interrupt status 0x%08X %s\n", val, xgene_dma_err[i]);
+
+       return IRQ_HANDLED;
+}
+
+static void xgene_dma_wr_ring_state(struct xgene_dma_ring *ring)
+{
+       int i;
+
+       iowrite32(ring->num, ring->pdma->csr_ring + XGENE_DMA_RING_STATE);
+
+       for (i = 0; i < XGENE_DMA_RING_NUM_CONFIG; i++)
+               iowrite32(ring->state[i], ring->pdma->csr_ring +
+                         XGENE_DMA_RING_STATE_WR_BASE + (i * 4));
+}
+
+static void xgene_dma_clr_ring_state(struct xgene_dma_ring *ring)
+{
+       memset(ring->state, 0, sizeof(u32) * XGENE_DMA_RING_NUM_CONFIG);
+       xgene_dma_wr_ring_state(ring);
+}
+
+static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
+{
+       void *ring_cfg = ring->state;
+       u64 addr = ring->desc_paddr;
+       void *desc;
+       u32 i, val;
+
+       ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
+
+       /* Clear DMA ring state */
+       xgene_dma_clr_ring_state(ring);
+
+       /* Set DMA ring type */
+       XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR);
+
+       if (ring->owner == XGENE_DMA_RING_OWNER_DMA) {
+               /* Set recombination buffer and timeout */
+               XGENE_DMA_RING_RECOMBBUF_SET(ring_cfg);
+               XGENE_DMA_RING_RECOMTIMEOUTL_SET(ring_cfg);
+               XGENE_DMA_RING_RECOMTIMEOUTH_SET(ring_cfg);
+       }
+
+       /* Initialize DMA ring state */
+       XGENE_DMA_RING_SELTHRSH_SET(ring_cfg);
+       XGENE_DMA_RING_ACCEPTLERR_SET(ring_cfg);
+       XGENE_DMA_RING_COHERENT_SET(ring_cfg);
+       XGENE_DMA_RING_ADDRL_SET(ring_cfg, addr);
+       XGENE_DMA_RING_ADDRH_SET(ring_cfg, addr);
+       XGENE_DMA_RING_SIZE_SET(ring_cfg, ring->cfgsize);
+
+       /* Write DMA ring configurations */
+       xgene_dma_wr_ring_state(ring);
+
+       /* Set DMA ring id */
+       iowrite32(XGENE_DMA_RING_ID_SETUP(ring->id),
+                 ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+       /* Set DMA ring buffer */
+       iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num),
+                 ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+
+       if (ring->owner != XGENE_DMA_RING_OWNER_CPU)
+               return;
+
+       /* Set empty signature to DMA Rx ring descriptors */
+       for (i = 0; i < ring->slots; i++) {
+               desc = &ring->desc_hw[i];
+               XGENE_DMA_DESC_SET_EMPTY(desc);
+       }
+
+       /* Enable DMA Rx ring interrupt */
+       val = ioread32(ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+       XGENE_DMA_RING_NE_INT_MODE_SET(val, ring->buf_num);
+       iowrite32(val, ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+}
+
+static void xgene_dma_clear_ring(struct xgene_dma_ring *ring)
+{
+       u32 ring_id, val;
+
+       if (ring->owner == XGENE_DMA_RING_OWNER_CPU) {
+               /* Disable DMA Rx ring interrupt */
+               val = ioread32(ring->pdma->csr_ring +
+                              XGENE_DMA_RING_NE_INT_MODE);
+               XGENE_DMA_RING_NE_INT_MODE_RESET(val, ring->buf_num);
+               iowrite32(val, ring->pdma->csr_ring +
+                         XGENE_DMA_RING_NE_INT_MODE);
+       }
+
+       /* Clear DMA ring state */
+       ring_id = XGENE_DMA_RING_ID_SETUP(ring->id);
+       iowrite32(ring_id, ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+       iowrite32(0, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+       xgene_dma_clr_ring_state(ring);
+}
+
+static void xgene_dma_set_ring_cmd(struct xgene_dma_ring *ring)
+{
+       ring->cmd_base = ring->pdma->csr_ring_cmd +
+                               XGENE_DMA_RING_CMD_BASE_OFFSET((ring->num -
+                                                         XGENE_DMA_RING_NUM));
+
+       ring->cmd = ring->cmd_base + XGENE_DMA_RING_CMD_OFFSET;
+}
+
+static int xgene_dma_get_ring_size(struct xgene_dma_chan *chan,
+                                  enum xgene_dma_ring_cfgsize cfgsize)
+{
+       int size;
+
+       switch (cfgsize) {
+       case XGENE_DMA_RING_CFG_SIZE_512B:
+               size = 0x200;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_2KB:
+               size = 0x800;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_16KB:
+               size = 0x4000;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_64KB:
+               size = 0x10000;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_512KB:
+               size = 0x80000;
+               break;
+       default:
+               chan_err(chan, "Unsupported cfg ring size %d\n", cfgsize);
+               return -EINVAL;
+       }
+
+       return size;
+}
+
+static void xgene_dma_delete_ring_one(struct xgene_dma_ring *ring)
+{
+       /* Clear DMA ring configurations */
+       xgene_dma_clear_ring(ring);
+
+       /* De-allocate DMA ring descriptor */
+       if (ring->desc_vaddr) {
+               dma_free_coherent(ring->pdma->dev, ring->size,
+                                 ring->desc_vaddr, ring->desc_paddr);
+               ring->desc_vaddr = NULL;
+       }
+}
+
+static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan)
+{
+       xgene_dma_delete_ring_one(&chan->rx_ring);
+       xgene_dma_delete_ring_one(&chan->tx_ring);
+}
+
+static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
+                                    struct xgene_dma_ring *ring,
+                                    enum xgene_dma_ring_cfgsize cfgsize)
+{
+       /* Setup DMA ring descriptor variables */
+       ring->pdma = chan->pdma;
+       ring->cfgsize = cfgsize;
+       ring->num = chan->pdma->ring_num++;
+       ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num);
+
+       ring->size = xgene_dma_get_ring_size(chan, cfgsize);
+       if (ring->size <= 0)
+               return ring->size;
+
+       /* Allocate memory for DMA ring descriptor */
+       ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size,
+                                              &ring->desc_paddr, GFP_KERNEL);
+       if (!ring->desc_vaddr) {
+               chan_err(chan, "Failed to allocate ring desc\n");
+               return -ENOMEM;
+       }
+
+       /* Configure and enable DMA ring */
+       xgene_dma_set_ring_cmd(ring);
+       xgene_dma_setup_ring(ring);
+
+       return 0;
+}
+
+static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_ring *rx_ring = &chan->rx_ring;
+       struct xgene_dma_ring *tx_ring = &chan->tx_ring;
+       int ret;
+
+       /* Create DMA Rx ring descriptor */
+       rx_ring->owner = XGENE_DMA_RING_OWNER_CPU;
+       rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id;
+
+       ret = xgene_dma_create_ring_one(chan, rx_ring,
+                                       XGENE_DMA_RING_CFG_SIZE_64KB);
+       if (ret)
+               return ret;
+
+       chan_dbg(chan, "Rx ring id 0x%X num %d desc 0x%p\n",
+                rx_ring->id, rx_ring->num, rx_ring->desc_vaddr);
+
+       /* Create DMA Tx ring descriptor */
+       tx_ring->owner = XGENE_DMA_RING_OWNER_DMA;
+       tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id;
+
+       ret = xgene_dma_create_ring_one(chan, tx_ring,
+                                       XGENE_DMA_RING_CFG_SIZE_64KB);
+       if (ret) {
+               xgene_dma_delete_ring_one(rx_ring);
+               return ret;
+       }
+
+       tx_ring->dst_ring_num = XGENE_DMA_RING_DST_ID(rx_ring->num);
+
+       chan_dbg(chan,
+                "Tx ring id 0x%X num %d desc 0x%p\n",
+                tx_ring->id, tx_ring->num, tx_ring->desc_vaddr);
+
+       /* Set the max outstanding request possible to this channel */
+       chan->max_outstanding = rx_ring->slots;
+
+       return ret;
+}
+
+static int xgene_dma_init_rings(struct xgene_dma *pdma)
+{
+       int ret, i, j;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               ret = xgene_dma_create_chan_rings(&pdma->chan[i]);
+               if (ret) {
+                       for (j = 0; j < i; j++)
+                               xgene_dma_delete_chan_rings(&pdma->chan[j]);
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+static void xgene_dma_enable(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       /* Configure and enable DMA engine */
+       val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+       XGENE_DMA_CH_SETUP(val);
+       XGENE_DMA_ENABLE(val);
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_disable(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+       XGENE_DMA_DISABLE(val);
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_mask_interrupts(struct xgene_dma *pdma)
+{
+       /*
+        * Mask DMA ring overflow, underflow and
+        * AXI write/read error interrupts
+        */
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+       /* Mask DMA error interrupts */
+       iowrite32(XGENE_DMA_INT_ALL_MASK, pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_unmask_interrupts(struct xgene_dma *pdma)
+{
+       /*
+        * Unmask DMA ring overflow, underflow and
+        * AXI write/read error interrupts
+        */
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+       /* Unmask DMA error interrupts */
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_init_hw(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       /* Associate DMA ring to corresponding ring HW */
+       iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
+                 pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC);
+
+       /* Configure RAID6 polynomial control setting */
+       if (is_pq_enabled(pdma))
+               iowrite32(XGENE_DMA_RAID6_MULTI_CTRL(0x1D),
+                         pdma->csr_dma + XGENE_DMA_RAID6_CONT);
+       else
+               dev_info(pdma->dev, "PQ is disabled in HW\n");
+
+       xgene_dma_enable(pdma);
+       xgene_dma_unmask_interrupts(pdma);
+
+       /* Get DMA id and version info */
+       val = ioread32(pdma->csr_dma + XGENE_DMA_IPBRR);
+
+       /* DMA device info */
+       dev_info(pdma->dev,
+                "X-Gene DMA v%d.%02d.%02d driver registered %d channels",
+                XGENE_DMA_REV_NO_RD(val), XGENE_DMA_BUS_ID_RD(val),
+                XGENE_DMA_DEV_ID_RD(val), XGENE_DMA_MAX_CHANNEL);
+}
+
+static int xgene_dma_init_ring_mngr(struct xgene_dma *pdma)
+{
+       if (ioread32(pdma->csr_ring + XGENE_DMA_RING_CLKEN) &&
+           (!ioread32(pdma->csr_ring + XGENE_DMA_RING_SRST)))
+               return 0;
+
+       iowrite32(0x3, pdma->csr_ring + XGENE_DMA_RING_CLKEN);
+       iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_SRST);
+
+       /* Bring up memory */
+       iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+       /* Force a barrier */
+       ioread32(pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+       /* reset may take up to 1ms */
+       usleep_range(1000, 1100);
+
+       if (ioread32(pdma->csr_ring + XGENE_DMA_RING_BLK_MEM_RDY)
+               != XGENE_DMA_RING_BLK_MEM_RDY_VAL) {
+               dev_err(pdma->dev,
+                       "Failed to release ring mngr memory from shutdown\n");
+               return -ENODEV;
+       }
+
+       /* program threshold set 1 and all hysteresis */
+       iowrite32(XGENE_DMA_RING_THRESLD0_SET1_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_THRESLD0_SET1);
+       iowrite32(XGENE_DMA_RING_THRESLD1_SET1_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_THRESLD1_SET1);
+       iowrite32(XGENE_DMA_RING_HYSTERESIS_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_HYSTERESIS);
+
+       /* Enable QPcore and assign error queue */
+       iowrite32(XGENE_DMA_RING_ENABLE,
+                 pdma->csr_ring + XGENE_DMA_RING_CONFIG);
+
+       return 0;
+}
+
+static int xgene_dma_init_mem(struct xgene_dma *pdma)
+{
+       int ret;
+
+       ret = xgene_dma_init_ring_mngr(pdma);
+       if (ret)
+               return ret;
+
+       /* Bring up memory */
+       iowrite32(0x0, pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+       /* Force a barrier */
+       ioread32(pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+       /* reset may take up to 1ms */
+       usleep_range(1000, 1100);
+
+       if (ioread32(pdma->csr_dma + XGENE_DMA_BLK_MEM_RDY)
+               != XGENE_DMA_BLK_MEM_RDY_VAL) {
+               dev_err(pdma->dev,
+                       "Failed to release DMA memory from shutdown\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static int xgene_dma_request_irqs(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int ret, i, j;
+
+       /* Register DMA error irq */
+       ret = devm_request_irq(pdma->dev, pdma->err_irq, xgene_dma_err_isr,
+                              0, "dma_error", pdma);
+       if (ret) {
+               dev_err(pdma->dev,
+                       "Failed to register error IRQ %d\n", pdma->err_irq);
+               return ret;
+       }
+
+       /* Register DMA channel rx irq */
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               ret = devm_request_irq(chan->dev, chan->rx_irq,
+                                      xgene_dma_chan_ring_isr,
+                                      0, chan->name, chan);
+               if (ret) {
+                       chan_err(chan, "Failed to register Rx IRQ %d\n",
+                                chan->rx_irq);
+                       devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+                       for (j = 0; j < i; j++) {
+                               chan = &pdma->chan[i];
+                               devm_free_irq(chan->dev, chan->rx_irq, chan);
+                       }
+
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void xgene_dma_free_irqs(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int i;
+
+       /* Free DMA device error irq */
+       devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               devm_free_irq(chan->dev, chan->rx_irq, chan);
+       }
+}
+
+static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
+                              struct dma_device *dma_dev)
+{
+       /* Initialize DMA device capability mask */
+       dma_cap_zero(dma_dev->cap_mask);
+
+       /* Set DMA device capability */
+       dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+       dma_cap_set(DMA_SG, dma_dev->cap_mask);
+
+       /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR
+        * and channel 1 supports XOR, PQ both. First thing here is we have
+        * mechanism in hw to enable/disable PQ/XOR supports on channel 1,
+        * we can make sure this by reading SoC Efuse register.
+        * Second thing, we have hw errata that if we run channel 0 and
+        * channel 1 simultaneously with executing XOR and PQ request,
+        * suddenly DMA engine hangs, So here we enable XOR on channel 0 only
+        * if XOR and PQ supports on channel 1 is disabled.
+        */
+       if ((chan->id == XGENE_DMA_PQ_CHANNEL) &&
+           is_pq_enabled(chan->pdma)) {
+               dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+               dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+       } else if ((chan->id == XGENE_DMA_XOR_CHANNEL) &&
+                  !is_pq_enabled(chan->pdma)) {
+               dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+       }
+
+       /* Set base and prep routines */
+       dma_dev->dev = chan->dev;
+       dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources;
+       dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources;
+       dma_dev->device_issue_pending = xgene_dma_issue_pending;
+       dma_dev->device_tx_status = xgene_dma_tx_status;
+       dma_dev->device_prep_dma_memcpy = xgene_dma_prep_memcpy;
+       dma_dev->device_prep_dma_sg = xgene_dma_prep_sg;
+
+       if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+               dma_dev->device_prep_dma_xor = xgene_dma_prep_xor;
+               dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC;
+               dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT;
+       }
+
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               dma_dev->device_prep_dma_pq = xgene_dma_prep_pq;
+               dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
+               dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT;
+       }
+}
+
+static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
+{
+       struct xgene_dma_chan *chan = &pdma->chan[id];
+       struct dma_device *dma_dev = &pdma->dma_dev[id];
+       int ret;
+
+       chan->dma_chan.device = dma_dev;
+
+       spin_lock_init(&chan->lock);
+       INIT_LIST_HEAD(&chan->ld_pending);
+       INIT_LIST_HEAD(&chan->ld_running);
+       INIT_LIST_HEAD(&chan->ld_completed);
+       tasklet_init(&chan->tasklet, xgene_dma_tasklet_cb,
+                    (unsigned long)chan);
+
+       chan->pending = 0;
+       chan->desc_pool = NULL;
+       dma_cookie_init(&chan->dma_chan);
+
+       /* Setup dma device capabilities and prep routines */
+       xgene_dma_set_caps(chan, dma_dev);
+
+       /* Initialize DMA device list head */
+       INIT_LIST_HEAD(&dma_dev->channels);
+       list_add_tail(&chan->dma_chan.device_node, &dma_dev->channels);
+
+       /* Register with Linux async DMA framework*/
+       ret = dma_async_device_register(dma_dev);
+       if (ret) {
+               chan_err(chan, "Failed to register async device %d", ret);
+               tasklet_kill(&chan->tasklet);
+
+               return ret;
+       }
+
+       /* DMA capability info */
+       dev_info(pdma->dev,
+                "%s: CAPABILITY ( %s%s%s%s)\n", dma_chan_name(&chan->dma_chan),
+                dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "MEMCPY " : "",
+                dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "",
+                dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
+                dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
+
+       return 0;
+}
+
+static int xgene_dma_init_async(struct xgene_dma *pdma)
+{
+       int ret, i, j;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL ; i++) {
+               ret = xgene_dma_async_register(pdma, i);
+               if (ret) {
+                       for (j = 0; j < i; j++) {
+                               dma_async_device_unregister(&pdma->dma_dev[j]);
+                               tasklet_kill(&pdma->chan[j].tasklet);
+                       }
+
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+static void xgene_dma_async_unregister(struct xgene_dma *pdma)
+{
+       int i;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+               dma_async_device_unregister(&pdma->dma_dev[i]);
+}
+
+static void xgene_dma_init_channels(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int i;
+
+       pdma->ring_num = XGENE_DMA_RING_NUM;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               chan->dev = pdma->dev;
+               chan->pdma = pdma;
+               chan->id = i;
+               snprintf(chan->name, sizeof(chan->name), "dmachan%d", chan->id);
+       }
+}
+
+static int xgene_dma_get_resources(struct platform_device *pdev,
+                                  struct xgene_dma *pdma)
+{
+       struct resource *res;
+       int irq, i;
+
+       /* Get DMA csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_dma = devm_ioremap(&pdev->dev, res->start,
+                                    resource_size(res));
+       if (!pdma->csr_dma) {
+               dev_err(&pdev->dev, "Failed to ioremap csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA ring csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get ring csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_ring =  devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (!pdma->csr_ring) {
+               dev_err(&pdev->dev, "Failed to ioremap ring csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA ring cmd csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get ring cmd csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_ring_cmd = devm_ioremap(&pdev->dev, res->start,
+                                         resource_size(res));
+       if (!pdma->csr_ring_cmd) {
+               dev_err(&pdev->dev, "Failed to ioremap ring cmd csr region");
+               return -ENOMEM;
+       }
+
+       /* Get efuse csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get efuse csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_efuse = devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (!pdma->csr_efuse) {
+               dev_err(&pdev->dev, "Failed to ioremap efuse csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA error interrupt */
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0) {
+               dev_err(&pdev->dev, "Failed to get Error IRQ\n");
+               return -ENXIO;
+       }
+
+       pdma->err_irq = irq;
+
+       /* Get DMA Rx ring descriptor interrupts for all DMA channels */
+       for (i = 1; i <= XGENE_DMA_MAX_CHANNEL; i++) {
+               irq = platform_get_irq(pdev, i);
+               if (irq <= 0) {
+                       dev_err(&pdev->dev, "Failed to get Rx IRQ\n");
+                       return -ENXIO;
+               }
+
+               pdma->chan[i - 1].rx_irq = irq;
+       }
+
+       return 0;
+}
+
+static int xgene_dma_probe(struct platform_device *pdev)
+{
+       struct xgene_dma *pdma;
+       int ret, i;
+
+       pdma = devm_kzalloc(&pdev->dev, sizeof(*pdma), GFP_KERNEL);
+       if (!pdma)
+               return -ENOMEM;
+
+       pdma->dev = &pdev->dev;
+       platform_set_drvdata(pdev, pdma);
+
+       ret = xgene_dma_get_resources(pdev, pdma);
+       if (ret)
+               return ret;
+
+       pdma->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pdma->clk)) {
+               dev_err(&pdev->dev, "Failed to get clk\n");
+               return PTR_ERR(pdma->clk);
+       }
+
+       /* Enable clk before accessing registers */
+       ret = clk_prepare_enable(pdma->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
+               return ret;
+       }
+
+       /* Remove DMA RAM out of shutdown */
+       ret = xgene_dma_init_mem(pdma);
+       if (ret)
+               goto err_clk_enable;
+
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(42));
+       if (ret) {
+               dev_err(&pdev->dev, "No usable DMA configuration\n");
+               goto err_dma_mask;
+       }
+
+       /* Initialize DMA channels software state */
+       xgene_dma_init_channels(pdma);
+
+       /* Configue DMA rings */
+       ret = xgene_dma_init_rings(pdma);
+       if (ret)
+               goto err_clk_enable;
+
+       ret = xgene_dma_request_irqs(pdma);
+       if (ret)
+               goto err_request_irq;
+
+       /* Configure and enable DMA engine */
+       xgene_dma_init_hw(pdma);
+
+       /* Register DMA device with linux async framework */
+       ret = xgene_dma_init_async(pdma);
+       if (ret)
+               goto err_async_init;
+
+       return 0;
+
+err_async_init:
+       xgene_dma_free_irqs(pdma);
+
+err_request_irq:
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+               xgene_dma_delete_chan_rings(&pdma->chan[i]);
+
+err_dma_mask:
+err_clk_enable:
+       clk_disable_unprepare(pdma->clk);
+
+       return ret;
+}
+
+static int xgene_dma_remove(struct platform_device *pdev)
+{
+       struct xgene_dma *pdma = platform_get_drvdata(pdev);
+       struct xgene_dma_chan *chan;
+       int i;
+
+       xgene_dma_async_unregister(pdma);
+
+       /* Mask interrupts and disable DMA engine */
+       xgene_dma_mask_interrupts(pdma);
+       xgene_dma_disable(pdma);
+       xgene_dma_free_irqs(pdma);
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               tasklet_kill(&chan->tasklet);
+               xgene_dma_delete_chan_rings(chan);
+       }
+
+       clk_disable_unprepare(pdma->clk);
+
+       return 0;
+}
+
+static const struct of_device_id xgene_dma_of_match_ptr[] = {
+       {.compatible = "apm,xgene-storm-dma",},
+       {},
+};
+MODULE_DEVICE_TABLE(of, xgene_dma_of_match_ptr);
+
+static struct platform_driver xgene_dma_driver = {
+       .probe = xgene_dma_probe,
+       .remove = xgene_dma_remove,
+       .driver = {
+               .name = "X-Gene-DMA",
+               .of_match_table = xgene_dma_of_match_ptr,
+       },
+};
+
+module_platform_driver(xgene_dma_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC DMA driver");
+MODULE_AUTHOR("Rameshwar Prasad Sahu <rsahu@apm.com>");
+MODULE_AUTHOR("Loc Ho <lho@apm.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
index bdd2a5d..d8434d4 100644 (file)
@@ -22,9 +22,9 @@
  * (at your option) any later version.
  */
 
-#include <linux/amba/xilinx_dma.h>
 #include <linux/bitops.h>
 #include <linux/dmapool.h>
+#include <linux/dma/xilinx_dma.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
index ef5feee..580e10a 100644 (file)
@@ -538,8 +538,14 @@ struct dma_buf *
 armada_gem_prime_export(struct drm_device *dev, struct drm_gem_object *obj,
        int flags)
 {
-       return dma_buf_export(obj, &armada_gem_prime_dmabuf_ops, obj->size,
-                             O_RDWR, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &armada_gem_prime_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = O_RDWR;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
 }
 
 struct drm_gem_object *
index 7482b06..7fec191 100644 (file)
@@ -339,13 +339,17 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
                                     struct drm_gem_object *obj, int flags)
 {
-       struct reservation_object *robj = NULL;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &drm_gem_prime_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
 
        if (dev->driver->gem_prime_res_obj)
-               robj = dev->driver->gem_prime_res_obj(obj);
+               exp_info.resv = dev->driver->gem_prime_res_obj(obj);
 
-       return dma_buf_export(obj, &drm_gem_prime_dmabuf_ops, obj->size,
-                             flags, robj);
+       return dma_buf_export(&exp_info);
 }
 EXPORT_SYMBOL(drm_gem_prime_export);
 
index 3833bf8..cd485c0 100644 (file)
@@ -185,9 +185,14 @@ struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev,
                                struct drm_gem_object *obj, int flags)
 {
        struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 
-       return dma_buf_export(obj, &exynos_dmabuf_ops,
-                               exynos_gem_obj->base.size, flags, NULL);
+       exp_info.ops = &exynos_dmabuf_ops;
+       exp_info.size = exynos_gem_obj->base.size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
 }
 
 struct drm_gem_object *exynos_dmabuf_prime_import(struct drm_device *drm_dev,
index c24c3f1..c302ffb 100644 (file)
@@ -1038,7 +1038,7 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
                s->lra_limits[i] = I915_READ(GEN7_LRA_LIMITS_BASE + i * 4);
 
        s->media_max_req_count  = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
-       s->gfx_max_req_count    = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
+       s->gfx_max_req_count    = I915_READ(GEN7_GFX_MAX_REQ_COUNT);
 
        s->render_hwsp          = I915_READ(RENDER_HWS_PGA_GEN7);
        s->ecochk               = I915_READ(GAM_ECOCHK);
@@ -1120,7 +1120,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
                I915_WRITE(GEN7_LRA_LIMITS_BASE + i * 4, s->lra_limits[i]);
 
        I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->media_max_req_count);
-       I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->gfx_max_req_count);
+       I915_WRITE(GEN7_GFX_MAX_REQ_COUNT, s->gfx_max_req_count);
 
        I915_WRITE(RENDER_HWS_PGA_GEN7, s->render_hwsp);
        I915_WRITE(GAM_ECOCHK,          s->ecochk);
index d07c0b1..53394f9 100644 (file)
@@ -2377,10 +2377,11 @@ int __i915_add_request(struct intel_engine_cs *ring,
                ret = ring->add_request(ring);
                if (ret)
                        return ret;
+
+               request->tail = intel_ring_get_tail(ringbuf);
        }
 
        request->head = request_start;
-       request->tail = intel_ring_get_tail(ringbuf);
 
        /* Whilst this request exists, batch_obj will be on the
         * active_list, and so will hold the active reference. Only when this
index 82a1f4b..7998da2 100644 (file)
@@ -230,6 +230,13 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
                                      struct drm_gem_object *gem_obj, int flags)
 {
        struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &i915_dmabuf_ops;
+       exp_info.size = gem_obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = gem_obj;
+
 
        if (obj->ops->dmabuf_export) {
                int ret = obj->ops->dmabuf_export(obj);
@@ -237,8 +244,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
                        return ERR_PTR(ret);
        }
 
-       return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags,
-                             NULL);
+       return dma_buf_export(&exp_info);
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
index b522eb6..3da1af4 100644 (file)
@@ -1807,6 +1807,7 @@ enum skl_disp_power_wells {
 #define   GMBUS_CYCLE_INDEX    (2<<25)
 #define   GMBUS_CYCLE_STOP     (4<<25)
 #define   GMBUS_BYTE_COUNT_SHIFT 16
+#define   GMBUS_BYTE_COUNT_MAX   256U
 #define   GMBUS_SLAVE_INDEX_SHIFT 8
 #define   GMBUS_SLAVE_ADDR_SHIFT 1
 #define   GMBUS_SLAVE_READ     (1<<0)
index b31088a..56e437e 100644 (file)
@@ -270,18 +270,17 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
 }
 
 static int
-gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
-               u32 gmbus1_index)
+gmbus_xfer_read_chunk(struct drm_i915_private *dev_priv,
+                     unsigned short addr, u8 *buf, unsigned int len,
+                     u32 gmbus1_index)
 {
        int reg_offset = dev_priv->gpio_mmio_base;
-       u16 len = msg->len;
-       u8 *buf = msg->buf;
 
        I915_WRITE(GMBUS1 + reg_offset,
                   gmbus1_index |
                   GMBUS_CYCLE_WAIT |
                   (len << GMBUS_BYTE_COUNT_SHIFT) |
-                  (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) |
+                  (addr << GMBUS_SLAVE_ADDR_SHIFT) |
                   GMBUS_SLAVE_READ | GMBUS_SW_RDY);
        while (len) {
                int ret;
@@ -303,11 +302,35 @@ gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
 }
 
 static int
-gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
+gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
+               u32 gmbus1_index)
 {
-       int reg_offset = dev_priv->gpio_mmio_base;
-       u16 len = msg->len;
        u8 *buf = msg->buf;
+       unsigned int rx_size = msg->len;
+       unsigned int len;
+       int ret;
+
+       do {
+               len = min(rx_size, GMBUS_BYTE_COUNT_MAX);
+
+               ret = gmbus_xfer_read_chunk(dev_priv, msg->addr,
+                                           buf, len, gmbus1_index);
+               if (ret)
+                       return ret;
+
+               rx_size -= len;
+               buf += len;
+       } while (rx_size != 0);
+
+       return 0;
+}
+
+static int
+gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv,
+                      unsigned short addr, u8 *buf, unsigned int len)
+{
+       int reg_offset = dev_priv->gpio_mmio_base;
+       unsigned int chunk_size = len;
        u32 val, loop;
 
        val = loop = 0;
@@ -319,8 +342,8 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
        I915_WRITE(GMBUS3 + reg_offset, val);
        I915_WRITE(GMBUS1 + reg_offset,
                   GMBUS_CYCLE_WAIT |
-                  (msg->len << GMBUS_BYTE_COUNT_SHIFT) |
-                  (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) |
+                  (chunk_size << GMBUS_BYTE_COUNT_SHIFT) |
+                  (addr << GMBUS_SLAVE_ADDR_SHIFT) |
                   GMBUS_SLAVE_WRITE | GMBUS_SW_RDY);
        while (len) {
                int ret;
@@ -337,6 +360,29 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
                if (ret)
                        return ret;
        }
+
+       return 0;
+}
+
+static int
+gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
+{
+       u8 *buf = msg->buf;
+       unsigned int tx_size = msg->len;
+       unsigned int len;
+       int ret;
+
+       do {
+               len = min(tx_size, GMBUS_BYTE_COUNT_MAX);
+
+               ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len);
+               if (ret)
+                       return ret;
+
+               buf += len;
+               tx_size -= len;
+       } while (tx_size != 0);
+
        return 0;
 }
 
index fcb074b..09df74b 100644 (file)
@@ -393,6 +393,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
                }
        }
 
+       if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
+               /*
+                * WaIdleLiteRestore: make sure we never cause a lite
+                * restore with HEAD==TAIL
+                */
+               if (req0 && req0->elsp_submitted) {
+                       /*
+                        * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
+                        * as we resubmit the request. See gen8_emit_request()
+                        * for where we prepare the padding after the end of the
+                        * request.
+                        */
+                       struct intel_ringbuffer *ringbuf;
+
+                       ringbuf = req0->ctx->engine[ring->id].ringbuf;
+                       req0->tail += 8;
+                       req0->tail &= ringbuf->size - 1;
+               }
+       }
+
        WARN_ON(req1 && req1->elsp_submitted);
 
        execlists_submit_contexts(ring, req0->ctx, req0->tail,
@@ -1315,7 +1335,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
        u32 cmd;
        int ret;
 
-       ret = intel_logical_ring_begin(ringbuf, request->ctx, 6);
+       /*
+        * Reserve space for 2 NOOPs at the end of each request to be
+        * used as a workaround for not being allowed to do lite
+        * restore with HEAD==TAIL (WaIdleLiteRestore).
+        */
+       ret = intel_logical_ring_begin(ringbuf, request->ctx, 8);
        if (ret)
                return ret;
 
@@ -1333,6 +1358,14 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
        intel_logical_ring_emit(ringbuf, MI_NOOP);
        intel_logical_ring_advance_and_submit(ringbuf, request->ctx, request);
 
+       /*
+        * Here we add two extra NOOPs as padding to avoid
+        * lite restore of a context with HEAD==TAIL.
+        */
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_advance(ringbuf);
+
        return 0;
 }
 
index b46dabd..344fd78 100644 (file)
@@ -171,7 +171,14 @@ static struct dma_buf_ops omap_dmabuf_ops = {
 struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
                struct drm_gem_object *obj, int flags)
 {
-       return dma_buf_export(obj, &omap_dmabuf_ops, obj->size, flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &omap_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
 }
 
 struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
index cfb4819..1217272 100644 (file)
@@ -627,8 +627,14 @@ struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
                                       struct drm_gem_object *gem,
                                       int flags)
 {
-       return dma_buf_export(gem, &tegra_gem_prime_dmabuf_ops, gem->size,
-                             flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &tegra_gem_prime_dmabuf_ops;
+       exp_info.size = gem->size;
+       exp_info.flags = flags;
+       exp_info.priv = gem;
+
+       return dma_buf_export(&exp_info);
 }
 
 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
index 12c8711..4f5fa8d 100644 (file)
@@ -683,6 +683,12 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
 
        dma_buf = prime->dma_buf;
        if (!dma_buf || !get_dma_buf_unless_doomed(dma_buf)) {
+               DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+               exp_info.ops = &tdev->ops;
+               exp_info.size = prime->size;
+               exp_info.flags = flags;
+               exp_info.priv = prime;
 
                /*
                 * Need to create a new dma_buf, with memory accounting.
@@ -694,8 +700,7 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
                        goto out_unref;
                }
 
-               dma_buf = dma_buf_export(prime, &tdev->ops,
-                                        prime->size, flags, NULL);
+               dma_buf = dma_buf_export(&exp_info);
                if (IS_ERR(dma_buf)) {
                        ret = PTR_ERR(dma_buf);
                        ttm_mem_global_free(tdev->mem_glob,
index ac8a66b..e2243ed 100644 (file)
@@ -202,7 +202,14 @@ static struct dma_buf_ops udl_dmabuf_ops = {
 struct dma_buf *udl_gem_prime_export(struct drm_device *dev,
                                     struct drm_gem_object *obj, int flags)
 {
-       return dma_buf_export(obj, &udl_dmabuf_ops, obj->size, flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &udl_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
 }
 
 static int udl_prime_create(struct drm_device *dev,
index 875c22a..fa8dedd 100644 (file)
@@ -182,72 +182,41 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
        const u16 bus_num = bus->remote_bus;
        int request_len;
        int response_len;
-       u8 *request = NULL;
-       u8 *response = NULL;
        int result;
-       struct cros_ec_command msg;
+       struct cros_ec_command msg = { };
 
        request_len = ec_i2c_count_message(i2c_msgs, num);
        if (request_len < 0) {
                dev_warn(dev, "Error constructing message %d\n", request_len);
-               result = request_len;
-               goto exit;
+               return request_len;
        }
+
        response_len = ec_i2c_count_response(i2c_msgs, num);
        if (response_len < 0) {
                /* Unexpected; no errors should come when NULL response */
                dev_warn(dev, "Error preparing response %d\n", response_len);
-               result = response_len;
-               goto exit;
-       }
-
-       if (request_len <= ARRAY_SIZE(bus->request_buf)) {
-               request = bus->request_buf;
-       } else {
-               request = kzalloc(request_len, GFP_KERNEL);
-               if (request == NULL) {
-                       result = -ENOMEM;
-                       goto exit;
-               }
-       }
-       if (response_len <= ARRAY_SIZE(bus->response_buf)) {
-               response = bus->response_buf;
-       } else {
-               response = kzalloc(response_len, GFP_KERNEL);
-               if (response == NULL) {
-                       result = -ENOMEM;
-                       goto exit;
-               }
+               return response_len;
        }
 
-       result = ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+       result = ec_i2c_construct_message(msg.outdata, i2c_msgs, num, bus_num);
        if (result)
-               goto exit;
+               return result;
 
        msg.version = 0;
        msg.command = EC_CMD_I2C_PASSTHRU;
-       msg.outdata = request;
        msg.outsize = request_len;
-       msg.indata = response;
        msg.insize = response_len;
 
        result = cros_ec_cmd_xfer(bus->ec, &msg);
        if (result < 0)
-               goto exit;
+               return result;
 
-       result = ec_i2c_parse_response(response, i2c_msgs, &num);
+       result = ec_i2c_parse_response(msg.indata, i2c_msgs, &num);
        if (result < 0)
-               goto exit;
+               return result;
 
        /* Indicate success by saying how many messages were sent */
-       result = num;
-exit:
-       if (request != bus->request_buf)
-               kfree(request);
-       if (response != bus->response_buf)
-               kfree(response);
-
-       return result;
+       return num;
 }
 
 static u32 ec_i2c_functionality(struct i2c_adapter *adap)
index 03f1e55..9604024 100644 (file)
 
 #include <linux/clk.h>
 #include <linux/completion.h>
+#include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
index 56fceff..3e84f6c 100644 (file)
@@ -913,7 +913,7 @@ static void __exit mxs_i2c_exit(void)
 module_exit(mxs_i2c_exit);
 
 MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
 MODULE_DESCRIPTION("MXS I2C Bus Driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRIVER_NAME);
index 6336f02..3bd2e7d 100644 (file)
@@ -285,6 +285,6 @@ static struct platform_driver i2c_pca_pf_driver = {
 
 module_platform_driver(i2c_pca_pf_driver);
 
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
 MODULE_DESCRIPTION("I2C-PCA9564/PCA9665 platform driver");
 MODULE_LICENSE("GPL");
index 5f96b1b..019d542 100644 (file)
@@ -833,7 +833,7 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap,
        clk_disable(i2c->clk);
        spin_unlock_irqrestore(&i2c->lock, flags);
 
-       return ret;
+       return ret < 0 ? ret : num;
 }
 
 static u32 rk3x_i2c_func(struct i2c_adapter *adap)
index 88057fa..ea72dca 100644 (file)
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/i2c.h>
 #include <linux/clk.h>
-#include <linux/io.h>
 #include <linux/delay.h>
-#include <linux/interrupt.h>
 #include <linux/err.h>
-#include <linux/of.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
 
 /* SSC registers */
 #define SSC_BRG                                0x000
index 098f698..987c124 100644 (file)
@@ -1413,6 +1413,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 
        dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
 
+       pm_runtime_no_callbacks(&adap->dev);
+
 #ifdef CONFIG_I2C_COMPAT
        res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
                                       adap->dev.parent);
index 593f7ca..06cc1ff 100644 (file)
@@ -32,8 +32,9 @@ struct i2c_mux_priv {
        struct i2c_algorithm algo;
 
        struct i2c_adapter *parent;
-       void *mux_priv; /* the mux chip/device */
-       u32  chan_id;   /* the channel id */
+       struct device *mux_dev;
+       void *mux_priv;
+       u32 chan_id;
 
        int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
        int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
@@ -119,6 +120,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
 
        /* Set up private adapter data */
        priv->parent = parent;
+       priv->mux_dev = mux_dev;
        priv->mux_priv = mux_priv;
        priv->chan_id = chan_id;
        priv->select = select;
@@ -203,7 +205,7 @@ void i2c_del_mux_adapter(struct i2c_adapter *adap)
        char symlink_name[20];
 
        snprintf(symlink_name, sizeof(symlink_name), "channel-%u", priv->chan_id);
-       sysfs_remove_link(&adap->dev.parent->kobj, symlink_name);
+       sysfs_remove_link(&priv->mux_dev->kobj, symlink_name);
 
        sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
        i2c_del_adapter(adap);
index 8c014b5..38acb3c 100644 (file)
@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (dmasync)
                dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
 
+       if (!size)
+               return ERR_PTR(-EINVAL);
+
        /*
         * If the combination of the addr and size requested for this memory
         * region causes an integer overflow, return error.
         */
-       if ((PAGE_ALIGN(addr + size) <= size) ||
-           (PAGE_ALIGN(addr + size) <= addr))
+       if (((addr + size) < addr) ||
+           PAGE_ALIGN(addr + size) < (addr + size))
                return ERR_PTR(-EINVAL);
 
        if (!can_do_mlock())
index 259dcc7..88cce9b 100644 (file)
@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(uqp);
        }
 
+       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+               struct ib_srq *srq = uobj->object;
+               struct ib_uevent_object *uevent =
+                       container_of(uobj, struct ib_uevent_object, uobject);
+
+               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+               ib_destroy_srq(srq);
+               ib_uverbs_release_uevent(file, uevent);
+               kfree(uevent);
+       }
+
        list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
                struct ib_cq *cq = uobj->object;
                struct ib_uverbs_event_file *ev_file = cq->cq_context;
@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(ucq);
        }
 
-       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
-               struct ib_srq *srq = uobj->object;
-               struct ib_uevent_object *uevent =
-                       container_of(uobj, struct ib_uevent_object, uobject);
-
-               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-               ib_destroy_srq(srq);
-               ib_uverbs_release_uevent(file, uevent);
-               kfree(uevent);
-       }
-
        list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
                struct ib_mr *mr = uobj->object;
 
index 33c45df..1ca8e32 100644 (file)
@@ -82,14 +82,14 @@ static int create_file(const char *name, umode_t mode,
 {
        int error;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(*dentry))
-               error = ipathfs_mknod(parent->d_inode, *dentry,
+               error = ipathfs_mknod(d_inode(parent), *dentry,
                                      mode, fops, data);
        else
                error = PTR_ERR(*dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 
        return error;
 }
@@ -277,11 +277,11 @@ static int remove_file(struct dentry *parent, char *name)
        }
 
        spin_lock(&tmp->d_lock);
-       if (!d_unhashed(tmp) && tmp->d_inode) {
+       if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
                dget_dlock(tmp);
                __d_drop(tmp);
                spin_unlock(&tmp->d_lock);
-               simple_unlink(parent->d_inode, tmp);
+               simple_unlink(d_inode(parent), tmp);
        } else
                spin_unlock(&tmp->d_lock);
 
@@ -302,7 +302,7 @@ static int remove_device_files(struct super_block *sb,
        int ret;
 
        root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
        snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
        dir = lookup_one_len(unit, root, strlen(unit));
 
@@ -315,10 +315,10 @@ static int remove_device_files(struct super_block *sb,
        remove_file(dir, "flash");
        remove_file(dir, "atomic_counters");
        d_delete(dir);
-       ret = simple_rmdir(root->d_inode, dir);
+       ret = simple_rmdir(d_inode(root), dir);
 
 bail:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        dput(root);
        return ret;
 }
index a31e031..0f00204 100644 (file)
@@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
        int                     query_id;
        struct list_head        list;
        int                     block_num;
+       ib_sa_comp_mask         guid_indexes;
+       u8                      method;
 };
 
 struct mlx4_next_alias_guid_work {
        u8 port;
        u8 block_num;
+       u8 method;
        struct mlx4_sriov_alias_guid_info_rec_det rec_det;
 };
 
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec);
 
 void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
                                         u8 port_num, u8 *p_data)
@@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
        return IB_SA_COMP_MASK(4 + index);
 }
 
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port,  int slave_init)
+{
+       __be64 curr_guid, required_guid;
+       int record_num = slave / 8;
+       int index = slave % 8;
+       int port_index = port - 1;
+       unsigned long flags;
+       int do_work = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
+           GUID_STATE_NEED_PORT_INIT)
+               goto unlock;
+       if (!slave_init) {
+               curr_guid = *(__be64 *)&dev->sriov.
+                       alias_guid.ports_guid[port_index].
+                       all_rec_per_port[record_num].
+                       all_recs[GUID_REC_SIZE * index];
+               if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
+                   !curr_guid)
+                       goto unlock;
+               required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+       } else {
+               required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
+               if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       goto unlock;
+       }
+       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               all_recs[GUID_REC_SIZE * index] = required_guid;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].guid_indexes
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(index);
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].status
+               = MLX4_GUID_INFO_STATUS_IDLE;
+       /* set to run immediately */
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].time_to_run = 0;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               guids_retry_schedule[index] = 0;
+       do_work = 1;
+unlock:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+
+       if (do_work)
+               mlx4_ib_init_alias_guid_work(dev, port_index);
+}
+
 /*
  * Whenever new GUID is set/unset (guid table change) create event and
  * notify the relevant slave (master also should be notified).
@@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
        enum slave_port_state prev_state;
        __be64 tmp_cur_ag, form_cache_ag;
        enum slave_port_gen_event gen_event;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec;
+       unsigned long flags;
+       __be64 required_value;
 
        if (!mlx4_is_master(dev->dev))
                return;
 
+       rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
+                       all_rec_per_port[block_num];
        guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
                                   ports_guid[port_num - 1].
                                   all_rec_per_port[block_num].guid_indexes);
@@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                 */
                if (tmp_cur_ag != form_cache_ag)
                        continue;
-               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
 
+               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+               required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+
+               if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       required_value = 0;
+
+               if (tmp_cur_ag == required_value) {
+                       rec->guid_indexes = rec->guid_indexes &
+                              ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               } else {
+                       /* may notify port down if value is 0 */
+                       if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
+                               spin_unlock_irqrestore(&dev->sriov.
+                                       alias_guid.ag_work_lock, flags);
+                               continue;
+                       }
+               }
+               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
+                                      flags);
+               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
                /*2 cases: Valid GUID, and Invalid Guid*/
 
                if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                        set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
                                                      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
                                                      &gen_event);
-                       pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
-                                slave_id, port_num);
-                       mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
-                                                      MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
+                               pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+                                        slave_id, port_num);
+                               mlx4_gen_port_state_change_eqe(dev->dev,
+                                                              slave_id,
+                                                              port_num,
+                                                              MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       }
                }
        }
 }
@@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
        int i;
        struct mlx4_sriov_alias_guid_info_rec_det *rec;
        unsigned long flags, flags1;
+       ib_sa_comp_mask declined_guid_indexes = 0;
+       ib_sa_comp_mask applied_guid_indexes = 0;
+       unsigned int resched_delay_sec = 0;
 
        if (!context)
                return;
@@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
                all_rec_per_port[cb_ctx->block_num];
 
        if (status) {
-               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
                pr_debug("(port: %d) failed: status = %d\n",
                         cb_ctx->port, status);
+               rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
                goto out;
        }
 
@@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
        rec = &dev->sriov.alias_guid.ports_guid[port_index].
                all_rec_per_port[guid_rec->block_num];
 
-       rec->status = MLX4_GUID_INFO_STATUS_SET;
-       rec->method = MLX4_GUID_INFO_RECORD_SET;
-
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
        for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
-               __be64 tmp_cur_ag;
-               tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+               __be64 sm_response, required_val;
+
+               if (!(cb_ctx->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+               sm_response = *(__be64 *)&guid_rec->guid_info_list
+                               [i * GUID_REC_SIZE];
+               required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+               if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
+                       if (required_val ==
+                           cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                               goto next_entry;
+
+                       /* A new value was set till we got the response */
+                       pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
+                                be64_to_cpu(required_val),
+                                i, guid_rec->block_num);
+                       goto entry_declined;
+               }
+
                /* check if the SM didn't assign one of the records.
-                * if it didn't, if it was not sysadmin request:
-                * ask the SM to give a new GUID, (instead of the driver request).
+                * if it didn't, re-ask for.
                 */
-               if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
-                       mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
-                                    "block_num: %d was declined by SM, "
-                                    "ownership by %d (0 = driver, 1=sysAdmin,"
-                                    " 2=None)\n", __func__, i,
-                                    guid_rec->block_num, rec->ownership);
-                       if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
-                               /* if it is driver assign, asks for new GUID from SM*/
-                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
-                                       MLX4_NOT_SET_GUID;
-
-                               /* Mark the record as not assigned, and let it
-                                * be sent again in the next work sched.*/
-                               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
-                               rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
-                       }
+               if (sm_response == MLX4_NOT_SET_GUID) {
+                       if (rec->guids_retry_schedule[i] == 0)
+                               mlx4_ib_warn(&dev->ib_dev,
+                                            "%s:Record num %d in  block_num: %d was declined by SM\n",
+                                            __func__, i,
+                                            guid_rec->block_num);
+                       goto entry_declined;
                } else {
                       /* properly assigned record. */
                       /* We save the GUID we just got from the SM in the
                        * admin_guid in order to be persistent, and in the
                        * request from the sm the process will ask for the same GUID */
-                       if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
-                           tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
-                               /* the sysadmin assignment failed.*/
-                               mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
-                                            " admin guid after SysAdmin "
-                                            "configuration. "
-                                            "Record num %d in block_num:%d "
-                                            "was declined by SM, "
-                                            "new val(0x%llx) was kept\n",
-                                             __func__, i,
-                                            guid_rec->block_num,
-                                            be64_to_cpu(*(__be64 *) &
-                                                        rec->all_recs[i * GUID_REC_SIZE]));
+                       if (required_val &&
+                           sm_response != required_val) {
+                               /* Warn only on first retry */
+                               if (rec->guids_retry_schedule[i] == 0)
+                                       mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+                                                    " admin guid after SysAdmin "
+                                                    "configuration. "
+                                                    "Record num %d in block_num:%d "
+                                                    "was declined by SM, "
+                                                    "new val(0x%llx) was kept, SM returned (0x%llx)\n",
+                                                     __func__, i,
+                                                    guid_rec->block_num,
+                                                    be64_to_cpu(required_val),
+                                                    be64_to_cpu(sm_response));
+                               goto entry_declined;
                        } else {
-                               memcpy(&rec->all_recs[i * GUID_REC_SIZE],
-                                      &guid_rec->guid_info_list[i * GUID_REC_SIZE],
-                                      GUID_REC_SIZE);
+                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+                                       sm_response;
+                               if (required_val == 0)
+                                       mlx4_set_admin_guid(dev->dev,
+                                                           sm_response,
+                                                           (guid_rec->block_num
+                                                           * NUM_ALIAS_GUID_IN_REC) + i,
+                                                           cb_ctx->port);
+                               goto next_entry;
                        }
                }
+entry_declined:
+               declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               rec->guids_retry_schedule[i] =
+                       (rec->guids_retry_schedule[i] == 0) ?  1 :
+                       min((unsigned int)60,
+                           rec->guids_retry_schedule[i] * 2);
+               /* using the minimum value among all entries in that record */
+               resched_delay_sec = (resched_delay_sec == 0) ?
+                               rec->guids_retry_schedule[i] :
+                               min(resched_delay_sec,
+                                   rec->guids_retry_schedule[i]);
+               continue;
+
+next_entry:
+               rec->guids_retry_schedule[i] = 0;
        }
+
+       applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
+       if (declined_guid_indexes ||
+           rec->guid_indexes & ~(applied_guid_indexes)) {
+               pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
+                        guid_rec->block_num,
+                        be64_to_cpu((__force __be64)rec->guid_indexes),
+                        be64_to_cpu((__force __be64)applied_guid_indexes),
+                        be64_to_cpu((__force __be64)declined_guid_indexes));
+               rec->time_to_run = ktime_get_real_ns() +
+                       resched_delay_sec * NSEC_PER_SEC;
+       } else {
+               rec->status = MLX4_GUID_INFO_STATUS_SET;
+       }
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
        /*
        The func is call here to close the cases when the
        sm doesn't send smp, so in the sa response the driver
@@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
 out:
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
-       if (!dev->sriov.is_going_down)
+       if (!dev->sriov.is_going_down) {
+               get_low_record_time_index(dev, port_index, &resched_delay_sec);
                queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
                                   &dev->sriov.alias_guid.ports_guid[port_index].
-                                  alias_guid_work, 0);
+                                  alias_guid_work,
+                                  msecs_to_jiffies(resched_delay_sec * 1000));
+       }
        if (cb_ctx->sa_query) {
                list_del(&cb_ctx->list);
                kfree(cb_ctx);
@@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
        ib_sa_comp_mask comp_mask = 0;
 
        dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
-               = MLX4_GUID_INFO_STATUS_IDLE;
-       dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
-               = MLX4_GUID_INFO_RECORD_SET;
+               = MLX4_GUID_INFO_STATUS_SET;
 
        /* calculate the comp_mask for that record.*/
        for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
                need to assign GUIDs, then don't put it up for assignment.
                */
                if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
-                   (!index && !i) ||
-                   MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
-                   ports_guid[port - 1].all_rec_per_port[index].ownership)
+                   (!index && !i))
                        continue;
                comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
        }
        dev->sriov.alias_guid.ports_guid[port - 1].
-               all_rec_per_port[index].guid_indexes = comp_mask;
+               all_rec_per_port[index].guid_indexes |= comp_mask;
+       if (dev->sriov.alias_guid.ports_guid[port - 1].
+           all_rec_per_port[index].guid_indexes)
+               dev->sriov.alias_guid.ports_guid[port - 1].
+               all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
+
 }
 
 static int set_guid_rec(struct ib_device *ibdev,
-                       u8 port, int index,
-                       struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+                       struct mlx4_next_alias_guid_work *rec)
 {
        int err;
        struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
        struct ib_port_attr attr;
        struct mlx4_alias_guid_work_context *callback_context;
        unsigned long resched_delay, flags, flags1;
+       u8 port = rec->port + 1;
+       int index = rec->block_num;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
        struct list_head *head =
                &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
 
@@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
        callback_context->port = port;
        callback_context->dev = dev;
        callback_context->block_num = index;
+       callback_context->guid_indexes = rec_det->guid_indexes;
+       callback_context->method = rec->method;
 
        memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
 
@@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
        callback_context->query_id =
                ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
                                          ibdev, port, &guid_info_rec,
-                                         comp_mask, rec_det->method, 1000,
+                                         comp_mask, rec->method, 1000,
                                          GFP_KERNEL, aliasguid_query_handler,
                                          callback_context,
                                          &callback_context->sa_query);
@@ -434,6 +573,30 @@ out:
        return err;
 }
 
+static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
+{
+       int j, k, entry;
+       __be64 guid;
+
+       /*Check if the SM doesn't need to assign the GUIDs*/
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+                       entry = j * NUM_ALIAS_GUID_IN_REC + k;
+                       /* no request for the 0 entry (hw guid) */
+                       if (!entry || entry > dev->dev->persist->num_vfs ||
+                           !mlx4_is_slave_active(dev->dev, entry))
+                               continue;
+                       guid = mlx4_get_admin_guid(dev->dev, entry, port);
+                       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+                               all_rec_per_port[j].all_recs
+                               [GUID_REC_SIZE * k] = guid;
+                       pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
+                                entry,
+                                be64_to_cpu(guid),
+                                port);
+               }
+       }
+}
 void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
 {
        int i;
@@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
 
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+       if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
+               GUID_STATE_NEED_PORT_INIT) {
+               mlx4_ib_guid_port_init(dev, port);
+               dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
+                       (~GUID_STATE_NEED_PORT_INIT);
+       }
        for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
                invalidate_guid_record(dev, port, i);
 
@@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
        spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
 }
 
-/* The function returns the next record that was
- * not configured (or failed to be configured) */
-static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
-                                    struct mlx4_next_alias_guid_work *rec)
+static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
+                               struct mlx4_next_alias_guid_work *next_rec,
+                               int record_index)
 {
-       int j;
-       unsigned long flags;
+       int i;
+       int lowset_time_entry = -1;
+       int lowest_time = 0;
+       ib_sa_comp_mask delete_guid_indexes = 0;
+       ib_sa_comp_mask set_guid_indexes = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec =
+                       &dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[record_index];
 
-       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
-               if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
-                   MLX4_GUID_INFO_STATUS_IDLE) {
-                       memcpy(&rec->rec_det,
-                              &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
-                              sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
-                       rec->port = port;
-                       rec->block_num = j;
-                       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
-                               MLX4_GUID_INFO_STATUS_PENDING;
-                       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
-                       return 0;
+       for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+               if (!(rec->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+
+               if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       delete_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               else
+                       set_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+
+               if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
+                       lowest_time) {
+                       lowset_time_entry = i;
+                       lowest_time = rec->guids_retry_schedule[i];
                }
-               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
        }
-       return -ENOENT;
+
+       memcpy(&next_rec->rec_det, rec, sizeof(*rec));
+       next_rec->port = port;
+       next_rec->block_num = record_index;
+
+       if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
+               next_rec->rec_det.guid_indexes = delete_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
+       } else {
+               next_rec->rec_det.guid_indexes = set_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_SET;
+       }
 }
 
-static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
-                                            int rec_index,
-                                            struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+/* return index of record that should be updated based on lowest
+ * rescheduled time
+ */
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec)
 {
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
-               rec_det->guid_indexes;
-       memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
-              rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
-               rec_det->status;
+       int record_index = -1;
+       u64 low_record_time = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det rec;
+       int j;
+
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               rec = dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[j];
+               if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
+                   rec.guid_indexes) {
+                       if (record_index == -1 ||
+                           rec.time_to_run < low_record_time) {
+                               record_index = j;
+                               low_record_time = rec.time_to_run;
+                       }
+               }
+       }
+       if (resched_delay_sec) {
+               u64 curr_time = ktime_get_real_ns();
+
+               *resched_delay_sec = (low_record_time < curr_time) ? 0 :
+                       div_u64((low_record_time - curr_time), NSEC_PER_SEC);
+       }
+
+       return record_index;
 }
 
-static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+                                    struct mlx4_next_alias_guid_work *rec)
 {
-       int j;
-       struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
-
-       for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
-               memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-               rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
-                       IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
-                       IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
-                       IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
-                       IB_SA_GUIDINFO_REC_GID7;
-               rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
-               set_administratively_guid_record(dev, port, j, &rec_det);
+       unsigned long flags;
+       int record_index;
+       int ret = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       record_index = get_low_record_time_index(dev, port, NULL);
+
+       if (record_index < 0) {
+               ret = -ENOENT;
+               goto out;
        }
+
+       set_required_record(dev, port, rec, record_index);
+out:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+       return ret;
 }
 
 static void alias_guid_work(struct work_struct *work)
@@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
                goto out;
        }
 
-       set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
-                    &rec->rec_det);
-
+       set_guid_rec(&dev->ib_dev, rec);
 out:
        kfree(rec);
 }
@@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
        if (!dev->sriov.is_going_down) {
+               /* If there is pending one should cancell then run, otherwise
+                 * won't run till previous one is ended as same work
+                 * struct is used.
+                 */
+               cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
+                                   alias_guid_work);
                queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
                           &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
        }
@@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
 {
        char alias_wq_name[15];
        int ret = 0;
-       int i, j, k;
+       int i, j;
        union ib_gid gid;
 
        if (!mlx4_is_master(dev->dev))
@@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
        for (i = 0 ; i < dev->num_ports; i++) {
                memset(&dev->sriov.alias_guid.ports_guid[i], 0,
                       sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
-               /*Check if the SM doesn't need to assign the GUIDs*/
+               dev->sriov.alias_guid.ports_guid[i].state_flags |=
+                               GUID_STATE_NEED_PORT_INIT;
                for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-                       if (mlx4_ib_sm_guid_assign) {
-                               dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].
-                                       ownership = MLX4_GUID_DRIVER_ASSIGN;
-                               continue;
-                       }
-                       dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
-                                       ownership = MLX4_GUID_NONE_ASSIGN;
-                       /*mark each val as it was deleted,
-                         till the sysAdmin will give it valid val*/
-                       for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
-                               *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
-                                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
-                       }
+                       /* mark each val as it was deleted */
+                       memset(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs, 0xFF,
+                               sizeof(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs));
                }
                INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
                /*prepare the records, set them to be allocated by sm*/
+               if (mlx4_ib_sm_guid_assign)
+                       for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
+                               mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
                for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
                        invalidate_guid_record(dev, i + 1, j);
 
                dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
                dev->sriov.alias_guid.ports_guid[i].port  = i;
-               if (mlx4_ib_sm_guid_assign)
-                       set_all_slaves_guids(dev, i);
 
                snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
                dev->sriov.alias_guid.ports_guid[i].wq =
index 5904026..9cd2b00 100644 (file)
@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                                        tun_qp->ring[i].addr,
                                                        rx_buf_size,
                                                        DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
+                       kfree(tun_qp->ring[i].addr);
+                       goto err;
+               }
        }
 
        for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                          tun_qp->tx_ring[i].buf.addr,
                                          tx_buf_size,
                                          DMA_TO_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev,
+                                        tun_qp->tx_ring[i].buf.map)) {
+                       kfree(tun_qp->tx_ring[i].buf.addr);
+                       goto tx_err;
+               }
                tun_qp->tx_ring[i].ah = NULL;
        }
        spin_lock_init(&tun_qp->tx_lock);
index 976bea7..57070c5 100644 (file)
@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-int mlx4_ib_sm_guid_assign = 1;
+int mlx4_ib_sm_guid_assign = 0;
 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
-MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
 
 static const char mlx4_ib_version[] =
        DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
        case MLX4_DEV_EVENT_SLAVE_INIT:
                /* here, p is the slave id */
                do_slave_init(ibdev, p, 1);
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      1);
+                       }
+               }
                return;
 
        case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      0);
+                       }
+               }
                /* here, p is the slave id */
                do_slave_init(ibdev, p, 0);
                return;
index f829fd9..fce3934 100644 (file)
@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
 enum mlx4_guid_alias_rec_status {
        MLX4_GUID_INFO_STATUS_IDLE,
        MLX4_GUID_INFO_STATUS_SET,
-       MLX4_GUID_INFO_STATUS_PENDING,
 };
 
-enum mlx4_guid_alias_rec_ownership {
-       MLX4_GUID_DRIVER_ASSIGN,
-       MLX4_GUID_SYSADMIN_ASSIGN,
-       MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
-};
+#define GUID_STATE_NEED_PORT_INIT 0x01
 
 enum mlx4_guid_alias_rec_method {
        MLX4_GUID_INFO_RECORD_SET       = IB_MGMT_METHOD_SET,
@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
        u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
        ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
        enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
-       u8 method; /*set or delete*/
-       enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+       unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
+       u64 time_to_run;
 };
 
 struct mlx4_sriov_alias_guid_port_rec_det {
@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
        struct workqueue_struct *wq;
        struct delayed_work alias_guid_work;
        u8 port;
+       u32 state_flags;
        struct mlx4_sriov_alias_guid *parent;
        struct list_head cb_list;
 };
@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
 void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
                             struct attribute *attr);
 ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port, int slave_init);
 
 int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
 
index ed2bd67..02fc91c 100644 (file)
@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
                        ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
                                          sizeof (struct mlx4_ib_proxy_sqp_hdr),
                                          DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
+                       kfree(qp->sqp_proxy_rcv[i].addr);
+                       goto err;
+               }
        }
        return 0;
 
@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 
        memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
 
-       *lso_hdr_sz  = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
-                                  wr->wr.ud.hlen);
+       *lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
        *lso_seg_len = halign;
        return 0;
 }
index d10c2b8..6797108 100644 (file)
 static ssize_t show_admin_alias_guid(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       int record_num;/*0-15*/
-       int guid_index_in_rec; /*0 - 7*/
        struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
                container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
        struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
        struct mlx4_ib_dev *mdev = port->dev;
+       __be64 sysadmin_ag_val;
 
-       record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
-       guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+       sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
+                                             mlx4_ib_iov_dentry->entry_num,
+                                             port->num);
 
-       return sprintf(buf, "%llx\n",
-                      be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
-                                  ports_guid[port->num - 1].
-                                  all_rec_per_port[record_num].
-                                  all_recs[8 * guid_index_in_rec]));
+       return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
 }
 
 /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
        struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
        struct mlx4_ib_dev *mdev = port->dev;
        u64 sysadmin_ag_val;
+       unsigned long flags;
 
        record_num = mlx4_ib_iov_dentry->entry_num / 8;
        guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
                pr_err("GUID 0 block 0 is RO\n");
                return count;
        }
+       spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
        sscanf(buf, "%llx", &sysadmin_ag_val);
        *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
                all_rec_per_port[record_num].
@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
        /* Change the state to be pending for update */
        mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
                = MLX4_GUID_INFO_STATUS_IDLE ;
-
-       mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-               = MLX4_GUID_INFO_RECORD_SET;
-
-       switch (sysadmin_ag_val) {
-       case MLX4_GUID_FOR_DELETE_VAL:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-                       = MLX4_GUID_INFO_RECORD_DELETE;
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       /* The sysadmin requests the SM to re-assign */
-       case MLX4_NOT_SET_GUID:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_DRIVER_ASSIGN;
-               break;
-       /* The sysadmin requests a specific value.*/
-       default:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       }
+       mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
+                           mlx4_ib_iov_dentry->entry_num,
+                           port->num);
 
        /* set the record index */
        mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
-               = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
 
+       spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
        mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
 
        return count;
index 650897a..bdd5d38 100644 (file)
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
 {
        int error;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(*dentry))
-               error = qibfs_mknod(parent->d_inode, *dentry,
+               error = qibfs_mknod(d_inode(parent), *dentry,
                                    mode, fops, data);
        else
                error = PTR_ERR(*dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 
        return error;
 }
@@ -455,10 +455,10 @@ static int remove_file(struct dentry *parent, char *name)
        }
 
        spin_lock(&tmp->d_lock);
-       if (!d_unhashed(tmp) && tmp->d_inode) {
+       if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
                __d_drop(tmp);
                spin_unlock(&tmp->d_lock);
-               simple_unlink(parent->d_inode, tmp);
+               simple_unlink(d_inode(parent), tmp);
        } else {
                spin_unlock(&tmp->d_lock);
        }
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
        int ret, i;
 
        root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
        snprintf(unit, sizeof(unit), "%u", dd->unit);
        dir = lookup_one_len(unit, root, strlen(unit));
 
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
                goto bail;
        }
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
        remove_file(dir, "counters");
        remove_file(dir, "counter_names");
        remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
                }
        }
        remove_file(dir, "flash");
-       mutex_unlock(&dir->d_inode->i_mutex);
-       ret = simple_rmdir(root->d_inode, dir);
+       mutex_unlock(&d_inode(dir)->i_mutex);
+       ret = simple_rmdir(d_inode(root), dir);
        d_delete(dir);
        dput(dir);
 
 bail:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        dput(root);
        return ret;
 }
index d7562be..bd94b0a 100644 (file)
@@ -87,7 +87,6 @@ enum {
        IPOIB_FLAG_ADMIN_UP       = 2,
        IPOIB_PKEY_ASSIGNED       = 3,
        IPOIB_FLAG_SUBINTERFACE   = 5,
-       IPOIB_MCAST_RUN           = 6,
        IPOIB_STOP_REAPER         = 7,
        IPOIB_FLAG_ADMIN_CM       = 9,
        IPOIB_FLAG_UMCAST         = 10,
@@ -98,9 +97,15 @@ enum {
 
        IPOIB_MCAST_FLAG_FOUND    = 0,  /* used in set_multicast_list */
        IPOIB_MCAST_FLAG_SENDONLY = 1,
-       IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
+       /*
+        * For IPOIB_MCAST_FLAG_BUSY
+        * When set, in flight join and mcast->mc is unreliable
+        * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+        *   haven't started yet
+        * When clear and mcast->mc is valid pointer, join was successful
+        */
+       IPOIB_MCAST_FLAG_BUSY     = 2,
        IPOIB_MCAST_FLAG_ATTACHED = 3,
-       IPOIB_MCAST_JOIN_STARTED  = 4,
 
        MAX_SEND_CQE              = 16,
        IPOIB_CM_COPYBREAK        = 256,
@@ -148,6 +153,7 @@ struct ipoib_mcast {
 
        unsigned long created;
        unsigned long backoff;
+       unsigned long delay_until;
 
        unsigned long flags;
        unsigned char logcount;
@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
        struct completion               deleted;
 };
 
+struct ipoib_qp_state_validate {
+       struct work_struct work;
+       struct ipoib_dev_priv   *priv;
+};
+
 /*
  * Device private locking: network stack tx_lock protects members used
  * in TX fast path, lock protects everything else.  lock nests inside
@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
        struct list_head multicast_list;
        struct rb_root multicast_tree;
 
+       struct workqueue_struct *wq;
        struct delayed_work mcast_task;
        struct work_struct carrier_on_task;
        struct work_struct flush_light;
@@ -426,11 +438,6 @@ struct ipoib_neigh {
 #define IPOIB_UD_MTU(ib_mtu)           (ib_mtu - IPOIB_ENCAP_LEN)
 #define IPOIB_UD_BUF_SIZE(ib_mtu)      (ib_mtu + IB_GRH_BYTES)
 
-static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
-{
-       return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
-}
-
 void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
 static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
 {
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
 
 void ipoib_mcast_dev_down(struct net_device *dev);
 void ipoib_mcast_dev_flush(struct net_device *dev);
index 933efce..56959ad 100644 (file)
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
        }
 
        spin_lock_irq(&priv->lock);
-       queue_delayed_work(ipoib_workqueue,
+       queue_delayed_work(priv->wq,
                           &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
        /* Add this entry to passive ids list head, but do not re-add it
         * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                        spin_lock_irqsave(&priv->lock, flags);
                        list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
                        ipoib_cm_start_rx_drain(priv);
-                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                       queue_work(priv->wq, &priv->cm.rx_reap_task);
                        spin_unlock_irqrestore(&priv->lock, flags);
                } else
                        ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                                spin_lock_irqsave(&priv->lock, flags);
                                list_move(&p->list, &priv->cm.rx_reap_list);
                                spin_unlock_irqrestore(&priv->lock, flags);
-                               queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                               queue_work(priv->wq, &priv->cm.rx_reap_task);
                        }
                        return;
                }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
                if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                        list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                }
 
                clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 
                if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                        list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                }
 
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
        tx->dev = dev;
        list_add(&tx->list, &priv->cm.start_list);
        set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-       queue_work(ipoib_workqueue, &priv->cm.start_task);
+       queue_work(priv->wq, &priv->cm.start_task);
        return tx;
 }
 
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
        if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                spin_lock_irqsave(&priv->lock, flags);
                list_move(&tx->list, &priv->cm.reap_list);
-               queue_work(ipoib_workqueue, &priv->cm.reap_task);
+               queue_work(priv->wq, &priv->cm.reap_task);
                ipoib_dbg(priv, "Reap connection for gid %pI6\n",
                          tx->neigh->daddr + 4);
                tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 
        skb_queue_tail(&priv->cm.skb_queue, skb);
        if (e)
-               queue_work(ipoib_workqueue, &priv->cm.skb_task);
+               queue_work(priv->wq, &priv->cm.skb_task);
 }
 
 static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
        }
 
        if (!list_empty(&priv->cm.passive_ids))
-               queue_delayed_work(ipoib_workqueue,
+               queue_delayed_work(priv->wq,
                                   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
        spin_unlock_irq(&priv->lock);
 }
index 72626c3..63b92cb 100644 (file)
@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
 static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
                                  u64 mapping[IPOIB_UD_RX_SG])
 {
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
-                                   DMA_FROM_DEVICE);
-               ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
-                                 DMA_FROM_DEVICE);
-       } else
-               ib_dma_unmap_single(priv->ca, mapping[0],
-                                   IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
-                                   DMA_FROM_DEVICE);
-}
-
-static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
-                                  struct sk_buff *skb,
-                                  unsigned int length)
-{
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
-               unsigned int size;
-               /*
-                * There is only two buffers needed for max_payload = 4K,
-                * first buf size is IPOIB_UD_HEAD_SIZE
-                */
-               skb->tail += IPOIB_UD_HEAD_SIZE;
-               skb->len  += length;
-
-               size = length - IPOIB_UD_HEAD_SIZE;
-
-               skb_frag_size_set(frag, size);
-               skb->data_len += size;
-               skb->truesize += PAGE_SIZE;
-       } else
-               skb_put(skb, length);
-
+       ib_dma_unmap_single(priv->ca, mapping[0],
+                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
+                           DMA_FROM_DEVICE);
 }
 
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct sk_buff *skb;
        int buf_size;
-       int tailroom;
        u64 *mapping;
 
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               buf_size = IPOIB_UD_HEAD_SIZE;
-               tailroom = 128; /* reserve some tailroom for IP/TCP headers */
-       } else {
-               buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               tailroom = 0;
-       }
+       buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
 
-       skb = dev_alloc_skb(buf_size + tailroom + 4);
+       skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
        if (unlikely(!skb))
                return NULL;
 
@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
        if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
                goto error;
 
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               struct page *page = alloc_page(GFP_ATOMIC);
-               if (!page)
-                       goto partial_error;
-               skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
-               mapping[1] =
-                       ib_dma_map_page(priv->ca, page,
-                                       0, PAGE_SIZE, DMA_FROM_DEVICE);
-               if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
-                       goto partial_error;
-       }
-
        priv->rx_ring[id].skb = skb;
        return skb;
-
-partial_error:
-       ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
 error:
        dev_kfree_skb_any(skb);
        return NULL;
@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                       wc->byte_len, wc->slid);
 
        ipoib_ud_dma_unmap_rx(priv, mapping);
-       ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+       skb_put(skb, wc->byte_len);
 
        /* First byte of dgid signals multicast when 0xff */
        dgid = &((struct ib_grh *)skb->data)->dgid;
@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        skb_reset_mac_header(skb);
        skb_pull(skb, IPOIB_ENCAP_LEN);
 
+       skb->truesize = SKB_TRUESIZE(skb->len);
+
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
 
@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
        }
 }
 
+/*
+ * As the result of a completion error the QP Can be transferred to SQE states.
+ * The function checks if the (send)QP is in SQE state and
+ * moves it back to RTS state, that in order to have it functional again.
+ */
+static void ipoib_qp_state_validate_work(struct work_struct *work)
+{
+       struct ipoib_qp_state_validate *qp_work =
+               container_of(work, struct ipoib_qp_state_validate, work);
+
+       struct ipoib_dev_priv *priv = qp_work->priv;
+       struct ib_qp_attr qp_attr;
+       struct ib_qp_init_attr query_init_attr;
+       int ret;
+
+       ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+       if (ret) {
+               ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
+                          __func__, ret);
+               goto free_res;
+       }
+       pr_info("%s: QP: 0x%x is in state: %d\n",
+               __func__, priv->qp->qp_num, qp_attr.qp_state);
+
+       /* currently support only in SQE->RTS transition*/
+       if (qp_attr.qp_state == IB_QPS_SQE) {
+               qp_attr.qp_state = IB_QPS_RTS;
+
+               ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
+               if (ret) {
+                       pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
+                               ret, priv->qp->qp_num);
+                       goto free_res;
+               }
+               pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
+                       __func__, priv->qp->qp_num);
+       } else {
+               pr_warn("QP (%d) will stay in state: %d\n",
+                       priv->qp->qp_num, qp_attr.qp_state);
+       }
+
+free_res:
+       kfree(qp_work);
+}
+
 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
                netif_wake_queue(dev);
 
        if (wc->status != IB_WC_SUCCESS &&
-           wc->status != IB_WC_WR_FLUSH_ERR)
+           wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct ipoib_qp_state_validate *qp_work;
                ipoib_warn(priv, "failed send event "
                           "(status=%d, wrid=%d vend_err %x)\n",
                           wc->status, wr_id, wc->vendor_err);
+               qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
+               if (!qp_work) {
+                       ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
+                                  __func__, priv->qp->qp_num);
+                       return;
+               }
+
+               INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
+               qp_work->priv = priv;
+               queue_work(priv->wq, &qp_work->work);
+       }
 }
 
 static int poll_tx(struct ipoib_dev_priv *priv)
@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
        __ipoib_reap_ah(dev);
 
        if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+               queue_delayed_work(priv->wq, &priv->ah_reap_task,
                                   round_jiffies_relative(HZ));
 }
 
+static void ipoib_flush_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       cancel_delayed_work(&priv->ah_reap_task);
+       flush_workqueue(priv->wq);
+       ipoib_reap_ah(&priv->ah_reap_task.work);
+}
+
+static void ipoib_stop_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       set_bit(IPOIB_STOP_REAPER, &priv->flags);
+       ipoib_flush_ah(dev);
+}
+
 static void ipoib_ib_tx_timer_func(unsigned long ctx)
 {
        drain_tx_cq((struct net_device *)ctx);
 }
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
        }
 
        clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+       queue_delayed_work(priv->wq, &priv->ah_reap_task,
                           round_jiffies_relative(HZ));
 
        if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
 dev_stop:
        if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                napi_enable(&priv->napi);
-       ipoib_ib_dev_stop(dev, flush);
+       ipoib_ib_dev_stop(dev);
        return -1;
 }
 
@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
        return ipoib_mcast_start_thread(dev);
 }
 
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
        clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
        netif_carrier_off(dev);
 
-       ipoib_mcast_stop_thread(dev, flush);
+       ipoib_mcast_stop_thread(dev);
        ipoib_mcast_dev_flush(dev);
 
        ipoib_flush_paths(dev);
@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
        local_bh_enable();
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_attr qp_attr;
@@ -877,24 +902,7 @@ timeout:
        if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
                ipoib_warn(priv, "Failed to modify QP to RESET state\n");
 
-       /* Wait for all AHs to be reaped */
-       set_bit(IPOIB_STOP_REAPER, &priv->flags);
-       cancel_delayed_work(&priv->ah_reap_task);
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
-
-       begin = jiffies;
-
-       while (!list_empty(&priv->dead_ahs)) {
-               __ipoib_reap_ah(dev);
-
-               if (time_after(jiffies, begin + HZ)) {
-                       ipoib_warn(priv, "timing out; will leak address handles\n");
-                       break;
-               }
-
-               msleep(1);
-       }
+       ipoib_flush_ah(dev);
 
        ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
 
@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
                    (unsigned long) dev);
 
        if (dev->flags & IFF_UP) {
-               if (ipoib_ib_dev_open(dev, 1)) {
+               if (ipoib_ib_dev_open(dev)) {
                        ipoib_transport_dev_cleanup(dev);
                        return -ENODEV;
                }
@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        if (level == IPOIB_FLUSH_LIGHT) {
                ipoib_mark_paths_invalid(dev);
                ipoib_mcast_dev_flush(dev);
+               ipoib_flush_ah(dev);
        }
 
        if (level >= IPOIB_FLUSH_NORMAL)
-               ipoib_ib_dev_down(dev, 0);
+               ipoib_ib_dev_down(dev);
 
        if (level == IPOIB_FLUSH_HEAVY) {
                if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-                       ipoib_ib_dev_stop(dev, 0);
-               if (ipoib_ib_dev_open(dev, 0) != 0)
+                       ipoib_ib_dev_stop(dev);
+               if (ipoib_ib_dev_open(dev) != 0)
                        return;
                if (netif_queue_stopped(dev))
                        netif_start_queue(dev);
@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
         */
        ipoib_flush_paths(dev);
 
-       ipoib_mcast_stop_thread(dev, 1);
+       ipoib_mcast_stop_thread(dev);
        ipoib_mcast_dev_flush(dev);
 
+       /*
+        * All of our ah references aren't free until after
+        * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
+        * the neighbor garbage collection is stopped and reaped.
+        * That should all be done now, so make a final ah flush.
+        */
+       ipoib_stop_ah(dev);
+
        ipoib_transport_dev_cleanup(dev);
 }
 
index 915ad04..9e1b203 100644 (file)
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
 
        set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
-       if (ipoib_ib_dev_open(dev, 1)) {
+       if (ipoib_ib_dev_open(dev)) {
                if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                        return 0;
                goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
        return 0;
 
 err_stop:
-       ipoib_ib_dev_stop(dev, 1);
+       ipoib_ib_dev_stop(dev);
 
 err_disable:
        clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
 
        netif_stop_queue(dev);
 
-       ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev, 0);
+       ipoib_ib_dev_down(dev);
+       ipoib_ib_dev_stop(dev);
 
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
                if (!path->query && path_rec_start(dev, path))
                        goto err_path;
-
-               __skb_queue_tail(&neigh->queue, skb);
+               if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+                       __skb_queue_tail(&neigh->queue, skb);
+               else
+                       goto err_drop;
        }
 
        spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                        new_path = 1;
                }
                if (path) {
-                       __skb_queue_tail(&path->queue, skb);
+                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+                               __skb_queue_tail(&path->queue, skb);
+                       } else {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                       }
 
                        if (!path->query && path_rec_start(dev, path)) {
                                spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
                return;
        }
 
-       queue_work(ipoib_workqueue, &priv->restart_task);
+       queue_work(priv->wq, &priv->restart_task);
 }
 
 static int ipoib_get_iflink(const struct net_device *dev)
@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
        __ipoib_reap_neigh(priv);
 
        if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+               queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                                   arp_tbl.gc_interval);
 }
 
@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 
        /* start garbage collection */
        clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+       queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                           arp_tbl.gc_interval);
 
        return 0;
@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       if (ipoib_neigh_hash_init(priv) < 0)
-               goto out;
        /* Allocate RX/TX "rings" to hold queued skbs */
        priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                GFP_KERNEL);
        if (!priv->rx_ring) {
                printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                       ca->name, ipoib_recvq_size);
-               goto out_neigh_hash_cleanup;
+               goto out;
        }
 
        priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        if (ipoib_ib_dev_init(dev, ca, port))
                goto out_tx_ring_cleanup;
 
+       /*
+        * Must be after ipoib_ib_dev_init so we can allocate a per
+        * device wq there and use it here
+        */
+       if (ipoib_neigh_hash_init(priv) < 0)
+               goto out_dev_uninit;
+
        return 0;
 
+out_dev_uninit:
+       ipoib_ib_dev_cleanup(dev);
+
 out_tx_ring_cleanup:
        vfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
        kfree(priv->rx_ring);
 
-out_neigh_hash_cleanup:
-       ipoib_neigh_hash_uninit(dev);
 out:
        return -ENOMEM;
 }
@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
        }
        unregister_netdevice_many(&head);
 
+       /*
+        * Must be before ipoib_ib_dev_cleanup or we delete an in use
+        * work queue
+        */
+       ipoib_neigh_hash_uninit(dev);
+
        ipoib_ib_dev_cleanup(dev);
 
        kfree(priv->rx_ring);
@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
 
        priv->rx_ring = NULL;
        priv->tx_ring = NULL;
-
-       ipoib_neigh_hash_uninit(dev);
 }
 
 static const struct header_ops ipoib_header_ops = {
@@ -1646,10 +1663,11 @@ sysfs_failed:
 
 register_failed:
        ib_unregister_event_handler(&priv->event_handler);
+       flush_workqueue(ipoib_workqueue);
        /* Stop GC if started before flush */
        set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
        cancel_delayed_work(&priv->neigh_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
 
 event_failed:
        ipoib_dev_cleanup(priv->dev);
@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
                ib_unregister_event_handler(&priv->event_handler);
+               flush_workqueue(ipoib_workqueue);
 
                rtnl_lock();
                dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
                /* Stop GC */
                set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
                cancel_delayed_work(&priv->neigh_reap_task);
-               flush_workqueue(ipoib_workqueue);
+               flush_workqueue(priv->wq);
 
                unregister_netdev(priv->dev);
                free_netdev(priv->dev);
@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
                return ret;
 
        /*
-        * We create our own workqueue mainly because we want to be
-        * able to flush it when devices are being removed.  We can't
-        * use schedule_work()/flush_scheduled_work() because both
-        * unregister_netdev() and linkwatch_event take the rtnl lock,
-        * so flush_scheduled_work() can deadlock during device
-        * removal.
+        * We create a global workqueue here that is used for all flush
+        * operations.  However, if you attempt to flush a workqueue
+        * from a task on that same workqueue, it deadlocks the system.
+        * We want to be able to flush the tasks associated with a
+        * specific net device, so we also create a workqueue for each
+        * netdevice.  We queue up the tasks for that device only on
+        * its private workqueue, and we only queue up flush events
+        * on our global flush workqueue.  This avoids the deadlocks.
         */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib");
+       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
        if (!ipoib_workqueue) {
                ret = -ENOMEM;
                goto err_fs;
index ffb83b5..0d23e05 100644 (file)
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
                 "Enable multicast debug tracing if > 0");
 #endif
 
-static DEFINE_MUTEX(mcast_mutex);
-
 struct ipoib_mcast_iter {
        struct net_device *dev;
        union ib_gid       mgid;
@@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
        unsigned int       send_only;
 };
 
+/*
+ * This should be called with the priv->lock held
+ */
+static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
+                                              struct ipoib_mcast *mcast,
+                                              bool delay)
+{
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               return;
+
+       /*
+        * We will be scheduling *something*, so cancel whatever is
+        * currently scheduled first
+        */
+       cancel_delayed_work(&priv->mcast_task);
+       if (mcast && delay) {
+               /*
+                * We had a failure and want to schedule a retry later
+                */
+               mcast->backoff *= 2;
+               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+               mcast->delay_until = jiffies + (mcast->backoff * HZ);
+               /*
+                * Mark this mcast for its delay, but restart the
+                * task immediately.  The join task will make sure to
+                * clear out all entries without delays, and then
+                * schedule itself to run again when the earliest
+                * delay expires
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+       } else if (delay) {
+               /*
+                * Special case of retrying after a failure to
+                * allocate the broadcast multicast group, wait
+                * 1 second and try again
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
+       } else
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+}
+
 static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 {
        struct net_device *dev = mcast->dev;
@@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 
        mcast->dev = dev;
        mcast->created = jiffies;
+       mcast->delay_until = jiffies;
        mcast->backoff = 1;
 
        INIT_LIST_HEAD(&mcast->list);
@@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                        spin_unlock_irq(&priv->lock);
                        return -EAGAIN;
                }
-               priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               /*update priv member according to the new mcast*/
+               priv->broadcast->mcmember.qkey = mcmember->qkey;
+               priv->broadcast->mcmember.mtu = mcmember->mtu;
+               priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
+               priv->broadcast->mcmember.rate = mcmember->rate;
+               priv->broadcast->mcmember.sl = mcmember->sl;
+               priv->broadcast->mcmember.flow_label = mcmember->flow_label;
+               priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
+               /* assume if the admin and the mcast are the same both can be changed */
+               if (priv->mcast_mtu == priv->admin_mtu)
+                       priv->admin_mtu =
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               else
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+
                priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                spin_unlock_irq(&priv->lock);
                priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
                set_qkey = 1;
-
-               if (!ipoib_cm_admin_enabled(dev)) {
-                       rtnl_lock();
-                       dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-                       rtnl_unlock();
-               }
        }
 
        if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
        return 0;
 }
 
-static int
-ipoib_mcast_sendonly_join_complete(int status,
-                                  struct ib_sa_multicast *multicast)
-{
-       struct ipoib_mcast *mcast = multicast->context;
-       struct net_device *dev = mcast->dev;
-
-       /* We trap for port events ourselves. */
-       if (status == -ENETRESET)
-               return 0;
-
-       if (!status)
-               status = ipoib_mcast_join_finish(mcast, &multicast->rec);
-
-       if (status) {
-               if (mcast->logcount++ < 20)
-                       ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
-
-               /* Flush out any queued packets */
-               netif_tx_lock_bh(dev);
-               while (!skb_queue_empty(&mcast->pkt_queue)) {
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
-               }
-               netif_tx_unlock_bh(dev);
-
-               /* Clear the busy flag so we try again */
-               status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-                                           &mcast->flags);
-       }
-       return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
-       struct net_device *dev = mcast->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_sa_mcmember_rec rec = {
-#if 0                          /* Some SMs don't support send-only yet */
-               .join_state = 4
-#else
-               .join_state = 1
-#endif
-       };
-       int ret = 0;
-
-       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-               ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
-               return -ENODEV;
-       }
-
-       if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-               ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
-               return -EBUSY;
-       }
-
-       rec.mgid     = mcast->mcmember.mgid;
-       rec.port_gid = priv->local_gid;
-       rec.pkey     = cpu_to_be16(priv->pkey);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
-                                        priv->port, &rec,
-                                        IB_SA_MCMEMBER_REC_MGID        |
-                                        IB_SA_MCMEMBER_REC_PORT_GID    |
-                                        IB_SA_MCMEMBER_REC_PKEY        |
-                                        IB_SA_MCMEMBER_REC_JOIN_STATE,
-                                        GFP_ATOMIC,
-                                        ipoib_mcast_sendonly_join_complete,
-                                        mcast);
-       if (IS_ERR(mcast->mc)) {
-               ret = PTR_ERR(mcast->mc);
-               clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-               ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-                          ret);
-       } else {
-               ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-                               mcast->mcmember.mgid.raw);
-       }
-
-       return ret;
-}
-
 void ipoib_mcast_carrier_on_task(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   carrier_on_task);
        struct ib_port_attr attr;
 
-       /*
-        * Take rtnl_lock to avoid racing with ipoib_stop() and
-        * turning the carrier back on while a device is being
-        * removed.
-        */
        if (ib_query_port(priv->ca, priv->port, &attr) ||
            attr.state != IB_PORT_ACTIVE) {
                ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
                return;
        }
 
-       rtnl_lock();
+       /*
+        * Take rtnl_lock to avoid racing with ipoib_stop() and
+        * turning the carrier back on while a device is being
+        * removed.  However, ipoib_stop() will attempt to flush
+        * the workqueue while holding the rtnl lock, so loop
+        * on trylock until either we get the lock or we see
+        * FLAG_OPER_UP go away as that signals that we are bailing
+        * and can safely ignore the carrier on work.
+        */
+       while (!rtnl_trylock()) {
+               if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+                       return;
+               else
+                       msleep(20);
+       }
+       if (!ipoib_cm_admin_enabled(priv->dev))
+               dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
        netif_carrier_on(priv->dev);
        rtnl_unlock();
 }
@@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
        struct net_device *dev = mcast->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+       ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
+                       test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
+                       "sendonly " : "",
                        mcast->mcmember.mgid.raw, status);
 
        /* We trap for port events ourselves. */
@@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
 
        if (!status) {
                mcast->backoff = 1;
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task, 0);
-               mutex_unlock(&mcast_mutex);
+               mcast->delay_until = jiffies;
 
                /*
-                * Defer carrier on work to ipoib_workqueue to avoid a
-                * deadlock on rtnl_lock here.
+                * Defer carrier on work to priv->wq to avoid a
+                * deadlock on rtnl_lock here.  Requeue our multicast
+                * work too, which will end up happening right after
+                * our carrier on task work and will allow us to
+                * send out all of the non-broadcast joins
                 */
-               if (mcast == priv->broadcast)
-                       queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-               status = 0;
-               goto out;
-       }
+               if (mcast == priv->broadcast) {
+                       spin_lock_irq(&priv->lock);
+                       queue_work(priv->wq, &priv->carrier_on_task);
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+                       goto out_locked;
+               }
+       } else {
+               if (mcast->logcount++ < 20) {
+                       if (status == -ETIMEDOUT || status == -EAGAIN) {
+                               ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                               mcast->mcmember.mgid.raw, status);
+                       } else {
+                               ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                          mcast->mcmember.mgid.raw, status);
+                       }
+               }
 
-       if (mcast->logcount++ < 20) {
-               if (status == -ETIMEDOUT || status == -EAGAIN) {
-                       ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+                   mcast->backoff >= 2) {
+                       /*
+                        * We only retry sendonly joins once before we drop
+                        * the packet and quit trying to deal with the
+                        * group.  However, we leave the group in the
+                        * mcast list as an unjoined group.  If we want to
+                        * try joining again, we simply queue up a packet
+                        * and restart the join thread.  The empty queue
+                        * is why the join thread ignores this group.
+                        */
+                       mcast->backoff = 1;
+                       netif_tx_lock_bh(dev);
+                       while (!skb_queue_empty(&mcast->pkt_queue)) {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+                       }
+                       netif_tx_unlock_bh(dev);
                } else {
-                       ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-                                  mcast->mcmember.mgid.raw, status);
+                       spin_lock_irq(&priv->lock);
+                       /* Requeue this join task with a backoff delay */
+                       __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
+                       goto out_locked;
                }
        }
-
-       mcast->backoff *= 2;
-       if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-               mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-       /* Clear the busy flag so we try again */
-       status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-       mutex_lock(&mcast_mutex);
+out:
        spin_lock_irq(&priv->lock);
-       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-                                  mcast->backoff * HZ);
+out_locked:
+       /*
+        * Make sure to set mcast->mc before we clear the busy flag to avoid
+        * racing with code that checks for BUSY before checking mcast->mc
+        */
+       if (status)
+               mcast->mc = NULL;
+       else
+               mcast->mc = multicast;
+       clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
        spin_unlock_irq(&priv->lock);
-       mutex_unlock(&mcast_mutex);
-out:
        complete(&mcast->done);
+
        return status;
 }
 
@@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                             int create)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ib_sa_multicast *multicast;
        struct ib_sa_mcmember_rec rec = {
                .join_state = 1
        };
@@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
        }
 
-       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-       init_completion(&mcast->done);
-       set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+       multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                         &rec, comp_mask, GFP_KERNEL,
                                         ipoib_mcast_join_complete, mcast);
-       if (IS_ERR(mcast->mc)) {
+       if (IS_ERR(multicast)) {
+               ret = PTR_ERR(multicast);
+               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+               spin_lock_irq(&priv->lock);
+               /* Requeue this join task with a backoff delay */
+               __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
                clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+               spin_unlock_irq(&priv->lock);
                complete(&mcast->done);
-               ret = PTR_ERR(mcast->mc);
-               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-
-               mcast->backoff *= 2;
-               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task,
-                                          mcast->backoff * HZ);
-               mutex_unlock(&mcast_mutex);
        }
 }
 
@@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
                container_of(work, struct ipoib_dev_priv, mcast_task.work);
        struct net_device *dev = priv->dev;
        struct ib_port_attr port_attr;
+       unsigned long delay_until = 0;
+       struct ipoib_mcast *mcast = NULL;
+       int create = 1;
 
-       if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
                return;
 
        if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
        else
                memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
+       spin_lock_irq(&priv->lock);
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               goto out;
+
        if (!priv->broadcast) {
                struct ipoib_mcast *broadcast;
 
-               if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-                       return;
-
-               broadcast = ipoib_mcast_alloc(dev, 1);
+               broadcast = ipoib_mcast_alloc(dev, 0);
                if (!broadcast) {
                        ipoib_warn(priv, "failed to allocate broadcast group\n");
-                       mutex_lock(&mcast_mutex);
-                       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                               queue_delayed_work(ipoib_workqueue,
-                                                  &priv->mcast_task, HZ);
-                       mutex_unlock(&mcast_mutex);
-                       return;
+                       /*
+                        * Restart us after a 1 second delay to retry
+                        * creating our broadcast group and attaching to
+                        * it.  Until this succeeds, this ipoib dev is
+                        * completely stalled (multicast wise).
+                        */
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
+                       goto out;
                }
 
-               spin_lock_irq(&priv->lock);
                memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
                       sizeof (union ib_gid));
                priv->broadcast = broadcast;
 
                __ipoib_mcast_add(dev, priv->broadcast);
-               spin_unlock_irq(&priv->lock);
        }
 
        if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
-                       ipoib_mcast_join(dev, priv->broadcast, 0);
-               return;
-       }
-
-       while (1) {
-               struct ipoib_mcast *mcast = NULL;
-
-               spin_lock_irq(&priv->lock);
-               list_for_each_entry(mcast, &priv->multicast_list, list) {
-                       if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
-                               /* Found the next unjoined group */
-                               break;
+               if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
+                       mcast = priv->broadcast;
+                       create = 0;
+                       if (mcast->backoff > 1 &&
+                           time_before(jiffies, mcast->delay_until)) {
+                               delay_until = mcast->delay_until;
+                               mcast = NULL;
                        }
                }
-               spin_unlock_irq(&priv->lock);
+               goto out;
+       }
 
-               if (&mcast->list == &priv->multicast_list) {
-                       /* All done */
-                       break;
+       /*
+        * We'll never get here until the broadcast group is both allocated
+        * and attached
+        */
+       list_for_each_entry(mcast, &priv->multicast_list, list) {
+               if (IS_ERR_OR_NULL(mcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+                   (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
+                    !skb_queue_empty(&mcast->pkt_queue))) {
+                       if (mcast->backoff == 1 ||
+                           time_after_eq(jiffies, mcast->delay_until)) {
+                               /* Found the next unjoined group */
+                               init_completion(&mcast->done);
+                               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+                               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                                       create = 0;
+                               else
+                                       create = 1;
+                               spin_unlock_irq(&priv->lock);
+                               ipoib_mcast_join(dev, mcast, create);
+                               spin_lock_irq(&priv->lock);
+                       } else if (!delay_until ||
+                                time_before(mcast->delay_until, delay_until))
+                               delay_until = mcast->delay_until;
                }
-
-               ipoib_mcast_join(dev, mcast, 1);
-               return;
        }
 
-       ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
+       mcast = NULL;
+       ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
 
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+out:
+       if (delay_until) {
+               cancel_delayed_work(&priv->mcast_task);
+               queue_delayed_work(priv->wq, &priv->mcast_task,
+                                  delay_until - jiffies);
+       }
+       if (mcast) {
+               init_completion(&mcast->done);
+               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       }
+       spin_unlock_irq(&priv->lock);
+       if (mcast)
+               ipoib_mcast_join(dev, mcast, create);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
 
        ipoib_dbg_mcast(priv, "starting multicast thread\n");
 
-       mutex_lock(&mcast_mutex);
-       if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
-       mutex_unlock(&mcast_mutex);
+       spin_lock_irqsave(&priv->lock, flags);
+       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+       spin_unlock_irqrestore(&priv->lock, flags);
 
        return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
 
        ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-       mutex_lock(&mcast_mutex);
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+       spin_lock_irqsave(&priv->lock, flags);
        cancel_delayed_work(&priv->mcast_task);
-       mutex_unlock(&mcast_mutex);
+       spin_unlock_irqrestore(&priv->lock, flags);
 
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
 
        return 0;
 }
@@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
        int ret = 0;
 
        if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+               ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+       if (!IS_ERR_OR_NULL(mcast->mc))
                ib_sa_free_multicast(mcast->mc);
 
        if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
                                      be16_to_cpu(mcast->mcmember.mlid));
                if (ret)
                        ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
-       }
+       } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+               ipoib_dbg(priv, "leaving with no mcmember but not a "
+                         "SENDONLY join\n");
 
        return 0;
 }
@@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
        }
 
        mcast = __ipoib_mcast_find(dev, mgid);
-       if (!mcast) {
-               /* Let's create a new send only group now */
-               ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
-                               mgid);
-
-               mcast = ipoib_mcast_alloc(dev, 0);
+       if (!mcast || !mcast->ah) {
                if (!mcast) {
-                       ipoib_warn(priv, "unable to allocate memory for "
-                                  "multicast structure\n");
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-                       goto out;
-               }
-
-               set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
-               memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
-               __ipoib_mcast_add(dev, mcast);
-               list_add_tail(&mcast->list, &priv->multicast_list);
-       }
+                       /* Let's create a new send only group now */
+                       ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
+                                       mgid);
+
+                       mcast = ipoib_mcast_alloc(dev, 0);
+                       if (!mcast) {
+                               ipoib_warn(priv, "unable to allocate memory "
+                                          "for multicast structure\n");
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                               goto unlock;
+                       }
 
-       if (!mcast->ah) {
+                       set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
+                       memcpy(mcast->mcmember.mgid.raw, mgid,
+                              sizeof (union ib_gid));
+                       __ipoib_mcast_add(dev, mcast);
+                       list_add_tail(&mcast->list, &priv->multicast_list);
+               }
                if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
                        skb_queue_tail(&mcast->pkt_queue, skb);
                else {
                        ++dev->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
                }
-
-               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-                       ipoib_dbg_mcast(priv, "no address vector, "
-                                       "but multicast join already started\n");
-               else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                       ipoib_mcast_sendonly_join(mcast);
-
-               /*
-                * If lookup completes between here and out:, don't
-                * want to send packet twice.
-                */
-               mcast = NULL;
-       }
-
-out:
-       if (mcast && mcast->ah) {
+               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               }
+       } else {
                struct ipoib_neigh *neigh;
 
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* seperate between the wait to the leave*/
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        unsigned long flags;
        struct ib_sa_mcmember_rec rec;
 
-       ipoib_dbg_mcast(priv, "restarting multicast task\n");
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               /*
+                * shortcut...on shutdown flush is called next, just
+                * let it do all the work
+                */
+               return;
 
-       ipoib_mcast_stop_thread(dev, 0);
+       ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
        local_irq_save(flags);
        netif_addr_lock(dev);
@@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        netif_addr_unlock(dev);
        local_irq_restore(flags);
 
-       /* We have to cancel outside of the spinlock */
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
                ipoib_mcast_leave(mcast->dev, mcast);
                ipoib_mcast_free(mcast);
        }
 
-       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-               ipoib_mcast_start_thread(dev);
+       /*
+        * Double check that we are still up
+        */
+       if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+               spin_lock_irqsave(&priv->lock, flags);
+               __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
index c56d5d4..e5cc430 100644 (file)
@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_pd;
        }
 
+       /*
+        * the various IPoIB tasks assume they will never race against
+        * themselves, so always use a single thread workqueue
+        */
+       priv->wq = create_singlethread_workqueue("ipoib_wq");
+       if (!priv->wq) {
+               printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+               goto out_free_mr;
+       }
+
        size = ipoib_recvq_size + 1;
        ret = ipoib_cm_dev_init(dev);
        if (!ret) {
@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                        size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
                else
                        size += ipoib_recvq_size * ipoib_max_conn_qp;
-       }
+       } else
+               goto out_free_wq;
 
        priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
        if (IS_ERR(priv->recv_cq)) {
                printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
-               goto out_free_mr;
+               goto out_cm_dev_cleanup;
        }
 
        priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
 
        priv->rx_sge[0].lkey = priv->mr->lkey;
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
-               priv->rx_sge[1].length = PAGE_SIZE;
-               priv->rx_sge[1].lkey = priv->mr->lkey;
-               priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
-       } else {
-               priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               priv->rx_wr.num_sge = 1;
-       }
+
+       priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+       priv->rx_wr.num_sge = 1;
+
        priv->rx_wr.next = NULL;
        priv->rx_wr.sg_list = priv->rx_sge;
 
@@ -236,12 +242,19 @@ out_free_send_cq:
 out_free_recv_cq:
        ib_destroy_cq(priv->recv_cq);
 
+out_cm_dev_cleanup:
+       ipoib_cm_dev_cleanup(dev);
+
+out_free_wq:
+       destroy_workqueue(priv->wq);
+       priv->wq = NULL;
+
 out_free_mr:
        ib_dereg_mr(priv->mr);
-       ipoib_cm_dev_cleanup(dev);
 
 out_free_pd:
        ib_dealloc_pd(priv->pd);
+
        return -ENODEV;
 }
 
@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 
        ipoib_cm_dev_cleanup(dev);
 
+       if (priv->wq) {
+               flush_workqueue(priv->wq);
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
+
        if (ib_dereg_mr(priv->mr))
                ipoib_warn(priv, "ib_dereg_mr failed\n");
 
        if (ib_dealloc_pd(priv->pd))
                ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
 }
 
 void ipoib_event(struct ib_event_handler *handler,
index b47aea1..262ba1f 100644 (file)
@@ -69,7 +69,7 @@
 
 #define DRV_NAME       "iser"
 #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.5"
+#define DRV_VER                "1.6"
 
 #define iser_dbg(fmt, arg...)                           \
        do {                                             \
@@ -218,22 +218,21 @@ enum iser_data_dir {
 /**
  * struct iser_data_buf - iSER data buffer
  *
- * @buf:          pointer to the sg list
+ * @sg:           pointer to the sg list
  * @size:         num entries of this sg
  * @data_len:     total beffer byte len
  * @dma_nents:    returned by dma_map_sg
- * @copy_buf:     allocated copy buf for SGs unaligned
- *                for rdma which are copied
- * @sg_single:    SG-ified clone of a non SG SC or
- *                unaligned SG
+ * @orig_sg:      pointer to the original sg list (in case
+ *                we used a copy)
+ * @orig_size:    num entris of orig sg list
  */
 struct iser_data_buf {
-       void               *buf;
+       struct scatterlist *sg;
        unsigned int       size;
        unsigned long      data_len;
        unsigned int       dma_nents;
-       char               *copy_buf;
-       struct scatterlist sg_single;
+       struct scatterlist *orig_sg;
+       unsigned int       orig_size;
   };
 
 /* fwd declarations */
@@ -244,35 +243,14 @@ struct iscsi_endpoint;
 /**
  * struct iser_mem_reg - iSER memory registration info
  *
- * @lkey:         MR local key
- * @rkey:         MR remote key
- * @va:           MR start address (buffer va)
- * @len:          MR length
+ * @sge:          memory region sg element
+ * @rkey:         memory region remote key
  * @mem_h:        pointer to registration context (FMR/Fastreg)
  */
 struct iser_mem_reg {
-       u32  lkey;
-       u32  rkey;
-       u64  va;
-       u64  len;
-       void *mem_h;
-};
-
-/**
- * struct iser_regd_buf - iSER buffer registration desc
- *
- * @reg:          memory registration info
- * @virt_addr:    virtual address of buffer
- * @device:       reference to iser device
- * @direction:    dma direction (for dma_unmap)
- * @data_size:    data buffer size in bytes
- */
-struct iser_regd_buf {
-       struct iser_mem_reg     reg;
-       void                    *virt_addr;
-       struct iser_device      *device;
-       enum dma_data_direction direction;
-       unsigned int            data_size;
+       struct ib_sge    sge;
+       u32              rkey;
+       void            *mem_h;
 };
 
 enum iser_desc_type {
@@ -534,11 +512,9 @@ struct iser_conn {
  * @sc:               link to scsi command
  * @command_sent:     indicate if command was sent
  * @dir:              iser data direction
- * @rdma_regd:        task rdma registration desc
+ * @rdma_reg        task rdma registration desc
  * @data:             iser data buffer desc
- * @data_copy:        iser data copy buffer desc (bounce buffer)
  * @prot:             iser protection buffer desc
- * @prot_copy:        iser protection copy buffer desc (bounce buffer)
  */
 struct iscsi_iser_task {
        struct iser_tx_desc          desc;
@@ -547,11 +523,9 @@ struct iscsi_iser_task {
        struct scsi_cmnd             *sc;
        int                          command_sent;
        int                          dir[ISER_DIRS_NUM];
-       struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];
+       struct iser_mem_reg          rdma_reg[ISER_DIRS_NUM];
        struct iser_data_buf         data[ISER_DIRS_NUM];
-       struct iser_data_buf         data_copy[ISER_DIRS_NUM];
        struct iser_data_buf         prot[ISER_DIRS_NUM];
-       struct iser_data_buf         prot_copy[ISER_DIRS_NUM];
 };
 
 struct iser_page_vec {
@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
 
 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     struct iser_data_buf *mem,
-                                    struct iser_data_buf *mem_copy,
                                     enum iser_data_dir cmd_dir);
 
 int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
@@ -634,10 +607,6 @@ int  iser_connect(struct iser_conn *iser_conn,
                  struct sockaddr *dst_addr,
                  int non_blocking);
 
-int  iser_reg_page_vec(struct ib_conn *ib_conn,
-                      struct iser_page_vec *page_vec,
-                      struct iser_mem_reg *mem_reg);
-
 void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
                        enum iser_data_dir cmd_dir);
 void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
 void iser_free_fastreg_pool(struct ib_conn *ib_conn);
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector);
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn);
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+                 struct fast_reg_descriptor *desc);
 #endif
index 20e859a..3e2118e 100644 (file)
@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
                iser_err("Failed to set up Data-IN RDMA\n");
                return err;
        }
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
 
        hdr->flags    |= ISER_RSV;
-       hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
-       hdr->read_va   = cpu_to_be64(regd_buf->reg.va);
+       hdr->read_stag = cpu_to_be32(mem_reg->rkey);
+       hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);
 
        iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
-                task->itt, regd_buf->reg.rkey,
-                (unsigned long long)regd_buf->reg.va);
+                task->itt, mem_reg->rkey,
+                (unsigned long long)mem_reg->sge.addr);
 
        return 0;
 }
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
 {
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
        int err;
        struct iser_hdr *hdr = &iser_task->desc.iser_header;
        struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                return err;
        }
 
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
 
        if (unsol_sz < edtl) {
                hdr->flags     |= ISER_WSV;
-               hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
-               hdr->write_va   = cpu_to_be64(regd_buf->reg.va + unsol_sz);
+               hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+               hdr->write_va   = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
 
                iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
                         "VA:%#llX + unsol:%d\n",
-                        task->itt, regd_buf->reg.rkey,
-                        (unsigned long long)regd_buf->reg.va, unsol_sz);
+                        task->itt, mem_reg->rkey,
+                        (unsigned long long)mem_reg->sge.addr, unsol_sz);
        }
 
        if (imm_sz > 0) {
                iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
                         task->itt, imm_sz);
-               tx_dsg->addr   = regd_buf->reg.va;
+               tx_dsg->addr = mem_reg->sge.addr;
                tx_dsg->length = imm_sz;
-               tx_dsg->lkey   = regd_buf->reg.lkey;
+               tx_dsg->lkey = mem_reg->sge.lkey;
                iser_task->desc.num_sge = 2;
        }
 
@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
        }
 
        if (scsi_sg_count(sc)) { /* using a scatter list */
-               data_buf->buf  = scsi_sglist(sc);
+               data_buf->sg = scsi_sglist(sc);
                data_buf->size = scsi_sg_count(sc);
        }
        data_buf->data_len = scsi_bufflen(sc);
 
        if (scsi_prot_sg_count(sc)) {
-               prot_buf->buf  = scsi_prot_sglist(sc);
+               prot_buf->sg  = scsi_prot_sglist(sc);
                prot_buf->size = scsi_prot_sg_count(sc);
-               prot_buf->data_len = data_buf->data_len >>
-                                    ilog2(sc->device->sector_size) * 8;
+               prot_buf->data_len = (data_buf->data_len >>
+                                    ilog2(sc->device->sector_size)) * 8;
        }
 
        if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
        struct iser_conn *iser_conn = conn->dd_data;
        struct iscsi_iser_task *iser_task = task->dd_data;
        struct iser_tx_desc *tx_desc = NULL;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
        unsigned long buf_offset;
        unsigned long data_seg_len;
        uint32_t itt;
@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
        /* build the tx desc */
        iser_initialize_task_headers(task, tx_desc);
 
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
        tx_dsg = &tx_desc->tx_sg[1];
-       tx_dsg->addr    = regd_buf->reg.va + buf_offset;
-       tx_dsg->length  = data_seg_len;
-       tx_dsg->lkey    = regd_buf->reg.lkey;
+       tx_dsg->addr = mem_reg->sge.addr + buf_offset;
+       tx_dsg->length = data_seg_len;
+       tx_dsg->lkey = mem_reg->sge.lkey;
        tx_desc->num_sge = 2;
 
        if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
        iser_task->prot[ISER_DIR_IN].data_len  = 0;
        iser_task->prot[ISER_DIR_OUT].data_len = 0;
 
-       memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
-              sizeof(struct iser_regd_buf));
-       memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
-              sizeof(struct iser_regd_buf));
+       memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
+              sizeof(struct iser_mem_reg));
+       memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
+              sizeof(struct iser_mem_reg));
 }
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
        /* if we were reading, copy back to unaligned sglist,
         * anyway dma_unmap and free the copy
         */
-       if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
+       if (iser_task->data[ISER_DIR_IN].orig_sg) {
                is_rdma_data_aligned = 0;
                iser_finalize_rdma_unaligned_sg(iser_task,
                                                &iser_task->data[ISER_DIR_IN],
-                                               &iser_task->data_copy[ISER_DIR_IN],
                                                ISER_DIR_IN);
        }
 
-       if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
+       if (iser_task->data[ISER_DIR_OUT].orig_sg) {
                is_rdma_data_aligned = 0;
                iser_finalize_rdma_unaligned_sg(iser_task,
                                                &iser_task->data[ISER_DIR_OUT],
-                                               &iser_task->data_copy[ISER_DIR_OUT],
                                                ISER_DIR_OUT);
        }
 
-       if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
+       if (iser_task->prot[ISER_DIR_IN].orig_sg) {
                is_rdma_prot_aligned = 0;
                iser_finalize_rdma_unaligned_sg(iser_task,
                                                &iser_task->prot[ISER_DIR_IN],
-                                               &iser_task->prot_copy[ISER_DIR_IN],
                                                ISER_DIR_IN);
        }
 
-       if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
+       if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
                is_rdma_prot_aligned = 0;
                iser_finalize_rdma_unaligned_sg(iser_task,
                                                &iser_task->prot[ISER_DIR_OUT],
-                                               &iser_task->prot_copy[ISER_DIR_OUT],
                                                ISER_DIR_OUT);
        }
 
index 341040b..f0cdc96 100644 (file)
 
 #include "iscsi_iser.h"
 
-#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+static void
+iser_free_bounce_sg(struct iser_data_buf *data)
+{
+       struct scatterlist *sg;
+       int count;
 
-/**
- * iser_start_rdma_unaligned_sg
- */
-static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
-                                       struct iser_data_buf *data,
-                                       struct iser_data_buf *data_copy,
-                                       enum iser_data_dir cmd_dir)
+       for_each_sg(data->sg, sg, data->size, count)
+               __free_page(sg_page(sg));
+
+       kfree(data->sg);
+
+       data->sg = data->orig_sg;
+       data->size = data->orig_size;
+       data->orig_sg = NULL;
+       data->orig_size = 0;
+}
+
+static int
+iser_alloc_bounce_sg(struct iser_data_buf *data)
 {
-       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
-       struct scatterlist *sgl = (struct scatterlist *)data->buf;
        struct scatterlist *sg;
-       char *mem = NULL;
-       unsigned long  cmd_data_len = 0;
-       int dma_nents, i;
+       struct page *page;
+       unsigned long length = data->data_len;
+       int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
 
-       for_each_sg(sgl, sg, data->size, i)
-               cmd_data_len += ib_sg_dma_len(dev, sg);
+       sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
+       if (!sg)
+               goto err;
 
-       if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
-               mem = (void *)__get_free_pages(GFP_ATOMIC,
-                     ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
-       else
-               mem = kmalloc(cmd_data_len, GFP_ATOMIC);
+       sg_init_table(sg, nents);
+       while (length) {
+               u32 page_len = min_t(u32, length, PAGE_SIZE);
 
-       if (mem == NULL) {
-               iser_err("Failed to allocate mem size %d %d for copying sglist\n",
-                        data->size, (int)cmd_data_len);
-               return -ENOMEM;
+               page = alloc_page(GFP_ATOMIC);
+               if (!page)
+                       goto err;
+
+               sg_set_page(&sg[i], page, page_len, 0);
+               length -= page_len;
+               i++;
        }
 
-       if (cmd_dir == ISER_DIR_OUT) {
-               /* copy the unaligned sg the buffer which is used for RDMA */
-               char *p, *from;
-
-               sgl = (struct scatterlist *)data->buf;
-               p = mem;
-               for_each_sg(sgl, sg, data->size, i) {
-                       from = kmap_atomic(sg_page(sg));
-                       memcpy(p,
-                              from + sg->offset,
-                              sg->length);
-                       kunmap_atomic(from);
-                       p += sg->length;
+       data->orig_sg = data->sg;
+       data->orig_size = data->size;
+       data->sg = sg;
+       data->size = nents;
+
+       return 0;
+
+err:
+       for (; i > 0; i--)
+               __free_page(sg_page(&sg[i - 1]));
+       kfree(sg);
+
+       return -ENOMEM;
+}
+
+static void
+iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
+{
+       struct scatterlist *osg, *bsg = data->sg;
+       void *oaddr, *baddr;
+       unsigned int left = data->data_len;
+       unsigned int bsg_off = 0;
+       int i;
+
+       for_each_sg(data->orig_sg, osg, data->orig_size, i) {
+               unsigned int copy_len, osg_off = 0;
+
+               oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
+               copy_len = min(left, osg->length);
+               while (copy_len) {
+                       unsigned int len = min(copy_len, bsg->length - bsg_off);
+
+                       baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
+                       if (to_buffer)
+                               memcpy(baddr + bsg_off, oaddr + osg_off, len);
+                       else
+                               memcpy(oaddr + osg_off, baddr + bsg_off, len);
+
+                       kunmap_atomic(baddr - bsg->offset);
+                       osg_off += len;
+                       bsg_off += len;
+                       copy_len -= len;
+
+                       if (bsg_off >= bsg->length) {
+                               bsg = sg_next(bsg);
+                               bsg_off = 0;
+                       }
                }
+               kunmap_atomic(oaddr - osg->offset);
+               left -= osg_off;
        }
+}
+
+static inline void
+iser_copy_from_bounce(struct iser_data_buf *data)
+{
+       iser_copy_bounce(data, false);
+}
+
+static inline void
+iser_copy_to_bounce(struct iser_data_buf *data)
+{
+       iser_copy_bounce(data, true);
+}
+
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn)
+{
+       struct fast_reg_descriptor *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ib_conn->lock, flags);
+       desc = list_first_entry(&ib_conn->fastreg.pool,
+                               struct fast_reg_descriptor, list);
+       list_del(&desc->list);
+       spin_unlock_irqrestore(&ib_conn->lock, flags);
+
+       return desc;
+}
+
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+                 struct fast_reg_descriptor *desc)
+{
+       unsigned long flags;
 
-       sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
-       data_copy->buf = &data_copy->sg_single;
-       data_copy->size = 1;
-       data_copy->copy_buf = mem;
+       spin_lock_irqsave(&ib_conn->lock, flags);
+       list_add(&desc->list, &ib_conn->fastreg.pool);
+       spin_unlock_irqrestore(&ib_conn->lock, flags);
+}
 
-       dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
-                                 (cmd_dir == ISER_DIR_OUT) ?
-                                 DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       BUG_ON(dma_nents == 0);
+/**
+ * iser_start_rdma_unaligned_sg
+ */
+static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+                                       struct iser_data_buf *data,
+                                       enum iser_data_dir cmd_dir)
+{
+       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
+       int rc;
+
+       rc = iser_alloc_bounce_sg(data);
+       if (rc) {
+               iser_err("Failed to allocate bounce for data len %lu\n",
+                        data->data_len);
+               return rc;
+       }
+
+       if (cmd_dir == ISER_DIR_OUT)
+               iser_copy_to_bounce(data);
 
-       data_copy->dma_nents = dma_nents;
-       data_copy->data_len = cmd_data_len;
+       data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
+                                       (cmd_dir == ISER_DIR_OUT) ?
+                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+       if (!data->dma_nents) {
+               iser_err("Got dma_nents %d, something went wrong...\n",
+                        data->dma_nents);
+               rc = -ENOMEM;
+               goto err;
+       }
 
        return 0;
+err:
+       iser_free_bounce_sg(data);
+       return rc;
 }
 
 /**
@@ -109,51 +214,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
 
 void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                     struct iser_data_buf *data,
-                                    struct iser_data_buf *data_copy,
                                     enum iser_data_dir cmd_dir)
 {
-       struct ib_device *dev;
-       unsigned long  cmd_data_len;
-
-       dev = iser_task->iser_conn->ib_conn.device->ib_device;
+       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
 
-       ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
+       ib_dma_unmap_sg(dev, data->sg, data->size,
                        (cmd_dir == ISER_DIR_OUT) ?
                        DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
-       if (cmd_dir == ISER_DIR_IN) {
-               char *mem;
-               struct scatterlist *sgl, *sg;
-               unsigned char *p, *to;
-               unsigned int sg_size;
-               int i;
-
-               /* copy back read RDMA to unaligned sg */
-               mem = data_copy->copy_buf;
-
-               sgl = (struct scatterlist *)data->buf;
-               sg_size = data->size;
-
-               p = mem;
-               for_each_sg(sgl, sg, sg_size, i) {
-                       to = kmap_atomic(sg_page(sg));
-                       memcpy(to + sg->offset,
-                              p,
-                              sg->length);
-                       kunmap_atomic(to);
-                       p += sg->length;
-               }
-       }
+       if (cmd_dir == ISER_DIR_IN)
+               iser_copy_from_bounce(data);
 
-       cmd_data_len = data->data_len;
-
-       if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
-               free_pages((unsigned long)data_copy->copy_buf,
-                          ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
-       else
-               kfree(data_copy->copy_buf);
-
-       data_copy->copy_buf = NULL;
+       iser_free_bounce_sg(data);
 }
 
 #define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -175,7 +247,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
                               struct ib_device *ibdev, u64 *pages,
                               int *offset, int *data_size)
 {
-       struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+       struct scatterlist *sg, *sgl = data->sg;
        u64 start_addr, end_addr, page, chunk_start = 0;
        unsigned long total_sz = 0;
        unsigned int dma_len;
@@ -227,14 +299,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
 static int iser_data_buf_aligned_len(struct iser_data_buf *data,
                                      struct ib_device *ibdev)
 {
-       struct scatterlist *sgl, *sg, *next_sg = NULL;
+       struct scatterlist *sg, *sgl, *next_sg = NULL;
        u64 start_addr, end_addr;
        int i, ret_len, start_check = 0;
 
        if (data->dma_nents == 1)
                return 1;
 
-       sgl = (struct scatterlist *)data->buf;
+       sgl = data->sg;
        start_addr  = ib_sg_dma_address(ibdev, sgl);
 
        for_each_sg(sgl, sg, data->dma_nents, i) {
@@ -266,11 +338,10 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
 static void iser_data_buf_dump(struct iser_data_buf *data,
                               struct ib_device *ibdev)
 {
-       struct scatterlist *sgl = (struct scatterlist *)data->buf;
        struct scatterlist *sg;
        int i;
 
-       for_each_sg(sgl, sg, data->dma_nents, i)
+       for_each_sg(data->sg, sg, data->dma_nents, i)
                iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
                         "off:0x%x sz:0x%x dma_len:0x%x\n",
                         i, (unsigned long)ib_sg_dma_address(ibdev, sg),
@@ -288,31 +359,6 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
                iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
 }
 
-static void iser_page_vec_build(struct iser_data_buf *data,
-                               struct iser_page_vec *page_vec,
-                               struct ib_device *ibdev)
-{
-       int page_vec_len = 0;
-
-       page_vec->length = 0;
-       page_vec->offset = 0;
-
-       iser_dbg("Translating sg sz: %d\n", data->dma_nents);
-       page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
-                                          &page_vec->offset,
-                                          &page_vec->data_size);
-       iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
-
-       page_vec->length = page_vec_len;
-
-       if (page_vec_len * SIZE_4K < page_vec->data_size) {
-               iser_err("page_vec too short to hold this SG\n");
-               iser_data_buf_dump(data, ibdev);
-               iser_dump_page_vec(page_vec);
-               BUG();
-       }
-}
-
 int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
                            struct iser_data_buf *data,
                            enum iser_data_dir iser_dir,
@@ -323,7 +369,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
        iser_task->dir[iser_dir] = 1;
        dev = iser_task->iser_conn->ib_conn.device->ib_device;
 
-       data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
+       data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
        if (data->dma_nents == 0) {
                iser_err("dma_map_sg failed!!!\n");
                return -EINVAL;
@@ -338,24 +384,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
        struct ib_device *dev;
 
        dev = iser_task->iser_conn->ib_conn.device->ib_device;
-       ib_dma_unmap_sg(dev, data->buf, data->size, dir);
+       ib_dma_unmap_sg(dev, data->sg, data->size, dir);
+}
+
+static int
+iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
+            struct iser_mem_reg *reg)
+{
+       struct scatterlist *sg = mem->sg;
+
+       reg->sge.lkey = device->mr->lkey;
+       reg->rkey = device->mr->rkey;
+       reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
+       reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
+
+       iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
+                " length=0x%x\n", reg->sge.lkey, reg->rkey,
+                reg->sge.addr, reg->sge.length);
+
+       return 0;
 }
 
 static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
-                             struct ib_device *ibdev,
                              struct iser_data_buf *mem,
-                             struct iser_data_buf *mem_copy,
                              enum iser_data_dir cmd_dir,
                              int aligned_len)
 {
-       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+       struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+       struct iser_device *device = iser_task->iser_conn->ib_conn.device;
 
        iscsi_conn->fmr_unalign_cnt++;
        iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
                  aligned_len, mem->size);
 
        if (iser_debug_level > 0)
-               iser_data_buf_dump(mem, ibdev);
+               iser_data_buf_dump(mem, device->ib_device);
 
        /* unmap the command data before accessing it */
        iser_dma_unmap_task_data(iser_task, mem,
@@ -364,12 +427,94 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
 
        /* allocate copy buf, if we are writing, copy the */
        /* unaligned scatterlist, dma map the copy        */
-       if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
+       if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
                return -ENOMEM;
 
        return 0;
 }
 
+/**
+ * iser_reg_page_vec - Register physical memory
+ *
+ * returns: 0 on success, errno code on failure
+ */
+static
+int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
+                     struct iser_data_buf *mem,
+                     struct iser_page_vec *page_vec,
+                     struct iser_mem_reg *mem_reg)
+{
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
+       struct iser_device *device = ib_conn->device;
+       struct ib_pool_fmr *fmr;
+       int ret, plen;
+
+       plen = iser_sg_to_page_vec(mem, device->ib_device,
+                                  page_vec->pages,
+                                  &page_vec->offset,
+                                  &page_vec->data_size);
+       page_vec->length = plen;
+       if (plen * SIZE_4K < page_vec->data_size) {
+               iser_err("page vec too short to hold this SG\n");
+               iser_data_buf_dump(mem, device->ib_device);
+               iser_dump_page_vec(page_vec);
+               return -EINVAL;
+       }
+
+       fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
+                                   page_vec->pages,
+                                   page_vec->length,
+                                   page_vec->pages[0]);
+       if (IS_ERR(fmr)) {
+               ret = PTR_ERR(fmr);
+               iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
+               return ret;
+       }
+
+       mem_reg->sge.lkey = fmr->fmr->lkey;
+       mem_reg->rkey = fmr->fmr->rkey;
+       mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
+       mem_reg->sge.length = page_vec->data_size;
+       mem_reg->mem_h = fmr;
+
+       return 0;
+}
+
+/**
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
+ */
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+                       enum iser_data_dir cmd_dir)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+       int ret;
+
+       if (!reg->mem_h)
+               return;
+
+       iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
+
+       ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
+       if (ret)
+               iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+
+       reg->mem_h = NULL;
+}
+
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+                           enum iser_data_dir cmd_dir)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+
+       if (!reg->mem_h)
+               return;
+
+       iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
+                         reg->mem_h);
+       reg->mem_h = NULL;
+}
+
 /**
  * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
  * using FMR (if possible) obtaining rkey and va
@@ -383,45 +528,29 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
        struct iser_device   *device = ib_conn->device;
        struct ib_device     *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
        int aligned_len;
        int err;
        int i;
-       struct scatterlist *sg;
 
-       regd_buf = &iser_task->rdma_regd[cmd_dir];
+       mem_reg = &iser_task->rdma_reg[cmd_dir];
 
        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
        if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                        &iser_task->data_copy[cmd_dir],
+               err = fall_to_bounce_buf(iser_task, mem,
                                         cmd_dir, aligned_len);
                if (err) {
                        iser_err("failed to allocate bounce buffer\n");
                        return err;
                }
-               mem = &iser_task->data_copy[cmd_dir];
        }
 
        /* if there a single dma entry, FMR is not needed */
        if (mem->dma_nents == 1) {
-               sg = (struct scatterlist *)mem->buf;
-
-               regd_buf->reg.lkey = device->mr->lkey;
-               regd_buf->reg.rkey = device->mr->rkey;
-               regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
-               regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-
-               iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
-                        "va: 0x%08lX sz: %ld]\n",
-                        (unsigned int)regd_buf->reg.lkey,
-                        (unsigned int)regd_buf->reg.rkey,
-                        (unsigned long)regd_buf->reg.va,
-                        (unsigned long)regd_buf->reg.len);
+               return iser_reg_dma(device, mem, mem_reg);
        } else { /* use FMR for multiple dma entries */
-               iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
-               err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
-                                       &regd_buf->reg);
+               err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
+                                       mem_reg);
                if (err && err != -EAGAIN) {
                        iser_data_buf_dump(mem, ibdev);
                        iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
@@ -519,8 +648,10 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
 
 static int
 iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
-               struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
-               struct ib_sge *prot_sge, struct ib_sge *sig_sge)
+               struct fast_reg_descriptor *desc,
+               struct iser_mem_reg *data_reg,
+               struct iser_mem_reg *prot_reg,
+               struct iser_mem_reg *sig_reg)
 {
        struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_pi_context *pi_ctx = desc->pi_ctx;
@@ -544,12 +675,12 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        memset(&sig_wr, 0, sizeof(sig_wr));
        sig_wr.opcode = IB_WR_REG_SIG_MR;
        sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-       sig_wr.sg_list = data_sge;
+       sig_wr.sg_list = &data_reg->sge;
        sig_wr.num_sge = 1;
        sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
        sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
        if (scsi_prot_sg_count(iser_task->sc))
-               sig_wr.wr.sig_handover.prot = prot_sge;
+               sig_wr.wr.sig_handover.prot = &prot_reg->sge;
        sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
                                              IB_ACCESS_REMOTE_READ |
                                              IB_ACCESS_REMOTE_WRITE;
@@ -566,27 +697,26 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        }
        desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
 
-       sig_sge->lkey = pi_ctx->sig_mr->lkey;
-       sig_sge->addr = 0;
-       sig_sge->length = scsi_transfer_length(iser_task->sc);
+       sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
+       sig_reg->rkey = pi_ctx->sig_mr->rkey;
+       sig_reg->sge.addr = 0;
+       sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
 
-       iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
-                sig_sge->addr, sig_sge->length,
-                sig_sge->lkey);
+       iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+                sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
+                sig_reg->sge.length);
 err:
        return ret;
 }
 
 static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
-                           struct iser_regd_buf *regd_buf,
                            struct iser_data_buf *mem,
+                           struct fast_reg_descriptor *desc,
                            enum iser_reg_indicator ind,
-                           struct ib_sge *sge)
+                           struct iser_mem_reg *reg)
 {
-       struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
        struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
        struct iser_device *device = ib_conn->device;
-       struct ib_device *ibdev = device->ib_device;
        struct ib_mr *mr;
        struct ib_fast_reg_page_list *frpl;
        struct ib_send_wr fastreg_wr, inv_wr;
@@ -594,17 +724,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
        int ret, offset, size, plen;
 
        /* if there a single dma entry, dma mr suffices */
-       if (mem->dma_nents == 1) {
-               struct scatterlist *sg = (struct scatterlist *)mem->buf;
-
-               sge->lkey = device->mr->lkey;
-               sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
-               sge->length  = ib_sg_dma_len(ibdev, &sg[0]);
-
-               iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
-                        sge->lkey, sge->addr, sge->length);
-               return 0;
-       }
+       if (mem->dma_nents == 1)
+               return iser_reg_dma(device, mem, reg);
 
        if (ind == ISER_DATA_KEY_VALID) {
                mr = desc->data_mr;
@@ -652,9 +773,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
        }
        desc->reg_indicators &= ~ind;
 
-       sge->lkey = mr->lkey;
-       sge->addr = frpl->page_list[0] + offset;
-       sge->length = size;
+       reg->sge.lkey = mr->lkey;
+       reg->rkey = mr->rkey;
+       reg->sge.addr = frpl->page_list[0] + offset;
+       reg->sge.length = size;
 
        return ret;
 }
@@ -672,93 +794,66 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
        struct iser_device *device = ib_conn->device;
        struct ib_device *ibdev = device->ib_device;
        struct iser_data_buf *mem = &iser_task->data[cmd_dir];
-       struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+       struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
        struct fast_reg_descriptor *desc = NULL;
-       struct ib_sge data_sge;
        int err, aligned_len;
-       unsigned long flags;
 
        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
        if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                        &iser_task->data_copy[cmd_dir],
+               err = fall_to_bounce_buf(iser_task, mem,
                                         cmd_dir, aligned_len);
                if (err) {
                        iser_err("failed to allocate bounce buffer\n");
                        return err;
                }
-               mem = &iser_task->data_copy[cmd_dir];
        }
 
        if (mem->dma_nents != 1 ||
            scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
-               spin_lock_irqsave(&ib_conn->lock, flags);
-               desc = list_first_entry(&ib_conn->fastreg.pool,
-                                       struct fast_reg_descriptor, list);
-               list_del(&desc->list);
-               spin_unlock_irqrestore(&ib_conn->lock, flags);
-               regd_buf->reg.mem_h = desc;
+               desc = iser_reg_desc_get(ib_conn);
+               mem_reg->mem_h = desc;
        }
 
-       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
-                              ISER_DATA_KEY_VALID, &data_sge);
+       err = iser_fast_reg_mr(iser_task, mem, desc,
+                              ISER_DATA_KEY_VALID, mem_reg);
        if (err)
                goto err_reg;
 
        if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
-               struct ib_sge prot_sge, sig_sge;
+               struct iser_mem_reg prot_reg;
 
-               memset(&prot_sge, 0, sizeof(prot_sge));
+               memset(&prot_reg, 0, sizeof(prot_reg));
                if (scsi_prot_sg_count(iser_task->sc)) {
                        mem = &iser_task->prot[cmd_dir];
                        aligned_len = iser_data_buf_aligned_len(mem, ibdev);
                        if (aligned_len != mem->dma_nents) {
-                               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                                        &iser_task->prot_copy[cmd_dir],
+                               err = fall_to_bounce_buf(iser_task, mem,
                                                         cmd_dir, aligned_len);
                                if (err) {
                                        iser_err("failed to allocate bounce buffer\n");
                                        return err;
                                }
-                               mem = &iser_task->prot_copy[cmd_dir];
                        }
 
-                       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
-                                              ISER_PROT_KEY_VALID, &prot_sge);
+                       err = iser_fast_reg_mr(iser_task, mem, desc,
+                                              ISER_PROT_KEY_VALID, &prot_reg);
                        if (err)
                                goto err_reg;
                }
 
-               err = iser_reg_sig_mr(iser_task, desc, &data_sge,
-                                     &prot_sge, &sig_sge);
+               err = iser_reg_sig_mr(iser_task, desc, mem_reg,
+                                     &prot_reg, mem_reg);
                if (err) {
                        iser_err("Failed to register signature mr\n");
                        return err;
                }
                desc->reg_indicators |= ISER_FASTREG_PROTECTED;
-
-               regd_buf->reg.lkey = sig_sge.lkey;
-               regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
-               regd_buf->reg.va = sig_sge.addr;
-               regd_buf->reg.len = sig_sge.length;
-       } else {
-               if (desc)
-                       regd_buf->reg.rkey = desc->data_mr->rkey;
-               else
-                       regd_buf->reg.rkey = device->mr->rkey;
-
-               regd_buf->reg.lkey = data_sge.lkey;
-               regd_buf->reg.va = data_sge.addr;
-               regd_buf->reg.len = data_sge.length;
        }
 
        return 0;
 err_reg:
-       if (desc) {
-               spin_lock_irqsave(&ib_conn->lock, flags);
-               list_add_tail(&desc->list, &ib_conn->fastreg.pool);
-               spin_unlock_irqrestore(&ib_conn->lock, flags);
-       }
+       if (desc)
+               iser_reg_desc_put(ib_conn, desc);
 
        return err;
 }
index 4065abe..cc2dd35 100644 (file)
@@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
        ib_conn->fmr.page_vec = NULL;
 }
 
+static int
+iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
+                 struct fast_reg_descriptor *desc)
+{
+       struct iser_pi_context *pi_ctx = NULL;
+       struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
+                                              .flags = IB_MR_SIGNATURE_EN};
+       int ret = 0;
+
+       desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+       if (!desc->pi_ctx)
+               return -ENOMEM;
+
+       pi_ctx = desc->pi_ctx;
+
+       pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+                                           ISCSI_ISER_SG_TABLESIZE);
+       if (IS_ERR(pi_ctx->prot_frpl)) {
+               ret = PTR_ERR(pi_ctx->prot_frpl);
+               goto prot_frpl_failure;
+       }
+
+       pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
+                                       ISCSI_ISER_SG_TABLESIZE + 1);
+       if (IS_ERR(pi_ctx->prot_mr)) {
+               ret = PTR_ERR(pi_ctx->prot_mr);
+               goto prot_mr_failure;
+       }
+       desc->reg_indicators |= ISER_PROT_KEY_VALID;
+
+       pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+       if (IS_ERR(pi_ctx->sig_mr)) {
+               ret = PTR_ERR(pi_ctx->sig_mr);
+               goto sig_mr_failure;
+       }
+       desc->reg_indicators |= ISER_SIG_KEY_VALID;
+       desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+
+       return 0;
+
+sig_mr_failure:
+       ib_dereg_mr(desc->pi_ctx->prot_mr);
+prot_mr_failure:
+       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+prot_frpl_failure:
+       kfree(desc->pi_ctx);
+
+       return ret;
+}
+
+static void
+iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
+{
+       ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
+       ib_dereg_mr(pi_ctx->prot_mr);
+       ib_destroy_mr(pi_ctx->sig_mr);
+       kfree(pi_ctx);
+}
+
 static int
 iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
                         bool pi_enable, struct fast_reg_descriptor *desc)
@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
        desc->reg_indicators |= ISER_DATA_KEY_VALID;
 
        if (pi_enable) {
-               struct ib_mr_init_attr mr_init_attr = {0};
-               struct iser_pi_context *pi_ctx = NULL;
-
-               desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
-               if (!desc->pi_ctx) {
-                       iser_err("Failed to allocate pi context\n");
-                       ret = -ENOMEM;
+               ret = iser_alloc_pi_ctx(ib_device, pd, desc);
+               if (ret)
                        goto pi_ctx_alloc_failure;
-               }
-               pi_ctx = desc->pi_ctx;
-
-               pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
-                                                   ISCSI_ISER_SG_TABLESIZE);
-               if (IS_ERR(pi_ctx->prot_frpl)) {
-                       ret = PTR_ERR(pi_ctx->prot_frpl);
-                       iser_err("Failed to allocate prot frpl ret=%d\n",
-                                ret);
-                       goto prot_frpl_failure;
-               }
-
-               pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
-                                               ISCSI_ISER_SG_TABLESIZE + 1);
-               if (IS_ERR(pi_ctx->prot_mr)) {
-                       ret = PTR_ERR(pi_ctx->prot_mr);
-                       iser_err("Failed to allocate prot frmr ret=%d\n",
-                                ret);
-                       goto prot_mr_failure;
-               }
-               desc->reg_indicators |= ISER_PROT_KEY_VALID;
-
-               mr_init_attr.max_reg_descriptors = 2;
-               mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
-               pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
-               if (IS_ERR(pi_ctx->sig_mr)) {
-                       ret = PTR_ERR(pi_ctx->sig_mr);
-                       iser_err("Failed to allocate signature enabled mr err=%d\n",
-                                ret);
-                       goto sig_mr_failure;
-               }
-               desc->reg_indicators |= ISER_SIG_KEY_VALID;
        }
-       desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
-
-       iser_dbg("Create fr_desc %p page_list %p\n",
-                desc, desc->data_frpl->page_list);
 
        return 0;
-sig_mr_failure:
-       ib_dereg_mr(desc->pi_ctx->prot_mr);
-prot_mr_failure:
-       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-prot_frpl_failure:
-       kfree(desc->pi_ctx);
 pi_ctx_alloc_failure:
        ib_dereg_mr(desc->data_mr);
 fast_reg_mr_failure:
@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
                list_del(&desc->list);
                ib_free_fast_reg_page_list(desc->data_frpl);
                ib_dereg_mr(desc->data_mr);
-               if (desc->pi_ctx) {
-                       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-                       ib_dereg_mr(desc->pi_ctx->prot_mr);
-                       ib_destroy_mr(desc->pi_ctx->sig_mr);
-                       kfree(desc->pi_ctx);
-               }
+               if (desc->pi_ctx)
+                       iser_free_pi_ctx(desc->pi_ctx);
                kfree(desc);
                ++i;
        }
@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
        struct iser_conn *iser_conn;
 
        iser_conn = (struct iser_conn *)cma_id->context;
-       iser_conn->state = ISER_CONN_DOWN;
+       iser_conn->state = ISER_CONN_TERMINATING;
 }
 
 /**
@@ -992,93 +1000,6 @@ connect_failure:
        return err;
 }
 
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
-int iser_reg_page_vec(struct ib_conn *ib_conn,
-                     struct iser_page_vec *page_vec,
-                     struct iser_mem_reg  *mem_reg)
-{
-       struct ib_pool_fmr *mem;
-       u64                io_addr;
-       u64                *page_list;
-       int                status;
-
-       page_list = page_vec->pages;
-       io_addr   = page_list[0];
-
-       mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
-                                   page_list,
-                                   page_vec->length,
-                                   io_addr);
-
-       if (IS_ERR(mem)) {
-               status = (int)PTR_ERR(mem);
-               iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
-               return status;
-       }
-
-       mem_reg->lkey  = mem->fmr->lkey;
-       mem_reg->rkey  = mem->fmr->rkey;
-       mem_reg->len   = page_vec->length * SIZE_4K;
-       mem_reg->va    = io_addr;
-       mem_reg->mem_h = (void *)mem;
-
-       mem_reg->va   += page_vec->offset;
-       mem_reg->len   = page_vec->data_size;
-
-       iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
-                "entry[0]: (0x%08lx,%ld)] -> "
-                "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
-                page_vec, page_vec->length,
-                (unsigned long)page_vec->pages[0],
-                (unsigned long)page_vec->data_size,
-                (unsigned int)mem_reg->lkey, mem_reg->mem_h,
-                (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
-       return 0;
-}
-
-/**
- * Unregister (previosuly registered using FMR) memory.
- * If memory is non-FMR does nothing.
- */
-void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
-                       enum iser_data_dir cmd_dir)
-{
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       int ret;
-
-       if (!reg->mem_h)
-               return;
-
-       iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
-
-       ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
-       if (ret)
-               iser_err("ib_fmr_pool_unmap failed %d\n", ret);
-
-       reg->mem_h = NULL;
-}
-
-void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
-                           enum iser_data_dir cmd_dir)
-{
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       struct iser_conn *iser_conn = iser_task->iser_conn;
-       struct ib_conn *ib_conn = &iser_conn->ib_conn;
-       struct fast_reg_descriptor *desc = reg->mem_h;
-
-       if (!desc)
-               return;
-
-       reg->mem_h = NULL;
-       spin_lock_bh(&ib_conn->lock);
-       list_add_tail(&desc->list, &ib_conn->fastreg.pool);
-       spin_unlock_bh(&ib_conn->lock);
-}
-
 int iser_post_recvl(struct iser_conn *iser_conn)
 {
        struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
                        iscsi_conn_failure(iser_conn->iscsi_conn,
                                           ISCSI_ERR_CONN_FAILED);
 
+       if (wc->wr_id == ISER_FASTREG_LI_WRID)
+               return;
+
        if (is_iser_tx_desc(iser_conn, wr_id)) {
                struct iser_tx_desc *desc = wr_id;
 
@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
                else
                        iser_dbg("flush error: wr id %llx\n", wc->wr_id);
 
-               if (wc->wr_id != ISER_FASTREG_LI_WRID &&
-                   wc->wr_id != ISER_BEACON_WRID)
-                       iser_handle_comp_error(ib_conn, wc);
-
-               /* complete in case all flush errors were consumed */
                if (wc->wr_id == ISER_BEACON_WRID)
+                       /* all flush errors were consumed */
                        complete(&ib_conn->flush_comp);
+               else
+                       iser_handle_comp_error(ib_conn, wc);
        }
 }
 
@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
 u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                             enum iser_data_dir cmd_dir, sector_t *sector)
 {
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
        struct fast_reg_descriptor *desc = reg->mem_h;
        unsigned long sector_size = iser_task->sc->device->sector_size;
        struct ib_mr_status mr_status;
index 075b19c..327529e 100644 (file)
@@ -76,12 +76,12 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
 static void
 isert_qp_event_callback(struct ib_event *e, void *context)
 {
-       struct isert_conn *isert_conn = (struct isert_conn *)context;
+       struct isert_conn *isert_conn = context;
 
        isert_err("conn %p event: %d\n", isert_conn, e->event);
        switch (e->event) {
        case IB_EVENT_COMM_EST:
-               rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
+               rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST);
                break;
        case IB_EVENT_QP_LAST_WQE_REACHED:
                isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
@@ -107,13 +107,12 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
        return 0;
 }
 
-static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+static struct isert_comp *
+isert_comp_get(struct isert_conn *isert_conn)
 {
-       struct isert_device *device = isert_conn->conn_device;
-       struct ib_qp_init_attr attr;
+       struct isert_device *device = isert_conn->device;
        struct isert_comp *comp;
-       int ret, i, min = 0;
+       int i, min = 0;
 
        mutex_lock(&device_list_mutex);
        for (i = 0; i < device->comps_used; i++)
@@ -122,9 +121,30 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
                        min = i;
        comp = &device->comps[min];
        comp->active_qps++;
+       mutex_unlock(&device_list_mutex);
+
        isert_info("conn %p, using comp %p min_index: %d\n",
                   isert_conn, comp, min);
+
+       return comp;
+}
+
+static void
+isert_comp_put(struct isert_comp *comp)
+{
+       mutex_lock(&device_list_mutex);
+       comp->active_qps--;
        mutex_unlock(&device_list_mutex);
+}
+
+static struct ib_qp *
+isert_create_qp(struct isert_conn *isert_conn,
+               struct isert_comp *comp,
+               struct rdma_cm_id *cma_id)
+{
+       struct isert_device *device = isert_conn->device;
+       struct ib_qp_init_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(struct ib_qp_init_attr));
        attr.event_handler = isert_qp_event_callback;
@@ -149,19 +169,31 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
        if (device->pi_capable)
                attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
 
-       ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
+       ret = rdma_create_qp(cma_id, device->pd, &attr);
        if (ret) {
                isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+               return ERR_PTR(ret);
+       }
+
+       return cma_id->qp;
+}
+
+static int
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+{
+       struct isert_comp *comp;
+       int ret;
+
+       comp = isert_comp_get(isert_conn);
+       isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id);
+       if (IS_ERR(isert_conn->qp)) {
+               ret = PTR_ERR(isert_conn->qp);
                goto err;
        }
-       isert_conn->conn_qp = cma_id->qp;
 
        return 0;
 err:
-       mutex_lock(&device_list_mutex);
-       comp->active_qps--;
-       mutex_unlock(&device_list_mutex);
-
+       isert_comp_put(comp);
        return ret;
 }
 
@@ -174,18 +206,19 @@ isert_cq_event_callback(struct ib_event *e, void *context)
 static int
 isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
        struct iser_rx_desc *rx_desc;
        struct ib_sge *rx_sg;
        u64 dma_addr;
        int i, j;
 
-       isert_conn->conn_rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
+       isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
                                sizeof(struct iser_rx_desc), GFP_KERNEL);
-       if (!isert_conn->conn_rx_descs)
+       if (!isert_conn->rx_descs)
                goto fail;
 
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
 
        for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
                dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc,
@@ -198,21 +231,21 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
                rx_sg = &rx_desc->rx_sg;
                rx_sg->addr = rx_desc->dma_addr;
                rx_sg->length = ISER_RX_PAYLOAD_SIZE;
-               rx_sg->lkey = isert_conn->conn_mr->lkey;
+               rx_sg->lkey = device->mr->lkey;
        }
 
-       isert_conn->conn_rx_desc_head = 0;
+       isert_conn->rx_desc_head = 0;
 
        return 0;
 
 dma_map_fail:
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
        for (j = 0; j < i; j++, rx_desc++) {
                ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
                                    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
        }
-       kfree(isert_conn->conn_rx_descs);
-       isert_conn->conn_rx_descs = NULL;
+       kfree(isert_conn->rx_descs);
+       isert_conn->rx_descs = NULL;
 fail:
        isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
 
@@ -222,59 +255,51 @@ fail:
 static void
 isert_free_rx_descriptors(struct isert_conn *isert_conn)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->device->ib_device;
        struct iser_rx_desc *rx_desc;
        int i;
 
-       if (!isert_conn->conn_rx_descs)
+       if (!isert_conn->rx_descs)
                return;
 
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
        for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
                ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
                                    ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
        }
 
-       kfree(isert_conn->conn_rx_descs);
-       isert_conn->conn_rx_descs = NULL;
+       kfree(isert_conn->rx_descs);
+       isert_conn->rx_descs = NULL;
 }
 
 static void isert_cq_work(struct work_struct *);
 static void isert_cq_callback(struct ib_cq *, void *);
 
-static int
-isert_create_device_ib_res(struct isert_device *device)
+static void
+isert_free_comps(struct isert_device *device)
 {
-       struct ib_device *ib_dev = device->ib_device;
-       struct ib_device_attr *dev_attr;
-       int ret = 0, i;
-       int max_cqe;
-
-       dev_attr = &device->dev_attr;
-       ret = isert_query_device(ib_dev, dev_attr);
-       if (ret)
-               return ret;
+       int i;
 
-       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+       for (i = 0; i < device->comps_used; i++) {
+               struct isert_comp *comp = &device->comps[i];
 
-       /* asign function handlers */
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
-               device->use_fastreg = 1;
-               device->reg_rdma_mem = isert_reg_rdma;
-               device->unreg_rdma_mem = isert_unreg_rdma;
-       } else {
-               device->use_fastreg = 0;
-               device->reg_rdma_mem = isert_map_rdma;
-               device->unreg_rdma_mem = isert_unmap_cmd;
+               if (comp->cq) {
+                       cancel_work_sync(&comp->work);
+                       ib_destroy_cq(comp->cq);
+               }
        }
+       kfree(device->comps);
+}
 
-       /* Check signature cap */
-       device->pi_capable = dev_attr->device_cap_flags &
-                            IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+static int
+isert_alloc_comps(struct isert_device *device,
+                 struct ib_device_attr *attr)
+{
+       int i, max_cqe, ret = 0;
 
        device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
-                                       device->ib_device->num_comp_vectors));
+                                device->ib_device->num_comp_vectors));
+
        isert_info("Using %d CQs, %s supports %d vectors support "
                   "Fast registration %d pi_capable %d\n",
                   device->comps_used, device->ib_device->name,
@@ -288,6 +313,8 @@ isert_create_device_ib_res(struct isert_device *device)
                return -ENOMEM;
        }
 
+       max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+
        for (i = 0; i < device->comps_used; i++) {
                struct isert_comp *comp = &device->comps[i];
 
@@ -299,6 +326,7 @@ isert_create_device_ib_res(struct isert_device *device)
                                        (void *)comp,
                                        max_cqe, i);
                if (IS_ERR(comp->cq)) {
+                       isert_err("Unable to allocate cq\n");
                        ret = PTR_ERR(comp->cq);
                        comp->cq = NULL;
                        goto out_cq;
@@ -310,40 +338,79 @@ isert_create_device_ib_res(struct isert_device *device)
        }
 
        return 0;
-
 out_cq:
-       for (i = 0; i < device->comps_used; i++) {
-               struct isert_comp *comp = &device->comps[i];
+       isert_free_comps(device);
+       return ret;
+}
 
-               if (comp->cq) {
-                       cancel_work_sync(&comp->work);
-                       ib_destroy_cq(comp->cq);
-               }
+static int
+isert_create_device_ib_res(struct isert_device *device)
+{
+       struct ib_device_attr *dev_attr;
+       int ret;
+
+       dev_attr = &device->dev_attr;
+       ret = isert_query_device(device->ib_device, dev_attr);
+       if (ret)
+               return ret;
+
+       /* asign function handlers */
+       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+               device->use_fastreg = 1;
+               device->reg_rdma_mem = isert_reg_rdma;
+               device->unreg_rdma_mem = isert_unreg_rdma;
+       } else {
+               device->use_fastreg = 0;
+               device->reg_rdma_mem = isert_map_rdma;
+               device->unreg_rdma_mem = isert_unmap_cmd;
        }
-       kfree(device->comps);
 
+       ret = isert_alloc_comps(device, dev_attr);
+       if (ret)
+               return ret;
+
+       device->pd = ib_alloc_pd(device->ib_device);
+       if (IS_ERR(device->pd)) {
+               ret = PTR_ERR(device->pd);
+               isert_err("failed to allocate pd, device %p, ret=%d\n",
+                         device, ret);
+               goto out_cq;
+       }
+
+       device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(device->mr)) {
+               ret = PTR_ERR(device->mr);
+               isert_err("failed to create dma mr, device %p, ret=%d\n",
+                         device, ret);
+               goto out_mr;
+       }
+
+       /* Check signature cap */
+       device->pi_capable = dev_attr->device_cap_flags &
+                            IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+
+       return 0;
+
+out_mr:
+       ib_dealloc_pd(device->pd);
+out_cq:
+       isert_free_comps(device);
        return ret;
 }
 
 static void
 isert_free_device_ib_res(struct isert_device *device)
 {
-       int i;
-
        isert_info("device %p\n", device);
 
-       for (i = 0; i < device->comps_used; i++) {
-               struct isert_comp *comp = &device->comps[i];
-
-               cancel_work_sync(&comp->work);
-               ib_destroy_cq(comp->cq);
-               comp->cq = NULL;
-       }
-       kfree(device->comps);
+       ib_dereg_mr(device->mr);
+       ib_dealloc_pd(device->pd);
+       isert_free_comps(device);
 }
 
 static void
-isert_device_try_release(struct isert_device *device)
+isert_device_put(struct isert_device *device)
 {
        mutex_lock(&device_list_mutex);
        device->refcount--;
@@ -357,7 +424,7 @@ isert_device_try_release(struct isert_device *device)
 }
 
 static struct isert_device *
-isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
+isert_device_get(struct rdma_cm_id *cma_id)
 {
        struct isert_device *device;
        int ret;
@@ -404,13 +471,13 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
        struct fast_reg_descriptor *fr_desc, *tmp;
        int i = 0;
 
-       if (list_empty(&isert_conn->conn_fr_pool))
+       if (list_empty(&isert_conn->fr_pool))
                return;
 
        isert_info("Freeing conn %p fastreg pool", isert_conn);
 
        list_for_each_entry_safe(fr_desc, tmp,
-                                &isert_conn->conn_fr_pool, list) {
+                                &isert_conn->fr_pool, list) {
                list_del(&fr_desc->list);
                ib_free_fast_reg_page_list(fr_desc->data_frpl);
                ib_dereg_mr(fr_desc->data_mr);
@@ -424,9 +491,9 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
                ++i;
        }
 
-       if (i < isert_conn->conn_fr_pool_size)
+       if (i < isert_conn->fr_pool_size)
                isert_warn("Pool still has %d regions registered\n",
-                       isert_conn->conn_fr_pool_size - i);
+                       isert_conn->fr_pool_size - i);
 }
 
 static int
@@ -526,7 +593,7 @@ static int
 isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
 {
        struct fast_reg_descriptor *fr_desc;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
        struct se_session *se_sess = isert_conn->conn->sess->se_sess;
        struct se_node_acl *se_nacl = se_sess->se_node_acl;
        int i, ret, tag_num;
@@ -537,7 +604,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
        tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
        tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
 
-       isert_conn->conn_fr_pool_size = 0;
+       isert_conn->fr_pool_size = 0;
        for (i = 0; i < tag_num; i++) {
                fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
                if (!fr_desc) {
@@ -547,7 +614,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
                }
 
                ret = isert_create_fr_desc(device->ib_device,
-                                          isert_conn->conn_pd, fr_desc);
+                                          device->pd, fr_desc);
                if (ret) {
                        isert_err("Failed to create fastreg descriptor err=%d\n",
                               ret);
@@ -555,12 +622,12 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
                        goto err;
                }
 
-               list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
-               isert_conn->conn_fr_pool_size++;
+               list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+               isert_conn->fr_pool_size++;
        }
 
        isert_dbg("Creating conn %p fastreg pool size=%d",
-                isert_conn, isert_conn->conn_fr_pool_size);
+                isert_conn, isert_conn->fr_pool_size);
 
        return 0;
 
@@ -569,55 +636,50 @@ err:
        return ret;
 }
 
-static int
-isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+static void
+isert_init_conn(struct isert_conn *isert_conn)
 {
-       struct isert_np *isert_np = cma_id->context;
-       struct iscsi_np *np = isert_np->np;
-       struct isert_conn *isert_conn;
-       struct isert_device *device;
-       struct ib_device *ib_dev = cma_id->device;
-       int ret = 0;
-
-       spin_lock_bh(&np->np_thread_lock);
-       if (!np->enabled) {
-               spin_unlock_bh(&np->np_thread_lock);
-               isert_dbg("iscsi_np is not enabled, reject connect request\n");
-               return rdma_reject(cma_id, NULL, 0);
-       }
-       spin_unlock_bh(&np->np_thread_lock);
-
-       isert_dbg("cma_id: %p, portal: %p\n",
-                cma_id, cma_id->context);
-
-       isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
-       if (!isert_conn) {
-               isert_err("Unable to allocate isert_conn\n");
-               return -ENOMEM;
-       }
        isert_conn->state = ISER_CONN_INIT;
-       INIT_LIST_HEAD(&isert_conn->conn_accept_node);
-       init_completion(&isert_conn->conn_login_comp);
+       INIT_LIST_HEAD(&isert_conn->accept_node);
+       init_completion(&isert_conn->login_comp);
        init_completion(&isert_conn->login_req_comp);
-       init_completion(&isert_conn->conn_wait);
-       kref_init(&isert_conn->conn_kref);
-       mutex_init(&isert_conn->conn_mutex);
-       spin_lock_init(&isert_conn->conn_lock);
-       INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+       init_completion(&isert_conn->wait);
+       kref_init(&isert_conn->kref);
+       mutex_init(&isert_conn->mutex);
+       spin_lock_init(&isert_conn->pool_lock);
+       INIT_LIST_HEAD(&isert_conn->fr_pool);
+}
+
+static void
+isert_free_login_buf(struct isert_conn *isert_conn)
+{
+       struct ib_device *ib_dev = isert_conn->device->ib_device;
 
-       isert_conn->conn_cm_id = cma_id;
+       ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
+                           ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
+       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN,
+                           DMA_FROM_DEVICE);
+       kfree(isert_conn->login_buf);
+}
+
+static int
+isert_alloc_login_buf(struct isert_conn *isert_conn,
+                     struct ib_device *ib_dev)
+{
+       int ret;
 
        isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
                                        ISER_RX_LOGIN_SIZE, GFP_KERNEL);
        if (!isert_conn->login_buf) {
                isert_err("Unable to allocate isert_conn->login_buf\n");
-               ret = -ENOMEM;
-               goto out;
+               return -ENOMEM;
        }
 
        isert_conn->login_req_buf = isert_conn->login_buf;
        isert_conn->login_rsp_buf = isert_conn->login_buf +
                                    ISCSI_DEF_MAX_RECV_SEG_LEN;
+
        isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
                 isert_conn->login_buf, isert_conn->login_req_buf,
                 isert_conn->login_rsp_buf);
@@ -628,8 +690,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 
        ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
        if (ret) {
-               isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
-                      ret);
+               isert_err("login_req_dma mapping error: %d\n", ret);
                isert_conn->login_req_dma = 0;
                goto out_login_buf;
        }
@@ -640,17 +701,58 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 
        ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
        if (ret) {
-               isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
-                      ret);
+               isert_err("login_rsp_dma mapping error: %d\n", ret);
                isert_conn->login_rsp_dma = 0;
                goto out_req_dma_map;
        }
 
-       device = isert_device_find_by_ib_dev(cma_id);
+       return 0;
+
+out_req_dma_map:
+       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
+out_login_buf:
+       kfree(isert_conn->login_buf);
+       return ret;
+}
+
+static int
+isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+{
+       struct isert_np *isert_np = cma_id->context;
+       struct iscsi_np *np = isert_np->np;
+       struct isert_conn *isert_conn;
+       struct isert_device *device;
+       int ret = 0;
+
+       spin_lock_bh(&np->np_thread_lock);
+       if (!np->enabled) {
+               spin_unlock_bh(&np->np_thread_lock);
+               isert_dbg("iscsi_np is not enabled, reject connect request\n");
+               return rdma_reject(cma_id, NULL, 0);
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+
+       isert_dbg("cma_id: %p, portal: %p\n",
+                cma_id, cma_id->context);
+
+       isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
+       if (!isert_conn)
+               return -ENOMEM;
+
+       isert_init_conn(isert_conn);
+       isert_conn->cm_id = cma_id;
+
+       ret = isert_alloc_login_buf(isert_conn, cma_id->device);
+       if (ret)
+               goto out;
+
+       device = isert_device_get(cma_id);
        if (IS_ERR(device)) {
                ret = PTR_ERR(device);
                goto out_rsp_dma_map;
        }
+       isert_conn->device = device;
 
        /* Set max inflight RDMA READ requests */
        isert_conn->initiator_depth = min_t(u8,
@@ -658,24 +760,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
                                device->dev_attr.max_qp_init_rd_atom);
        isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
 
-       isert_conn->conn_device = device;
-       isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
-       if (IS_ERR(isert_conn->conn_pd)) {
-               ret = PTR_ERR(isert_conn->conn_pd);
-               isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
-                      isert_conn, ret);
-               goto out_pd;
-       }
-
-       isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd,
-                                          IB_ACCESS_LOCAL_WRITE);
-       if (IS_ERR(isert_conn->conn_mr)) {
-               ret = PTR_ERR(isert_conn->conn_mr);
-               isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
-                      isert_conn, ret);
-               goto out_mr;
-       }
-
        ret = isert_conn_setup_qp(isert_conn, cma_id);
        if (ret)
                goto out_conn_dev;
@@ -689,7 +773,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
                goto out_conn_dev;
 
        mutex_lock(&isert_np->np_accept_mutex);
-       list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
+       list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list);
        mutex_unlock(&isert_np->np_accept_mutex);
 
        isert_info("np %p: Allow accept_np to continue\n", np);
@@ -697,19 +781,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        return 0;
 
 out_conn_dev:
-       ib_dereg_mr(isert_conn->conn_mr);
-out_mr:
-       ib_dealloc_pd(isert_conn->conn_pd);
-out_pd:
-       isert_device_try_release(device);
+       isert_device_put(device);
 out_rsp_dma_map:
-       ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
-                           ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
-out_req_dma_map:
-       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
-out_login_buf:
-       kfree(isert_conn->login_buf);
+       isert_free_login_buf(isert_conn);
 out:
        kfree(isert_conn);
        rdma_reject(cma_id, NULL, 0);
@@ -719,43 +793,32 @@ out:
 static void
 isert_connect_release(struct isert_conn *isert_conn)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
 
        isert_dbg("conn %p\n", isert_conn);
 
-       if (device && device->use_fastreg)
+       BUG_ON(!device);
+
+       if (device->use_fastreg)
                isert_conn_free_fastreg_pool(isert_conn);
 
        isert_free_rx_descriptors(isert_conn);
-       rdma_destroy_id(isert_conn->conn_cm_id);
+       if (isert_conn->cm_id)
+               rdma_destroy_id(isert_conn->cm_id);
 
-       if (isert_conn->conn_qp) {
-               struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
+       if (isert_conn->qp) {
+               struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context;
 
-               isert_dbg("dec completion context %p active_qps\n", comp);
-               mutex_lock(&device_list_mutex);
-               comp->active_qps--;
-               mutex_unlock(&device_list_mutex);
-
-               ib_destroy_qp(isert_conn->conn_qp);
+               isert_comp_put(comp);
+               ib_destroy_qp(isert_conn->qp);
        }
 
-       ib_dereg_mr(isert_conn->conn_mr);
-       ib_dealloc_pd(isert_conn->conn_pd);
+       if (isert_conn->login_buf)
+               isert_free_login_buf(isert_conn);
 
-       if (isert_conn->login_buf) {
-               ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
-                                   ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
-               ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-                                   ISCSI_DEF_MAX_RECV_SEG_LEN,
-                                   DMA_FROM_DEVICE);
-               kfree(isert_conn->login_buf);
-       }
-       kfree(isert_conn);
+       isert_device_put(device);
 
-       if (device)
-               isert_device_try_release(device);
+       kfree(isert_conn);
 }
 
 static void
@@ -765,22 +828,22 @@ isert_connected_handler(struct rdma_cm_id *cma_id)
 
        isert_info("conn %p\n", isert_conn);
 
-       if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+       if (!kref_get_unless_zero(&isert_conn->kref)) {
                isert_warn("conn %p connect_release is running\n", isert_conn);
                return;
        }
 
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
        if (isert_conn->state != ISER_CONN_FULL_FEATURE)
                isert_conn->state = ISER_CONN_UP;
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
 }
 
 static void
-isert_release_conn_kref(struct kref *kref)
+isert_release_kref(struct kref *kref)
 {
        struct isert_conn *isert_conn = container_of(kref,
-                               struct isert_conn, conn_kref);
+                               struct isert_conn, kref);
 
        isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
                   current->pid);
@@ -791,7 +854,7 @@ isert_release_conn_kref(struct kref *kref)
 static void
 isert_put_conn(struct isert_conn *isert_conn)
 {
-       kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
+       kref_put(&isert_conn->kref, isert_release_kref);
 }
 
 /**
@@ -803,7 +866,7 @@ isert_put_conn(struct isert_conn *isert_conn)
  * to TEMINATING and start teardown sequence (rdma_disconnect).
  * In case the connection state is UP, complete flush as well.
  *
- * This routine must be called with conn_mutex held. Thus it is
+ * This routine must be called with mutex held. Thus it is
  * safe to call multiple times.
  */
 static void
@@ -819,7 +882,7 @@ isert_conn_terminate(struct isert_conn *isert_conn)
                isert_info("Terminating conn %p state %d\n",
                           isert_conn, isert_conn->state);
                isert_conn->state = ISER_CONN_TERMINATING;
-               err = rdma_disconnect(isert_conn->conn_cm_id);
+               err = rdma_disconnect(isert_conn->cm_id);
                if (err)
                        isert_warn("Failed rdma_disconnect isert_conn %p\n",
                                   isert_conn);
@@ -868,22 +931,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
 
        isert_conn = cma_id->qp->qp_context;
 
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
        isert_conn_terminate(isert_conn);
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
 
-       isert_info("conn %p completing conn_wait\n", isert_conn);
-       complete(&isert_conn->conn_wait);
+       isert_info("conn %p completing wait\n", isert_conn);
+       complete(&isert_conn->wait);
 
        return 0;
 }
 
-static void
+static int
 isert_connect_error(struct rdma_cm_id *cma_id)
 {
        struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
+       isert_conn->cm_id = NULL;
        isert_put_conn(isert_conn);
+
+       return -1;
 }
 
 static int
@@ -912,7 +978,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
        case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
        case RDMA_CM_EVENT_CONNECT_ERROR:
-               isert_connect_error(cma_id);
+               ret = isert_connect_error(cma_id);
                break;
        default:
                isert_err("Unhandled RDMA CMA event: %d\n", event->event);
@@ -927,11 +993,11 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
 {
        struct ib_recv_wr *rx_wr, *rx_wr_failed;
        int i, ret;
-       unsigned int rx_head = isert_conn->conn_rx_desc_head;
+       unsigned int rx_head = isert_conn->rx_desc_head;
        struct iser_rx_desc *rx_desc;
 
-       for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &isert_conn->conn_rx_descs[rx_head];
+       for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
+               rx_desc         = &isert_conn->rx_descs[rx_head];
                rx_wr->wr_id    = (uintptr_t)rx_desc;
                rx_wr->sg_list  = &rx_desc->rx_sg;
                rx_wr->num_sge  = 1;
@@ -943,14 +1009,14 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
        rx_wr->next = NULL; /* mark end of work requests list */
 
        isert_conn->post_recv_buf_count += count;
-       ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
+       ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
                                &rx_wr_failed);
        if (ret) {
                isert_err("ib_post_recv() failed with ret: %d\n", ret);
                isert_conn->post_recv_buf_count -= count;
        } else {
                isert_dbg("Posted %d RX buffers\n", count);
-               isert_conn->conn_rx_desc_head = rx_head;
+               isert_conn->rx_desc_head = rx_head;
        }
        return ret;
 }
@@ -958,7 +1024,7 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
 static int
 isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
        struct ib_send_wr send_wr, *send_wr_failed;
        int ret;
 
@@ -972,7 +1038,7 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
        send_wr.opcode  = IB_WR_SEND;
        send_wr.send_flags = IB_SEND_SIGNALED;
 
-       ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
+       ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed);
        if (ret)
                isert_err("ib_post_send() failed, ret: %d\n", ret);
 
@@ -984,7 +1050,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
                       struct isert_cmd *isert_cmd,
                       struct iser_tx_desc *tx_desc)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
 
        ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
                                   ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -995,8 +1062,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
        tx_desc->num_sge = 1;
        tx_desc->isert_cmd = isert_cmd;
 
-       if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
-               tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+       if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
+               tx_desc->tx_sg[0].lkey = device->mr->lkey;
                isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
        }
 }
@@ -1005,7 +1072,8 @@ static int
 isert_init_tx_hdrs(struct isert_conn *isert_conn,
                   struct iser_tx_desc *tx_desc)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
        u64 dma_addr;
 
        dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
@@ -1018,7 +1086,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
        tx_desc->dma_addr = dma_addr;
        tx_desc->tx_sg[0].addr  = tx_desc->dma_addr;
        tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
-       tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+       tx_desc->tx_sg[0].lkey = device->mr->lkey;
 
        isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
                  tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
@@ -1051,7 +1119,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
        memset(&sge, 0, sizeof(struct ib_sge));
        sge.addr = isert_conn->login_req_dma;
        sge.length = ISER_RX_LOGIN_SIZE;
-       sge.lkey = isert_conn->conn_mr->lkey;
+       sge.lkey = isert_conn->device->mr->lkey;
 
        isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
                sge.addr, sge.length, sge.lkey);
@@ -1062,7 +1130,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
        rx_wr.num_sge = 1;
 
        isert_conn->post_recv_buf_count++;
-       ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
+       ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
        if (ret) {
                isert_err("ib_post_recv() failed: %d\n", ret);
                isert_conn->post_recv_buf_count--;
@@ -1076,8 +1144,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
                   u32 length)
 {
        struct isert_conn *isert_conn = conn->context;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-       struct iser_tx_desc *tx_desc = &isert_conn->conn_login_tx_desc;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
+       struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
        int ret;
 
        isert_create_send_desc(isert_conn, NULL, tx_desc);
@@ -1100,13 +1169,13 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
 
                tx_dsg->addr    = isert_conn->login_rsp_dma;
                tx_dsg->length  = length;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = isert_conn->device->mr->lkey;
                tx_desc->num_sge = 2;
        }
        if (!login->login_failed) {
                if (login->login_complete) {
                        if (!conn->sess->sess_ops->SessionType &&
-                           isert_conn->conn_device->use_fastreg) {
+                           isert_conn->device->use_fastreg) {
                                ret = isert_conn_create_fastreg_pool(isert_conn);
                                if (ret) {
                                        isert_err("Conn: %p failed to create"
@@ -1124,9 +1193,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
                                return ret;
 
                        /* Now we are in FULL_FEATURE phase */
-                       mutex_lock(&isert_conn->conn_mutex);
+                       mutex_lock(&isert_conn->mutex);
                        isert_conn->state = ISER_CONN_FULL_FEATURE;
-                       mutex_unlock(&isert_conn->conn_mutex);
+                       mutex_unlock(&isert_conn->mutex);
                        goto post_send;
                }
 
@@ -1185,7 +1254,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
        memcpy(login->req_buf, &rx_desc->data[0], size);
 
        if (login->first_request) {
-               complete(&isert_conn->conn_login_comp);
+               complete(&isert_conn->login_comp);
                return;
        }
        schedule_delayed_work(&conn->login_work, 0);
@@ -1194,7 +1263,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
 static struct iscsi_cmd
 *isert_allocate_cmd(struct iscsi_conn *conn)
 {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct isert_cmd *isert_cmd;
        struct iscsi_cmd *cmd;
 
@@ -1379,13 +1448,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 {
        struct iscsi_hdr *hdr = &rx_desc->iscsi_header;
        struct iscsi_conn *conn = isert_conn->conn;
-       struct iscsi_session *sess = conn->sess;
        struct iscsi_cmd *cmd;
        struct isert_cmd *isert_cmd;
        int ret = -EINVAL;
        u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK);
 
-       if (sess->sess_ops->SessionType &&
+       if (conn->sess->sess_ops->SessionType &&
           (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
                isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
                          " ignoring\n", opcode);
@@ -1497,10 +1565,11 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
 }
 
 static void
-isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
-                   u32 xfer_len)
+isert_rcv_completion(struct iser_rx_desc *desc,
+                    struct isert_conn *isert_conn,
+                    u32 xfer_len)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
        struct iscsi_hdr *hdr;
        u64 rx_dma;
        int rx_buflen, outstanding;
@@ -1532,9 +1601,9 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
                        if (login && !login->first_request)
                                isert_rx_login_req(isert_conn);
                }
-               mutex_lock(&isert_conn->conn_mutex);
+               mutex_lock(&isert_conn->mutex);
                complete(&isert_conn->login_req_comp);
-               mutex_unlock(&isert_conn->conn_mutex);
+               mutex_unlock(&isert_conn->mutex);
        } else {
                isert_rx_do_work(desc, isert_conn);
        }
@@ -1566,7 +1635,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                   struct scatterlist *sg, u32 nents, u32 length, u32 offset,
                   enum iser_ib_op_code op, struct isert_data_buf *data)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
 
        data->dma_dir = op == ISER_IB_RDMA_WRITE ?
                              DMA_TO_DEVICE : DMA_FROM_DEVICE;
@@ -1597,7 +1666,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 static void
 isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
 
        ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
        memset(data, 0, sizeof(*data));
@@ -1634,7 +1703,6 @@ static void
 isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
 {
        struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       LIST_HEAD(unmap_list);
 
        isert_dbg("Cmd %p\n", isert_cmd);
 
@@ -1644,9 +1712,9 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
                        isert_unmap_data_buf(isert_conn, &wr->prot);
                        wr->fr_desc->ind &= ~ISERT_PROTECTED;
                }
-               spin_lock_bh(&isert_conn->conn_lock);
-               list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool);
-               spin_unlock_bh(&isert_conn->conn_lock);
+               spin_lock_bh(&isert_conn->pool_lock);
+               list_add_tail(&wr->fr_desc->list, &isert_conn->fr_pool);
+               spin_unlock_bh(&isert_conn->pool_lock);
                wr->fr_desc = NULL;
        }
 
@@ -1665,7 +1733,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct isert_conn *isert_conn = isert_cmd->conn;
        struct iscsi_conn *conn = isert_conn->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
        struct iscsi_text_rsp *hdr;
 
        isert_dbg("Cmd %p\n", isert_cmd);
@@ -1815,7 +1883,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct se_cmd *se_cmd = &cmd->se_cmd;
        struct isert_conn *isert_conn = isert_cmd->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
        int ret = 0;
 
        if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1841,7 +1909,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct se_cmd *se_cmd = &cmd->se_cmd;
        struct isert_conn *isert_conn = isert_cmd->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
        int ret = 0;
 
        if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1861,11 +1929,13 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
        cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
        spin_unlock_bh(&cmd->istate_lock);
 
-       if (ret)
+       if (ret) {
+               target_put_sess_cmd(se_cmd->se_sess, se_cmd);
                transport_send_check_condition_and_sense(se_cmd,
                                                         se_cmd->pi_err, 0);
-       else
+       } else {
                target_execute_cmd(se_cmd);
+       }
 }
 
 static void
@@ -1874,7 +1944,7 @@ isert_do_control_comp(struct work_struct *work)
        struct isert_cmd *isert_cmd = container_of(work,
                        struct isert_cmd, comp_work);
        struct isert_conn *isert_conn = isert_cmd->conn;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
        isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
@@ -1922,10 +1992,10 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
 }
 
 static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
+isert_snd_completion(struct iser_tx_desc *tx_desc,
                      struct isert_conn *isert_conn)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
        struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
        struct isert_rdma_wr *wr;
 
@@ -1938,10 +2008,6 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
        isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
 
        switch (wr->iser_ib_op) {
-       case ISER_IB_RECV:
-               isert_err("Got ISER_IB_RECV\n");
-               dump_stack();
-               break;
        case ISER_IB_SEND:
                isert_response_completion(tx_desc, isert_cmd,
                                          isert_conn, ib_dev);
@@ -1973,8 +2039,8 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
 static inline bool
 is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
 {
-       void *start = isert_conn->conn_rx_descs;
-       int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
+       void *start = isert_conn->rx_descs;
+       int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs);
 
        if (wr_id >= start && wr_id < start + len)
                return false;
@@ -1986,11 +2052,11 @@ static void
 isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 {
        if (wc->wr_id == ISER_BEACON_WRID) {
-               isert_info("conn %p completing conn_wait_comp_err\n",
+               isert_info("conn %p completing wait_comp_err\n",
                           isert_conn);
-               complete(&isert_conn->conn_wait_comp_err);
+               complete(&isert_conn->wait_comp_err);
        } else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct ib_device *ib_dev = isert_conn->cm_id->device;
                struct isert_cmd *isert_cmd;
                struct iser_tx_desc *desc;
 
@@ -2018,10 +2084,10 @@ isert_handle_wc(struct ib_wc *wc)
        if (likely(wc->status == IB_WC_SUCCESS)) {
                if (wc->opcode == IB_WC_RECV) {
                        rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-                       isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
+                       isert_rcv_completion(rx_desc, isert_conn, wc->byte_len);
                } else {
                        tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-                       isert_send_completion(tx_desc, isert_conn);
+                       isert_snd_completion(tx_desc, isert_conn);
                }
        } else {
                if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -2070,7 +2136,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
        struct ib_send_wr *wr_failed;
        int ret;
 
-       ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
+       ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
                           &wr_failed);
        if (ret) {
                isert_err("ib_post_send failed with %d\n", ret);
@@ -2083,7 +2149,7 @@ static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
        struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)
                                &isert_cmd->tx_desc.iscsi_header;
@@ -2097,7 +2163,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
        if (cmd->se_cmd.sense_buffer &&
            ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
            (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct isert_device *device = isert_conn->device;
+               struct ib_device *ib_dev = device->ib_device;
                struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
                u32 padding, pdu_len;
 
@@ -2116,7 +2183,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                isert_cmd->pdu_buf_len = pdu_len;
                tx_dsg->addr    = isert_cmd->pdu_buf_dma;
                tx_dsg->length  = pdu_len;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = device->mr->lkey;
                isert_cmd->tx_desc.num_sge = 2;
        }
 
@@ -2131,8 +2198,8 @@ static void
 isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
 
        spin_lock_bh(&conn->cmd_lock);
        if (!list_empty(&cmd->i_conn_node))
@@ -2148,8 +2215,8 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 static enum target_prot_op
 isert_get_sup_prot_ops(struct iscsi_conn *conn)
 {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
 
        if (conn->tpg->tpg_attrib.t10_pi) {
                if (device->pi_capable) {
@@ -2170,7 +2237,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
                bool nopout_response)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
        isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2189,7 +2256,7 @@ static int
 isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
        isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2207,7 +2274,7 @@ static int
 isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
        isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2225,9 +2292,10 @@ static int
 isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
        struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
        struct iscsi_reject *hdr =
                (struct iscsi_reject *)&isert_cmd->tx_desc.iscsi_header;
@@ -2243,7 +2311,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
        isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
        tx_dsg->addr    = isert_cmd->pdu_buf_dma;
        tx_dsg->length  = ISCSI_HDR_LEN;
-       tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+       tx_dsg->lkey    = device->mr->lkey;
        isert_cmd->tx_desc.num_sge = 2;
 
        isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2257,7 +2325,7 @@ static int
 isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
        struct iscsi_text_rsp *hdr =
                (struct iscsi_text_rsp *)&isert_cmd->tx_desc.iscsi_header;
@@ -2273,7 +2341,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
        isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
 
        if (txt_rsp_len) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct isert_device *device = isert_conn->device;
+               struct ib_device *ib_dev = device->ib_device;
                struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
                void *txt_rsp_buf = cmd->buf_ptr;
 
@@ -2283,7 +2352,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
                isert_cmd->pdu_buf_len = txt_rsp_len;
                tx_dsg->addr    = isert_cmd->pdu_buf_dma;
                tx_dsg->length  = txt_rsp_len;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = device->mr->lkey;
                isert_cmd->tx_desc.num_sge = 2;
        }
        isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2300,7 +2369,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 {
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct scatterlist *sg_start, *tmp_sg;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
        u32 sg_off, page_off;
        int i = 0, sg_nents;
 
@@ -2324,7 +2394,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
                ib_sge->length = min_t(u32, data_left,
                                ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
-               ib_sge->lkey = isert_conn->conn_mr->lkey;
+               ib_sge->lkey = device->mr->lkey;
 
                isert_dbg("RDMA ib_sge: addr: 0x%llx  length: %u lkey: %x\n",
                          ib_sge->addr, ib_sge->length, ib_sge->lkey);
@@ -2346,7 +2416,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 {
        struct se_cmd *se_cmd = &cmd->se_cmd;
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        struct isert_data_buf *data = &wr->data;
        struct ib_send_wr *send_wr;
        struct ib_sge *ib_sge;
@@ -2485,7 +2555,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
                  enum isert_indicator ind,
                  struct ib_sge *sge)
 {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
        struct ib_mr *mr;
        struct ib_fast_reg_page_list *frpl;
        struct ib_send_wr fr_wr, inv_wr;
@@ -2494,7 +2565,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
        u32 page_off;
 
        if (mem->dma_nents == 1) {
-               sge->lkey = isert_conn->conn_mr->lkey;
+               sge->lkey = device->mr->lkey;
                sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
                sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
                isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
@@ -2542,7 +2613,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
        else
                wr->next = &fr_wr;
 
-       ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+       ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
        if (ret) {
                isert_err("fast registration failed, ret:%d\n", ret);
                return ret;
@@ -2655,7 +2726,7 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
        else
                wr->next = &sig_wr;
 
-       ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+       ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
        if (ret) {
                isert_err("fast registration failed, ret:%d\n", ret);
                goto err;
@@ -2685,14 +2756,14 @@ isert_handle_prot_cmd(struct isert_conn *isert_conn,
                      struct isert_cmd *isert_cmd,
                      struct isert_rdma_wr *wr)
 {
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
        struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
        int ret;
 
        if (!wr->fr_desc->pi_ctx) {
                ret = isert_create_pi_ctx(wr->fr_desc,
                                          device->ib_device,
-                                         isert_conn->conn_pd);
+                                         device->pd);
                if (ret) {
                        isert_err("conn %p failed to allocate pi_ctx\n",
                                  isert_conn);
@@ -2763,11 +2834,11 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
                return ret;
 
        if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
-               spin_lock_irqsave(&isert_conn->conn_lock, flags);
-               fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
+               spin_lock_irqsave(&isert_conn->pool_lock, flags);
+               fr_desc = list_first_entry(&isert_conn->fr_pool,
                                           struct fast_reg_descriptor, list);
                list_del(&fr_desc->list);
-               spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+               spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
                wr->fr_desc = fr_desc;
        }
 
@@ -2814,9 +2885,9 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 unmap_cmd:
        if (fr_desc) {
-               spin_lock_irqsave(&isert_conn->conn_lock, flags);
-               list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
-               spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+               spin_lock_irqsave(&isert_conn->pool_lock, flags);
+               list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+               spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
        }
        isert_unmap_data_buf(isert_conn, &wr->data);
 
@@ -2829,8 +2900,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
        struct se_cmd *se_cmd = &cmd->se_cmd;
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
        struct ib_send_wr *wr_failed;
        int rc;
 
@@ -2859,7 +2930,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                wr->send_wr_num += 1;
        }
 
-       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
        if (rc)
                isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
@@ -2879,8 +2950,8 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
        struct se_cmd *se_cmd = &cmd->se_cmd;
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
        struct ib_send_wr *wr_failed;
        int rc;
 
@@ -2893,7 +2964,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
                return rc;
        }
 
-       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
        if (rc)
                isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
 
@@ -2987,7 +3058,7 @@ isert_setup_id(struct isert_np *isert_np)
                goto out_id;
        }
 
-       ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+       ret = rdma_listen(id, 0);
        if (ret) {
                isert_err("rdma_listen() failed: %d\n", ret);
                goto out_id;
@@ -3046,7 +3117,7 @@ out:
 static int
 isert_rdma_accept(struct isert_conn *isert_conn)
 {
-       struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+       struct rdma_cm_id *cm_id = isert_conn->cm_id;
        struct rdma_conn_param cp;
        int ret;
 
@@ -3067,7 +3138,7 @@ isert_rdma_accept(struct isert_conn *isert_conn)
 static int
 isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
 {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
        int ret;
 
        isert_info("before login_req comp conn: %p\n", isert_conn);
@@ -3090,8 +3161,8 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
 
        isert_rx_login_req(isert_conn);
 
-       isert_info("before conn_login_comp conn: %p\n", conn);
-       ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
+       isert_info("before login_comp conn: %p\n", conn);
+       ret = wait_for_completion_interruptible(&isert_conn->login_comp);
        if (ret)
                return ret;
 
@@ -3104,7 +3175,7 @@ static void
 isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
                    struct isert_conn *isert_conn)
 {
-       struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+       struct rdma_cm_id *cm_id = isert_conn->cm_id;
        struct rdma_route *cm_route = &cm_id->route;
        struct sockaddr_in *sock_in;
        struct sockaddr_in6 *sock_in6;
@@ -3137,13 +3208,13 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
 static int
 isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
 {
-       struct isert_np *isert_np = (struct isert_np *)np->np_context;
+       struct isert_np *isert_np = np->np_context;
        struct isert_conn *isert_conn;
-       int max_accept = 0, ret;
+       int ret;
 
 accept_wait:
        ret = down_interruptible(&isert_np->np_sem);
-       if (ret || max_accept > 5)
+       if (ret)
                return -ENODEV;
 
        spin_lock_bh(&np->np_thread_lock);
@@ -3162,17 +3233,15 @@ accept_wait:
        mutex_lock(&isert_np->np_accept_mutex);
        if (list_empty(&isert_np->np_accept_list)) {
                mutex_unlock(&isert_np->np_accept_mutex);
-               max_accept++;
                goto accept_wait;
        }
        isert_conn = list_first_entry(&isert_np->np_accept_list,
-                       struct isert_conn, conn_accept_node);
-       list_del_init(&isert_conn->conn_accept_node);
+                       struct isert_conn, accept_node);
+       list_del_init(&isert_conn->accept_node);
        mutex_unlock(&isert_np->np_accept_mutex);
 
        conn->context = isert_conn;
        isert_conn->conn = conn;
-       max_accept = 0;
 
        isert_set_conn_info(np, conn, isert_conn);
 
@@ -3184,7 +3253,7 @@ accept_wait:
 static void
 isert_free_np(struct iscsi_np *np)
 {
-       struct isert_np *isert_np = (struct isert_np *)np->np_context;
+       struct isert_np *isert_np = np->np_context;
        struct isert_conn *isert_conn, *n;
 
        if (isert_np->np_cm_id)
@@ -3202,7 +3271,7 @@ isert_free_np(struct iscsi_np *np)
                isert_info("Still have isert connections, cleaning up...\n");
                list_for_each_entry_safe(isert_conn, n,
                                         &isert_np->np_accept_list,
-                                        conn_accept_node) {
+                                        accept_node) {
                        isert_info("cleaning isert_conn %p state (%d)\n",
                                   isert_conn, isert_conn->state);
                        isert_connect_release(isert_conn);
@@ -3222,11 +3291,11 @@ static void isert_release_work(struct work_struct *work)
 
        isert_info("Starting release conn %p\n", isert_conn);
 
-       wait_for_completion(&isert_conn->conn_wait);
+       wait_for_completion(&isert_conn->wait);
 
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
        isert_conn->state = ISER_CONN_DOWN;
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
 
        isert_info("Destroying conn %p\n", isert_conn);
        isert_put_conn(isert_conn);
@@ -3264,15 +3333,15 @@ isert_wait4flush(struct isert_conn *isert_conn)
 
        isert_info("conn %p\n", isert_conn);
 
-       init_completion(&isert_conn->conn_wait_comp_err);
+       init_completion(&isert_conn->wait_comp_err);
        isert_conn->beacon.wr_id = ISER_BEACON_WRID;
        /* post an indication that all flush errors were consumed */
-       if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+       if (ib_post_recv(isert_conn->qp, &isert_conn->beacon, &bad_wr)) {
                isert_err("conn %p failed to post beacon", isert_conn);
                return;
        }
 
-       wait_for_completion(&isert_conn->conn_wait_comp_err);
+       wait_for_completion(&isert_conn->wait_comp_err);
 }
 
 static void isert_wait_conn(struct iscsi_conn *conn)
@@ -3281,17 +3350,17 @@ static void isert_wait_conn(struct iscsi_conn *conn)
 
        isert_info("Starting conn %p\n", isert_conn);
 
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
        /*
-        * Only wait for conn_wait_comp_err if the isert_conn made it
+        * Only wait for wait_comp_err if the isert_conn made it
         * into full feature phase..
         */
        if (isert_conn->state == ISER_CONN_INIT) {
-               mutex_unlock(&isert_conn->conn_mutex);
+               mutex_unlock(&isert_conn->mutex);
                return;
        }
        isert_conn_terminate(isert_conn);
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
 
        isert_wait4cmds(conn);
        isert_wait4flush(isert_conn);
@@ -3370,7 +3439,7 @@ static void __exit isert_exit(void)
 }
 
 MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
-MODULE_VERSION("0.1");
+MODULE_VERSION("1.0");
 MODULE_AUTHOR("nab@Linux-iSCSI.org");
 MODULE_LICENSE("GPL");
 
index 8dc8415..9ec23a7 100644 (file)
@@ -31,7 +31,6 @@
 #define isert_err(fmt, arg...) \
        pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
-#define ISERT_RDMA_LISTEN_BACKLOG      10
 #define ISCSI_ISER_SG_TABLESIZE                256
 #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
 #define ISER_BEACON_WRID               0xfffffffffffffffeULL
@@ -160,27 +159,25 @@ struct isert_conn {
        u64                     login_req_dma;
        int                     login_req_len;
        u64                     login_rsp_dma;
-       unsigned int            conn_rx_desc_head;
-       struct iser_rx_desc     *conn_rx_descs;
-       struct ib_recv_wr       conn_rx_wr[ISERT_MIN_POSTED_RX];
+       unsigned int            rx_desc_head;
+       struct iser_rx_desc     *rx_descs;
+       struct ib_recv_wr       rx_wr[ISERT_MIN_POSTED_RX];
        struct iscsi_conn       *conn;
-       struct list_head        conn_accept_node;
-       struct completion       conn_login_comp;
+       struct list_head        accept_node;
+       struct completion       login_comp;
        struct completion       login_req_comp;
-       struct iser_tx_desc     conn_login_tx_desc;
-       struct rdma_cm_id       *conn_cm_id;
-       struct ib_pd            *conn_pd;
-       struct ib_mr            *conn_mr;
-       struct ib_qp            *conn_qp;
-       struct isert_device     *conn_device;
-       struct mutex            conn_mutex;
-       struct completion       conn_wait;
-       struct completion       conn_wait_comp_err;
-       struct kref             conn_kref;
-       struct list_head        conn_fr_pool;
-       int                     conn_fr_pool_size;
+       struct iser_tx_desc     login_tx_desc;
+       struct rdma_cm_id       *cm_id;
+       struct ib_qp            *qp;
+       struct isert_device     *device;
+       struct mutex            mutex;
+       struct completion       wait;
+       struct completion       wait_comp_err;
+       struct kref             kref;
+       struct list_head        fr_pool;
+       int                     fr_pool_size;
        /* lock to protect fastreg pool */
-       spinlock_t              conn_lock;
+       spinlock_t              pool_lock;
        struct work_struct      release_work;
        struct ib_recv_wr       beacon;
        bool                    logout_posted;
@@ -211,6 +208,8 @@ struct isert_device {
        bool                    pi_capable;
        int                     refcount;
        struct ib_device        *ib_device;
+       struct ib_pd            *pd;
+       struct ib_mr            *mr;
        struct isert_comp       *comps;
        int                     comps_used;
        struct list_head        dev_node;
index 0747c05..918814c 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/parser.h>
 #include <linux/random.h>
 #include <linux/jiffies.h>
+#include <rdma/ib_cache.h>
 
 #include <linux/atomic.h>
 
@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
        if (!attr)
                return -ENOMEM;
 
-       ret = ib_find_pkey(target->srp_host->srp_dev->dev,
-                          target->srp_host->port,
-                          be16_to_cpu(target->pkey),
-                          &attr->pkey_index);
+       ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
+                                 target->srp_host->port,
+                                 be16_to_cpu(target->pkey),
+                                 &attr->pkey_index);
        if (ret)
                goto out;
 
index 6e0a477..9b84b4c 100644 (file)
@@ -93,7 +93,7 @@ MODULE_PARM_DESC(srpt_service_guid,
                 " instead of using the node_guid of the first HCA.");
 
 static struct ib_client srpt_client;
-static struct target_fabric_configfs *srpt_target;
+static const struct target_core_fabric_ops srpt_template;
 static void srpt_release_channel(struct srpt_rdma_ch *ch);
 static int srpt_queue_status(struct se_cmd *cmd);
 
@@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
                }
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB event %d\n",
+               pr_err("received unrecognized IB event %d\n",
                       event->event);
                break;
        }
@@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
  */
 static void srpt_srq_event(struct ib_event *event, void *ctx)
 {
-       printk(KERN_INFO "SRQ event %d\n", event->event);
+       pr_info("SRQ event %d\n", event->event);
 }
 
 /**
@@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
                                 ch->sess_name, srpt_get_ch_state(ch));
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB QP event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB QP event %d\n", event->event);
                break;
        }
 }
@@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
                sport = &sdev->port[i - 1];
                WARN_ON(sport->port != i);
                if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
-                       printk(KERN_ERR "disabling MAD processing failed.\n");
+                       pr_err("disabling MAD processing failed.\n");
                if (sport->mad_agent) {
                        ib_unregister_mad_agent(sport->mad_agent);
                        sport->mad_agent = NULL;
@@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
 
        ret = -ENOMEM;
        if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
-               printk(KERN_WARNING "IB send queue full (needed 1)\n");
+               pr_warn("IB send queue full (needed 1)\n");
                goto out;
        }
 
@@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
 
                if (ioctx->n_rbuf >
                    (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
-                       printk(KERN_ERR "received unsupported SRP_CMD request"
+                       pr_err("received unsupported SRP_CMD request"
                               " type (%u out + %u in != %u / %zu)\n",
                               srp_cmd->data_out_desc_cnt,
                               srp_cmd->data_in_desc_cnt,
@@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
                srpt_unmap_sg_to_ib_sge(ch, ioctx);
                transport_generic_free_cmd(&ioctx->cmd, 0);
        } else {
-               printk(KERN_ERR "IB completion has been received too late for"
+               pr_err("IB completion has been received too late for"
                       " wr_id = %u.\n", ioctx->ioctx.index);
        }
 }
@@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
                                                SRPT_STATE_DATA_IN))
                        target_execute_cmd(&ioctx->cmd);
                else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
+                       pr_err("%s[%d]: wrong state = %d\n", __func__,
                               __LINE__, srpt_get_cmd_state(ioctx));
        } else if (opcode == SRPT_RDMA_ABORT) {
                ioctx->rdma_aborted = true;
@@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
        switch (opcode) {
        case SRPT_RDMA_READ_LAST:
                if (ioctx->n_rdma <= 0) {
-                       printk(KERN_ERR "Received invalid RDMA read"
+                       pr_err("Received invalid RDMA read"
                               " error completion with idx %d\n",
                               ioctx->ioctx.index);
                        break;
@@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
                if (state == SRPT_STATE_NEED_DATA)
                        srpt_abort_cmd(ioctx);
                else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n",
+                       pr_err("%s[%d]: wrong state = %d\n",
                               __func__, __LINE__, state);
                break;
        case SRPT_RDMA_WRITE_LAST:
                break;
        default:
-               printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
-                      __LINE__, opcode);
+               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
                break;
        }
 }
@@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
                BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
                max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
                if (sense_data_len > max_sense_len) {
-                       printk(KERN_WARNING "truncated sense data from %d to %d"
-                              " bytes\n", sense_data_len, max_sense_len);
+                       pr_warn("truncated sense data from %d to %d"
+                               " bytes\n", sense_data_len, max_sense_len);
                        sense_data_len = max_sense_len;
                }
 
@@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
        int addressing_method;
 
        if (unlikely(len < 2)) {
-               printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
-                      "more", len);
+               pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
+                      len);
                goto out;
        }
 
@@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
 
        case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
        default:
-               printk(KERN_ERR "Unimplemented LUN addressing method %u",
+               pr_err("Unimplemented LUN addressing method %u\n",
                       addressing_method);
                break;
        }
@@ -1672,8 +1670,7 @@ out:
        return res;
 
 out_err:
-       printk(KERN_ERR "Support for multi-level LUNs has not yet been"
-              " implemented");
+       pr_err("Support for multi-level LUNs has not yet been implemented\n");
        goto out;
 }
 
@@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
        }
 
        if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
-               printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
+               pr_err("0x%llx: parsing SRP descriptor table failed.\n",
                       srp_cmd->tag);
                ret = TCM_INVALID_CDB_FIELD;
                goto send_sense;
@@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
                break;
        case SRP_I_LOGOUT:
-               printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
+               pr_err("Not yet implemented: SRP_I_LOGOUT\n");
                break;
        case SRP_CRED_RSP:
                pr_debug("received SRP_CRED_RSP\n");
@@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                pr_debug("received SRP_AER_RSP\n");
                break;
        case SRP_RSP:
-               printk(KERN_ERR "Received SRP_RSP\n");
+               pr_err("Received SRP_RSP\n");
                break;
        default:
-               printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
+               pr_err("received IU with unknown opcode 0x%x\n",
                       srp_cmd->opcode);
                break;
        }
@@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
 
                req_lim = atomic_dec_return(&ch->req_lim);
                if (unlikely(req_lim < 0))
-                       printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
+                       pr_err("req_lim = %d < 0\n", req_lim);
                ioctx = sdev->ioctx_ring[index];
                srpt_handle_new_iu(ch, ioctx, NULL);
        } else {
-               printk(KERN_INFO "receiving failed for idx %u with status %d\n",
-                      index, wc->status);
+               pr_info("receiving failed for idx %u with status %d\n",
+                       index, wc->status);
        }
 }
 
@@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
                }
        } else {
                if (opcode == SRPT_SEND) {
-                       printk(KERN_INFO "sending response for idx %u failed"
-                              " with status %d\n", index, wc->status);
+                       pr_info("sending response for idx %u failed"
+                               " with status %d\n", index, wc->status);
                        srpt_handle_send_err_comp(ch, wc->wr_id);
                } else if (opcode != SRPT_RDMA_MID) {
-                       printk(KERN_INFO "RDMA t %d for idx %u failed with"
-                               " status %d", opcode, index, wc->status);
+                       pr_info("RDMA t %d for idx %u failed with"
+                               " status %d\n", opcode, index, wc->status);
                        srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
                }
        }
@@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
 
        ch = arg;
        BUG_ON(!ch);
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) started\n",
+               ch->sess_name, ch->thread->comm, current->pid);
        while (!kthread_should_stop()) {
                wait_event_interruptible(ch->wait_queue,
                        (srpt_process_completion(ch->cq, ch),
                         kthread_should_stop()));
        }
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
+               ch->sess_name, ch->thread->comm, current->pid);
        return 0;
 }
 
@@ -2097,7 +2094,7 @@ retry:
                              ch->rq_size + srp_sq_size, 0);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
-               printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
+               pr_err("failed to create CQ cqe= %d ret= %d\n",
                       ch->rq_size + srp_sq_size, ret);
                goto out;
        }
@@ -2123,7 +2120,7 @@ retry:
                                goto retry;
                        }
                }
-               printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
+               pr_err("failed to create_qp ret= %d\n", ret);
                goto err_destroy_cq;
        }
 
@@ -2143,7 +2140,7 @@ retry:
 
        ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
        if (IS_ERR(ch->thread)) {
-               printk(KERN_ERR "failed to create kernel thread %ld\n",
+               pr_err("failed to create kernel thread %ld\n",
                       PTR_ERR(ch->thread));
                ch->thread = NULL;
                goto err_destroy_qp;
@@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
                /* fall through */
        case CH_LIVE:
                if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "sending CM DREQ failed.\n");
+                       pr_err("sending CM DREQ failed.\n");
                break;
        case CH_DISCONNECTING:
                break;
@@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
 
                ret = srpt_ch_qp_err(ch);
                if (ret < 0)
-                       printk(KERN_ERR "Setting queue pair in error state"
+                       pr_err("Setting queue pair in error state"
                               " failed: %d\n", ret);
        }
 }
@@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        it_iu_len = be32_to_cpu(req->req_it_iu_len);
 
-       printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
-              " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
-              " (guid=0x%llx:0x%llx)\n",
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
-              it_iu_len,
-              param->port,
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
+       pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
+               " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
+               " (guid=0x%llx:0x%llx)\n",
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
+               it_iu_len,
+               param->port,
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
 
        rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
        rej = kzalloc(sizeof *rej, GFP_KERNEL);
@@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
                ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
+               pr_err("rejected SRP_LOGIN_REQ because its"
                       " length (%d bytes) is out of range (%d .. %d)\n",
                       it_iu_len, 64, srp_max_req_size);
                goto reject;
@@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                             SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
                ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
+               pr_err("rejected SRP_LOGIN_REQ because the target port"
                       " has not yet been enabled\n");
                goto reject;
        }
@@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
                ret = -ENOMEM;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
+               pr_err("rejected SRP_LOGIN_REQ because it"
                       " has an invalid target port identifier.\n");
                goto reject;
        }
@@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (!ch) {
                rej->reason = __constant_cpu_to_be32(
                                        SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
+               pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
                ret = -ENOMEM;
                goto reject;
        }
@@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (ret) {
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
+               pr_err("rejected SRP_LOGIN_REQ because creating"
                       " a new RDMA channel failed.\n");
                goto free_ring;
        }
@@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (ret) {
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
+               pr_err("rejected SRP_LOGIN_REQ because enabling"
                       " RTR failed (error code = %d)\n", ret);
                goto destroy_ib;
        }
@@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        nacl = srpt_lookup_acl(sport, ch->i_port_id);
        if (!nacl) {
-               printk(KERN_INFO "Rejected login because no ACL has been"
-                      " configured yet for initiator %s.\n", ch->sess_name);
+               pr_info("Rejected login because no ACL has been"
+                       " configured yet for initiator %s.\n", ch->sess_name);
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
                goto destroy_ib;
@@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        ret = ib_send_cm_rep(cm_id, rep_param);
        if (ret) {
-               printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
+               pr_err("sending SRP_LOGIN_REQ response failed"
                       " (error code = %d)\n", ret);
                goto release_channel;
        }
@@ -2679,7 +2676,7 @@ out:
 
 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
+       pr_info("Received IB REJ for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
 
 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
+       pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
+       pr_info("Received IB REP error for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
 
        if (send_drep) {
                if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "Sending IB DREP failed.\n");
-               printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
-                      ch->sess_name);
+                       pr_err("Sending IB DREP failed.\n");
+               pr_info("Received DREQ and sent DREP for session %s.\n",
+                       ch->sess_name);
        }
 }
 
@@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
  */
 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
-              cm_id);
+       pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
                srpt_cm_rep_error(cm_id);
                break;
        case IB_CM_DREQ_ERROR:
-               printk(KERN_INFO "Received IB DREQ ERROR event.\n");
+               pr_info("Received IB DREQ ERROR event.\n");
                break;
        case IB_CM_MRA_RECEIVED:
-               printk(KERN_INFO "Received IB MRA event\n");
+               pr_info("Received IB MRA event\n");
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB CM event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB CM event %d\n", event->event);
                break;
        }
 
@@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                ret = -ENOMEM;
                sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
                if (sq_wr_avail < 0) {
-                       printk(KERN_WARNING "IB send queue full (needed %d)\n",
-                              n_rdma);
+                       pr_warn("IB send queue full (needed %d)\n",
+                               n_rdma);
                        goto out;
                }
        }
@@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
        }
 
        if (ret)
-               printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
+               pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
        if (ret && i > 0) {
                wr.num_sge = 0;
@@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                wr.send_flags = IB_SEND_SIGNALED;
                while (ch->state == CH_LIVE &&
                        ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
-                       printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
+                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
                                ioctx->ioctx.index);
                        msleep(1000);
                }
                while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
+                       pr_info("Waiting until RDMA abort finished [%d]\n",
                                ioctx->ioctx.index);
                        msleep(1000);
                }
@@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
 
        ret = srpt_map_sg_to_ib_sge(ch, ioctx);
        if (ret) {
-               printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
+               pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
                goto out;
        }
 
        ret = srpt_perform_rdmas(ch, ioctx);
        if (ret) {
                if (ret == -EAGAIN || ret == -ENOMEM)
-                       printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
-                                  __func__, __LINE__, ret);
+                       pr_info("%s[%d] queue full -- ret=%d\n",
+                               __func__, __LINE__, ret);
                else
-                       printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
+                       pr_err("%s[%d] fatal error -- ret=%d\n",
                               __func__, __LINE__, ret);
                goto out_unmap;
        }
@@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
            !ioctx->queue_status_only) {
                ret = srpt_xfer_data(ch, ioctx);
                if (ret) {
-                       printk(KERN_ERR "xfer_data failed for tag %llu\n",
+                       pr_err("xfer_data failed for tag %llu\n",
                               ioctx->tag);
                        return;
                }
@@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
        }
        ret = srpt_post_send(ch, ioctx, resp_len);
        if (ret) {
-               printk(KERN_ERR "sending cmd response failed for tag %llu\n",
+               pr_err("sending cmd response failed for tag %llu\n",
                       ioctx->tag);
                srpt_unmap_sg_to_ib_sge(ch, ioctx);
                srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
        res = wait_event_interruptible(sdev->ch_releaseQ,
                                       srpt_ch_list_empty(sdev));
        if (res)
-               printk(KERN_ERR "%s: interrupted.\n", __func__);
+               pr_err("%s: interrupted.\n", __func__);
 
        return 0;
 }
@@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
                spin_lock_init(&sport->port_acl_lock);
 
                if (srpt_refresh_port(sport)) {
-                       printk(KERN_ERR "MAD registration failed for %s-%d.\n",
+                       pr_err("MAD registration failed for %s-%d.\n",
                               srpt_sdev_name(sdev), i);
                        goto err_ring;
                }
@@ -3330,7 +3325,7 @@ free_dev:
        kfree(sdev);
 err:
        sdev = NULL;
-       printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
+       pr_info("%s(%s) failed.\n", __func__, device->name);
        goto out;
 }
 
@@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
 
        sdev = ib_get_client_data(device, &srpt_client);
        if (!sdev) {
-               printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
-                      device->name);
+               pr_info("%s(%s): nothing to do.\n", __func__, device->name);
                return;
        }
 
@@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
 
        nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
        if (!nacl) {
-               printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
+               pr_err("Unable to allocate struct srpt_node_acl\n");
                return NULL;
        }
 
@@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
        u8 i_port_id[16];
 
        if (srpt_parse_i_port_id(i_port_id, name) < 0) {
-               printk(KERN_ERR "invalid initiator port ID %s\n", name);
+               pr_err("invalid initiator port ID %s\n", name);
                ret = -EINVAL;
                goto err;
        }
@@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
 
        ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
-               printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
+               pr_err("Unable to extract srpt_tpg_store_enable\n");
                return -EINVAL;
        }
 
        if ((tmp != 0) && (tmp != 1)) {
-               printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
+               pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
                return -EINVAL;
        }
        if (tmp == 1)
@@ -3851,7 +3845,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
        int res;
 
        /* Initialize sport->port_wwn and sport->port_tpg_1 */
-       res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn,
+       res = core_tpg_register(&srpt_template, &sport->port_wwn,
                        &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL);
        if (res)
                return ERR_PTR(res);
@@ -3919,7 +3913,9 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
        NULL,
 };
 
-static struct target_core_fabric_ops srpt_template = {
+static const struct target_core_fabric_ops srpt_template = {
+       .module                         = THIS_MODULE,
+       .name                           = "srpt",
        .get_fabric_name                = srpt_get_fabric_name,
        .get_fabric_proto_ident         = srpt_get_fabric_proto_ident,
        .tpg_get_wwn                    = srpt_get_fabric_wwn,
@@ -3964,6 +3960,10 @@ static struct target_core_fabric_ops srpt_template = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = srpt_make_nodeacl,
        .fabric_drop_nodeacl            = srpt_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = srpt_wwn_attrs,
+       .tfc_tpg_base_attrs             = srpt_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = srpt_tpg_attrib_attrs,
 };
 
 /**
@@ -3980,7 +3980,7 @@ static int __init srpt_init_module(void)
 
        ret = -EINVAL;
        if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                       " srp_max_req_size -- must be at least %d.\n",
                       srp_max_req_size, MIN_MAX_REQ_SIZE);
                goto out;
@@ -3988,54 +3988,26 @@ static int __init srpt_init_module(void)
 
        if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
            || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                       " srpt_srq_size -- must be in the range [%d..%d].\n",
                       srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
                goto out;
        }
 
-       srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
-       if (IS_ERR(srpt_target)) {
-               printk(KERN_ERR "couldn't register\n");
-               ret = PTR_ERR(srpt_target);
+       ret = target_register_template(&srpt_template);
+       if (ret)
                goto out;
-       }
-
-       srpt_target->tf_ops = srpt_template;
-
-       /*
-        * Set up default attribute lists.
-        */
-       srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-
-       ret = target_fabric_configfs_register(srpt_target);
-       if (ret < 0) {
-               printk(KERN_ERR "couldn't register\n");
-               goto out_free_target;
-       }
 
        ret = ib_register_client(&srpt_client);
        if (ret) {
-               printk(KERN_ERR "couldn't register IB client\n");
+               pr_err("couldn't register IB client\n");
                goto out_unregister_target;
        }
 
        return 0;
 
 out_unregister_target:
-       target_fabric_configfs_deregister(srpt_target);
-       srpt_target = NULL;
-out_free_target:
-       if (srpt_target)
-               target_fabric_configfs_free(srpt_target);
+       target_unregister_template(&srpt_template);
 out:
        return ret;
 }
@@ -4043,8 +4015,7 @@ out:
 static void __exit srpt_cleanup_module(void)
 {
        ib_unregister_client(&srpt_client);
-       target_fabric_configfs_deregister(srpt_target);
-       srpt_target = NULL;
+       target_unregister_template(&srpt_template);
 }
 
 module_init(srpt_init_module);
index 64b9b59..b50c5b8 100644 (file)
@@ -148,16 +148,19 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
 
 static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 {
+       int ret;
        struct cros_ec_command msg = {
-               .version = 0,
                .command = EC_CMD_MKBP_STATE,
-               .outdata = NULL,
-               .outsize = 0,
-               .indata = kb_state,
                .insize = ckdev->cols,
        };
 
-       return cros_ec_cmd_xfer(ckdev->ec, &msg);
+       ret = cros_ec_cmd_xfer(ckdev->ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       memcpy(kb_state, msg.indata, ckdev->cols);
+
+       return 0;
 }
 
 static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
index a35927c..68d43be 100644 (file)
@@ -50,6 +50,7 @@
 #define CONTEXT_SIZE           VTD_PAGE_SIZE
 
 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
+#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
 
@@ -184,32 +185,11 @@ static int force_on = 0;
  * 64-127: Reserved
  */
 struct root_entry {
-       u64     val;
-       u64     rsvd1;
+       u64     lo;
+       u64     hi;
 };
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-       return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-       root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-       root->val &= ~VTD_PAGE_MASK;
-       root->val |= value & VTD_PAGE_MASK;
-}
 
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-       return (struct context_entry *)
-               (root_present(root)?phys_to_virt(
-               root->val & VTD_PAGE_MASK) :
-               NULL);
-}
 
 /*
  * low 64 bits:
@@ -682,6 +662,40 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
        domain->iommu_superpage = domain_update_iommu_superpage(NULL);
 }
 
+static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
+                                                      u8 bus, u8 devfn, int alloc)
+{
+       struct root_entry *root = &iommu->root_entry[bus];
+       struct context_entry *context;
+       u64 *entry;
+
+       if (ecap_ecs(iommu->ecap)) {
+               if (devfn >= 0x80) {
+                       devfn -= 0x80;
+                       entry = &root->hi;
+               }
+               devfn *= 2;
+       }
+       entry = &root->lo;
+       if (*entry & 1)
+               context = phys_to_virt(*entry & VTD_PAGE_MASK);
+       else {
+               unsigned long phy_addr;
+               if (!alloc)
+                       return NULL;
+
+               context = alloc_pgtable_page(iommu->node);
+               if (!context)
+                       return NULL;
+
+               __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
+               phy_addr = virt_to_phys((void *)context);
+               *entry = phy_addr | 1;
+               __iommu_flush_cache(iommu, entry, sizeof(*entry));
+       }
+       return &context[devfn];
+}
+
 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 {
        struct dmar_drhd_unit *drhd = NULL;
@@ -741,75 +755,36 @@ static void domain_flush_cache(struct dmar_domain *domain,
                clflush_cache_range(addr, size);
 }
 
-/* Gets context entry for a given bus and devfn */
-static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
-               u8 bus, u8 devfn)
-{
-       struct root_entry *root;
-       struct context_entry *context;
-       unsigned long phy_addr;
-       unsigned long flags;
-
-       spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
-       if (!context) {
-               context = (struct context_entry *)
-                               alloc_pgtable_page(iommu->node);
-               if (!context) {
-                       spin_unlock_irqrestore(&iommu->lock, flags);
-                       return NULL;
-               }
-               __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
-               phy_addr = virt_to_phys((void *)context);
-               set_root_value(root, phy_addr);
-               set_root_present(root);
-               __iommu_flush_cache(iommu, root, sizeof(*root));
-       }
-       spin_unlock_irqrestore(&iommu->lock, flags);
-       return &context[devfn];
-}
-
 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
-       struct root_entry *root;
        struct context_entry *context;
-       int ret;
+       int ret = 0;
        unsigned long flags;
 
        spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
-       if (!context) {
-               ret = 0;
-               goto out;
-       }
-       ret = context_present(&context[devfn]);
-out:
+       context = iommu_context_addr(iommu, bus, devfn, 0);
+       if (context)
+               ret = context_present(context);
        spin_unlock_irqrestore(&iommu->lock, flags);
        return ret;
 }
 
 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
 {
-       struct root_entry *root;
        struct context_entry *context;
        unsigned long flags;
 
        spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
+       context = iommu_context_addr(iommu, bus, devfn, 0);
        if (context) {
-               context_clear_entry(&context[devfn]);
-               __iommu_flush_cache(iommu, &context[devfn], \
-                       sizeof(*context));
+               context_clear_entry(context);
+               __iommu_flush_cache(iommu, context, sizeof(*context));
        }
        spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
 static void free_context_table(struct intel_iommu *iommu)
 {
-       struct root_entry *root;
        int i;
        unsigned long flags;
        struct context_entry *context;
@@ -819,10 +794,17 @@ static void free_context_table(struct intel_iommu *iommu)
                goto out;
        }
        for (i = 0; i < ROOT_ENTRY_NR; i++) {
-               root = &iommu->root_entry[i];
-               context = get_context_addr_from_root(root);
+               context = iommu_context_addr(iommu, i, 0, 0);
+               if (context)
+                       free_pgtable_page(context);
+
+               if (!ecap_ecs(iommu->ecap))
+                       continue;
+
+               context = iommu_context_addr(iommu, i, 0x80, 0);
                if (context)
                        free_pgtable_page(context);
+
        }
        free_pgtable_page(iommu->root_entry);
        iommu->root_entry = NULL;
@@ -1146,14 +1128,16 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
 
 static void iommu_set_root_entry(struct intel_iommu *iommu)
 {
-       void *addr;
+       u64 addr;
        u32 sts;
        unsigned long flag;
 
-       addr = iommu->root_entry;
+       addr = virt_to_phys(iommu->root_entry);
+       if (ecap_ecs(iommu->ecap))
+               addr |= DMA_RTADDR_RTT;
 
        raw_spin_lock_irqsave(&iommu->register_lock, flag);
-       dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
+       dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
 
        writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
 
@@ -1800,7 +1784,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
        BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
               translation != CONTEXT_TT_MULTI_LEVEL);
 
-       context = device_to_context_entry(iommu, bus, devfn);
+       spin_lock_irqsave(&iommu->lock, flags);
+       context = iommu_context_addr(iommu, bus, devfn, 1);
+       spin_unlock_irqrestore(&iommu->lock, flags);
        if (!context)
                return -ENOMEM;
        spin_lock_irqsave(&iommu->lock, flags);
@@ -2564,6 +2550,10 @@ static bool device_has_rmrr(struct device *dev)
  * In both cases we assume that PCI USB devices with RMRRs have them largely
  * for historical reasons and that the RMRR space is not actively used post
  * boot.  This exclusion may change if vendors begin to abuse it.
+ *
+ * The same exception is made for graphics devices, with the requirement that
+ * any use of the RMRR regions will be torn down before assigning the device
+ * to a guest.
  */
 static bool device_is_rmrr_locked(struct device *dev)
 {
@@ -2573,7 +2563,7 @@ static bool device_is_rmrr_locked(struct device *dev)
        if (dev_is_pci(dev)) {
                struct pci_dev *pdev = to_pci_dev(dev);
 
-               if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
+               if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
                        return false;
        }
 
index 6c25b3c..5709ae9 100644 (file)
@@ -637,10 +637,7 @@ static int __init intel_enable_irq_remapping(void)
        if (x2apic_supported()) {
                eim = !dmar_x2apic_optout();
                if (!eim)
-                       printk(KERN_WARNING
-                               "Your BIOS is broken and requested that x2apic be disabled.\n"
-                               "This will slightly decrease performance.\n"
-                               "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
+                       pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
        }
 
        for_each_iommu(iommu, drhd) {
index a6ce347..7b315e3 100644 (file)
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 #include <linux/irqdomain.h>
 #include <linux/interrupt.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
+#include <linux/irqchip/arm-gic-acpi.h>
 
 #include <asm/cputype.h>
 #include <asm/irq.h>
@@ -1107,3 +1109,105 @@ IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
 IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
 
 #endif
+
+#ifdef CONFIG_ACPI
+static phys_addr_t dist_phy_base, cpu_phy_base __initdata;
+
+static int __init
+gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
+                       const unsigned long end)
+{
+       struct acpi_madt_generic_interrupt *processor;
+       phys_addr_t gic_cpu_base;
+       static int cpu_base_assigned;
+
+       processor = (struct acpi_madt_generic_interrupt *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       /*
+        * There is no support for non-banked GICv1/2 register in ACPI spec.
+        * All CPU interface addresses have to be the same.
+        */
+       gic_cpu_base = processor->base_address;
+       if (cpu_base_assigned && gic_cpu_base != cpu_phy_base)
+               return -EINVAL;
+
+       cpu_phy_base = gic_cpu_base;
+       cpu_base_assigned = 1;
+       return 0;
+}
+
+static int __init
+gic_acpi_parse_madt_distributor(struct acpi_subtable_header *header,
+                               const unsigned long end)
+{
+       struct acpi_madt_generic_distributor *dist;
+
+       dist = (struct acpi_madt_generic_distributor *)header;
+
+       if (BAD_MADT_ENTRY(dist, end))
+               return -EINVAL;
+
+       dist_phy_base = dist->base_address;
+       return 0;
+}
+
+int __init
+gic_v2_acpi_init(struct acpi_table_header *table)
+{
+       void __iomem *cpu_base, *dist_base;
+       int count;
+
+       /* Collect CPU base addresses */
+       count = acpi_parse_entries(ACPI_SIG_MADT,
+                                  sizeof(struct acpi_table_madt),
+                                  gic_acpi_parse_madt_cpu, table,
+                                  ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+       if (count <= 0) {
+               pr_err("No valid GICC entries exist\n");
+               return -EINVAL;
+       }
+
+       /*
+        * Find distributor base address. We expect one distributor entry since
+        * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
+        */
+       count = acpi_parse_entries(ACPI_SIG_MADT,
+                                  sizeof(struct acpi_table_madt),
+                                  gic_acpi_parse_madt_distributor, table,
+                                  ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+       if (count <= 0) {
+               pr_err("No valid GICD entries exist\n");
+               return -EINVAL;
+       } else if (count > 1) {
+               pr_err("More than one GICD entry detected\n");
+               return -EINVAL;
+       }
+
+       cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
+       if (!cpu_base) {
+               pr_err("Unable to map GICC registers\n");
+               return -ENOMEM;
+       }
+
+       dist_base = ioremap(dist_phy_base, ACPI_GICV2_DIST_MEM_SIZE);
+       if (!dist_base) {
+               pr_err("Unable to map GICD registers\n");
+               iounmap(cpu_base);
+               return -ENOMEM;
+       }
+
+       /*
+        * Initialize zero GIC instance (no multi-GIC support). Also, set GIC
+        * as default IRQ domain to allow for GSI registration and GSI to IRQ
+        * number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
+        */
+       gic_init_bases(0, -1, dist_base, cpu_base, 0, NULL);
+       irq_set_default_host(gic_data[0].domain);
+
+       acpi_irq_model = ACPI_IRQ_MODEL_GIC;
+       return 0;
+}
+#endif
index 0fe2f71..afd1af3 100644 (file)
@@ -8,6 +8,7 @@
  * warranty of any kind, whether express or implied.
  */
 
+#include <linux/acpi_irq.h>
 #include <linux/init.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip.h>
@@ -26,4 +27,6 @@ extern struct of_device_id __irqchip_of_table[];
 void __init irqchip_init(void)
 {
        of_irq_init(__irqchip_of_table);
+
+       acpi_irq_init();
 }
index 1219af4..19a3228 100644 (file)
@@ -211,10 +211,9 @@ static void initialize(struct lg_cpu *cpu)
 
        /*
         * The Guest tells us where we're not to deliver interrupts by putting
-        * the range of addresses into "struct lguest_data".
+        * the instruction address into "struct lguest_data".
         */
-       if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
-           || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+       if (get_user(cpu->lg->noirq_iret, &cpu->lg->lguest_data->noirq_iret))
                kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
        /*
index 70dfcdc..5e7559b 100644 (file)
@@ -56,21 +56,16 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
 }
 
 /*H:210
- * The set_guest_interrupt() routine actually delivers the interrupt or
- * trap.  The mechanics of delivering traps and interrupts to the Guest are the
- * same, except some traps have an "error code" which gets pushed onto the
- * stack as well: the caller tells us if this is one.
- *
- * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
- * interrupt or trap.  It's split into two parts for traditional reasons: gcc
- * on i386 used to be frightened by 64 bit numbers.
+ * The push_guest_interrupt_stack() routine saves Guest state on the stack for
+ * an interrupt or trap.  The mechanics of delivering traps and interrupts to
+ * the Guest are the same, except some traps have an "error code" which gets
+ * pushed onto the stack as well: the caller tells us if this is one.
  *
  * We set up the stack just like the CPU does for a real interrupt, so it's
  * identical for the Guest (and the standard "iret" instruction will undo
  * it).
  */
-static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
-                               bool has_err)
+static void push_guest_interrupt_stack(struct lg_cpu *cpu, bool has_err)
 {
        unsigned long gstack, origstack;
        u32 eflags, ss, irq_enable;
@@ -130,12 +125,28 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
        if (has_err)
                push_guest_stack(cpu, &gstack, cpu->regs->errcode);
 
-       /*
-        * Now we've pushed all the old state, we change the stack, the code
-        * segment and the address to execute.
-        */
+       /* Adjust the stack pointer and stack segment. */
        cpu->regs->ss = ss;
        cpu->regs->esp = virtstack + (gstack - origstack);
+}
+
+/*
+ * This actually makes the Guest start executing the given interrupt/trap
+ * handler.
+ *
+ * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
+ * interrupt or trap.  It's split into two parts for traditional reasons: gcc
+ * on i386 used to be frightened by 64 bit numbers.
+ */
+static void guest_run_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi)
+{
+       /* If we're already in the kernel, we don't change stacks. */
+       if ((cpu->regs->ss&0x3) != GUEST_PL)
+               cpu->regs->ss = cpu->esp1;
+
+       /*
+        * Set the code segment and the address to execute.
+        */
        cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
        cpu->regs->eip = idt_address(lo, hi);
 
@@ -158,6 +169,24 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
                        kill_guest(cpu, "Disabling interrupts");
 }
 
+/* This restores the eflags word which was pushed on the stack by a trap */
+static void restore_eflags(struct lg_cpu *cpu)
+{
+       /* This is the physical address of the stack. */
+       unsigned long stack_pa = guest_pa(cpu, cpu->regs->esp);
+
+       /*
+        * Stack looks like this:
+        * Address      Contents
+        * esp          EIP
+        * esp + 4      CS
+        * esp + 8      EFLAGS
+        */
+       cpu->regs->eflags = lgread(cpu, stack_pa + 8, u32);
+       cpu->regs->eflags &=
+               ~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT);
+}
+
 /*H:205
  * Virtual Interrupts.
  *
@@ -200,14 +229,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 
        BUG_ON(irq >= LGUEST_IRQS);
 
-       /*
-        * They may be in the middle of an iret, where they asked us never to
-        * deliver interrupts.
-        */
-       if (cpu->regs->eip >= cpu->lg->noirq_start &&
-          (cpu->regs->eip < cpu->lg->noirq_end))
-               return;
-
        /* If they're halted, interrupts restart them. */
        if (cpu->halted) {
                /* Re-enable interrupts. */
@@ -237,12 +258,34 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
        if (idt_present(idt->a, idt->b)) {
                /* OK, mark it no longer pending and deliver it. */
                clear_bit(irq, cpu->irqs_pending);
+
                /*
-                * set_guest_interrupt() takes the interrupt descriptor and a
-                * flag to say whether this interrupt pushes an error code onto
-                * the stack as well: virtual interrupts never do.
+                * They may be about to iret, where they asked us never to
+                * deliver interrupts.  In this case, we can emulate that iret
+                * then immediately deliver the interrupt.  This is basically
+                * a noop: the iret would pop the interrupt frame and restore
+                * eflags, and then we'd set it up again.  So just restore the
+                * eflags word and jump straight to the handler in this case.
+                *
+                * Denys Vlasenko points out that this isn't quite right: if
+                * the iret was returning to userspace, then that interrupt
+                * would reset the stack pointer (which the Guest told us
+                * about via LHCALL_SET_STACK).  But unless the Guest is being
+                * *really* weird, that will be the same as the current stack
+                * anyway.
                 */
-               set_guest_interrupt(cpu, idt->a, idt->b, false);
+               if (cpu->regs->eip == cpu->lg->noirq_iret) {
+                       restore_eflags(cpu);
+               } else {
+                       /*
+                        * set_guest_interrupt() takes a flag to say whether
+                        * this interrupt pushes an error code onto the stack
+                        * as well: virtual interrupts never do.
+                        */
+                       push_guest_interrupt_stack(cpu, false);
+               }
+               /* Actually make Guest cpu jump to handler. */
+               guest_run_interrupt(cpu, idt->a, idt->b);
        }
 
        /*
@@ -353,8 +396,9 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
         */
        if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
                return false;
-       set_guest_interrupt(cpu, cpu->arch.idt[num].a,
-                           cpu->arch.idt[num].b, has_err(num));
+       push_guest_interrupt_stack(cpu, has_err(num));
+       guest_run_interrupt(cpu, cpu->arch.idt[num].a,
+                           cpu->arch.idt[num].b);
        return true;
 }
 
@@ -395,8 +439,9 @@ static bool direct_trap(unsigned int num)
  * The Guest has the ability to turn its interrupt gates into trap gates,
  * if it is careful.  The Host will let trap gates can go directly to the
  * Guest, but the Guest needs the interrupts atomically disabled for an
- * interrupt gate.  It can do this by pointing the trap gate at instructions
- * within noirq_start and noirq_end, where it can safely disable interrupts.
+ * interrupt gate.  The Host could provide a mechanism to register more
+ * "no-interrupt" regions, and the Guest could point the trap gate at
+ * instructions within that region, where it can safely disable interrupts.
  */
 
 /*M:006
index 307e8b3..ac8ad04 100644 (file)
@@ -102,7 +102,7 @@ struct lguest {
 
        struct pgdir pgdirs[4];
 
-       unsigned long noirq_start, noirq_end;
+       unsigned long noirq_iret;
 
        unsigned int stack_pages;
        u32 tsc_khz;
index c4c6113..30c6068 100644 (file)
@@ -339,6 +339,13 @@ static ssize_t write(struct file *file, const char __user *in,
        }
 }
 
+static int open(struct inode *inode, struct file *file)
+{
+       file->private_data = NULL;
+
+       return 0;
+}
+
 /*L:060
  * The final piece of interface code is the close() routine.  It reverses
  * everything done in initialize().  This is usually called because the
@@ -409,6 +416,7 @@ static int close(struct inode *inode, struct file *file)
  */
 static const struct file_operations lguest_fops = {
        .owner   = THIS_MODULE,
+       .open    = open,
        .release = close,
        .write   = write,
        .read    = read,
index 6ddc983..edcf4ab 100644 (file)
@@ -175,6 +175,22 @@ config MD_FAULTY
 
          In unsure, say N.
 
+
+config MD_CLUSTER
+       tristate "Cluster Support for MD (EXPERIMENTAL)"
+       depends on BLK_DEV_MD
+       depends on DLM
+       default n
+       ---help---
+       Clustering support for MD devices. This enables locking and
+       synchronization across multiple systems on the cluster, so all
+       nodes in the cluster can access the MD devices simultaneously.
+
+       This brings the redundancy (and uptime) of RAID levels across the
+       nodes of the cluster.
+
+       If unsure, say N.
+
 source "drivers/md/bcache/Kconfig"
 
 config BLK_DEV_DM_BUILTIN
index 1863fea..dba4db5 100644 (file)
@@ -30,6 +30,7 @@ obj-$(CONFIG_MD_RAID10)               += raid10.o
 obj-$(CONFIG_MD_RAID456)       += raid456.o
 obj-$(CONFIG_MD_MULTIPATH)     += multipath.o
 obj-$(CONFIG_MD_FAULTY)                += faulty.o
+obj-$(CONFIG_MD_CLUSTER)       += md-cluster.o
 obj-$(CONFIG_BCACHE)           += bcache/
 obj-$(CONFIG_BLK_DEV_MD)       += md-mod.o
 obj-$(CONFIG_BLK_DEV_DM)       += dm-mod.o
index 3a57679..2bc56e2 100644 (file)
@@ -205,6 +205,10 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
        struct block_device *bdev;
        struct mddev *mddev = bitmap->mddev;
        struct bitmap_storage *store = &bitmap->storage;
+       int node_offset = 0;
+
+       if (mddev_is_clustered(bitmap->mddev))
+               node_offset = bitmap->cluster_slot * store->file_pages;
 
        while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
                int size = PAGE_SIZE;
@@ -433,6 +437,7 @@ void bitmap_update_sb(struct bitmap *bitmap)
        /* This might have been changed by a reshape */
        sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
        sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
+       sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
        sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
                                           bitmap_info.space);
        kunmap_atomic(sb);
@@ -544,6 +549,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
        bitmap_super_t *sb;
        unsigned long chunksize, daemon_sleep, write_behind;
        unsigned long long events;
+       int nodes = 0;
        unsigned long sectors_reserved = 0;
        int err = -EINVAL;
        struct page *sb_page;
@@ -562,6 +568,22 @@ static int bitmap_read_sb(struct bitmap *bitmap)
                return -ENOMEM;
        bitmap->storage.sb_page = sb_page;
 
+re_read:
+       /* If cluster_slot is set, the cluster is setup */
+       if (bitmap->cluster_slot >= 0) {
+               sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
+
+               sector_div(bm_blocks,
+                          bitmap->mddev->bitmap_info.chunksize >> 9);
+               /* bits to bytes */
+               bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
+               /* to 4k blocks */
+               bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
+               bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
+               pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
+                       bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
+       }
+
        if (bitmap->storage.file) {
                loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
                int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
@@ -577,12 +599,15 @@ static int bitmap_read_sb(struct bitmap *bitmap)
        if (err)
                return err;
 
+       err = -EINVAL;
        sb = kmap_atomic(sb_page);
 
        chunksize = le32_to_cpu(sb->chunksize);
        daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
        write_behind = le32_to_cpu(sb->write_behind);
        sectors_reserved = le32_to_cpu(sb->sectors_reserved);
+       nodes = le32_to_cpu(sb->nodes);
+       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
 
        /* verify that the bitmap-specific fields are valid */
        if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -619,7 +644,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
                        goto out;
                }
                events = le64_to_cpu(sb->events);
-               if (events < bitmap->mddev->events) {
+               if (!nodes && (events < bitmap->mddev->events)) {
                        printk(KERN_INFO
                               "%s: bitmap file is out of date (%llu < %llu) "
                               "-- forcing full recovery\n",
@@ -634,20 +659,40 @@ static int bitmap_read_sb(struct bitmap *bitmap)
        if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
                set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
        bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
+       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
        err = 0;
+
 out:
        kunmap_atomic(sb);
+       /* Assiging chunksize is required for "re_read" */
+       bitmap->mddev->bitmap_info.chunksize = chunksize;
+       if (nodes && (bitmap->cluster_slot < 0)) {
+               err = md_setup_cluster(bitmap->mddev, nodes);
+               if (err) {
+                       pr_err("%s: Could not setup cluster service (%d)\n",
+                                       bmname(bitmap), err);
+                       goto out_no_sb;
+               }
+               bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
+               goto re_read;
+       }
+
+
 out_no_sb:
        if (test_bit(BITMAP_STALE, &bitmap->flags))
                bitmap->events_cleared = bitmap->mddev->events;
        bitmap->mddev->bitmap_info.chunksize = chunksize;
        bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
        bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+       bitmap->mddev->bitmap_info.nodes = nodes;
        if (bitmap->mddev->bitmap_info.space == 0 ||
            bitmap->mddev->bitmap_info.space > sectors_reserved)
                bitmap->mddev->bitmap_info.space = sectors_reserved;
-       if (err)
+       if (err) {
                bitmap_print_sb(bitmap);
+               if (bitmap->cluster_slot < 0)
+                       md_cluster_stop(bitmap->mddev);
+       }
        return err;
 }
 
@@ -692,9 +737,10 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store,
 }
 
 static int bitmap_storage_alloc(struct bitmap_storage *store,
-                               unsigned long chunks, int with_super)
+                               unsigned long chunks, int with_super,
+                               int slot_number)
 {
-       int pnum;
+       int pnum, offset = 0;
        unsigned long num_pages;
        unsigned long bytes;
 
@@ -703,6 +749,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
                bytes += sizeof(bitmap_super_t);
 
        num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+       offset = slot_number * (num_pages - 1);
 
        store->filemap = kmalloc(sizeof(struct page *)
                                 * num_pages, GFP_KERNEL);
@@ -713,20 +760,22 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
                store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
                if (store->sb_page == NULL)
                        return -ENOMEM;
-               store->sb_page->index = 0;
        }
+
        pnum = 0;
        if (store->sb_page) {
                store->filemap[0] = store->sb_page;
                pnum = 1;
+               store->sb_page->index = offset;
        }
+
        for ( ; pnum < num_pages; pnum++) {
                store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
                if (!store->filemap[pnum]) {
                        store->file_pages = pnum;
                        return -ENOMEM;
                }
-               store->filemap[pnum]->index = pnum;
+               store->filemap[pnum]->index = pnum + offset;
        }
        store->file_pages = pnum;
 
@@ -885,6 +934,28 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
        }
 }
 
+static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
+{
+       unsigned long bit;
+       struct page *page;
+       void *paddr;
+       unsigned long chunk = block >> bitmap->counts.chunkshift;
+       int set = 0;
+
+       page = filemap_get_page(&bitmap->storage, chunk);
+       if (!page)
+               return -EINVAL;
+       bit = file_page_offset(&bitmap->storage, chunk);
+       paddr = kmap_atomic(page);
+       if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
+               set = test_bit(bit, paddr);
+       else
+               set = test_bit_le(bit, paddr);
+       kunmap_atomic(paddr);
+       return set;
+}
+
+
 /* this gets called when the md device is ready to unplug its underlying
  * (slave) device queues -- before we let any writes go down, we need to
  * sync the dirty pages of the bitmap file to disk */
@@ -935,7 +1006,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
  */
 static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
 {
-       unsigned long i, chunks, index, oldindex, bit;
+       unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
        struct page *page = NULL;
        unsigned long bit_cnt = 0;
        struct file *file;
@@ -981,6 +1052,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
        if (!bitmap->mddev->bitmap_info.external)
                offset = sizeof(bitmap_super_t);
 
+       if (mddev_is_clustered(bitmap->mddev))
+               node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
+
        for (i = 0; i < chunks; i++) {
                int b;
                index = file_page_index(&bitmap->storage, i);
@@ -1001,7 +1075,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
                                        bitmap->mddev,
                                        bitmap->mddev->bitmap_info.offset,
                                        page,
-                                       index, count);
+                                       index + node_offset, count);
 
                        if (ret)
                                goto err;
@@ -1207,7 +1281,6 @@ void bitmap_daemon_work(struct mddev *mddev)
             j < bitmap->storage.file_pages
                     && !test_bit(BITMAP_STALE, &bitmap->flags);
             j++) {
-
                if (test_page_attr(bitmap, j,
                                   BITMAP_PAGE_DIRTY))
                        /* bitmap_unplug will handle the rest */
@@ -1530,11 +1603,13 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
                return;
        }
        if (!*bmc) {
-               *bmc = 2 | (needed ? NEEDED_MASK : 0);
+               *bmc = 2;
                bitmap_count_page(&bitmap->counts, offset, 1);
                bitmap_set_pending(&bitmap->counts, offset);
                bitmap->allclean = 0;
        }
+       if (needed)
+               *bmc |= NEEDED_MASK;
        spin_unlock_irq(&bitmap->counts.lock);
 }
 
@@ -1591,6 +1666,10 @@ static void bitmap_free(struct bitmap *bitmap)
        if (!bitmap) /* there was no bitmap */
                return;
 
+       if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
+               bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
+               md_cluster_stop(bitmap->mddev);
+
        /* Shouldn't be needed - but just in case.... */
        wait_event(bitmap->write_wait,
                   atomic_read(&bitmap->pending_writes) == 0);
@@ -1636,7 +1715,7 @@ void bitmap_destroy(struct mddev *mddev)
  * initialize the bitmap structure
  * if this returns an error, bitmap_destroy must be called to do clean up
  */
-int bitmap_create(struct mddev *mddev)
+struct bitmap *bitmap_create(struct mddev *mddev, int slot)
 {
        struct bitmap *bitmap;
        sector_t blocks = mddev->resync_max_sectors;
@@ -1650,7 +1729,7 @@ int bitmap_create(struct mddev *mddev)
 
        bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
        if (!bitmap)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        spin_lock_init(&bitmap->counts.lock);
        atomic_set(&bitmap->pending_writes, 0);
@@ -1659,6 +1738,7 @@ int bitmap_create(struct mddev *mddev)
        init_waitqueue_head(&bitmap->behind_wait);
 
        bitmap->mddev = mddev;
+       bitmap->cluster_slot = slot;
 
        if (mddev->kobj.sd)
                bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
@@ -1706,12 +1786,14 @@ int bitmap_create(struct mddev *mddev)
        printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
               bitmap->counts.pages, bmname(bitmap));
 
-       mddev->bitmap = bitmap;
-       return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
+       err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
+       if (err)
+               goto error;
 
+       return bitmap;
  error:
        bitmap_free(bitmap);
-       return err;
+       return ERR_PTR(err);
 }
 
 int bitmap_load(struct mddev *mddev)
@@ -1765,6 +1847,60 @@ out:
 }
 EXPORT_SYMBOL_GPL(bitmap_load);
 
+/* Loads the bitmap associated with slot and copies the resync information
+ * to our bitmap
+ */
+int bitmap_copy_from_slot(struct mddev *mddev, int slot,
+               sector_t *low, sector_t *high, bool clear_bits)
+{
+       int rv = 0, i, j;
+       sector_t block, lo = 0, hi = 0;
+       struct bitmap_counts *counts;
+       struct bitmap *bitmap = bitmap_create(mddev, slot);
+
+       if (IS_ERR(bitmap))
+               return PTR_ERR(bitmap);
+
+       rv = bitmap_read_sb(bitmap);
+       if (rv)
+               goto err;
+
+       rv = bitmap_init_from_disk(bitmap, 0);
+       if (rv)
+               goto err;
+
+       counts = &bitmap->counts;
+       for (j = 0; j < counts->chunks; j++) {
+               block = (sector_t)j << counts->chunkshift;
+               if (bitmap_file_test_bit(bitmap, block)) {
+                       if (!lo)
+                               lo = block;
+                       hi = block;
+                       bitmap_file_clear_bit(bitmap, block);
+                       bitmap_set_memory_bits(mddev->bitmap, block, 1);
+                       bitmap_file_set_bit(mddev->bitmap, block);
+               }
+       }
+
+       if (clear_bits) {
+               bitmap_update_sb(bitmap);
+               /* Setting this for the ev_page should be enough.
+                * And we do not require both write_all and PAGE_DIRT either
+                */
+               for (i = 0; i < bitmap->storage.file_pages; i++)
+                       set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+               bitmap_write_all(bitmap);
+               bitmap_unplug(bitmap);
+       }
+       *low = lo;
+       *high = hi;
+err:
+       bitmap_free(bitmap);
+       return rv;
+}
+EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
+
+
 void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
 {
        unsigned long chunk_kb;
@@ -1849,7 +1985,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
        memset(&store, 0, sizeof(store));
        if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
                ret = bitmap_storage_alloc(&store, chunks,
-                                          !bitmap->mddev->bitmap_info.external);
+                                          !bitmap->mddev->bitmap_info.external,
+                                          bitmap->cluster_slot);
        if (ret)
                goto err;
 
@@ -2021,13 +2158,18 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                                return -EINVAL;
                        mddev->bitmap_info.offset = offset;
                        if (mddev->pers) {
+                               struct bitmap *bitmap;
                                mddev->pers->quiesce(mddev, 1);
-                               rv = bitmap_create(mddev);
-                               if (!rv)
+                               bitmap = bitmap_create(mddev, -1);
+                               if (IS_ERR(bitmap))
+                                       rv = PTR_ERR(bitmap);
+                               else {
+                                       mddev->bitmap = bitmap;
                                        rv = bitmap_load(mddev);
-                               if (rv) {
-                                       bitmap_destroy(mddev);
-                                       mddev->bitmap_info.offset = 0;
+                                       if (rv) {
+                                               bitmap_destroy(mddev);
+                                               mddev->bitmap_info.offset = 0;
+                                       }
                                }
                                mddev->pers->quiesce(mddev, 0);
                                if (rv)
@@ -2186,6 +2328,8 @@ __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
 
 static ssize_t metadata_show(struct mddev *mddev, char *page)
 {
+       if (mddev_is_clustered(mddev))
+               return sprintf(page, "clustered\n");
        return sprintf(page, "%s\n", (mddev->bitmap_info.external
                                      ? "external" : "internal"));
 }
@@ -2198,7 +2342,8 @@ static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
                return -EBUSY;
        if (strncmp(buf, "external", 8) == 0)
                mddev->bitmap_info.external = 1;
-       else if (strncmp(buf, "internal", 8) == 0)
+       else if ((strncmp(buf, "internal", 8) == 0) ||
+                       (strncmp(buf, "clustered", 9) == 0))
                mddev->bitmap_info.external = 0;
        else
                return -EINVAL;
index 30210b9..f1f4dd0 100644 (file)
@@ -130,8 +130,9 @@ typedef struct bitmap_super_s {
        __le32 write_behind; /* 60  number of outstanding write-behind writes */
        __le32 sectors_reserved; /* 64 number of 512-byte sectors that are
                                  * reserved for the bitmap. */
-
-       __u8  pad[256 - 68]; /* set to zero */
+       __le32 nodes;        /* 68 the maximum number of nodes in cluster. */
+       __u8 cluster_name[64]; /* 72 cluster name to which this md belongs */
+       __u8  pad[256 - 136]; /* set to zero */
 } bitmap_super_t;
 
 /* notes:
@@ -226,12 +227,13 @@ struct bitmap {
        wait_queue_head_t behind_wait;
 
        struct kernfs_node *sysfs_can_clear;
+       int cluster_slot;               /* Slot offset for clustered env */
 };
 
 /* the bitmap API */
 
 /* these are used only by md/bitmap */
-int  bitmap_create(struct mddev *mddev);
+struct bitmap *bitmap_create(struct mddev *mddev, int slot);
 int bitmap_load(struct mddev *mddev);
 void bitmap_flush(struct mddev *mddev);
 void bitmap_destroy(struct mddev *mddev);
@@ -260,6 +262,8 @@ void bitmap_daemon_work(struct mddev *mddev);
 
 int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
                  int chunksize, int init);
+int bitmap_copy_from_slot(struct mddev *mddev, int slot,
+                               sector_t *lo, sector_t *hi, bool clear_bits);
 #endif
 
 #endif
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
new file mode 100644 (file)
index 0000000..fcfc4b9
--- /dev/null
@@ -0,0 +1,965 @@
+/*
+ * Copyright (C) 2015, SUSE
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/dlm.h>
+#include <linux/sched.h>
+#include <linux/raid/md_p.h>
+#include "md.h"
+#include "bitmap.h"
+#include "md-cluster.h"
+
+#define LVB_SIZE       64
+#define NEW_DEV_TIMEOUT 5000
+
+struct dlm_lock_resource {
+       dlm_lockspace_t *ls;
+       struct dlm_lksb lksb;
+       char *name; /* lock name. */
+       uint32_t flags; /* flags to pass to dlm_lock() */
+       struct completion completion; /* completion for synchronized locking */
+       void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
+       struct mddev *mddev; /* pointing back to mddev. */
+};
+
+struct suspend_info {
+       int slot;
+       sector_t lo;
+       sector_t hi;
+       struct list_head list;
+};
+
+struct resync_info {
+       __le64 lo;
+       __le64 hi;
+};
+
+/* md_cluster_info flags */
+#define                MD_CLUSTER_WAITING_FOR_NEWDISK          1
+
+
+struct md_cluster_info {
+       /* dlm lock space and resources for clustered raid. */
+       dlm_lockspace_t *lockspace;
+       int slot_number;
+       struct completion completion;
+       struct dlm_lock_resource *sb_lock;
+       struct mutex sb_mutex;
+       struct dlm_lock_resource *bitmap_lockres;
+       struct list_head suspend_list;
+       spinlock_t suspend_lock;
+       struct md_thread *recovery_thread;
+       unsigned long recovery_map;
+       /* communication loc resources */
+       struct dlm_lock_resource *ack_lockres;
+       struct dlm_lock_resource *message_lockres;
+       struct dlm_lock_resource *token_lockres;
+       struct dlm_lock_resource *no_new_dev_lockres;
+       struct md_thread *recv_thread;
+       struct completion newdisk_completion;
+       unsigned long state;
+};
+
+enum msg_type {
+       METADATA_UPDATED = 0,
+       RESYNCING,
+       NEWDISK,
+       REMOVE,
+       RE_ADD,
+};
+
+struct cluster_msg {
+       int type;
+       int slot;
+       /* TODO: Unionize this for smaller footprint */
+       sector_t low;
+       sector_t high;
+       char uuid[16];
+       int raid_slot;
+};
+
+static void sync_ast(void *arg)
+{
+       struct dlm_lock_resource *res;
+
+       res = (struct dlm_lock_resource *) arg;
+       complete(&res->completion);
+}
+
+static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
+{
+       int ret = 0;
+
+       init_completion(&res->completion);
+       ret = dlm_lock(res->ls, mode, &res->lksb,
+                       res->flags, res->name, strlen(res->name),
+                       0, sync_ast, res, res->bast);
+       if (ret)
+               return ret;
+       wait_for_completion(&res->completion);
+       return res->lksb.sb_status;
+}
+
+static int dlm_unlock_sync(struct dlm_lock_resource *res)
+{
+       return dlm_lock_sync(res, DLM_LOCK_NL);
+}
+
+static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
+               char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
+{
+       struct dlm_lock_resource *res = NULL;
+       int ret, namelen;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
+       if (!res)
+               return NULL;
+       res->ls = cinfo->lockspace;
+       res->mddev = mddev;
+       namelen = strlen(name);
+       res->name = kzalloc(namelen + 1, GFP_KERNEL);
+       if (!res->name) {
+               pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
+               goto out_err;
+       }
+       strlcpy(res->name, name, namelen + 1);
+       if (with_lvb) {
+               res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
+               if (!res->lksb.sb_lvbptr) {
+                       pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
+                       goto out_err;
+               }
+               res->flags = DLM_LKF_VALBLK;
+       }
+
+       if (bastfn)
+               res->bast = bastfn;
+
+       res->flags |= DLM_LKF_EXPEDITE;
+
+       ret = dlm_lock_sync(res, DLM_LOCK_NL);
+       if (ret) {
+               pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
+               goto out_err;
+       }
+       res->flags &= ~DLM_LKF_EXPEDITE;
+       res->flags |= DLM_LKF_CONVERT;
+
+       return res;
+out_err:
+       kfree(res->lksb.sb_lvbptr);
+       kfree(res->name);
+       kfree(res);
+       return NULL;
+}
+
+static void lockres_free(struct dlm_lock_resource *res)
+{
+       if (!res)
+               return;
+
+       init_completion(&res->completion);
+       dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+       wait_for_completion(&res->completion);
+
+       kfree(res->name);
+       kfree(res->lksb.sb_lvbptr);
+       kfree(res);
+}
+
+static char *pretty_uuid(char *dest, char *src)
+{
+       int i, len = 0;
+
+       for (i = 0; i < 16; i++) {
+               if (i == 4 || i == 6 || i == 8 || i == 10)
+                       len += sprintf(dest + len, "-");
+               len += sprintf(dest + len, "%02x", (__u8)src[i]);
+       }
+       return dest;
+}
+
+static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
+               sector_t lo, sector_t hi)
+{
+       struct resync_info *ri;
+
+       ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
+       ri->lo = cpu_to_le64(lo);
+       ri->hi = cpu_to_le64(hi);
+}
+
+static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
+{
+       struct resync_info ri;
+       struct suspend_info *s = NULL;
+       sector_t hi = 0;
+
+       dlm_lock_sync(lockres, DLM_LOCK_CR);
+       memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
+       hi = le64_to_cpu(ri.hi);
+       if (ri.hi > 0) {
+               s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
+               if (!s)
+                       goto out;
+               s->hi = hi;
+               s->lo = le64_to_cpu(ri.lo);
+       }
+       dlm_unlock_sync(lockres);
+out:
+       return s;
+}
+
+static void recover_bitmaps(struct md_thread *thread)
+{
+       struct mddev *mddev = thread->mddev;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct dlm_lock_resource *bm_lockres;
+       char str[64];
+       int slot, ret;
+       struct suspend_info *s, *tmp;
+       sector_t lo, hi;
+
+       while (cinfo->recovery_map) {
+               slot = fls64((u64)cinfo->recovery_map) - 1;
+
+               /* Clear suspend_area associated with the bitmap */
+               spin_lock_irq(&cinfo->suspend_lock);
+               list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
+                       if (slot == s->slot) {
+                               list_del(&s->list);
+                               kfree(s);
+                       }
+               spin_unlock_irq(&cinfo->suspend_lock);
+
+               snprintf(str, 64, "bitmap%04d", slot);
+               bm_lockres = lockres_init(mddev, str, NULL, 1);
+               if (!bm_lockres) {
+                       pr_err("md-cluster: Cannot initialize bitmaps\n");
+                       goto clear_bit;
+               }
+
+               ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+               if (ret) {
+                       pr_err("md-cluster: Could not DLM lock %s: %d\n",
+                                       str, ret);
+                       goto clear_bit;
+               }
+               ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
+               if (ret) {
+                       pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
+                       goto dlm_unlock;
+               }
+               if (hi > 0) {
+                       /* TODO:Wait for current resync to get over */
+                       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                       if (lo < mddev->recovery_cp)
+                               mddev->recovery_cp = lo;
+                       md_check_recovery(mddev);
+               }
+dlm_unlock:
+               dlm_unlock_sync(bm_lockres);
+clear_bit:
+               clear_bit(slot, &cinfo->recovery_map);
+       }
+}
+
+static void recover_prep(void *arg)
+{
+}
+
+static void recover_slot(void *arg, struct dlm_slot *slot)
+{
+       struct mddev *mddev = arg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
+                       mddev->bitmap_info.cluster_name,
+                       slot->nodeid, slot->slot,
+                       cinfo->slot_number);
+       set_bit(slot->slot - 1, &cinfo->recovery_map);
+       if (!cinfo->recovery_thread) {
+               cinfo->recovery_thread = md_register_thread(recover_bitmaps,
+                               mddev, "recover");
+               if (!cinfo->recovery_thread) {
+                       pr_warn("md-cluster: Could not create recovery thread\n");
+                       return;
+               }
+       }
+       md_wakeup_thread(cinfo->recovery_thread);
+}
+
+static void recover_done(void *arg, struct dlm_slot *slots,
+               int num_slots, int our_slot,
+               uint32_t generation)
+{
+       struct mddev *mddev = arg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       cinfo->slot_number = our_slot;
+       complete(&cinfo->completion);
+}
+
+static const struct dlm_lockspace_ops md_ls_ops = {
+       .recover_prep = recover_prep,
+       .recover_slot = recover_slot,
+       .recover_done = recover_done,
+};
+
+/*
+ * The BAST function for the ack lock resource
+ * This function wakes up the receive thread in
+ * order to receive and process the message.
+ */
+static void ack_bast(void *arg, int mode)
+{
+       struct dlm_lock_resource *res = (struct dlm_lock_resource *)arg;
+       struct md_cluster_info *cinfo = res->mddev->cluster_info;
+
+       if (mode == DLM_LOCK_EX)
+               md_wakeup_thread(cinfo->recv_thread);
+}
+
+static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
+{
+       struct suspend_info *s, *tmp;
+
+       list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
+               if (slot == s->slot) {
+                       pr_info("%s:%d Deleting suspend_info: %d\n",
+                                       __func__, __LINE__, slot);
+                       list_del(&s->list);
+                       kfree(s);
+                       break;
+               }
+}
+
+static void remove_suspend_info(struct md_cluster_info *cinfo, int slot)
+{
+       spin_lock_irq(&cinfo->suspend_lock);
+       __remove_suspend_info(cinfo, slot);
+       spin_unlock_irq(&cinfo->suspend_lock);
+}
+
+
+static void process_suspend_info(struct md_cluster_info *cinfo,
+               int slot, sector_t lo, sector_t hi)
+{
+       struct suspend_info *s;
+
+       if (!hi) {
+               remove_suspend_info(cinfo, slot);
+               return;
+       }
+       s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
+       if (!s)
+               return;
+       s->slot = slot;
+       s->lo = lo;
+       s->hi = hi;
+       spin_lock_irq(&cinfo->suspend_lock);
+       /* Remove existing entry (if exists) before adding */
+       __remove_suspend_info(cinfo, slot);
+       list_add(&s->list, &cinfo->suspend_list);
+       spin_unlock_irq(&cinfo->suspend_lock);
+}
+
+static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+{
+       char disk_uuid[64];
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       char event_name[] = "EVENT=ADD_DEVICE";
+       char raid_slot[16];
+       char *envp[] = {event_name, disk_uuid, raid_slot, NULL};
+       int len;
+
+       len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
+       pretty_uuid(disk_uuid + len, cmsg->uuid);
+       snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
+       pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
+       init_completion(&cinfo->newdisk_completion);
+       set_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
+       kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp);
+       wait_for_completion_timeout(&cinfo->newdisk_completion,
+                       NEW_DEV_TIMEOUT);
+       clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
+}
+
+
+static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       md_reload_sb(mddev);
+       dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+}
+
+static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+       if (rdev)
+               md_kick_rdev_from_array(rdev);
+       else
+               pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
+}
+
+static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+       if (rdev && test_bit(Faulty, &rdev->flags))
+               clear_bit(Faulty, &rdev->flags);
+       else
+               pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
+}
+
+static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
+{
+       switch (msg->type) {
+       case METADATA_UPDATED:
+               pr_info("%s: %d Received message: METADATA_UPDATE from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_metadata_update(mddev, msg);
+               break;
+       case RESYNCING:
+               pr_info("%s: %d Received message: RESYNCING from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_suspend_info(mddev->cluster_info, msg->slot,
+                               msg->low, msg->high);
+               break;
+       case NEWDISK:
+               pr_info("%s: %d Received message: NEWDISK from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_add_new_disk(mddev, msg);
+               break;
+       case REMOVE:
+               pr_info("%s: %d Received REMOVE from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_remove_disk(mddev, msg);
+               break;
+       case RE_ADD:
+               pr_info("%s: %d Received RE_ADD from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_readd_disk(mddev, msg);
+               break;
+       default:
+               pr_warn("%s:%d Received unknown message from %d\n",
+                       __func__, __LINE__, msg->slot);
+       }
+}
+
+/*
+ * thread for receiving message
+ */
+static void recv_daemon(struct md_thread *thread)
+{
+       struct md_cluster_info *cinfo = thread->mddev->cluster_info;
+       struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
+       struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
+       struct cluster_msg msg;
+
+       /*get CR on Message*/
+       if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
+               pr_err("md/raid1:failed to get CR on MESSAGE\n");
+               return;
+       }
+
+       /* read lvb and wake up thread to process this message_lockres */
+       memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
+       process_recvd_msg(thread->mddev, &msg);
+
+       /*release CR on ack_lockres*/
+       dlm_unlock_sync(ack_lockres);
+       /*up-convert to EX on message_lockres*/
+       dlm_lock_sync(message_lockres, DLM_LOCK_EX);
+       /*get CR on ack_lockres again*/
+       dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+       /*release CR on message_lockres*/
+       dlm_unlock_sync(message_lockres);
+}
+
+/* lock_comm()
+ * Takes the lock on the TOKEN lock resource so no other
+ * node can communicate while the operation is underway.
+ */
+static int lock_comm(struct md_cluster_info *cinfo)
+{
+       int error;
+
+       error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
+       if (error)
+               pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
+                               __func__, __LINE__, error);
+       return error;
+}
+
+static void unlock_comm(struct md_cluster_info *cinfo)
+{
+       dlm_unlock_sync(cinfo->token_lockres);
+}
+
+/* __sendmsg()
+ * This function performs the actual sending of the message. This function is
+ * usually called after performing the encompassing operation
+ * The function:
+ * 1. Grabs the message lockresource in EX mode
+ * 2. Copies the message to the message LVB
+ * 3. Downconverts message lockresource to CR
+ * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
+ *    and the other nodes read the message. The thread will wait here until all other
+ *    nodes have released ack lock resource.
+ * 5. Downconvert ack lockresource to CR
+ */
+static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
+{
+       int error;
+       int slot = cinfo->slot_number - 1;
+
+       cmsg->slot = cpu_to_le32(slot);
+       /*get EX on Message*/
+       error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
+       if (error) {
+               pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
+               goto failed_message;
+       }
+
+       memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
+                       sizeof(struct cluster_msg));
+       /*down-convert EX to CR on Message*/
+       error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
+       if (error) {
+               pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
+                               error);
+               goto failed_message;
+       }
+
+       /*up-convert CR to EX on Ack*/
+       error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_EX);
+       if (error) {
+               pr_err("md-cluster: failed to convert CR to EX on ACK(%d)\n",
+                               error);
+               goto failed_ack;
+       }
+
+       /*down-convert EX to CR on Ack*/
+       error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR);
+       if (error) {
+               pr_err("md-cluster: failed to convert EX to CR on ACK(%d)\n",
+                               error);
+               goto failed_ack;
+       }
+
+failed_ack:
+       dlm_unlock_sync(cinfo->message_lockres);
+failed_message:
+       return error;
+}
+
+static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
+{
+       int ret;
+
+       lock_comm(cinfo);
+       ret = __sendmsg(cinfo, cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int gather_all_resync_info(struct mddev *mddev, int total_slots)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int i, ret = 0;
+       struct dlm_lock_resource *bm_lockres;
+       struct suspend_info *s;
+       char str[64];
+
+
+       for (i = 0; i < total_slots; i++) {
+               memset(str, '\0', 64);
+               snprintf(str, 64, "bitmap%04d", i);
+               bm_lockres = lockres_init(mddev, str, NULL, 1);
+               if (!bm_lockres)
+                       return -ENOMEM;
+               if (i == (cinfo->slot_number - 1))
+                       continue;
+
+               bm_lockres->flags |= DLM_LKF_NOQUEUE;
+               ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+               if (ret == -EAGAIN) {
+                       memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
+                       s = read_resync_info(mddev, bm_lockres);
+                       if (s) {
+                               pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
+                                               __func__, __LINE__,
+                                               (unsigned long long) s->lo,
+                                               (unsigned long long) s->hi, i);
+                               spin_lock_irq(&cinfo->suspend_lock);
+                               s->slot = i;
+                               list_add(&s->list, &cinfo->suspend_list);
+                               spin_unlock_irq(&cinfo->suspend_lock);
+                       }
+                       ret = 0;
+                       lockres_free(bm_lockres);
+                       continue;
+               }
+               if (ret)
+                       goto out;
+               /* TODO: Read the disk bitmap sb and check if it needs recovery */
+               dlm_unlock_sync(bm_lockres);
+               lockres_free(bm_lockres);
+       }
+out:
+       return ret;
+}
+
+static int join(struct mddev *mddev, int nodes)
+{
+       struct md_cluster_info *cinfo;
+       int ret, ops_rv;
+       char str[64];
+
+       if (!try_module_get(THIS_MODULE))
+               return -ENOENT;
+
+       cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
+       if (!cinfo)
+               return -ENOMEM;
+
+       init_completion(&cinfo->completion);
+
+       mutex_init(&cinfo->sb_mutex);
+       mddev->cluster_info = cinfo;
+
+       memset(str, 0, 64);
+       pretty_uuid(str, mddev->uuid);
+       ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
+                               DLM_LSFL_FS, LVB_SIZE,
+                               &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
+       if (ret)
+               goto err;
+       wait_for_completion(&cinfo->completion);
+       if (nodes < cinfo->slot_number) {
+               pr_err("md-cluster: Slot allotted(%d) is greater than available slots(%d).",
+                       cinfo->slot_number, nodes);
+               ret = -ERANGE;
+               goto err;
+       }
+       cinfo->sb_lock = lockres_init(mddev, "cmd-super",
+                                       NULL, 0);
+       if (!cinfo->sb_lock) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       /* Initiate the communication resources */
+       ret = -ENOMEM;
+       cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
+       if (!cinfo->recv_thread) {
+               pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
+               goto err;
+       }
+       cinfo->message_lockres = lockres_init(mddev, "message", NULL, 1);
+       if (!cinfo->message_lockres)
+               goto err;
+       cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
+       if (!cinfo->token_lockres)
+               goto err;
+       cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
+       if (!cinfo->ack_lockres)
+               goto err;
+       cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
+       if (!cinfo->no_new_dev_lockres)
+               goto err;
+
+       /* get sync CR lock on ACK. */
+       if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
+               pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
+                               ret);
+       /* get sync CR lock on no-new-dev. */
+       if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
+               pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
+
+
+       pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
+       snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
+       cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
+       if (!cinfo->bitmap_lockres)
+               goto err;
+       if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
+               pr_err("Failed to get bitmap lock\n");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       INIT_LIST_HEAD(&cinfo->suspend_list);
+       spin_lock_init(&cinfo->suspend_lock);
+
+       ret = gather_all_resync_info(mddev, nodes);
+       if (ret)
+               goto err;
+
+       return 0;
+err:
+       lockres_free(cinfo->message_lockres);
+       lockres_free(cinfo->token_lockres);
+       lockres_free(cinfo->ack_lockres);
+       lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->bitmap_lockres);
+       lockres_free(cinfo->sb_lock);
+       if (cinfo->lockspace)
+               dlm_release_lockspace(cinfo->lockspace, 2);
+       mddev->cluster_info = NULL;
+       kfree(cinfo);
+       module_put(THIS_MODULE);
+       return ret;
+}
+
+static int leave(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       if (!cinfo)
+               return 0;
+       md_unregister_thread(&cinfo->recovery_thread);
+       md_unregister_thread(&cinfo->recv_thread);
+       lockres_free(cinfo->message_lockres);
+       lockres_free(cinfo->token_lockres);
+       lockres_free(cinfo->ack_lockres);
+       lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->sb_lock);
+       lockres_free(cinfo->bitmap_lockres);
+       dlm_release_lockspace(cinfo->lockspace, 2);
+       return 0;
+}
+
+/* slot_number(): Returns the MD slot number to use
+ * DLM starts the slot numbers from 1, wheras cluster-md
+ * wants the number to be from zero, so we deduct one
+ */
+static int slot_number(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       return cinfo->slot_number - 1;
+}
+
+static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
+       /* Re-acquire the lock to refresh LVB */
+       dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
+}
+
+static int metadata_update_start(struct mddev *mddev)
+{
+       return lock_comm(mddev->cluster_info);
+}
+
+static int metadata_update_finish(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int ret;
+
+       memset(&cmsg, 0, sizeof(cmsg));
+       cmsg.type = cpu_to_le32(METADATA_UPDATED);
+       ret = __sendmsg(cinfo, &cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int metadata_update_cancel(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       return dlm_unlock_sync(cinfo->token_lockres);
+}
+
+static int resync_send(struct mddev *mddev, enum msg_type type,
+               sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int slot = cinfo->slot_number - 1;
+
+       pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
+                       (unsigned long long)lo,
+                       (unsigned long long)hi);
+       resync_info_update(mddev, lo, hi);
+       cmsg.type = cpu_to_le32(type);
+       cmsg.slot = cpu_to_le32(slot);
+       cmsg.low = cpu_to_le64(lo);
+       cmsg.high = cpu_to_le64(hi);
+       return sendmsg(cinfo, &cmsg);
+}
+
+static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       pr_info("%s:%d\n", __func__, __LINE__);
+       return resync_send(mddev, RESYNCING, lo, hi);
+}
+
+static void resync_finish(struct mddev *mddev)
+{
+       pr_info("%s:%d\n", __func__, __LINE__);
+       resync_send(mddev, RESYNCING, 0, 0);
+}
+
+static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int ret = 0;
+       struct suspend_info *s;
+
+       spin_lock_irq(&cinfo->suspend_lock);
+       if (list_empty(&cinfo->suspend_list))
+               goto out;
+       list_for_each_entry(s, &cinfo->suspend_list, list)
+               if (hi > s->lo && lo < s->hi) {
+                       ret = 1;
+                       break;
+               }
+out:
+       spin_unlock_irq(&cinfo->suspend_lock);
+       return ret;
+}
+
+static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int ret = 0;
+       struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
+       char *uuid = sb->device_uuid;
+
+       memset(&cmsg, 0, sizeof(cmsg));
+       cmsg.type = cpu_to_le32(NEWDISK);
+       memcpy(cmsg.uuid, uuid, 16);
+       cmsg.raid_slot = rdev->desc_nr;
+       lock_comm(cinfo);
+       ret = __sendmsg(cinfo, &cmsg);
+       if (ret)
+               return ret;
+       cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
+       ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
+       cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
+       /* Some node does not "see" the device */
+       if (ret == -EAGAIN)
+               ret = -ENOENT;
+       else
+               dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+       return ret;
+}
+
+static int add_new_disk_finish(struct mddev *mddev)
+{
+       struct cluster_msg cmsg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int ret;
+       /* Write sb and inform others */
+       md_update_sb(mddev, 1);
+       cmsg.type = METADATA_UPDATED;
+       ret = __sendmsg(cinfo, &cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int new_disk_ack(struct mddev *mddev, bool ack)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       if (!test_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state)) {
+               pr_warn("md-cluster(%s): Spurious cluster confirmation\n", mdname(mddev));
+               return -EINVAL;
+       }
+
+       if (ack)
+               dlm_unlock_sync(cinfo->no_new_dev_lockres);
+       complete(&cinfo->newdisk_completion);
+       return 0;
+}
+
+static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+{
+       struct cluster_msg cmsg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       cmsg.type = REMOVE;
+       cmsg.raid_slot = rdev->desc_nr;
+       return __sendmsg(cinfo, &cmsg);
+}
+
+static int gather_bitmaps(struct md_rdev *rdev)
+{
+       int sn, err;
+       sector_t lo, hi;
+       struct cluster_msg cmsg;
+       struct mddev *mddev = rdev->mddev;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       cmsg.type = RE_ADD;
+       cmsg.raid_slot = rdev->desc_nr;
+       err = sendmsg(cinfo, &cmsg);
+       if (err)
+               goto out;
+
+       for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
+               if (sn == (cinfo->slot_number - 1))
+                       continue;
+               err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
+               if (err) {
+                       pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
+                       goto out;
+               }
+               if ((hi > 0) && (lo < mddev->recovery_cp))
+                       mddev->recovery_cp = lo;
+       }
+out:
+       return err;
+}
+
+static struct md_cluster_operations cluster_ops = {
+       .join   = join,
+       .leave  = leave,
+       .slot_number = slot_number,
+       .resync_info_update = resync_info_update,
+       .resync_start = resync_start,
+       .resync_finish = resync_finish,
+       .metadata_update_start = metadata_update_start,
+       .metadata_update_finish = metadata_update_finish,
+       .metadata_update_cancel = metadata_update_cancel,
+       .area_resyncing = area_resyncing,
+       .add_new_disk_start = add_new_disk_start,
+       .add_new_disk_finish = add_new_disk_finish,
+       .new_disk_ack = new_disk_ack,
+       .remove_disk = remove_disk,
+       .gather_bitmaps = gather_bitmaps,
+};
+
+static int __init cluster_init(void)
+{
+       pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
+       pr_info("Registering Cluster MD functions\n");
+       register_md_cluster_operations(&cluster_ops, THIS_MODULE);
+       return 0;
+}
+
+static void cluster_exit(void)
+{
+       unregister_md_cluster_operations();
+}
+
+module_init(cluster_init);
+module_exit(cluster_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Clustering support for MD");
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
new file mode 100644 (file)
index 0000000..6817ee0
--- /dev/null
@@ -0,0 +1,29 @@
+
+
+#ifndef _MD_CLUSTER_H
+#define _MD_CLUSTER_H
+
+#include "md.h"
+
+struct mddev;
+struct md_rdev;
+
+struct md_cluster_operations {
+       int (*join)(struct mddev *mddev, int nodes);
+       int (*leave)(struct mddev *mddev);
+       int (*slot_number)(struct mddev *mddev);
+       void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
+       int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
+       void (*resync_finish)(struct mddev *mddev);
+       int (*metadata_update_start)(struct mddev *mddev);
+       int (*metadata_update_finish)(struct mddev *mddev);
+       int (*metadata_update_cancel)(struct mddev *mddev);
+       int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi);
+       int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
+       int (*add_new_disk_finish)(struct mddev *mddev);
+       int (*new_disk_ack)(struct mddev *mddev, bool ack);
+       int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+       int (*gather_bitmaps)(struct md_rdev *rdev);
+};
+
+#endif /* _MD_CLUSTER_H */
index e617878..d4f31e1 100644 (file)
@@ -53,6 +53,7 @@
 #include <linux/slab.h>
 #include "md.h"
 #include "bitmap.h"
+#include "md-cluster.h"
 
 #ifndef MODULE
 static void autostart_arrays(int part);
@@ -66,6 +67,11 @@ static void autostart_arrays(int part);
 static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
+struct md_cluster_operations *md_cluster_ops;
+EXPORT_SYMBOL(md_cluster_ops);
+struct module *md_cluster_mod;
+EXPORT_SYMBOL(md_cluster_mod);
+
 static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
 static struct workqueue_struct *md_wq;
 static struct workqueue_struct *md_misc_wq;
@@ -640,7 +646,7 @@ void mddev_unlock(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(mddev_unlock);
 
-static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
+struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
 {
        struct md_rdev *rdev;
 
@@ -650,6 +656,7 @@ static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
 
        return NULL;
 }
+EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
 
 static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
 {
@@ -2047,11 +2054,11 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
                int choice = 0;
                if (mddev->pers)
                        choice = mddev->raid_disks;
-               while (find_rdev_nr_rcu(mddev, choice))
+               while (md_find_rdev_nr_rcu(mddev, choice))
                        choice++;
                rdev->desc_nr = choice;
        } else {
-               if (find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
+               if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
                        rcu_read_unlock();
                        return -EBUSY;
                }
@@ -2166,11 +2173,12 @@ static void export_rdev(struct md_rdev *rdev)
        kobject_put(&rdev->kobj);
 }
 
-static void kick_rdev_from_array(struct md_rdev *rdev)
+void md_kick_rdev_from_array(struct md_rdev *rdev)
 {
        unbind_rdev_from_array(rdev);
        export_rdev(rdev);
 }
+EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
 
 static void export_array(struct mddev *mddev)
 {
@@ -2179,7 +2187,7 @@ static void export_array(struct mddev *mddev)
        while (!list_empty(&mddev->disks)) {
                rdev = list_first_entry(&mddev->disks, struct md_rdev,
                                        same_set);
-               kick_rdev_from_array(rdev);
+               md_kick_rdev_from_array(rdev);
        }
        mddev->raid_disks = 0;
        mddev->major_version = 0;
@@ -2208,7 +2216,7 @@ static void sync_sbs(struct mddev *mddev, int nospares)
        }
 }
 
-static void md_update_sb(struct mddev *mddev, int force_change)
+void md_update_sb(struct mddev *mddev, int force_change)
 {
        struct md_rdev *rdev;
        int sync_req;
@@ -2369,6 +2377,37 @@ repeat:
                wake_up(&rdev->blocked_wait);
        }
 }
+EXPORT_SYMBOL(md_update_sb);
+
+static int add_bound_rdev(struct md_rdev *rdev)
+{
+       struct mddev *mddev = rdev->mddev;
+       int err = 0;
+
+       if (!mddev->pers->hot_remove_disk) {
+               /* If there is hot_add_disk but no hot_remove_disk
+                * then added disks for geometry changes,
+                * and should be added immediately.
+                */
+               super_types[mddev->major_version].
+                       validate_super(mddev, rdev);
+               err = mddev->pers->hot_add_disk(mddev, rdev);
+               if (err) {
+                       unbind_rdev_from_array(rdev);
+                       export_rdev(rdev);
+                       return err;
+               }
+       }
+       sysfs_notify_dirent_safe(rdev->sysfs_state);
+
+       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       if (mddev->degraded)
+               set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_new_event(mddev);
+       md_wakeup_thread(mddev->thread);
+       return 0;
+}
 
 /* words written to sysfs files may, or may not, be \n terminated.
  * We want to accept with case. For this we use cmd_match.
@@ -2471,10 +2510,16 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                        err = -EBUSY;
                else {
                        struct mddev *mddev = rdev->mddev;
-                       kick_rdev_from_array(rdev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->remove_disk(mddev, rdev);
+                       md_kick_rdev_from_array(rdev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                        if (mddev->pers)
                                md_update_sb(mddev, 1);
                        md_new_event(mddev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
                        err = 0;
                }
        } else if (cmd_match(buf, "writemostly")) {
@@ -2553,6 +2598,21 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                        clear_bit(Replacement, &rdev->flags);
                        err = 0;
                }
+       } else if (cmd_match(buf, "re-add")) {
+               if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+                       /* clear_bit is performed _after_ all the devices
+                        * have their local Faulty bit cleared. If any writes
+                        * happen in the meantime in the local node, they
+                        * will land in the local bitmap, which will be synced
+                        * by this node eventually
+                        */
+                       if (!mddev_is_clustered(rdev->mddev) ||
+                           (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
+                               clear_bit(Faulty, &rdev->flags);
+                               err = add_bound_rdev(rdev);
+                       }
+               } else
+                       err = -EBUSY;
        }
        if (!err)
                sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -3127,7 +3187,7 @@ static void analyze_sbs(struct mddev *mddev)
                                "md: fatal superblock inconsistency in %s"
                                " -- removing from array\n",
                                bdevname(rdev->bdev,b));
-                       kick_rdev_from_array(rdev);
+                       md_kick_rdev_from_array(rdev);
                }
 
        super_types[mddev->major_version].
@@ -3142,18 +3202,27 @@ static void analyze_sbs(struct mddev *mddev)
                               "md: %s: %s: only %d devices permitted\n",
                               mdname(mddev), bdevname(rdev->bdev, b),
                               mddev->max_disks);
-                       kick_rdev_from_array(rdev);
+                       md_kick_rdev_from_array(rdev);
                        continue;
                }
-               if (rdev != freshest)
+               if (rdev != freshest) {
                        if (super_types[mddev->major_version].
                            validate_super(mddev, rdev)) {
                                printk(KERN_WARNING "md: kicking non-fresh %s"
                                        " from array!\n",
                                        bdevname(rdev->bdev,b));
-                               kick_rdev_from_array(rdev);
+                               md_kick_rdev_from_array(rdev);
                                continue;
                        }
+                       /* No device should have a Candidate flag
+                        * when reading devices
+                        */
+                       if (test_bit(Candidate, &rdev->flags)) {
+                               pr_info("md: kicking Cluster Candidate %s from array!\n",
+                                       bdevname(rdev->bdev, b));
+                               md_kick_rdev_from_array(rdev);
+                       }
+               }
                if (mddev->level == LEVEL_MULTIPATH) {
                        rdev->desc_nr = i++;
                        rdev->raid_disk = rdev->desc_nr;
@@ -4008,8 +4077,12 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
        if (err)
                return err;
        if (mddev->pers) {
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                err = update_size(mddev, sectors);
                md_update_sb(mddev, 1);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
        } else {
                if (mddev->dev_sectors == 0 ||
                    mddev->dev_sectors > sectors)
@@ -4354,7 +4427,6 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
 {
        unsigned long long min;
        int err;
-       int chunk;
 
        if (kstrtoull(buf, 10, &min))
                return -EINVAL;
@@ -4368,16 +4440,8 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
        if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                goto out_unlock;
 
-       /* Must be a multiple of chunk_size */
-       chunk = mddev->chunk_sectors;
-       if (chunk) {
-               sector_t temp = min;
-
-               err = -EINVAL;
-               if (sector_div(temp, chunk))
-                       goto out_unlock;
-       }
-       mddev->resync_min = min;
+       /* Round down to multiple of 4K for safety */
+       mddev->resync_min = round_down(min, 8);
        err = 0;
 
 out_unlock:
@@ -5077,10 +5141,16 @@ int md_run(struct mddev *mddev)
        }
        if (err == 0 && pers->sync_request &&
            (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
-               err = bitmap_create(mddev);
-               if (err)
+               struct bitmap *bitmap;
+
+               bitmap = bitmap_create(mddev, -1);
+               if (IS_ERR(bitmap)) {
+                       err = PTR_ERR(bitmap);
                        printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
                               mdname(mddev), err);
+               } else
+                       mddev->bitmap = bitmap;
+
        }
        if (err) {
                mddev_detach(mddev);
@@ -5232,6 +5302,8 @@ static void md_clean(struct mddev *mddev)
 
 static void __md_stop_writes(struct mddev *mddev)
 {
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        flush_workqueue(md_misc_wq);
        if (mddev->sync_thread) {
@@ -5250,6 +5322,8 @@ static void __md_stop_writes(struct mddev *mddev)
                mddev->in_sync = 1;
                md_update_sb(mddev, 1);
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
 }
 
 void md_stop_writes(struct mddev *mddev)
@@ -5636,6 +5710,8 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
                info.state = (1<<MD_SB_CLEAN);
        if (mddev->bitmap && mddev->bitmap_info.offset)
                info.state |= (1<<MD_SB_BITMAP_PRESENT);
+       if (mddev_is_clustered(mddev))
+               info.state |= (1<<MD_SB_CLUSTERED);
        info.active_disks  = insync;
        info.working_disks = working;
        info.failed_disks  = failed;
@@ -5691,7 +5767,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
                return -EFAULT;
 
        rcu_read_lock();
-       rdev = find_rdev_nr_rcu(mddev, info.number);
+       rdev = md_find_rdev_nr_rcu(mddev, info.number);
        if (rdev) {
                info.major = MAJOR(rdev->bdev->bd_dev);
                info.minor = MINOR(rdev->bdev->bd_dev);
@@ -5724,6 +5800,13 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
        struct md_rdev *rdev;
        dev_t dev = MKDEV(info->major,info->minor);
 
+       if (mddev_is_clustered(mddev) &&
+               !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
+               pr_err("%s: Cannot add to clustered mddev.\n",
+                              mdname(mddev));
+               return -EINVAL;
+       }
+
        if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
                return -EOVERFLOW;
 
@@ -5810,31 +5893,38 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
                else
                        clear_bit(WriteMostly, &rdev->flags);
 
+               /*
+                * check whether the device shows up in other nodes
+                */
+               if (mddev_is_clustered(mddev)) {
+                       if (info->state & (1 << MD_DISK_CANDIDATE)) {
+                               /* Through --cluster-confirm */
+                               set_bit(Candidate, &rdev->flags);
+                               err = md_cluster_ops->new_disk_ack(mddev, true);
+                               if (err) {
+                                       export_rdev(rdev);
+                                       return err;
+                               }
+                       } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
+                               /* --add initiated by this node */
+                               err = md_cluster_ops->add_new_disk_start(mddev, rdev);
+                               if (err) {
+                                       md_cluster_ops->add_new_disk_finish(mddev);
+                                       export_rdev(rdev);
+                                       return err;
+                               }
+                       }
+               }
+
                rdev->raid_disk = -1;
                err = bind_rdev_to_array(rdev, mddev);
-               if (!err && !mddev->pers->hot_remove_disk) {
-                       /* If there is hot_add_disk but no hot_remove_disk
-                        * then added disks for geometry changes,
-                        * and should be added immediately.
-                        */
-                       super_types[mddev->major_version].
-                               validate_super(mddev, rdev);
-                       err = mddev->pers->hot_add_disk(mddev, rdev);
-                       if (err)
-                               unbind_rdev_from_array(rdev);
-               }
                if (err)
                        export_rdev(rdev);
                else
-                       sysfs_notify_dirent_safe(rdev->sysfs_state);
-
-               set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (mddev->degraded)
-                       set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-               if (!err)
-                       md_new_event(mddev);
-               md_wakeup_thread(mddev->thread);
+                       err = add_bound_rdev(rdev);
+               if (mddev_is_clustered(mddev) &&
+                               (info->state & (1 << MD_DISK_CLUSTER_ADD)))
+                       md_cluster_ops->add_new_disk_finish(mddev);
                return err;
        }
 
@@ -5895,18 +5985,29 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
        if (!rdev)
                return -ENXIO;
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
+
        clear_bit(Blocked, &rdev->flags);
        remove_and_add_spares(mddev, rdev);
 
        if (rdev->raid_disk >= 0)
                goto busy;
 
-       kick_rdev_from_array(rdev);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->remove_disk(mddev, rdev);
+
+       md_kick_rdev_from_array(rdev);
        md_update_sb(mddev, 1);
        md_new_event(mddev);
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+
        return 0;
 busy:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
        printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
                bdevname(rdev->bdev,b), mdname(mddev));
        return -EBUSY;
@@ -5956,12 +6057,15 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
                err = -EINVAL;
                goto abort_export;
        }
+
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        clear_bit(In_sync, &rdev->flags);
        rdev->desc_nr = -1;
        rdev->saved_raid_disk = -1;
        err = bind_rdev_to_array(rdev, mddev);
        if (err)
-               goto abort_export;
+               goto abort_clustered;
 
        /*
         * The rest should better be atomic, we can have disk failures
@@ -5972,6 +6076,8 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
 
        md_update_sb(mddev, 1);
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
        /*
         * Kick recovery, maybe this spare has to be added to the
         * array immediately.
@@ -5981,6 +6087,9 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
        md_new_event(mddev);
        return 0;
 
+abort_clustered:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
 abort_export:
        export_rdev(rdev);
        return err;
@@ -6038,9 +6147,14 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
        if (mddev->pers) {
                mddev->pers->quiesce(mddev, 1);
                if (fd >= 0) {
-                       err = bitmap_create(mddev);
-                       if (!err)
+                       struct bitmap *bitmap;
+
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                err = bitmap_load(mddev);
+                       } else
+                               err = PTR_ERR(bitmap);
                }
                if (fd < 0 || err) {
                        bitmap_destroy(mddev);
@@ -6293,6 +6407,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                        return rv;
                }
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
                rv = update_size(mddev, (sector_t)info->size * 2);
 
@@ -6300,33 +6416,49 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                rv = update_raid_disks(mddev, info->raid_disks);
 
        if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
-               if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
-                       return -EINVAL;
-               if (mddev->recovery || mddev->sync_thread)
-                       return -EBUSY;
+               if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
+                       rv = -EINVAL;
+                       goto err;
+               }
+               if (mddev->recovery || mddev->sync_thread) {
+                       rv = -EBUSY;
+                       goto err;
+               }
                if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       struct bitmap *bitmap;
                        /* add the bitmap */
-                       if (mddev->bitmap)
-                               return -EEXIST;
-                       if (mddev->bitmap_info.default_offset == 0)
-                               return -EINVAL;
+                       if (mddev->bitmap) {
+                               rv = -EEXIST;
+                               goto err;
+                       }
+                       if (mddev->bitmap_info.default_offset == 0) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                        mddev->bitmap_info.offset =
                                mddev->bitmap_info.default_offset;
                        mddev->bitmap_info.space =
                                mddev->bitmap_info.default_space;
                        mddev->pers->quiesce(mddev, 1);
-                       rv = bitmap_create(mddev);
-                       if (!rv)
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                rv = bitmap_load(mddev);
+                       } else
+                               rv = PTR_ERR(bitmap);
                        if (rv)
                                bitmap_destroy(mddev);
                        mddev->pers->quiesce(mddev, 0);
                } else {
                        /* remove the bitmap */
-                       if (!mddev->bitmap)
-                               return -ENOENT;
-                       if (mddev->bitmap->storage.file)
-                               return -EINVAL;
+                       if (!mddev->bitmap) {
+                               rv = -ENOENT;
+                               goto err;
+                       }
+                       if (mddev->bitmap->storage.file) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                        mddev->pers->quiesce(mddev, 1);
                        bitmap_destroy(mddev);
                        mddev->pers->quiesce(mddev, 0);
@@ -6334,6 +6466,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                }
        }
        md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+       return rv;
+err:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
        return rv;
 }
 
@@ -6393,6 +6531,7 @@ static inline bool md_ioctl_valid(unsigned int cmd)
        case SET_DISK_FAULTY:
        case STOP_ARRAY:
        case STOP_ARRAY_RO:
+       case CLUSTERED_DISK_NACK:
                return true;
        default:
                return false;
@@ -6665,6 +6804,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                goto unlock;
        }
 
+       case CLUSTERED_DISK_NACK:
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->new_disk_ack(mddev, false);
+               else
+                       err = -EINVAL;
+               goto unlock;
+
        case HOT_ADD_DISK:
                err = hot_add_disk(mddev, new_decode_dev(arg));
                goto unlock;
@@ -7238,6 +7384,55 @@ int unregister_md_personality(struct md_personality *p)
 }
 EXPORT_SYMBOL(unregister_md_personality);
 
+int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
+{
+       if (md_cluster_ops != NULL)
+               return -EALREADY;
+       spin_lock(&pers_lock);
+       md_cluster_ops = ops;
+       md_cluster_mod = module;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(register_md_cluster_operations);
+
+int unregister_md_cluster_operations(void)
+{
+       spin_lock(&pers_lock);
+       md_cluster_ops = NULL;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(unregister_md_cluster_operations);
+
+int md_setup_cluster(struct mddev *mddev, int nodes)
+{
+       int err;
+
+       err = request_module("md-cluster");
+       if (err) {
+               pr_err("md-cluster module not found.\n");
+               return err;
+       }
+
+       spin_lock(&pers_lock);
+       if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+               spin_unlock(&pers_lock);
+               return -ENOENT;
+       }
+       spin_unlock(&pers_lock);
+
+       return md_cluster_ops->join(mddev, nodes);
+}
+
+void md_cluster_stop(struct mddev *mddev)
+{
+       if (!md_cluster_ops)
+               return;
+       md_cluster_ops->leave(mddev);
+       module_put(md_cluster_mod);
+}
+
 static int is_mddev_idle(struct mddev *mddev, int init)
 {
        struct md_rdev *rdev;
@@ -7375,7 +7570,11 @@ int md_allow_write(struct mddev *mddev)
                    mddev->safemode == 0)
                        mddev->safemode = 1;
                spin_unlock(&mddev->lock);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                md_update_sb(mddev, 0);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
                sysfs_notify_dirent_safe(mddev->sysfs_state);
        } else
                spin_unlock(&mddev->lock);
@@ -7576,6 +7775,9 @@ void md_do_sync(struct md_thread *thread)
        md_new_event(mddev);
        update_time = jiffies;
 
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_start(mddev, j, max_sectors);
+
        blk_start_plug(&plug);
        while (j < max_sectors) {
                sector_t sectors;
@@ -7618,8 +7820,7 @@ void md_do_sync(struct md_thread *thread)
                if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                        break;
 
-               sectors = mddev->pers->sync_request(mddev, j, &skipped,
-                                                 currspeed < speed_min(mddev));
+               sectors = mddev->pers->sync_request(mddev, j, &skipped);
                if (sectors == 0) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                        break;
@@ -7636,6 +7837,8 @@ void md_do_sync(struct md_thread *thread)
                j += sectors;
                if (j > 2)
                        mddev->curr_resync = j;
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->resync_info_update(mddev, j, max_sectors);
                mddev->curr_mark_cnt = io_sectors;
                if (last_check == 0)
                        /* this is the earliest that rebuild will be
@@ -7677,11 +7880,18 @@ void md_do_sync(struct md_thread *thread)
                        /((jiffies-mddev->resync_mark)/HZ +1) +1;
 
                if (currspeed > speed_min(mddev)) {
-                       if ((currspeed > speed_max(mddev)) ||
-                                       !is_mddev_idle(mddev, 0)) {
+                       if (currspeed > speed_max(mddev)) {
                                msleep(500);
                                goto repeat;
                        }
+                       if (!is_mddev_idle(mddev, 0)) {
+                               /*
+                                * Give other IO more of a chance.
+                                * The faster the devices, the less we wait.
+                                */
+                               wait_event(mddev->recovery_wait,
+                                          !atomic_read(&mddev->recovery_active));
+                       }
                }
        }
        printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
@@ -7694,7 +7904,10 @@ void md_do_sync(struct md_thread *thread)
        wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
 
        /* tell personality that we are finished */
-       mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
+       mddev->pers->sync_request(mddev, max_sectors, &skipped);
+
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_finish(mddev);
 
        if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
            mddev->curr_resync > 2) {
@@ -7925,8 +8138,13 @@ void md_check_recovery(struct mddev *mddev)
                                sysfs_notify_dirent_safe(mddev->sysfs_state);
                }
 
-               if (mddev->flags & MD_UPDATE_SB_FLAGS)
+               if (mddev->flags & MD_UPDATE_SB_FLAGS) {
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                        md_update_sb(mddev, 0);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
+               }
 
                if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
                    !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
@@ -8024,6 +8242,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                        set_bit(MD_CHANGE_DEVS, &mddev->flags);
                }
        }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
        if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
            mddev->pers->finish_reshape)
                mddev->pers->finish_reshape(mddev);
@@ -8036,6 +8256,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                        rdev->saved_raid_disk = -1;
 
        md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
        clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
        clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@ -8656,6 +8878,28 @@ err_wq:
        return ret;
 }
 
+void md_reload_sb(struct mddev *mddev)
+{
+       struct md_rdev *rdev, *tmp;
+
+       rdev_for_each_safe(rdev, tmp, mddev) {
+               rdev->sb_loaded = 0;
+               ClearPageUptodate(rdev->sb_page);
+       }
+       mddev->raid_disks = 0;
+       analyze_sbs(mddev);
+       rdev_for_each_safe(rdev, tmp, mddev) {
+               struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
+               /* since we don't write to faulty devices, we figure out if the
+                *  disk is faulty by comparing events
+                */
+               if (mddev->events > sb->events)
+                       set_bit(Faulty, &rdev->flags);
+       }
+
+}
+EXPORT_SYMBOL(md_reload_sb);
+
 #ifndef MODULE
 
 /*
index 318ca8f..4046a6c 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/timer.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
+#include "md-cluster.h"
 
 #define MaxSector (~(sector_t)0)
 
@@ -170,6 +171,10 @@ enum flag_bits {
                                 * a want_replacement device with same
                                 * raid_disk number.
                                 */
+       Candidate,              /* For clustered environments only:
+                                * This device is seen locally but not
+                                * by the whole cluster
+                                */
 };
 
 #define BB_LEN_MASK    (0x00000000000001FFULL)
@@ -202,6 +207,8 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                int is_new);
 extern void md_ack_all_badblocks(struct badblocks *bb);
 
+struct md_cluster_info;
+
 struct mddev {
        void                            *private;
        struct md_personality           *pers;
@@ -430,6 +437,8 @@ struct mddev {
                unsigned long           daemon_sleep; /* how many jiffies between updates? */
                unsigned long           max_write_behind; /* write-behind mode */
                int                     external;
+               int                     nodes; /* Maximum number of nodes in the cluster */
+               char                    cluster_name[64]; /* Name of the cluster */
        } bitmap_info;
 
        atomic_t                        max_corr_read_errors; /* max read retries */
@@ -448,6 +457,7 @@ struct mddev {
        struct work_struct flush_work;
        struct work_struct event_work;  /* used by dm to report failure event */
        void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
+       struct md_cluster_info          *cluster_info;
 };
 
 static inline int __must_check mddev_lock(struct mddev *mddev)
@@ -496,7 +506,7 @@ struct md_personality
        int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
        int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
        int (*spare_active) (struct mddev *mddev);
-       sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster);
+       sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
        int (*resize) (struct mddev *mddev, sector_t sectors);
        sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
        int (*check_reshape) (struct mddev *mddev);
@@ -608,6 +618,11 @@ static inline void safe_put_page(struct page *p)
 
 extern int register_md_personality(struct md_personality *p);
 extern int unregister_md_personality(struct md_personality *p);
+extern int register_md_cluster_operations(struct md_cluster_operations *ops,
+               struct module *module);
+extern int unregister_md_cluster_operations(void);
+extern int md_setup_cluster(struct mddev *mddev, int nodes);
+extern void md_cluster_stop(struct mddev *mddev);
 extern struct md_thread *md_register_thread(
        void (*run)(struct md_thread *thread),
        struct mddev *mddev,
@@ -654,6 +669,10 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                   struct mddev *mddev);
 
 extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
+extern void md_reload_sb(struct mddev *mddev);
+extern void md_update_sb(struct mddev *mddev, int force);
+extern void md_kick_rdev_from_array(struct md_rdev * rdev);
+struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
 static inline int mddev_check_plugged(struct mddev *mddev)
 {
        return !!blk_check_plugged(md_unplug, mddev,
@@ -669,4 +688,9 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
        }
 }
 
+extern struct md_cluster_operations *md_cluster_ops;
+static inline int mddev_is_clustered(struct mddev *mddev)
+{
+       return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
+}
 #endif /* _MD_MD_H */
index 3b5d7f7..2cb59a6 100644 (file)
@@ -271,14 +271,16 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
                goto abort;
        }
 
-       blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
-       blk_queue_io_opt(mddev->queue,
-                        (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
-       if (!discard_supported)
-               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
-       else
-               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       if (mddev->queue) {
+               blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
+               blk_queue_io_opt(mddev->queue,
+                                (mddev->chunk_sectors << 9) * mddev->raid_disks);
+
+               if (!discard_supported)
+                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+               else
+                       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       }
 
        pr_debug("md/raid0:%s: done.\n", mdname(mddev));
        *private_conf = conf;
@@ -429,9 +431,12 @@ static int raid0_run(struct mddev *mddev)
        }
        if (md_check_no_bitmap(mddev))
                return -EINVAL;
-       blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
-       blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
-       blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+
+       if (mddev->queue) {
+               blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+               blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+       }
 
        /* if private is not null, we are here after takeover */
        if (mddev->private == NULL) {
@@ -448,16 +453,17 @@ static int raid0_run(struct mddev *mddev)
        printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
               mdname(mddev),
               (unsigned long long)mddev->array_sectors);
-       /* calculate the max read-ahead size.
-        * For read-ahead of large files to be effective, we need to
-        * readahead at least twice a whole stripe. i.e. number of devices
-        * multiplied by chunk size times 2.
-        * If an individual device has an ra_pages greater than the
-        * chunk size, then we will not drive that device as hard as it
-        * wants.  We consider this a configuration error: a larger
-        * chunksize should be used in that case.
-        */
-       {
+
+       if (mddev->queue) {
+               /* calculate the max read-ahead size.
+                * For read-ahead of large files to be effective, we need to
+                * readahead at least twice a whole stripe. i.e. number of devices
+                * multiplied by chunk size times 2.
+                * If an individual device has an ra_pages greater than the
+                * chunk size, then we will not drive that device as hard as it
+                * wants.  We consider this a configuration error: a larger
+                * chunksize should be used in that case.
+                */
                int stripe = mddev->raid_disks *
                        (mddev->chunk_sectors << 9) / PAGE_SIZE;
                if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
index d34e238..9157a29 100644 (file)
@@ -539,7 +539,13 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
        has_nonrot_disk = 0;
        choose_next_idle = 0;
 
-       choose_first = (conf->mddev->recovery_cp < this_sector + sectors);
+       if ((conf->mddev->recovery_cp < this_sector + sectors) ||
+           (mddev_is_clustered(conf->mddev) &&
+           md_cluster_ops->area_resyncing(conf->mddev, this_sector,
+                   this_sector + sectors)))
+               choose_first = 1;
+       else
+               choose_first = 0;
 
        for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
                sector_t dist;
@@ -1102,8 +1108,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
        md_write_start(mddev, bio); /* wait on superblock update early */
 
        if (bio_data_dir(bio) == WRITE &&
-           bio_end_sector(bio) > mddev->suspend_lo &&
-           bio->bi_iter.bi_sector < mddev->suspend_hi) {
+           ((bio_end_sector(bio) > mddev->suspend_lo &&
+           bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+           (mddev_is_clustered(mddev) &&
+            md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
                /* As the suspend_* range is controlled by
                 * userspace, we want an interruptible
                 * wait.
@@ -1114,7 +1122,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                        prepare_to_wait(&conf->wait_barrier,
                                        &w, TASK_INTERRUPTIBLE);
                        if (bio_end_sector(bio) <= mddev->suspend_lo ||
-                           bio->bi_iter.bi_sector >= mddev->suspend_hi)
+                           bio->bi_iter.bi_sector >= mddev->suspend_hi ||
+                           (mddev_is_clustered(mddev) &&
+                            !md_cluster_ops->area_resyncing(mddev,
+                                    bio->bi_iter.bi_sector, bio_end_sector(bio))))
                                break;
                        schedule();
                }
@@ -1561,6 +1572,7 @@ static int raid1_spare_active(struct mddev *mddev)
                struct md_rdev *rdev = conf->mirrors[i].rdev;
                struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
                if (repl
+                   && !test_bit(Candidate, &repl->flags)
                    && repl->recovery_offset == MaxSector
                    && !test_bit(Faulty, &repl->flags)
                    && !test_and_set_bit(In_sync, &repl->flags)) {
@@ -2468,7 +2480,7 @@ static int init_resync(struct r1conf *conf)
  * that can be installed to exclude normal IO requests.
  */
 
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
+static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
 {
        struct r1conf *conf = mddev->private;
        struct r1bio *r1_bio;
@@ -2521,13 +2533,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                *skipped = 1;
                return sync_blocks;
        }
-       /*
-        * If there is non-resync activity waiting for a turn,
-        * and resync is going fast enough,
-        * then let it though before starting on this new sync request.
-        */
-       if (!go_faster && conf->nr_waiting)
-               msleep_interruptible(1000);
 
        bitmap_cond_end_sync(mddev->bitmap, sector_nr);
        r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
index a7196c4..e793ab6 100644 (file)
@@ -2889,7 +2889,7 @@ static int init_resync(struct r10conf *conf)
  */
 
 static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
-                            int *skipped, int go_faster)
+                            int *skipped)
 {
        struct r10conf *conf = mddev->private;
        struct r10bio *r10_bio;
@@ -2994,12 +2994,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
        if (conf->geo.near_copies < conf->geo.raid_disks &&
            max_sector > (sector_nr | chunk_mask))
                max_sector = (sector_nr | chunk_mask) + 1;
-       /*
-        * If there is non-resync activity waiting for us then
-        * put in a delay to throttle resync.
-        */
-       if (!go_faster && conf->nr_waiting)
-               msleep_interruptible(1000);
 
        /* Again, very different code for resync and recovery.
         * Both must result in an r10bio with a list of bios that
index cd2f96b..77dfd72 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/slab.h>
 #include <linux/ratelimit.h>
 #include <linux/nodemask.h>
+#include <linux/flex_array.h>
 #include <trace/events/block.h>
 
 #include "md.h"
@@ -496,7 +497,7 @@ static void shrink_buffers(struct stripe_head *sh)
        }
 }
 
-static int grow_buffers(struct stripe_head *sh)
+static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
 {
        int i;
        int num = sh->raid_conf->pool_size;
@@ -504,7 +505,7 @@ static int grow_buffers(struct stripe_head *sh)
        for (i = 0; i < num; i++) {
                struct page *page;
 
-               if (!(page = alloc_page(GFP_KERNEL))) {
+               if (!(page = alloc_page(gfp))) {
                        return 1;
                }
                sh->dev[i].page = page;
@@ -525,6 +526,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
        BUG_ON(atomic_read(&sh->count) != 0);
        BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
        BUG_ON(stripe_operations_active(sh));
+       BUG_ON(sh->batch_head);
 
        pr_debug("init_stripe called, stripe %llu\n",
                (unsigned long long)sector);
@@ -552,8 +554,10 @@ retry:
        }
        if (read_seqcount_retry(&conf->gen_lock, seq))
                goto retry;
+       sh->overwrite_disks = 0;
        insert_hash(conf, sh);
        sh->cpu = smp_processor_id();
+       set_bit(STRIPE_BATCH_READY, &sh->state);
 }
 
 static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
@@ -668,20 +672,28 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                    *(conf->hash_locks + hash));
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
-                       if (!conf->inactive_blocked)
+                       if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
                                sh = get_free_stripe(conf, hash);
+                               if (!sh && llist_empty(&conf->released_stripes) &&
+                                   !test_bit(R5_DID_ALLOC, &conf->cache_state))
+                                       set_bit(R5_ALLOC_MORE,
+                                               &conf->cache_state);
+                       }
                        if (noblock && sh == NULL)
                                break;
                        if (!sh) {
-                               conf->inactive_blocked = 1;
+                               set_bit(R5_INACTIVE_BLOCKED,
+                                       &conf->cache_state);
                                wait_event_lock_irq(
                                        conf->wait_for_stripe,
                                        !list_empty(conf->inactive_list + hash) &&
                                        (atomic_read(&conf->active_stripes)
                                         < (conf->max_nr_stripes * 3 / 4)
-                                        || !conf->inactive_blocked),
+                                        || !test_bit(R5_INACTIVE_BLOCKED,
+                                                     &conf->cache_state)),
                                        *(conf->hash_locks + hash));
-                               conf->inactive_blocked = 0;
+                               clear_bit(R5_INACTIVE_BLOCKED,
+                                         &conf->cache_state);
                        } else {
                                init_stripe(sh, sector, previous);
                                atomic_inc(&sh->count);
@@ -708,6 +720,130 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
        return sh;
 }
 
+static bool is_full_stripe_write(struct stripe_head *sh)
+{
+       BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
+       return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded);
+}
+
+static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
+{
+       local_irq_disable();
+       if (sh1 > sh2) {
+               spin_lock(&sh2->stripe_lock);
+               spin_lock_nested(&sh1->stripe_lock, 1);
+       } else {
+               spin_lock(&sh1->stripe_lock);
+               spin_lock_nested(&sh2->stripe_lock, 1);
+       }
+}
+
+static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
+{
+       spin_unlock(&sh1->stripe_lock);
+       spin_unlock(&sh2->stripe_lock);
+       local_irq_enable();
+}
+
+/* Only freshly new full stripe normal write stripe can be added to a batch list */
+static bool stripe_can_batch(struct stripe_head *sh)
+{
+       return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+               is_full_stripe_write(sh);
+}
+
+/* we only do back search */
+static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
+{
+       struct stripe_head *head;
+       sector_t head_sector, tmp_sec;
+       int hash;
+       int dd_idx;
+
+       if (!stripe_can_batch(sh))
+               return;
+       /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
+       tmp_sec = sh->sector;
+       if (!sector_div(tmp_sec, conf->chunk_sectors))
+               return;
+       head_sector = sh->sector - STRIPE_SECTORS;
+
+       hash = stripe_hash_locks_hash(head_sector);
+       spin_lock_irq(conf->hash_locks + hash);
+       head = __find_stripe(conf, head_sector, conf->generation);
+       if (head && !atomic_inc_not_zero(&head->count)) {
+               spin_lock(&conf->device_lock);
+               if (!atomic_read(&head->count)) {
+                       if (!test_bit(STRIPE_HANDLE, &head->state))
+                               atomic_inc(&conf->active_stripes);
+                       BUG_ON(list_empty(&head->lru) &&
+                              !test_bit(STRIPE_EXPANDING, &head->state));
+                       list_del_init(&head->lru);
+                       if (head->group) {
+                               head->group->stripes_cnt--;
+                               head->group = NULL;
+                       }
+               }
+               atomic_inc(&head->count);
+               spin_unlock(&conf->device_lock);
+       }
+       spin_unlock_irq(conf->hash_locks + hash);
+
+       if (!head)
+               return;
+       if (!stripe_can_batch(head))
+               goto out;
+
+       lock_two_stripes(head, sh);
+       /* clear_batch_ready clear the flag */
+       if (!stripe_can_batch(head) || !stripe_can_batch(sh))
+               goto unlock_out;
+
+       if (sh->batch_head)
+               goto unlock_out;
+
+       dd_idx = 0;
+       while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
+               dd_idx++;
+       if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw)
+               goto unlock_out;
+
+       if (head->batch_head) {
+               spin_lock(&head->batch_head->batch_lock);
+               /* This batch list is already running */
+               if (!stripe_can_batch(head)) {
+                       spin_unlock(&head->batch_head->batch_lock);
+                       goto unlock_out;
+               }
+
+               /*
+                * at this point, head's BATCH_READY could be cleared, but we
+                * can still add the stripe to batch list
+                */
+               list_add(&sh->batch_list, &head->batch_list);
+               spin_unlock(&head->batch_head->batch_lock);
+
+               sh->batch_head = head->batch_head;
+       } else {
+               head->batch_head = head;
+               sh->batch_head = head->batch_head;
+               spin_lock(&head->batch_lock);
+               list_add_tail(&sh->batch_list, &head->batch_list);
+               spin_unlock(&head->batch_lock);
+       }
+
+       if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+               if (atomic_dec_return(&conf->preread_active_stripes)
+                   < IO_THRESHOLD)
+                       md_wakeup_thread(conf->mddev->thread);
+
+       atomic_inc(&sh->count);
+unlock_out:
+       unlock_two_stripes(head, sh);
+out:
+       release_stripe(head);
+}
+
 /* Determine if 'data_offset' or 'new_data_offset' should be used
  * in this stripe_head.
  */
@@ -738,6 +874,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
 {
        struct r5conf *conf = sh->raid_conf;
        int i, disks = sh->disks;
+       struct stripe_head *head_sh = sh;
 
        might_sleep();
 
@@ -746,6 +883,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                int replace_only = 0;
                struct bio *bi, *rbi;
                struct md_rdev *rdev, *rrdev = NULL;
+
+               sh = head_sh;
                if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
                        if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
                                rw = WRITE_FUA;
@@ -764,6 +903,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
                        rw |= REQ_SYNC;
 
+again:
                bi = &sh->dev[i].req;
                rbi = &sh->dev[i].rreq; /* For writing to replacement */
 
@@ -782,7 +922,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                /* We raced and saw duplicates */
                                rrdev = NULL;
                } else {
-                       if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
+                       if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev)
                                rdev = rrdev;
                        rrdev = NULL;
                }
@@ -853,13 +993,15 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                __func__, (unsigned long long)sh->sector,
                                bi->bi_rw, i);
                        atomic_inc(&sh->count);
+                       if (sh != head_sh)
+                               atomic_inc(&head_sh->count);
                        if (use_new_offset(conf, sh))
                                bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->new_data_offset);
                        else
                                bi->bi_iter.bi_sector = (sh->sector
                                                 + rdev->data_offset);
-                       if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                       if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags))
                                bi->bi_rw |= REQ_NOMERGE;
 
                        if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
@@ -903,6 +1045,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                __func__, (unsigned long long)sh->sector,
                                rbi->bi_rw, i);
                        atomic_inc(&sh->count);
+                       if (sh != head_sh)
+                               atomic_inc(&head_sh->count);
                        if (use_new_offset(conf, sh))
                                rbi->bi_iter.bi_sector = (sh->sector
                                                  + rrdev->new_data_offset);
@@ -934,8 +1078,18 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                        pr_debug("skip op %ld on disc %d for sector %llu\n",
                                bi->bi_rw, i, (unsigned long long)sh->sector);
                        clear_bit(R5_LOCKED, &sh->dev[i].flags);
+                       if (sh->batch_head)
+                               set_bit(STRIPE_BATCH_ERR,
+                                       &sh->batch_head->state);
                        set_bit(STRIPE_HANDLE, &sh->state);
                }
+
+               if (!head_sh->batch_head)
+                       continue;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               if (sh != head_sh)
+                       goto again;
        }
 }
 
@@ -1051,6 +1205,7 @@ static void ops_run_biofill(struct stripe_head *sh)
        struct async_submit_ctl submit;
        int i;
 
+       BUG_ON(sh->batch_head);
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
@@ -1109,16 +1264,28 @@ static void ops_complete_compute(void *stripe_head_ref)
 
 /* return a pointer to the address conversion region of the scribble buffer */
 static addr_conv_t *to_addr_conv(struct stripe_head *sh,
-                                struct raid5_percpu *percpu)
+                                struct raid5_percpu *percpu, int i)
 {
-       return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+       void *addr;
+
+       addr = flex_array_get(percpu->scribble, i);
+       return addr + sizeof(struct page *) * (sh->disks + 2);
+}
+
+/* return a pointer to the address conversion region of the scribble buffer */
+static struct page **to_addr_page(struct raid5_percpu *percpu, int i)
+{
+       void *addr;
+
+       addr = flex_array_get(percpu->scribble, i);
+       return addr;
 }
 
 static struct dma_async_tx_descriptor *
 ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
        int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
        int target = sh->ops.target;
        struct r5dev *tgt = &sh->dev[target];
        struct page *xor_dest = tgt->page;
@@ -1127,6 +1294,8 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
        struct async_submit_ctl submit;
        int i;
 
+       BUG_ON(sh->batch_head);
+
        pr_debug("%s: stripe %llu block: %d\n",
                __func__, (unsigned long long)sh->sector, target);
        BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
@@ -1138,7 +1307,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
        atomic_inc(&sh->count);
 
        init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
-                         ops_complete_compute, sh, to_addr_conv(sh, percpu));
+                         ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
        if (unlikely(count == 1))
                tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
        else
@@ -1156,7 +1325,9 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
  * destination buffer is recorded in srcs[count] and the Q destination
  * is recorded in srcs[count+1]].
  */
-static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+static int set_syndrome_sources(struct page **srcs,
+                               struct stripe_head *sh,
+                               int srctype)
 {
        int disks = sh->disks;
        int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
@@ -1171,8 +1342,15 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
        i = d0_idx;
        do {
                int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+               struct r5dev *dev = &sh->dev[i];
 
-               srcs[slot] = sh->dev[i].page;
+               if (i == sh->qd_idx || i == sh->pd_idx ||
+                   (srctype == SYNDROME_SRC_ALL) ||
+                   (srctype == SYNDROME_SRC_WANT_DRAIN &&
+                    test_bit(R5_Wantdrain, &dev->flags)) ||
+                   (srctype == SYNDROME_SRC_WRITTEN &&
+                    dev->written))
+                       srcs[slot] = sh->dev[i].page;
                i = raid6_next_disk(i, disks);
        } while (i != d0_idx);
 
@@ -1183,7 +1361,7 @@ static struct dma_async_tx_descriptor *
 ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 {
        int disks = sh->disks;
-       struct page **blocks = percpu->scribble;
+       struct page **blocks = to_addr_page(percpu, 0);
        int target;
        int qd_idx = sh->qd_idx;
        struct dma_async_tx_descriptor *tx;
@@ -1193,6 +1371,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
        int i;
        int count;
 
+       BUG_ON(sh->batch_head);
        if (sh->ops.target < 0)
                target = sh->ops.target2;
        else if (sh->ops.target2 < 0)
@@ -1211,12 +1390,12 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
        atomic_inc(&sh->count);
 
        if (target == qd_idx) {
-               count = set_syndrome_sources(blocks, sh);
+               count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
                blocks[count] = NULL; /* regenerating p is not necessary */
                BUG_ON(blocks[count+1] != dest); /* q should already be set */
                init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                  ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
        } else {
                /* Compute any data- or p-drive using XOR */
@@ -1229,7 +1408,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 
                init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                  NULL, ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
        }
 
@@ -1248,9 +1427,10 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
        struct r5dev *tgt = &sh->dev[target];
        struct r5dev *tgt2 = &sh->dev[target2];
        struct dma_async_tx_descriptor *tx;
-       struct page **blocks = percpu->scribble;
+       struct page **blocks = to_addr_page(percpu, 0);
        struct async_submit_ctl submit;
 
+       BUG_ON(sh->batch_head);
        pr_debug("%s: stripe %llu block1: %d block2: %d\n",
                 __func__, (unsigned long long)sh->sector, target, target2);
        BUG_ON(target < 0 || target2 < 0);
@@ -1290,7 +1470,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
                        /* Missing P+Q, just recompute */
                        init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                          ops_complete_compute, sh,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                        return async_gen_syndrome(blocks, 0, syndrome_disks+2,
                                                  STRIPE_SIZE, &submit);
                } else {
@@ -1314,21 +1494,21 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
                        init_async_submit(&submit,
                                          ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                          NULL, NULL, NULL,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                        tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
                                       &submit);
 
-                       count = set_syndrome_sources(blocks, sh);
+                       count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
                        init_async_submit(&submit, ASYNC_TX_FENCE, tx,
                                          ops_complete_compute, sh,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                        return async_gen_syndrome(blocks, 0, count+2,
                                                  STRIPE_SIZE, &submit);
                }
        } else {
                init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                  ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                if (failb == syndrome_disks) {
                        /* We're missing D+P. */
                        return async_raid6_datap_recov(syndrome_disks+2,
@@ -1352,17 +1532,18 @@ static void ops_complete_prexor(void *stripe_head_ref)
 }
 
 static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
-              struct dma_async_tx_descriptor *tx)
+ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
+               struct dma_async_tx_descriptor *tx)
 {
        int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
        int count = 0, pd_idx = sh->pd_idx, i;
        struct async_submit_ctl submit;
 
        /* existing parity data subtracted */
        struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
 
+       BUG_ON(sh->batch_head);
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
@@ -1374,31 +1555,56 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
        }
 
        init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
-                         ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
        tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
        return tx;
 }
 
+static struct dma_async_tx_descriptor *
+ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
+               struct dma_async_tx_descriptor *tx)
+{
+       struct page **blocks = to_addr_page(percpu, 0);
+       int count;
+       struct async_submit_ctl submit;
+
+       pr_debug("%s: stripe %llu\n", __func__,
+               (unsigned long long)sh->sector);
+
+       count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN);
+
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
+       tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+
+       return tx;
+}
+
 static struct dma_async_tx_descriptor *
 ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 {
        int disks = sh->disks;
        int i;
+       struct stripe_head *head_sh = sh;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
        for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
+               struct r5dev *dev;
                struct bio *chosen;
 
-               if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
+               sh = head_sh;
+               if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) {
                        struct bio *wbi;
 
+again:
+                       dev = &sh->dev[i];
                        spin_lock_irq(&sh->stripe_lock);
                        chosen = dev->towrite;
                        dev->towrite = NULL;
+                       sh->overwrite_disks = 0;
                        BUG_ON(dev->written);
                        wbi = dev->written = chosen;
                        spin_unlock_irq(&sh->stripe_lock);
@@ -1423,6 +1629,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                                }
                                wbi = r5_next_bio(wbi, dev->sector);
                        }
+
+                       if (head_sh->batch_head) {
+                               sh = list_first_entry(&sh->batch_list,
+                                                     struct stripe_head,
+                                                     batch_list);
+                               if (sh == head_sh)
+                                       continue;
+                               goto again;
+                       }
                }
        }
 
@@ -1478,12 +1693,15 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
                     struct dma_async_tx_descriptor *tx)
 {
        int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs;
        struct async_submit_ctl submit;
-       int count = 0, pd_idx = sh->pd_idx, i;
+       int count, pd_idx = sh->pd_idx, i;
        struct page *xor_dest;
        int prexor = 0;
        unsigned long flags;
+       int j = 0;
+       struct stripe_head *head_sh = sh;
+       int last_stripe;
 
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
@@ -1500,15 +1718,18 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
                ops_complete_reconstruct(sh);
                return;
        }
+again:
+       count = 0;
+       xor_srcs = to_addr_page(percpu, j);
        /* check if prexor is active which means only process blocks
         * that are part of a read-modify-write (written)
         */
-       if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+       if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
                prexor = 1;
                xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
-                       if (dev->written)
+                       if (head_sh->dev[i].written)
                                xor_srcs[count++] = dev->page;
                }
        } else {
@@ -1525,17 +1746,32 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
         * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
         * for the synchronous xor case
         */
-       flags = ASYNC_TX_ACK |
-               (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
-
-       atomic_inc(&sh->count);
+       last_stripe = !head_sh->batch_head ||
+               list_first_entry(&sh->batch_list,
+                                struct stripe_head, batch_list) == head_sh;
+       if (last_stripe) {
+               flags = ASYNC_TX_ACK |
+                       (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
+
+               atomic_inc(&head_sh->count);
+               init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh,
+                                 to_addr_conv(sh, percpu, j));
+       } else {
+               flags = prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST;
+               init_async_submit(&submit, flags, tx, NULL, NULL,
+                                 to_addr_conv(sh, percpu, j));
+       }
 
-       init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
-                         to_addr_conv(sh, percpu));
        if (unlikely(count == 1))
                tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
        else
                tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+       if (!last_stripe) {
+               j++;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               goto again;
+       }
 }
 
 static void
@@ -1543,8 +1779,12 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
                     struct dma_async_tx_descriptor *tx)
 {
        struct async_submit_ctl submit;
-       struct page **blocks = percpu->scribble;
-       int count, i;
+       struct page **blocks;
+       int count, i, j = 0;
+       struct stripe_head *head_sh = sh;
+       int last_stripe;
+       int synflags;
+       unsigned long txflags;
 
        pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
 
@@ -1562,13 +1802,36 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
                return;
        }
 
-       count = set_syndrome_sources(blocks, sh);
+again:
+       blocks = to_addr_page(percpu, j);
 
-       atomic_inc(&sh->count);
+       if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+               synflags = SYNDROME_SRC_WRITTEN;
+               txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST;
+       } else {
+               synflags = SYNDROME_SRC_ALL;
+               txflags = ASYNC_TX_ACK;
+       }
+
+       count = set_syndrome_sources(blocks, sh, synflags);
+       last_stripe = !head_sh->batch_head ||
+               list_first_entry(&sh->batch_list,
+                                struct stripe_head, batch_list) == head_sh;
 
-       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
-                         sh, to_addr_conv(sh, percpu));
+       if (last_stripe) {
+               atomic_inc(&head_sh->count);
+               init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
+                                 head_sh, to_addr_conv(sh, percpu, j));
+       } else
+               init_async_submit(&submit, 0, tx, NULL, NULL,
+                                 to_addr_conv(sh, percpu, j));
        async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+       if (!last_stripe) {
+               j++;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               goto again;
+       }
 }
 
 static void ops_complete_check(void *stripe_head_ref)
@@ -1589,7 +1852,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
        int pd_idx = sh->pd_idx;
        int qd_idx = sh->qd_idx;
        struct page *xor_dest;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
        struct dma_async_tx_descriptor *tx;
        struct async_submit_ctl submit;
        int count;
@@ -1598,6 +1861,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
        pr_debug("%s: stripe %llu\n", __func__,
                (unsigned long long)sh->sector);
 
+       BUG_ON(sh->batch_head);
        count = 0;
        xor_dest = sh->dev[pd_idx].page;
        xor_srcs[count++] = xor_dest;
@@ -1608,7 +1872,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
        }
 
        init_async_submit(&submit, 0, NULL, NULL, NULL,
-                         to_addr_conv(sh, percpu));
+                         to_addr_conv(sh, percpu, 0));
        tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
                           &sh->ops.zero_sum_result, &submit);
 
@@ -1619,20 +1883,21 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
 
 static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
 {
-       struct page **srcs = percpu->scribble;
+       struct page **srcs = to_addr_page(percpu, 0);
        struct async_submit_ctl submit;
        int count;
 
        pr_debug("%s: stripe %llu checkp: %d\n", __func__,
                (unsigned long long)sh->sector, checkp);
 
-       count = set_syndrome_sources(srcs, sh);
+       BUG_ON(sh->batch_head);
+       count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL);
        if (!checkp)
                srcs[count] = NULL;
 
        atomic_inc(&sh->count);
        init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
-                         sh, to_addr_conv(sh, percpu));
+                         sh, to_addr_conv(sh, percpu, 0));
        async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
                           &sh->ops.zero_sum_result, percpu->spare_page, &submit);
 }
@@ -1667,8 +1932,12 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                        async_tx_ack(tx);
        }
 
-       if (test_bit(STRIPE_OP_PREXOR, &ops_request))
-               tx = ops_run_prexor(sh, percpu, tx);
+       if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
+               if (level < 6)
+                       tx = ops_run_prexor5(sh, percpu, tx);
+               else
+                       tx = ops_run_prexor6(sh, percpu, tx);
+       }
 
        if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                tx = ops_run_biodrain(sh, tx);
@@ -1693,7 +1962,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                        BUG();
        }
 
-       if (overlap_clear)
+       if (overlap_clear && !sh->batch_head)
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
                        if (test_and_clear_bit(R5_Overlap, &dev->flags))
@@ -1702,10 +1971,10 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
        put_cpu();
 }
 
-static int grow_one_stripe(struct r5conf *conf, int hash)
+static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
 {
        struct stripe_head *sh;
-       sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
+       sh = kmem_cache_zalloc(conf->slab_cache, gfp);
        if (!sh)
                return 0;
 
@@ -1713,17 +1982,23 @@ static int grow_one_stripe(struct r5conf *conf, int hash)
 
        spin_lock_init(&sh->stripe_lock);
 
-       if (grow_buffers(sh)) {
+       if (grow_buffers(sh, gfp)) {
                shrink_buffers(sh);
                kmem_cache_free(conf->slab_cache, sh);
                return 0;
        }
-       sh->hash_lock_index = hash;
+       sh->hash_lock_index =
+               conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
        /* we just created an active stripe so... */
        atomic_set(&sh->count, 1);
        atomic_inc(&conf->active_stripes);
        INIT_LIST_HEAD(&sh->lru);
+
+       spin_lock_init(&sh->batch_lock);
+       INIT_LIST_HEAD(&sh->batch_list);
+       sh->batch_head = NULL;
        release_stripe(sh);
+       conf->max_nr_stripes++;
        return 1;
 }
 
@@ -1731,7 +2006,6 @@ static int grow_stripes(struct r5conf *conf, int num)
 {
        struct kmem_cache *sc;
        int devs = max(conf->raid_disks, conf->previous_raid_disks);
-       int hash;
 
        if (conf->mddev->gendisk)
                sprintf(conf->cache_name[0],
@@ -1749,13 +2023,10 @@ static int grow_stripes(struct r5conf *conf, int num)
                return 1;
        conf->slab_cache = sc;
        conf->pool_size = devs;
-       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
-       while (num--) {
-               if (!grow_one_stripe(conf, hash))
+       while (num--)
+               if (!grow_one_stripe(conf, GFP_KERNEL))
                        return 1;
-               conf->max_nr_stripes++;
-               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
-       }
+
        return 0;
 }
 
@@ -1772,13 +2043,21 @@ static int grow_stripes(struct r5conf *conf, int num)
  * calculate over all devices (not just the data blocks), using zeros in place
  * of the P and Q blocks.
  */
-static size_t scribble_len(int num)
+static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags)
 {
+       struct flex_array *ret;
        size_t len;
 
        len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
-
-       return len;
+       ret = flex_array_alloc(len, cnt, flags);
+       if (!ret)
+               return NULL;
+       /* always prealloc all elements, so no locking is required */
+       if (flex_array_prealloc(ret, 0, cnt, flags)) {
+               flex_array_free(ret);
+               return NULL;
+       }
+       return ret;
 }
 
 static int resize_stripes(struct r5conf *conf, int newsize)
@@ -1896,16 +2175,16 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                err = -ENOMEM;
 
        get_online_cpus();
-       conf->scribble_len = scribble_len(newsize);
        for_each_present_cpu(cpu) {
                struct raid5_percpu *percpu;
-               void *scribble;
+               struct flex_array *scribble;
 
                percpu = per_cpu_ptr(conf->percpu, cpu);
-               scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+               scribble = scribble_alloc(newsize, conf->chunk_sectors /
+                       STRIPE_SECTORS, GFP_NOIO);
 
                if (scribble) {
-                       kfree(percpu->scribble);
+                       flex_array_free(percpu->scribble);
                        percpu->scribble = scribble;
                } else {
                        err = -ENOMEM;
@@ -1937,9 +2216,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
        return err;
 }
 
-static int drop_one_stripe(struct r5conf *conf, int hash)
+static int drop_one_stripe(struct r5conf *conf)
 {
        struct stripe_head *sh;
+       int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
 
        spin_lock_irq(conf->hash_locks + hash);
        sh = get_free_stripe(conf, hash);
@@ -1950,15 +2230,15 @@ static int drop_one_stripe(struct r5conf *conf, int hash)
        shrink_buffers(sh);
        kmem_cache_free(conf->slab_cache, sh);
        atomic_dec(&conf->active_stripes);
+       conf->max_nr_stripes--;
        return 1;
 }
 
 static void shrink_stripes(struct r5conf *conf)
 {
-       int hash;
-       for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
-               while (drop_one_stripe(conf, hash))
-                       ;
+       while (conf->max_nr_stripes &&
+              drop_one_stripe(conf))
+               ;
 
        if (conf->slab_cache)
                kmem_cache_destroy(conf->slab_cache);
@@ -2154,10 +2434,16 @@ static void raid5_end_write_request(struct bio *bi, int error)
        }
        rdev_dec_pending(rdev, conf->mddev);
 
+       if (sh->batch_head && !uptodate)
+               set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
+
        if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                clear_bit(R5_LOCKED, &sh->dev[i].flags);
        set_bit(STRIPE_HANDLE, &sh->state);
        release_stripe(sh);
+
+       if (sh->batch_head && sh != sh->batch_head)
+               release_stripe(sh->batch_head);
 }
 
 static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
@@ -2535,7 +2821,7 @@ static void
 schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                         int rcw, int expand)
 {
-       int i, pd_idx = sh->pd_idx, disks = sh->disks;
+       int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
        struct r5conf *conf = sh->raid_conf;
        int level = conf->level;
 
@@ -2571,13 +2857,15 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                        if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
                                atomic_inc(&conf->pending_full_writes);
        } else {
-               BUG_ON(level == 6);
                BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                        test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+               BUG_ON(level == 6 &&
+                       (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
+                          test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
 
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
-                       if (i == pd_idx)
+                       if (i == pd_idx || i == qd_idx)
                                continue;
 
                        if (dev->towrite &&
@@ -2624,7 +2912,8 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
  * toread/towrite point to the first in a chain.
  * The bi_next chain must be in order.
  */
-static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
+static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
+                         int forwrite, int previous)
 {
        struct bio **bip;
        struct r5conf *conf = sh->raid_conf;
@@ -2643,6 +2932,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
         * protect it.
         */
        spin_lock_irq(&sh->stripe_lock);
+       /* Don't allow new IO added to stripes in batch list */
+       if (sh->batch_head)
+               goto overlap;
        if (forwrite) {
                bip = &sh->dev[dd_idx].towrite;
                if (*bip == NULL)
@@ -2657,6 +2949,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
        if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
                goto overlap;
 
+       if (!forwrite || previous)
+               clear_bit(STRIPE_BATCH_READY, &sh->state);
+
        BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
        if (*bip)
                bi->bi_next = *bip;
@@ -2674,7 +2969,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                                sector = bio_end_sector(bi);
                }
                if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
-                       set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
+                       if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags))
+                               sh->overwrite_disks++;
        }
 
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
@@ -2688,6 +2984,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                sh->bm_seq = conf->seq_flush+1;
                set_bit(STRIPE_BIT_DELAY, &sh->state);
        }
+
+       if (stripe_can_batch(sh))
+               stripe_add_to_batch_list(conf, sh);
        return 1;
 
  overlap:
@@ -2720,6 +3019,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                struct bio **return_bi)
 {
        int i;
+       BUG_ON(sh->batch_head);
        for (i = disks; i--; ) {
                struct bio *bi;
                int bitmap_end = 0;
@@ -2746,6 +3046,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                /* fail all writes first */
                bi = sh->dev[i].towrite;
                sh->dev[i].towrite = NULL;
+               sh->overwrite_disks = 0;
                spin_unlock_irq(&sh->stripe_lock);
                if (bi)
                        bitmap_end = 1;
@@ -2834,6 +3135,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
        int abort = 0;
        int i;
 
+       BUG_ON(sh->batch_head);
        clear_bit(STRIPE_SYNCING, &sh->state);
        if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
                wake_up(&conf->wait_for_overlap);
@@ -3064,6 +3366,7 @@ static void handle_stripe_fill(struct stripe_head *sh,
 {
        int i;
 
+       BUG_ON(sh->batch_head);
        /* look for blocks to read/compute, skip this if a compute
         * is already in flight, or if the stripe contents are in the
         * midst of changing due to a write
@@ -3087,6 +3390,9 @@ static void handle_stripe_clean_event(struct r5conf *conf,
        int i;
        struct r5dev *dev;
        int discard_pending = 0;
+       struct stripe_head *head_sh = sh;
+       bool do_endio = false;
+       int wakeup_nr = 0;
 
        for (i = disks; i--; )
                if (sh->dev[i].written) {
@@ -3102,8 +3408,11 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                        clear_bit(R5_UPTODATE, &dev->flags);
                                if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
                                        WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
-                                       dev->page = dev->orig_page;
                                }
+                               do_endio = true;
+
+returnbi:
+                               dev->page = dev->orig_page;
                                wbi = dev->written;
                                dev->written = NULL;
                                while (wbi && wbi->bi_iter.bi_sector <
@@ -3120,6 +3429,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                                STRIPE_SECTORS,
                                         !test_bit(STRIPE_DEGRADED, &sh->state),
                                                0);
+                               if (head_sh->batch_head) {
+                                       sh = list_first_entry(&sh->batch_list,
+                                                             struct stripe_head,
+                                                             batch_list);
+                                       if (sh != head_sh) {
+                                               dev = &sh->dev[i];
+                                               goto returnbi;
+                                       }
+                               }
+                               sh = head_sh;
+                               dev = &sh->dev[i];
                        } else if (test_bit(R5_Discard, &dev->flags))
                                discard_pending = 1;
                        WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
@@ -3141,8 +3461,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                 * will be reinitialized
                 */
                spin_lock_irq(&conf->device_lock);
+unhash:
                remove_hash(sh);
+               if (head_sh->batch_head) {
+                       sh = list_first_entry(&sh->batch_list,
+                                             struct stripe_head, batch_list);
+                       if (sh != head_sh)
+                                       goto unhash;
+               }
                spin_unlock_irq(&conf->device_lock);
+               sh = head_sh;
+
                if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
                        set_bit(STRIPE_HANDLE, &sh->state);
 
@@ -3151,6 +3480,45 @@ static void handle_stripe_clean_event(struct r5conf *conf,
        if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
                if (atomic_dec_and_test(&conf->pending_full_writes))
                        md_wakeup_thread(conf->mddev->thread);
+
+       if (!head_sh->batch_head || !do_endio)
+               return;
+       for (i = 0; i < head_sh->disks; i++) {
+               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+                       wakeup_nr++;
+       }
+       while (!list_empty(&head_sh->batch_list)) {
+               int i;
+               sh = list_first_entry(&head_sh->batch_list,
+                                     struct stripe_head, batch_list);
+               list_del_init(&sh->batch_list);
+
+               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
+                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
+                                                (1 << STRIPE_PREREAD_ACTIVE) |
+                                                STRIPE_EXPAND_SYNC_FLAG));
+               sh->check_state = head_sh->check_state;
+               sh->reconstruct_state = head_sh->reconstruct_state;
+               for (i = 0; i < sh->disks; i++) {
+                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+                               wakeup_nr++;
+                       sh->dev[i].flags = head_sh->dev[i].flags;
+               }
+
+               spin_lock_irq(&sh->stripe_lock);
+               sh->batch_head = NULL;
+               spin_unlock_irq(&sh->stripe_lock);
+               if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
+                       set_bit(STRIPE_HANDLE, &sh->state);
+               release_stripe(sh);
+       }
+
+       spin_lock_irq(&head_sh->stripe_lock);
+       head_sh->batch_head = NULL;
+       spin_unlock_irq(&head_sh->stripe_lock);
+       wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
+       if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
+               set_bit(STRIPE_HANDLE, &head_sh->state);
 }
 
 static void handle_stripe_dirtying(struct r5conf *conf,
@@ -3161,28 +3529,27 @@ static void handle_stripe_dirtying(struct r5conf *conf,
        int rmw = 0, rcw = 0, i;
        sector_t recovery_cp = conf->mddev->recovery_cp;
 
-       /* RAID6 requires 'rcw' in current implementation.
-        * Otherwise, check whether resync is now happening or should start.
+       /* Check whether resync is now happening or should start.
         * If yes, then the array is dirty (after unclean shutdown or
         * initial creation), so parity in some stripes might be inconsistent.
         * In this case, we need to always do reconstruct-write, to ensure
         * that in case of drive failure or read-error correction, we
         * generate correct data from the parity.
         */
-       if (conf->max_degraded == 2 ||
+       if (conf->rmw_level == PARITY_DISABLE_RMW ||
            (recovery_cp < MaxSector && sh->sector >= recovery_cp &&
             s->failed == 0)) {
                /* Calculate the real rcw later - for now make it
                 * look like rcw is cheaper
                 */
                rcw = 1; rmw = 2;
-               pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n",
-                        conf->max_degraded, (unsigned long long)recovery_cp,
+               pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
+                        conf->rmw_level, (unsigned long long)recovery_cp,
                         (unsigned long long)sh->sector);
        } else for (i = disks; i--; ) {
                /* would I have to read this buffer for read_modify_write */
                struct r5dev *dev = &sh->dev[i];
-               if ((dev->towrite || i == sh->pd_idx) &&
+               if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
                    !test_bit(R5_LOCKED, &dev->flags) &&
                    !(test_bit(R5_UPTODATE, &dev->flags) ||
                      test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3192,7 +3559,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                                rmw += 2*disks;  /* cannot read it */
                }
                /* Would I have to read this buffer for reconstruct_write */
-               if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+               if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+                   i != sh->pd_idx && i != sh->qd_idx &&
                    !test_bit(R5_LOCKED, &dev->flags) &&
                    !(test_bit(R5_UPTODATE, &dev->flags) ||
                    test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3205,7 +3573,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
        pr_debug("for sector %llu, rmw=%d rcw=%d\n",
                (unsigned long long)sh->sector, rmw, rcw);
        set_bit(STRIPE_HANDLE, &sh->state);
-       if (rmw < rcw && rmw > 0) {
+       if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
                /* prefer read-modify-write, but need to get some data */
                if (conf->mddev->queue)
                        blk_add_trace_msg(conf->mddev->queue,
@@ -3213,7 +3581,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                                          (unsigned long long)sh->sector, rmw);
                for (i = disks; i--; ) {
                        struct r5dev *dev = &sh->dev[i];
-                       if ((dev->towrite || i == sh->pd_idx) &&
+                       if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
                            !test_bit(R5_LOCKED, &dev->flags) &&
                            !(test_bit(R5_UPTODATE, &dev->flags) ||
                            test_bit(R5_Wantcompute, &dev->flags)) &&
@@ -3232,7 +3600,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                        }
                }
        }
-       if (rcw <= rmw && rcw > 0) {
+       if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
                /* want reconstruct write, but need to get some data */
                int qread =0;
                rcw = 0;
@@ -3290,6 +3658,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
 {
        struct r5dev *dev = NULL;
 
+       BUG_ON(sh->batch_head);
        set_bit(STRIPE_HANDLE, &sh->state);
 
        switch (sh->check_state) {
@@ -3380,6 +3749,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
        int qd_idx = sh->qd_idx;
        struct r5dev *dev;
 
+       BUG_ON(sh->batch_head);
        set_bit(STRIPE_HANDLE, &sh->state);
 
        BUG_ON(s->failed > 2);
@@ -3543,6 +3913,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
         * copy some of them into a target stripe for expand.
         */
        struct dma_async_tx_descriptor *tx = NULL;
+       BUG_ON(sh->batch_head);
        clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
        for (i = 0; i < sh->disks; i++)
                if (i != sh->pd_idx && i != sh->qd_idx) {
@@ -3615,8 +3986,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 
        memset(s, 0, sizeof(*s));
 
-       s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-       s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+       s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head;
+       s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
        s->failed_num[0] = -1;
        s->failed_num[1] = -1;
 
@@ -3786,6 +4157,80 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
        rcu_read_unlock();
 }
 
+static int clear_batch_ready(struct stripe_head *sh)
+{
+       struct stripe_head *tmp;
+       if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
+               return 0;
+       spin_lock(&sh->stripe_lock);
+       if (!sh->batch_head) {
+               spin_unlock(&sh->stripe_lock);
+               return 0;
+       }
+
+       /*
+        * this stripe could be added to a batch list before we check
+        * BATCH_READY, skips it
+        */
+       if (sh->batch_head != sh) {
+               spin_unlock(&sh->stripe_lock);
+               return 1;
+       }
+       spin_lock(&sh->batch_lock);
+       list_for_each_entry(tmp, &sh->batch_list, batch_list)
+               clear_bit(STRIPE_BATCH_READY, &tmp->state);
+       spin_unlock(&sh->batch_lock);
+       spin_unlock(&sh->stripe_lock);
+
+       /*
+        * BATCH_READY is cleared, no new stripes can be added.
+        * batch_list can be accessed without lock
+        */
+       return 0;
+}
+
+static void check_break_stripe_batch_list(struct stripe_head *sh)
+{
+       struct stripe_head *head_sh, *next;
+       int i;
+
+       if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+               return;
+
+       head_sh = sh;
+       do {
+               sh = list_first_entry(&sh->batch_list,
+                                     struct stripe_head, batch_list);
+               BUG_ON(sh == head_sh);
+       } while (!test_bit(STRIPE_DEGRADED, &sh->state));
+
+       while (sh != head_sh) {
+               next = list_first_entry(&sh->batch_list,
+                                       struct stripe_head, batch_list);
+               list_del_init(&sh->batch_list);
+
+               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
+                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
+                                                (1 << STRIPE_PREREAD_ACTIVE) |
+                                                (1 << STRIPE_DEGRADED) |
+                                                STRIPE_EXPAND_SYNC_FLAG));
+               sh->check_state = head_sh->check_state;
+               sh->reconstruct_state = head_sh->reconstruct_state;
+               for (i = 0; i < sh->disks; i++)
+                       sh->dev[i].flags = head_sh->dev[i].flags &
+                               (~((1 << R5_WriteError) | (1 << R5_Overlap)));
+
+               spin_lock_irq(&sh->stripe_lock);
+               sh->batch_head = NULL;
+               spin_unlock_irq(&sh->stripe_lock);
+
+               set_bit(STRIPE_HANDLE, &sh->state);
+               release_stripe(sh);
+
+               sh = next;
+       }
+}
+
 static void handle_stripe(struct stripe_head *sh)
 {
        struct stripe_head_state s;
@@ -3803,7 +4248,14 @@ static void handle_stripe(struct stripe_head *sh)
                return;
        }
 
-       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+       if (clear_batch_ready(sh) ) {
+               clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
+               return;
+       }
+
+       check_break_stripe_batch_list(sh);
+
+       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
                spin_lock(&sh->stripe_lock);
                /* Cannot process 'sync' concurrently with 'discard' */
                if (!test_bit(STRIPE_DISCARD, &sh->state) &&
@@ -4158,7 +4610,7 @@ static int raid5_congested(struct mddev *mddev, int bits)
         * how busy the stripe_cache is
         */
 
-       if (conf->inactive_blocked)
+       if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
                return 1;
        if (conf->quiesce)
                return 1;
@@ -4180,8 +4632,12 @@ static int raid5_mergeable_bvec(struct mddev *mddev,
        unsigned int chunk_sectors = mddev->chunk_sectors;
        unsigned int bio_sectors = bvm->bi_size >> 9;
 
-       if ((bvm->bi_rw & 1) == WRITE)
-               return biovec->bv_len; /* always allow writes to be mergeable */
+       /*
+        * always allow writes to be mergeable, read as well if array
+        * is degraded as we'll go through stripe cache anyway.
+        */
+       if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
+               return biovec->bv_len;
 
        if (mddev->new_chunk_sectors < mddev->chunk_sectors)
                chunk_sectors = mddev->new_chunk_sectors;
@@ -4603,12 +5059,14 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                }
                set_bit(STRIPE_DISCARD, &sh->state);
                finish_wait(&conf->wait_for_overlap, &w);
+               sh->overwrite_disks = 0;
                for (d = 0; d < conf->raid_disks; d++) {
                        if (d == sh->pd_idx || d == sh->qd_idx)
                                continue;
                        sh->dev[d].towrite = bi;
                        set_bit(R5_OVERWRITE, &sh->dev[d].flags);
                        raid5_inc_bi_active_stripes(bi);
+                       sh->overwrite_disks++;
                }
                spin_unlock_irq(&sh->stripe_lock);
                if (conf->mddev->bitmap) {
@@ -4656,7 +5114,12 @@ static void make_request(struct mddev *mddev, struct bio * bi)
 
        md_write_start(mddev, bi);
 
-       if (rw == READ &&
+       /*
+        * If array is degraded, better not do chunk aligned read because
+        * later we might have to read it again in order to reconstruct
+        * data on failed drives.
+        */
+       if (rw == READ && mddev->degraded == 0 &&
             mddev->reshape_position == MaxSector &&
             chunk_aligned_read(mddev,bi))
                return;
@@ -4772,7 +5235,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                        }
 
                        if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-                           !add_stripe_bio(sh, bi, dd_idx, rw)) {
+                           !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
                                /* Stripe is busy expanding or
                                 * add failed due to overlap.  Flush everything
                                 * and wait a while
@@ -4785,7 +5248,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                        }
                        set_bit(STRIPE_HANDLE, &sh->state);
                        clear_bit(STRIPE_DELAYED, &sh->state);
-                       if ((bi->bi_rw & REQ_SYNC) &&
+                       if ((!sh->batch_head || sh == sh->batch_head) &&
+                           (bi->bi_rw & REQ_SYNC) &&
                            !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                atomic_inc(&conf->preread_active_stripes);
                        release_stripe_plug(mddev, sh);
@@ -5050,8 +5514,7 @@ ret:
        return reshape_sectors;
 }
 
-/* FIXME go_faster isn't used */
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
+static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
 {
        struct r5conf *conf = mddev->private;
        struct stripe_head *sh;
@@ -5186,7 +5649,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                        return handled;
                }
 
-               if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
+               if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
                        release_stripe(sh);
                        raid5_set_bi_processed_stripes(raid_bio, scnt);
                        conf->retry_read_aligned = raid_bio;
@@ -5312,6 +5775,8 @@ static void raid5d(struct md_thread *thread)
                int batch_size, released;
 
                released = release_stripe_list(conf, conf->temp_inactive_list);
+               if (released)
+                       clear_bit(R5_DID_ALLOC, &conf->cache_state);
 
                if (
                    !list_empty(&conf->bitmap_list)) {
@@ -5350,6 +5815,13 @@ static void raid5d(struct md_thread *thread)
        pr_debug("%d stripes handled\n", handled);
 
        spin_unlock_irq(&conf->device_lock);
+       if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
+               grow_one_stripe(conf, __GFP_NOWARN);
+               /* Set flag even if allocation failed.  This helps
+                * slow down allocation requests when mem is short
+                */
+               set_bit(R5_DID_ALLOC, &conf->cache_state);
+       }
 
        async_tx_issue_pending_all();
        blk_finish_plug(&plug);
@@ -5365,7 +5837,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
        spin_lock(&mddev->lock);
        conf = mddev->private;
        if (conf)
-               ret = sprintf(page, "%d\n", conf->max_nr_stripes);
+               ret = sprintf(page, "%d\n", conf->min_nr_stripes);
        spin_unlock(&mddev->lock);
        return ret;
 }
@@ -5375,30 +5847,24 @@ raid5_set_cache_size(struct mddev *mddev, int size)
 {
        struct r5conf *conf = mddev->private;
        int err;
-       int hash;
 
        if (size <= 16 || size > 32768)
                return -EINVAL;
-       hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
-       while (size < conf->max_nr_stripes) {
-               if (drop_one_stripe(conf, hash))
-                       conf->max_nr_stripes--;
-               else
-                       break;
-               hash--;
-               if (hash < 0)
-                       hash = NR_STRIPE_HASH_LOCKS - 1;
-       }
+
+       conf->min_nr_stripes = size;
+       while (size < conf->max_nr_stripes &&
+              drop_one_stripe(conf))
+               ;
+
+
        err = md_allow_write(mddev);
        if (err)
                return err;
-       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
-       while (size > conf->max_nr_stripes) {
-               if (grow_one_stripe(conf, hash))
-                       conf->max_nr_stripes++;
-               else break;
-               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
-       }
+
+       while (size > conf->max_nr_stripes)
+               if (!grow_one_stripe(conf, GFP_KERNEL))
+                       break;
+
        return 0;
 }
 EXPORT_SYMBOL(raid5_set_cache_size);
@@ -5432,6 +5898,49 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
                                raid5_show_stripe_cache_size,
                                raid5_store_stripe_cache_size);
 
+static ssize_t
+raid5_show_rmw_level(struct mddev  *mddev, char *page)
+{
+       struct r5conf *conf = mddev->private;
+       if (conf)
+               return sprintf(page, "%d\n", conf->rmw_level);
+       else
+               return 0;
+}
+
+static ssize_t
+raid5_store_rmw_level(struct mddev  *mddev, const char *page, size_t len)
+{
+       struct r5conf *conf = mddev->private;
+       unsigned long new;
+
+       if (!conf)
+               return -ENODEV;
+
+       if (len >= PAGE_SIZE)
+               return -EINVAL;
+
+       if (kstrtoul(page, 10, &new))
+               return -EINVAL;
+
+       if (new != PARITY_DISABLE_RMW && !raid6_call.xor_syndrome)
+               return -EINVAL;
+
+       if (new != PARITY_DISABLE_RMW &&
+           new != PARITY_ENABLE_RMW &&
+           new != PARITY_PREFER_RMW)
+               return -EINVAL;
+
+       conf->rmw_level = new;
+       return len;
+}
+
+static struct md_sysfs_entry
+raid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR,
+                        raid5_show_rmw_level,
+                        raid5_store_rmw_level);
+
+
 static ssize_t
 raid5_show_preread_threshold(struct mddev *mddev, char *page)
 {
@@ -5463,7 +5972,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
        conf = mddev->private;
        if (!conf)
                err = -ENODEV;
-       else if (new > conf->max_nr_stripes)
+       else if (new > conf->min_nr_stripes)
                err = -EINVAL;
        else
                conf->bypass_threshold = new;
@@ -5618,6 +6127,7 @@ static struct attribute *raid5_attrs[] =  {
        &raid5_preread_bypass_threshold.attr,
        &raid5_group_thread_cnt.attr,
        &raid5_skip_copy.attr,
+       &raid5_rmw_level.attr,
        NULL,
 };
 static struct attribute_group raid5_attrs_group = {
@@ -5699,7 +6209,8 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
 static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
 {
        safe_put_page(percpu->spare_page);
-       kfree(percpu->scribble);
+       if (percpu->scribble)
+               flex_array_free(percpu->scribble);
        percpu->spare_page = NULL;
        percpu->scribble = NULL;
 }
@@ -5709,7 +6220,9 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
        if (conf->level == 6 && !percpu->spare_page)
                percpu->spare_page = alloc_page(GFP_KERNEL);
        if (!percpu->scribble)
-               percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+               percpu->scribble = scribble_alloc(max(conf->raid_disks,
+                       conf->previous_raid_disks), conf->chunk_sectors /
+                       STRIPE_SECTORS, GFP_KERNEL);
 
        if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
                free_scratch_buffer(conf, percpu);
@@ -5740,6 +6253,8 @@ static void raid5_free_percpu(struct r5conf *conf)
 
 static void free_conf(struct r5conf *conf)
 {
+       if (conf->shrinker.seeks)
+               unregister_shrinker(&conf->shrinker);
        free_thread_groups(conf);
        shrink_stripes(conf);
        raid5_free_percpu(conf);
@@ -5807,6 +6322,30 @@ static int raid5_alloc_percpu(struct r5conf *conf)
        return err;
 }
 
+static unsigned long raid5_cache_scan(struct shrinker *shrink,
+                                     struct shrink_control *sc)
+{
+       struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+       int ret = 0;
+       while (ret < sc->nr_to_scan) {
+               if (drop_one_stripe(conf) == 0)
+                       return SHRINK_STOP;
+               ret++;
+       }
+       return ret;
+}
+
+static unsigned long raid5_cache_count(struct shrinker *shrink,
+                                      struct shrink_control *sc)
+{
+       struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+
+       if (conf->max_nr_stripes < conf->min_nr_stripes)
+               /* unlikely, but not impossible */
+               return 0;
+       return conf->max_nr_stripes - conf->min_nr_stripes;
+}
+
 static struct r5conf *setup_conf(struct mddev *mddev)
 {
        struct r5conf *conf;
@@ -5879,7 +6418,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        else
                conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
        max_disks = max(conf->raid_disks, conf->previous_raid_disks);
-       conf->scribble_len = scribble_len(max_disks);
 
        conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
                              GFP_KERNEL);
@@ -5907,6 +6445,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                INIT_LIST_HEAD(conf->temp_inactive_list + i);
 
        conf->level = mddev->new_level;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
        if (raid5_alloc_percpu(conf) != 0)
                goto abort;
 
@@ -5939,12 +6478,17 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                        conf->fullsync = 1;
        }
 
-       conf->chunk_sectors = mddev->new_chunk_sectors;
        conf->level = mddev->new_level;
-       if (conf->level == 6)
+       if (conf->level == 6) {
                conf->max_degraded = 2;
-       else
+               if (raid6_call.xor_syndrome)
+                       conf->rmw_level = PARITY_ENABLE_RMW;
+               else
+                       conf->rmw_level = PARITY_DISABLE_RMW;
+       } else {
                conf->max_degraded = 1;
+               conf->rmw_level = PARITY_ENABLE_RMW;
+       }
        conf->algorithm = mddev->new_layout;
        conf->reshape_progress = mddev->reshape_position;
        if (conf->reshape_progress != MaxSector) {
@@ -5952,10 +6496,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                conf->prev_algo = mddev->layout;
        }
 
-       memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+       conf->min_nr_stripes = NR_STRIPES;
+       memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
                 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
        atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
-       if (grow_stripes(conf, NR_STRIPES)) {
+       if (grow_stripes(conf, conf->min_nr_stripes)) {
                printk(KERN_ERR
                       "md/raid:%s: couldn't allocate %dkB for buffers\n",
                       mdname(mddev), memory);
@@ -5963,6 +6508,17 @@ static struct r5conf *setup_conf(struct mddev *mddev)
        } else
                printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
                       mdname(mddev), memory);
+       /*
+        * Losing a stripe head costs more than the time to refill it,
+        * it reduces the queue depth and so can hurt throughput.
+        * So set it rather large, scaled by number of devices.
+        */
+       conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
+       conf->shrinker.scan_objects = raid5_cache_scan;
+       conf->shrinker.count_objects = raid5_cache_count;
+       conf->shrinker.batch = 128;
+       conf->shrinker.flags = 0;
+       register_shrinker(&conf->shrinker);
 
        sprintf(pers_name, "raid%d", mddev->new_level);
        conf->thread = md_register_thread(raid5d, mddev, pers_name);
@@ -6604,9 +7160,9 @@ static int check_stripe_cache(struct mddev *mddev)
         */
        struct r5conf *conf = mddev->private;
        if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
-           > conf->max_nr_stripes ||
+           > conf->min_nr_stripes ||
            ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
-           > conf->max_nr_stripes) {
+           > conf->min_nr_stripes) {
                printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
                       mdname(mddev),
                       ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
index 983e18a..7dc0dd8 100644 (file)
@@ -210,11 +210,19 @@ struct stripe_head {
        atomic_t                count;        /* nr of active thread/requests */
        int                     bm_seq; /* sequence number for bitmap flushes */
        int                     disks;          /* disks in stripe */
+       int                     overwrite_disks; /* total overwrite disks in stripe,
+                                                 * this is only checked when stripe
+                                                 * has STRIPE_BATCH_READY
+                                                 */
        enum check_states       check_state;
        enum reconstruct_states reconstruct_state;
        spinlock_t              stripe_lock;
        int                     cpu;
        struct r5worker_group   *group;
+
+       struct stripe_head      *batch_head; /* protected by stripe lock */
+       spinlock_t              batch_lock; /* only header's lock is useful */
+       struct list_head        batch_list; /* protected by head's batch lock*/
        /**
         * struct stripe_operations
         * @target - STRIPE_OP_COMPUTE_BLK target
@@ -327,8 +335,15 @@ enum {
        STRIPE_ON_UNPLUG_LIST,
        STRIPE_DISCARD,
        STRIPE_ON_RELEASE_LIST,
+       STRIPE_BATCH_READY,
+       STRIPE_BATCH_ERR,
 };
 
+#define STRIPE_EXPAND_SYNC_FLAG \
+       ((1 << STRIPE_EXPAND_SOURCE) |\
+       (1 << STRIPE_EXPAND_READY) |\
+       (1 << STRIPE_EXPANDING) |\
+       (1 << STRIPE_SYNC_REQUESTED))
 /*
  * Operation request flags
  */
@@ -340,6 +355,24 @@ enum {
        STRIPE_OP_RECONSTRUCT,
        STRIPE_OP_CHECK,
 };
+
+/*
+ * RAID parity calculation preferences
+ */
+enum {
+       PARITY_DISABLE_RMW = 0,
+       PARITY_ENABLE_RMW,
+       PARITY_PREFER_RMW,
+};
+
+/*
+ * Pages requested from set_syndrome_sources()
+ */
+enum {
+       SYNDROME_SRC_ALL,
+       SYNDROME_SRC_WANT_DRAIN,
+       SYNDROME_SRC_WRITTEN,
+};
 /*
  * Plugging:
  *
@@ -396,10 +429,11 @@ struct r5conf {
        spinlock_t              hash_locks[NR_STRIPE_HASH_LOCKS];
        struct mddev            *mddev;
        int                     chunk_sectors;
-       int                     level, algorithm;
+       int                     level, algorithm, rmw_level;
        int                     max_degraded;
        int                     raid_disks;
        int                     max_nr_stripes;
+       int                     min_nr_stripes;
 
        /* reshape_progress is the leading edge of a 'reshape'
         * It has value MaxSector when no reshape is happening
@@ -458,15 +492,11 @@ struct r5conf {
        /* per cpu variables */
        struct raid5_percpu {
                struct page     *spare_page; /* Used when checking P/Q in raid6 */
-               void            *scribble;   /* space for constructing buffer
+               struct flex_array *scribble;   /* space for constructing buffer
                                              * lists and performing address
                                              * conversions
                                              */
        } __percpu *percpu;
-       size_t                  scribble_len; /* size of scribble region must be
-                                              * associated with conf to handle
-                                              * cpu hotplug while reshaping
-                                              */
 #ifdef CONFIG_HOTPLUG_CPU
        struct notifier_block   cpu_notify;
 #endif
@@ -480,9 +510,19 @@ struct r5conf {
        struct llist_head       released_stripes;
        wait_queue_head_t       wait_for_stripe;
        wait_queue_head_t       wait_for_overlap;
-       int                     inactive_blocked;       /* release of inactive stripes blocked,
-                                                        * waiting for 25% to be free
-                                                        */
+       unsigned long           cache_state;
+#define R5_INACTIVE_BLOCKED    1       /* release of inactive stripes blocked,
+                                        * waiting for 25% to be free
+                                        */
+#define R5_ALLOC_MORE          2       /* It might help to allocate another
+                                        * stripe.
+                                        */
+#define R5_DID_ALLOC           4       /* A stripe was allocated, don't allocate
+                                        * more until at least one has been
+                                        * released.  This avoids flooding
+                                        * the cache.
+                                        */
+       struct shrinker         shrinker;
        int                     pool_size; /* number of disks in stripeheads in pool */
        spinlock_t              device_lock;
        struct disk_info        *disks;
@@ -497,6 +537,7 @@ struct r5conf {
        int                     worker_cnt_per_group;
 };
 
+
 /*
  * Our supported algorithms
  */
index 10209c2..efde88a 100644 (file)
@@ -12,7 +12,7 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/amba/xilinx_dma.h>
+#include <linux/dma/xilinx_dma.h>
 #include <linux/lcm.h>
 #include <linux/list.h>
 #include <linux/module.h>
index 69e0483..644dec7 100644 (file)
@@ -402,6 +402,12 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
 {
        struct vb2_dc_buf *buf = buf_priv;
        struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_dc_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
 
        if (!buf->sgt_base)
                buf->sgt_base = vb2_dc_get_base_sgt(buf);
@@ -409,7 +415,7 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
        if (WARN_ON(!buf->sgt_base))
                return NULL;
 
-       dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
        if (IS_ERR(dbuf))
                return NULL;
 
index b1838ab..45c708e 100644 (file)
@@ -583,11 +583,17 @@ static struct dma_buf *vb2_dma_sg_get_dmabuf(void *buf_priv, unsigned long flags
 {
        struct vb2_dma_sg_buf *buf = buf_priv;
        struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_dma_sg_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
 
        if (WARN_ON(!buf->dma_sgt))
                return NULL;
 
-       dbuf = dma_buf_export(buf, &vb2_dma_sg_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
        if (IS_ERR(dbuf))
                return NULL;
 
index bcde885..657ab30 100644 (file)
@@ -368,11 +368,17 @@ static struct dma_buf *vb2_vmalloc_get_dmabuf(void *buf_priv, unsigned long flag
 {
        struct vb2_vmalloc_buf *buf = buf_priv;
        struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_vmalloc_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
 
        if (WARN_ON(!buf->vaddr))
                return NULL;
 
-       dbuf = dma_buf_export(buf, &vb2_vmalloc_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
        if (IS_ERR(dbuf))
                return NULL;
 
index fc0c81e..c4aecc6 100644 (file)
@@ -74,15 +74,11 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
        ret = ec_dev->cmd_xfer(ec_dev, msg);
        if (msg->result == EC_RES_IN_PROGRESS) {
                int i;
-               struct cros_ec_command status_msg;
-               struct ec_response_get_comms_status status;
+               struct cros_ec_command status_msg = { };
+               struct ec_response_get_comms_status *status;
 
-               status_msg.version = 0;
                status_msg.command = EC_CMD_GET_COMMS_STATUS;
-               status_msg.outdata = NULL;
-               status_msg.outsize = 0;
-               status_msg.indata = (uint8_t *)&status;
-               status_msg.insize = sizeof(status);
+               status_msg.insize = sizeof(*status);
 
                /*
                 * Query the EC's status until it's no longer busy or
@@ -98,7 +94,10 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
                        msg->result = status_msg.result;
                        if (status_msg.result != EC_RES_SUCCESS)
                                break;
-                       if (!(status.flags & EC_COMMS_STATUS_PROCESSING))
+
+                       status = (struct ec_response_get_comms_status *)
+                                status_msg.indata;
+                       if (!(status->flags & EC_COMMS_STATUS_PROCESSING))
                                break;
                }
        }
@@ -119,6 +118,10 @@ static const struct mfd_cell cros_devs[] = {
                .id = 2,
                .of_compatible = "google,cros-ec-i2c-tunnel",
        },
+       {
+               .name = "cros-ec-ctl",
+               .id = 3,
+       },
 };
 
 int cros_ec_register(struct cros_ec_device *ec_dev)
index 072f670..2b6ef6b 100644 (file)
@@ -388,7 +388,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
 {
        struct dma_slave_config cfg = { 0, };
        struct dma_chan *chan;
-       unsigned int slave_id;
+       void *slave_data = NULL;
        struct resource *res;
        dma_cap_mask_t mask;
        int ret;
@@ -397,13 +397,12 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
        dma_cap_set(DMA_SLAVE, mask);
 
        if (pdata)
-               slave_id = direction == DMA_MEM_TO_DEV
-                        ? pdata->slave_id_tx : pdata->slave_id_rx;
-       else
-               slave_id = 0;
+               slave_data = direction == DMA_MEM_TO_DEV ?
+                       (void *)pdata->slave_id_tx :
+                       (void *)pdata->slave_id_rx;
 
        chan = dma_request_slave_channel_compat(mask, shdma_chan_filter,
-                               (void *)(unsigned long)slave_id, &host->pd->dev,
+                               slave_data, &host->pd->dev,
                                direction == DMA_MEM_TO_DEV ? "tx" : "rx");
 
        dev_dbg(&host->pd->dev, "%s: %s: got channel %p\n", __func__,
@@ -414,8 +413,6 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
 
        res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
 
-       /* In the OF case the driver will get the slave ID from the DT */
-       cfg.slave_id = slave_id;
        cfg.direction = direction;
 
        if (direction == DMA_DEV_TO_MEM) {
index 6906a90..354f4f3 100644 (file)
@@ -201,7 +201,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
                of_match_device(sh_mobile_sdhi_of_match, &pdev->dev);
        struct sh_mobile_sdhi *priv;
        struct tmio_mmc_data *mmc_data;
-       struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
+       struct tmio_mmc_data *mmd = pdev->dev.platform_data;
        struct tmio_mmc_host *host;
        struct resource *res;
        int irq, ret, i = 0;
@@ -245,30 +245,14 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
        else
                host->bus_shift = 0;
 
-       mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
-       if (p) {
-               mmc_data->flags = p->tmio_flags;
-               mmc_data->ocr_mask = p->tmio_ocr_mask;
-               mmc_data->capabilities |= p->tmio_caps;
-               mmc_data->capabilities2 |= p->tmio_caps2;
-               mmc_data->cd_gpio = p->cd_gpio;
-
-               if (p->dma_slave_tx > 0 && p->dma_slave_rx > 0) {
-                       /*
-                        * Yes, we have to provide slave IDs twice to TMIO:
-                        * once as a filter parameter and once for channel
-                        * configuration as an explicit slave ID
-                        */
-                       dma_priv->chan_priv_tx = (void *)p->dma_slave_tx;
-                       dma_priv->chan_priv_rx = (void *)p->dma_slave_rx;
-                       dma_priv->slave_id_tx = p->dma_slave_tx;
-                       dma_priv->slave_id_rx = p->dma_slave_rx;
-               }
-       }
+       if (mmd)
+               *mmc_data = *mmd;
+
        dma_priv->filter = shdma_chan_filter;
        dma_priv->enable = sh_mobile_sdhi_enable_dma;
 
        mmc_data->alignment_shift = 1; /* 2-byte alignment */
+       mmc_data->capabilities |= MMC_CAP_MMC_HIGHSPEED;
 
        /*
         * All SDHI blocks support 2-byte and larger block sizes in 4-bit
index fc3805e..4a597f5 100644 (file)
@@ -43,10 +43,6 @@ struct tmio_mmc_data;
 struct tmio_mmc_host;
 
 struct tmio_mmc_dma {
-       void *chan_priv_tx;
-       void *chan_priv_rx;
-       int slave_id_tx;
-       int slave_id_rx;
        enum dma_slave_buswidth dma_buswidth;
        bool (*filter)(struct dma_chan *chan, void *arg);
        void (*enable)(struct tmio_mmc_host *host, bool enable);
index 331bb61..e4b05db 100644 (file)
@@ -261,7 +261,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 {
        /* We can only either use DMA for both Tx and Rx or not use it at all */
        if (!host->dma || (!host->pdev->dev.of_node &&
-               (!host->dma->chan_priv_tx || !host->dma->chan_priv_rx)))
+               (!pdata->chan_priv_tx || !pdata->chan_priv_rx)))
                return;
 
        if (!host->chan_tx && !host->chan_rx) {
@@ -278,7 +278,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                dma_cap_set(DMA_SLAVE, mask);
 
                host->chan_tx = dma_request_slave_channel_compat(mask,
-                                       host->dma->filter, host->dma->chan_priv_tx,
+                                       host->dma->filter, pdata->chan_priv_tx,
                                        &host->pdev->dev, "tx");
                dev_dbg(&host->pdev->dev, "%s: TX: got channel %p\n", __func__,
                        host->chan_tx);
@@ -286,8 +286,6 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                if (!host->chan_tx)
                        return;
 
-               if (host->dma->chan_priv_tx)
-                       cfg.slave_id = host->dma->slave_id_tx;
                cfg.direction = DMA_MEM_TO_DEV;
                cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->bus_shift);
                cfg.dst_addr_width = host->dma->dma_buswidth;
@@ -299,7 +297,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                        goto ecfgtx;
 
                host->chan_rx = dma_request_slave_channel_compat(mask,
-                                       host->dma->filter, host->dma->chan_priv_rx,
+                                       host->dma->filter, pdata->chan_priv_rx,
                                        &host->pdev->dev, "rx");
                dev_dbg(&host->pdev->dev, "%s: RX: got channel %p\n", __func__,
                        host->chan_rx);
@@ -307,8 +305,6 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                if (!host->chan_rx)
                        goto ereqrx;
 
-               if (host->dma->chan_priv_rx)
-                       cfg.slave_id = host->dma->slave_id_rx;
                cfg.direction = DMA_DEV_TO_MEM;
                cfg.src_addr = cfg.dst_addr + host->pdata->dma_rx_offset;
                cfg.src_addr_width = host->dma->dma_buswidth;
index 71fea89..a03ad29 100644 (file)
@@ -309,6 +309,19 @@ config MTD_SWAP
          The driver provides wear leveling by storing erase counter into the
          OOB.
 
+config MTD_PARTITIONED_MASTER
+       bool "Retain master device when partitioned"
+       default n
+       depends on MTD
+       help
+         For historical reasons, by default, either a master is present or
+         several partitions are present, but not both. The concern was that
+         data listed in multiple partitions was dangerous; however, SCSI does
+         this and it is frequently useful for applications. This config option
+         leaves the master in even if the device is partitioned. It also makes
+         the parent of the partition device be the master device, rather than
+         what lies behind the master.
+
 source "drivers/mtd/chips/Kconfig"
 
 source "drivers/mtd/maps/Kconfig"
index 423666b..9a1a6ff 100644 (file)
@@ -206,23 +206,23 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
                        mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].numblocks = ernum;
                }
                offset += (ersize * ernum);
-               }
+       }
 
-               if (offset != devsize) {
-                       /* Argh */
-                       printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize);
-                       kfree(mtd->eraseregions);
-                       kfree(cfi->cmdset_priv);
-                       kfree(mtd);
-                       return NULL;
-               }
+       if (offset != devsize) {
+               /* Argh */
+               printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize);
+               kfree(mtd->eraseregions);
+               kfree(cfi->cmdset_priv);
+               kfree(mtd);
+               return NULL;
+       }
 
-               for (i=0; i<mtd->numeraseregions;i++){
-                       printk(KERN_DEBUG "%d: offset=0x%llx,size=0x%x,blocks=%d\n",
-                              i, (unsigned long long)mtd->eraseregions[i].offset,
-                              mtd->eraseregions[i].erasesize,
-                              mtd->eraseregions[i].numblocks);
-               }
+       for (i=0; i<mtd->numeraseregions;i++){
+               printk(KERN_DEBUG "%d: offset=0x%llx,size=0x%x,blocks=%d\n",
+                      i, (unsigned long long)mtd->eraseregions[i].offset,
+                      mtd->eraseregions[i].erasesize,
+                      mtd->eraseregions[i].numblocks);
+       }
 
        /* Also select the correct geometry setup too */
        mtd->_erase = cfi_staa_erase_varsize;
index 66f0405..b16f3cd 100644 (file)
@@ -9,7 +9,15 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+/*
+ * When the first attempt at device initialization fails, we may need to
+ * wait a little bit and retry. This timeout, by default 3 seconds, gives
+ * device time to start up. Required on BCM2708 and a few other chipsets.
+ */
+#define MTD_DEFAULT_TIMEOUT    3
+
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/bio.h>
@@ -209,10 +217,14 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
 }
 
 
-static struct block2mtd_dev *add_device(char *devname, int erase_size)
+static struct block2mtd_dev *add_device(char *devname, int erase_size,
+               int timeout)
 {
+#ifndef MODULE
+       int i;
+#endif
        const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
-       struct block_device *bdev;
+       struct block_device *bdev = ERR_PTR(-ENODEV);
        struct block2mtd_dev *dev;
        char *name;
 
@@ -225,15 +237,28 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
 
        /* Get a handle on the device */
        bdev = blkdev_get_by_path(devname, mode, dev);
-#ifndef MODULE
-       if (IS_ERR(bdev)) {
-
-               /* We might not have rootfs mounted at this point. Try
-                  to resolve the device name by other means. */
 
-               dev_t devt = name_to_dev_t(devname);
-               if (devt)
-                       bdev = blkdev_get_by_dev(devt, mode, dev);
+#ifndef MODULE
+       /*
+        * We might not have the root device mounted at this point.
+        * Try to resolve the device name by other means.
+        */
+       for (i = 0; IS_ERR(bdev) && i <= timeout; i++) {
+               dev_t devt;
+
+               if (i)
+                       /*
+                        * Calling wait_for_device_probe in the first loop
+                        * was not enough, sleep for a bit in subsequent
+                        * go-arounds.
+                        */
+                       msleep(1000);
+               wait_for_device_probe();
+
+               devt = name_to_dev_t(devname);
+               if (!devt)
+                       continue;
+               bdev = blkdev_get_by_dev(devt, mode, dev);
        }
 #endif
 
@@ -280,6 +305,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
                /* Device didn't get added, so free the entry */
                goto err_destroy_mutex;
        }
+
        list_add(&dev->list, &blkmtd_device_list);
        pr_info("mtd%d: [%s] erase_size = %dKiB [%d]\n",
                dev->mtd.index,
@@ -348,16 +374,19 @@ static inline void kill_final_newline(char *str)
 
 #ifndef MODULE
 static int block2mtd_init_called = 0;
-static char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */
+/* 80 for device, 12 for erase size */
+static char block2mtd_paramline[80 + 12];
 #endif
 
 static int block2mtd_setup2(const char *val)
 {
-       char buf[80 + 12]; /* 80 for device, 12 for erase size */
+       /* 80 for device, 12 for erase size, 80 for name, 8 for timeout */
+       char buf[80 + 12 + 80 + 8];
        char *str = buf;
        char *token[2];
        char *name;
        size_t erase_size = PAGE_SIZE;
+       unsigned long timeout = MTD_DEFAULT_TIMEOUT;
        int i, ret;
 
        if (strnlen(val, sizeof(buf)) >= sizeof(buf)) {
@@ -395,7 +424,7 @@ static int block2mtd_setup2(const char *val)
                }
        }
 
-       add_device(name, erase_size);
+       add_device(name, erase_size, timeout);
 
        return 0;
 }
@@ -463,8 +492,7 @@ static void block2mtd_exit(void)
        }
 }
 
-
-module_init(block2mtd_init);
+late_initcall(block2mtd_init);
 module_exit(block2mtd_exit);
 
 MODULE_LICENSE("GPL");
index 448ce42..866d319 100644 (file)
@@ -1805,7 +1805,7 @@ static int __init doc_dbg_register(struct docg3 *docg3)
        }
 }
 
-static void __exit doc_dbg_unregister(struct docg3 *docg3)
+static void doc_dbg_unregister(struct docg3 *docg3)
 {
        debugfs_remove_recursive(docg3->debugfs_root);
 }
@@ -2033,7 +2033,7 @@ static int __init docg3_probe(struct platform_device *pdev)
        struct mtd_info *mtd;
        struct resource *ress;
        void __iomem *base;
-       int ret, floor, found = 0;
+       int ret, floor;
        struct docg3_cascade *cascade;
 
        ret = -ENXIO;
@@ -2073,14 +2073,11 @@ static int __init docg3_probe(struct platform_device *pdev)
                                                0);
                if (ret)
                        goto err_probe;
-               found++;
        }
 
        ret = doc_register_sysfs(pdev, cascade);
        if (ret)
                goto err_probe;
-       if (!found)
-               goto notfound;
 
        platform_set_drvdata(pdev, cascade);
        doc_dbg_register(cascade->floors[0]->priv);
@@ -2103,7 +2100,7 @@ err_probe:
  *
  * Returns 0
  */
-static int __exit docg3_release(struct platform_device *pdev)
+static int docg3_release(struct platform_device *pdev)
 {
        struct docg3_cascade *cascade = platform_get_drvdata(pdev);
        struct docg3 *docg3 = cascade->floors[0]->priv;
@@ -2134,7 +2131,7 @@ static struct platform_driver g3_driver = {
        },
        .suspend        = docg3_suspend,
        .resume         = docg3_resume,
-       .remove         = __exit_p(docg3_release),
+       .remove         = docg3_release,
 };
 
 module_platform_driver_probe(g3_driver, docg3_probe);
index 85e3546..7c8b169 100644 (file)
@@ -223,6 +223,8 @@ static int m25p_probe(struct spi_device *spi)
         */
        if (data && data->type)
                flash_name = data->type;
+       else if (!strcmp(spi->modalias, "nor-jedec"))
+               flash_name = NULL; /* auto-detect */
        else
                flash_name = spi->modalias;
 
@@ -247,9 +249,16 @@ static int m25p_remove(struct spi_device *spi)
 }
 
 /*
- * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
- * it with spi-nor, because if this is built as a module then modpost
- * won't be able to read it and add appropriate aliases.
+ * Do NOT add to this array without reading the following:
+ *
+ * Historically, many flash devices are bound to this driver by their name. But
+ * since most of these flash are compatible to some extent, and their
+ * differences can often be differentiated by the JEDEC read-ID command, we
+ * encourage new users to add support to the spi-nor library, and simply bind
+ * against a generic string here (e.g., "nor-jedec").
+ *
+ * Many flash names are kept here in this list (as well as in spi-nor.c) to
+ * keep them available as module aliases for existing platforms.
  */
 static const struct spi_device_id m25p_ids[] = {
        {"at25fs010"},  {"at25fs040"},  {"at25df041a"}, {"at25df321a"},
@@ -291,6 +300,12 @@ static const struct spi_device_id m25p_ids[] = {
        {"w25x64"},     {"w25q64"},     {"w25q80"},     {"w25q80bl"},
        {"w25q128"},    {"w25q256"},    {"cat25c11"},
        {"cat25c03"},   {"cat25c09"},   {"cat25c17"},   {"cat25128"},
+
+       /*
+        * Generic support for SPI NOR that can be identified by the JEDEC READ
+        * ID opcode (0x9F). Use this, if possible.
+        */
+       {"nor-jedec"},
        { },
 };
 MODULE_DEVICE_TABLE(spi, m25p_ids);
index ba801d2..e715ae9 100644 (file)
@@ -242,7 +242,7 @@ config MTD_L440GX
 
 config MTD_CFI_FLAGADM
        tristate "CFI Flash device mapping on FlagaDM"
-       depends on 8xx && MTD_CFI
+       depends on PPC_8xx && MTD_CFI
        help
          Mapping for the Flaga digital module. If you don't have one, ignore
          this setting.
index ea69720..892ad6a 100644 (file)
@@ -274,7 +274,7 @@ static int sa1100_mtd_probe(struct platform_device *pdev)
        return err;
 }
 
-static int __exit sa1100_mtd_remove(struct platform_device *pdev)
+static int sa1100_mtd_remove(struct platform_device *pdev)
 {
        struct sa_info *info = platform_get_drvdata(pdev);
        struct flash_platform_data *plat = dev_get_platdata(&pdev->dev);
@@ -286,7 +286,7 @@ static int __exit sa1100_mtd_remove(struct platform_device *pdev)
 
 static struct platform_driver sa1100_mtd_driver = {
        .probe          = sa1100_mtd_probe,
-       .remove         = __exit_p(sa1100_mtd_remove),
+       .remove         = sa1100_mtd_remove,
        .driver         = {
                .name   = "sa1100-mtd",
        },
index d1d671d..9969fed 100644 (file)
@@ -117,5 +117,5 @@ module_exit(cleanup_ts5500_map);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sean Young <sean@mess.org>");
-MODULE_DESCRIPTION("MTD map driver for Techology Systems TS-5500 board");
+MODULE_DESCRIPTION("MTD map driver for Technology Systems TS-5500 board");
 
index d08229e..2b0c528 100644 (file)
@@ -171,9 +171,6 @@ static void mtd_blktrans_work(struct work_struct *work)
                background_done = 0;
        }
 
-       if (req)
-               __blk_end_request_all(req, -EIO);
-
        spin_unlock_irq(rq->queue_lock);
 }
 
index 11883bd..d172195 100644 (file)
@@ -38,6 +38,7 @@
 #include <linux/gfp.h>
 #include <linux/slab.h>
 #include <linux/reboot.h>
+#include <linux/kconfig.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
@@ -501,6 +502,29 @@ out_error:
        return ret;
 }
 
+static int mtd_add_device_partitions(struct mtd_info *mtd,
+                                    struct mtd_partition *real_parts,
+                                    int nbparts)
+{
+       int ret;
+
+       if (nbparts == 0 || IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+               ret = add_mtd_device(mtd);
+               if (ret == 1)
+                       return -ENODEV;
+       }
+
+       if (nbparts > 0) {
+               ret = add_mtd_partitions(mtd, real_parts, nbparts);
+               if (ret && IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+                       del_mtd_device(mtd);
+               return ret;
+       }
+
+       return 0;
+}
+
+
 /**
  * mtd_device_parse_register - parse partitions and register an MTD device.
  *
@@ -523,7 +547,8 @@ out_error:
  *   found this functions tries to fallback to information specified in
  *   @parts/@nr_parts.
  * * If any partitioning info was found, this function registers the found
- *   partitions.
+ *   partitions. If the MTD_PARTITIONED_MASTER option is set, then the device
+ *   as a whole is registered first.
  * * If no partitions were found this function just registers the MTD device
  *   @mtd and exits.
  *
@@ -534,27 +559,21 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
                              const struct mtd_partition *parts,
                              int nr_parts)
 {
-       int err;
-       struct mtd_partition *real_parts;
+       int ret;
+       struct mtd_partition *real_parts = NULL;
 
-       err = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
-       if (err <= 0 && nr_parts && parts) {
+       ret = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
+       if (ret <= 0 && nr_parts && parts) {
                real_parts = kmemdup(parts, sizeof(*parts) * nr_parts,
                                     GFP_KERNEL);
                if (!real_parts)
-                       err = -ENOMEM;
+                       ret = -ENOMEM;
                else
-                       err = nr_parts;
+                       ret = nr_parts;
        }
 
-       if (err > 0) {
-               err = add_mtd_partitions(mtd, real_parts, err);
-               kfree(real_parts);
-       } else if (err == 0) {
-               err = add_mtd_device(mtd);
-               if (err == 1)
-                       err = -ENODEV;
-       }
+       if (ret >= 0)
+               ret = mtd_add_device_partitions(mtd, real_parts, ret);
 
        /*
         * FIXME: some drivers unfortunately call this function more than once.
@@ -569,7 +588,8 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
                register_reboot_notifier(&mtd->reboot_notifier);
        }
 
-       return err;
+       kfree(real_parts);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(mtd_device_parse_register);
 
index e779de3..cafdb88 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
 #include <linux/err.h>
+#include <linux/kconfig.h>
 
 #include "mtdcore.h"
 
@@ -379,10 +380,17 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
        slave->mtd.name = name;
        slave->mtd.owner = master->owner;
 
-       /* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
-        * to have the same data be in two different partitions.
+       /* NOTE: Historically, we didn't arrange MTDs as a tree out of
+        * concern for showing the same data in multiple partitions.
+        * However, it is very useful to have the master node present,
+        * so the MTD_PARTITIONED_MASTER option allows that. The master
+        * will have device nodes etc only if this is set, so make the
+        * parent conditional on that option. Note, this is a way to
+        * distinguish between the master and the partition in sysfs.
         */
-       slave->mtd.dev.parent = master->dev.parent;
+       slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ?
+                               &master->dev :
+                               master->dev.parent;
 
        slave->mtd._read = part_read;
        slave->mtd._write = part_write;
@@ -546,12 +554,35 @@ out_register:
        return slave;
 }
 
+static ssize_t mtd_partition_offset_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct mtd_info *mtd = dev_get_drvdata(dev);
+       struct mtd_part *part = PART(mtd);
+       return snprintf(buf, PAGE_SIZE, "%lld\n", part->offset);
+}
+
+static DEVICE_ATTR(offset, S_IRUGO, mtd_partition_offset_show, NULL);
+
+static const struct attribute *mtd_partition_attrs[] = {
+       &dev_attr_offset.attr,
+       NULL
+};
+
+static int mtd_add_partition_attrs(struct mtd_part *new)
+{
+       int ret = sysfs_create_files(&new->mtd.dev.kobj, mtd_partition_attrs);
+       if (ret)
+               printk(KERN_WARNING
+                      "mtd: failed to create partition attrs, err=%d\n", ret);
+       return ret;
+}
+
 int mtd_add_partition(struct mtd_info *master, const char *name,
                      long long offset, long long length)
 {
        struct mtd_partition part;
-       struct mtd_part *p, *new;
-       uint64_t start, end;
+       struct mtd_part *new;
        int ret = 0;
 
        /* the direct offset is expected */
@@ -575,31 +606,15 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
        if (IS_ERR(new))
                return PTR_ERR(new);
 
-       start = offset;
-       end = offset + length;
-
        mutex_lock(&mtd_partitions_mutex);
-       list_for_each_entry(p, &mtd_partitions, list)
-               if (p->master == master) {
-                       if ((start >= p->offset) &&
-                           (start < (p->offset + p->mtd.size)))
-                               goto err_inv;
-
-                       if ((end >= p->offset) &&
-                           (end < (p->offset + p->mtd.size)))
-                               goto err_inv;
-               }
-
        list_add(&new->list, &mtd_partitions);
        mutex_unlock(&mtd_partitions_mutex);
 
        add_mtd_device(&new->mtd);
 
+       mtd_add_partition_attrs(new);
+
        return ret;
-err_inv:
-       mutex_unlock(&mtd_partitions_mutex);
-       free_partition(new);
-       return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(mtd_add_partition);
 
@@ -612,6 +627,8 @@ int mtd_del_partition(struct mtd_info *master, int partno)
        list_for_each_entry_safe(slave, next, &mtd_partitions, list)
                if ((slave->master == master) &&
                    (slave->mtd.index == partno)) {
+                       sysfs_remove_files(&slave->mtd.dev.kobj,
+                                          mtd_partition_attrs);
                        ret = del_mtd_device(&slave->mtd);
                        if (ret < 0)
                                break;
@@ -631,8 +648,8 @@ EXPORT_SYMBOL_GPL(mtd_del_partition);
  * and registers slave MTD objects which are bound to the master according to
  * the partition definitions.
  *
- * We don't register the master, or expect the caller to have done so,
- * for reasons of data integrity.
+ * For historical reasons, this function's caller only registers the master
+ * if the MTD_PARTITIONED_MASTER config option is set.
  */
 
 int add_mtd_partitions(struct mtd_info *master,
@@ -655,6 +672,7 @@ int add_mtd_partitions(struct mtd_info *master,
                mutex_unlock(&mtd_partitions_mutex);
 
                add_mtd_device(&slave->mtd);
+               mtd_add_partition_attrs(slave);
 
                cur_offset = slave->offset + slave->mtd.size;
        }
index d93c849..46010bd 100644 (file)
@@ -485,7 +485,7 @@ static void pmecc_config_ecc_layout(struct nand_ecclayout *layout,
        for (i = 0; i < ecc_len; i++)
                layout->eccpos[i] = oobsize - ecc_len + i;
 
-       layout->oobfree[0].offset = 2;
+       layout->oobfree[0].offset = PMECC_OOB_RESERVED_BYTES;
        layout->oobfree[0].length =
                oobsize - ecc_len - layout->oobfree[0].offset;
 }
@@ -1204,14 +1204,14 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
                goto err;
        }
 
-       regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-       host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom);
-       if (IS_ERR(host->pmecc_rom_base)) {
-               if (!host->has_no_lookup_table)
-                       /* Don't display the information again */
+       if (!host->has_no_lookup_table) {
+               regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+               host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev,
+                                                               regs_rom);
+               if (IS_ERR(host->pmecc_rom_base)) {
                        dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
-
-               host->has_no_lookup_table = true;
+                       host->has_no_lookup_table = true;
+               }
        }
 
        if (host->has_no_lookup_table) {
@@ -1254,7 +1254,8 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
                nand_chip->ecc.steps = mtd->writesize / sector_size;
                nand_chip->ecc.total = nand_chip->ecc.bytes *
                        nand_chip->ecc.steps;
-               if (nand_chip->ecc.total > mtd->oobsize - 2) {
+               if (nand_chip->ecc.total >
+                               mtd->oobsize - PMECC_OOB_RESERVED_BYTES) {
                        dev_err(host->dev, "No room for ECC bytes\n");
                        err_no = -EINVAL;
                        goto err;
@@ -1719,7 +1720,7 @@ static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
                comp[index++] = &host->nfc->comp_cmd_done;
 
        if (index == 0) {
-               dev_err(host->dev, "Unkown interrupt flag: 0x%08x\n", flag);
+               dev_err(host->dev, "Unknown interrupt flag: 0x%08x\n", flag);
                return -EINVAL;
        }
 
@@ -1752,11 +1753,10 @@ static int nfc_send_command(struct atmel_nand_host *host,
                cmd, addr, cycle0);
 
        timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-       while (nfc_cmd_readl(NFCADDR_CMD_NFCBUSY, host->nfc->base_cmd_regs)
-                       & NFCADDR_CMD_NFCBUSY) {
+       while (nfc_readl(host->nfc->hsmc_regs, SR) & NFC_SR_BUSY) {
                if (time_after(jiffies, timeout)) {
                        dev_err(host->dev,
-                               "Time out to wait CMD_NFCBUSY ready!\n");
+                               "Time out to wait for NFC ready!\n");
                        return -ETIMEDOUT;
                }
        }
index d4035e3..668e735 100644 (file)
 /* Time out value for reading PMECC status register */
 #define PMECC_MAX_TIMEOUT_MS                   100
 
+/* Reserved bytes in oob area */
+#define PMECC_OOB_RESERVED_BYTES               2
+
 #endif
index 85b8ca6..4d5d262 100644 (file)
@@ -35,6 +35,7 @@
 #define                NFC_CTRL_DISABLE        (1 << 1)
 
 #define ATMEL_HSMC_NFC_SR      0x08            /* NFC Status Register */
+#define                NFC_SR_BUSY             (1 << 8)
 #define                NFC_SR_XFR_DONE         (1 << 16)
 #define                NFC_SR_CMD_DONE         (1 << 17)
 #define                NFC_SR_DTOE             (1 << 20)
index f44c606..870c7fc 100644 (file)
@@ -225,7 +225,6 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali,
        uint16_t Twhr[6] = {120, 80, 80, 60, 60, 60};
        uint16_t Tcs[6] = {70, 35, 25, 25, 20, 15};
 
-       uint16_t TclsRising = 1;
        uint16_t data_invalid_rhoh, data_invalid_rloh, data_invalid;
        uint16_t dv_window = 0;
        uint16_t en_lo, en_hi;
@@ -276,8 +275,6 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali,
        re_2_re = CEIL_DIV(Trhz[mode], CLK_X);
        we_2_re = CEIL_DIV(Twhr[mode], CLK_X);
        cs_cnt = CEIL_DIV((Tcs[mode] - Trp[mode]), CLK_X);
-       if (!TclsRising)
-               cs_cnt = CEIL_DIV(Tcs[mode], CLK_X);
        if (cs_cnt == 0)
                cs_cnt = 1;
 
@@ -1536,6 +1533,9 @@ int denali_init(struct denali_nand_info *denali)
        denali->nand.options |= NAND_SKIP_BBTSCAN;
        denali->nand.ecc.mode = NAND_ECC_HW_SYNDROME;
 
+       /* no subpage writes on denali */
+       denali->nand.options |= NAND_NO_SUBPAGE_WRITE;
+
        /*
         * Denali Controller only support 15bit and 8bit ECC in MRST,
         * so just let controller do 15bit ECC for MLC and 8bit ECC for
index 4c05f4f..51394e5 100644 (file)
@@ -317,7 +317,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
 
        /* wait for command complete flag or timeout */
        wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
-                          IFC_TIMEOUT_MSECS * HZ/1000);
+                          msecs_to_jiffies(IFC_TIMEOUT_MSECS));
 
        /* ctrl->nand_stat will be updated from IRQ context */
        if (!ctrl->nand_stat)
@@ -860,7 +860,7 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv)
 
        /* wait for command complete flag or timeout */
        wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
-                          IFC_TIMEOUT_MSECS * HZ/1000);
+                          msecs_to_jiffies(IFC_TIMEOUT_MSECS));
 
        if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC)
                printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n");
index edfaa21..e58af4b 100644 (file)
@@ -873,6 +873,7 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
 {
        struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
        u32 val;
+       int ret;
 
        /* Set default NAND width to 8 bits */
        pdata->width = 8;
@@ -891,8 +892,12 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
                                sizeof(*pdata->nand_timings), GFP_KERNEL);
        if (!pdata->nand_timings)
                return -ENOMEM;
-       of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
+       ret = of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
                                                sizeof(*pdata->nand_timings));
+       if (ret) {
+               dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
+               pdata->nand_timings = NULL;
+       }
 
        /* Set default NAND bank to 0 */
        pdata->bank = 0;
index 33f3c3c..1b8f350 100644 (file)
@@ -446,7 +446,7 @@ int start_dma_without_bch_irq(struct gpmi_nand_data *this,
                                struct dma_async_tx_descriptor *desc)
 {
        struct completion *dma_c = &this->dma_done;
-       int err;
+       unsigned long timeout;
 
        init_completion(dma_c);
 
@@ -456,8 +456,8 @@ int start_dma_without_bch_irq(struct gpmi_nand_data *this,
        dma_async_issue_pending(get_dma_chan(this));
 
        /* Wait for the interrupt from the DMA block. */
-       err = wait_for_completion_timeout(dma_c, msecs_to_jiffies(1000));
-       if (!err) {
+       timeout = wait_for_completion_timeout(dma_c, msecs_to_jiffies(1000));
+       if (!timeout) {
                dev_err(this->dev, "DMA timeout, last DMA :%d\n",
                        this->last_dma_type);
                gpmi_dump_info(this);
@@ -477,7 +477,7 @@ int start_dma_with_bch_irq(struct gpmi_nand_data *this,
                        struct dma_async_tx_descriptor *desc)
 {
        struct completion *bch_c = &this->bch_done;
-       int err;
+       unsigned long timeout;
 
        /* Prepare to receive an interrupt from the BCH block. */
        init_completion(bch_c);
@@ -486,8 +486,8 @@ int start_dma_with_bch_irq(struct gpmi_nand_data *this,
        start_dma_without_bch_irq(this, desc);
 
        /* Wait for the interrupt from the BCH block. */
-       err = wait_for_completion_timeout(bch_c, msecs_to_jiffies(1000));
-       if (!err) {
+       timeout = wait_for_completion_timeout(bch_c, msecs_to_jiffies(1000));
+       if (!timeout) {
                dev_err(this->dev, "BCH timeout, last DMA :%d\n",
                        this->last_dma_type);
                gpmi_dump_info(this);
@@ -1950,7 +1950,9 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
        ret = nand_boot_init(this);
        if (ret)
                goto err_out;
-       chip->scan_bbt(mtd);
+       ret = chip->scan_bbt(mtd);
+       if (ret)
+               goto err_out;
 
        ppdata.of_node = this->pdev->dev.of_node;
        ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
index a8f550f..372e0e3 100644 (file)
@@ -386,26 +386,51 @@ static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
 /* This function polls the NANDFC to wait for the basic operation to
  * complete by checking the INT bit of config2 register.
  */
-static void wait_op_done(struct mxc_nand_host *host, int useirq)
+static int wait_op_done(struct mxc_nand_host *host, int useirq)
 {
-       int max_retries = 8000;
+       int ret = 0;
+
+       /*
+        * If operation is already complete, don't bother to setup an irq or a
+        * loop.
+        */
+       if (host->devtype_data->check_int(host))
+               return 0;
 
        if (useirq) {
-               if (!host->devtype_data->check_int(host)) {
-                       reinit_completion(&host->op_completion);
-                       irq_control(host, 1);
-                       wait_for_completion(&host->op_completion);
+               unsigned long timeout;
+
+               reinit_completion(&host->op_completion);
+
+               irq_control(host, 1);
+
+               timeout = wait_for_completion_timeout(&host->op_completion, HZ);
+               if (!timeout && !host->devtype_data->check_int(host)) {
+                       dev_dbg(host->dev, "timeout waiting for irq\n");
+                       ret = -ETIMEDOUT;
                }
        } else {
-               while (max_retries-- > 0) {
-                       if (host->devtype_data->check_int(host))
-                               break;
+               int max_retries = 8000;
+               int done;
 
+               do {
                        udelay(1);
+
+                       done = host->devtype_data->check_int(host);
+                       if (done)
+                               break;
+
+               } while (--max_retries);
+
+               if (!done) {
+                       dev_dbg(host->dev, "timeout polling for completion\n");
+                       ret = -ETIMEDOUT;
                }
-               if (max_retries < 0)
-                       pr_debug("%s: INT not set\n", __func__);
        }
+
+       WARN_ONCE(ret < 0, "timeout! useirq=%d\n", useirq);
+
+       return ret;
 }
 
 static void send_cmd_v3(struct mxc_nand_host *host, uint16_t cmd, int useirq)
@@ -527,30 +552,17 @@ static void send_page_v1(struct mtd_info *mtd, unsigned int ops)
 
 static void send_read_id_v3(struct mxc_nand_host *host)
 {
-       struct nand_chip *this = &host->nand;
-
        /* Read ID into main buffer */
        writel(NFC_ID, NFC_V3_LAUNCH);
 
        wait_op_done(host, true);
 
        memcpy32_fromio(host->data_buf, host->main_area0, 16);
-
-       if (this->options & NAND_BUSWIDTH_16) {
-               /* compress the ID info */
-               host->data_buf[1] = host->data_buf[2];
-               host->data_buf[2] = host->data_buf[4];
-               host->data_buf[3] = host->data_buf[6];
-               host->data_buf[4] = host->data_buf[8];
-               host->data_buf[5] = host->data_buf[10];
-       }
 }
 
 /* Request the NANDFC to perform a read of the NAND device ID. */
 static void send_read_id_v1_v2(struct mxc_nand_host *host)
 {
-       struct nand_chip *this = &host->nand;
-
        /* NANDFC buffer 0 is used for device ID output */
        writew(host->active_cs << 4, NFC_V1_V2_BUF_ADDR);
 
@@ -560,15 +572,6 @@ static void send_read_id_v1_v2(struct mxc_nand_host *host)
        wait_op_done(host, true);
 
        memcpy32_fromio(host->data_buf, host->main_area0, 16);
-
-       if (this->options & NAND_BUSWIDTH_16) {
-               /* compress the ID info */
-               host->data_buf[1] = host->data_buf[2];
-               host->data_buf[2] = host->data_buf[4];
-               host->data_buf[3] = host->data_buf[6];
-               host->data_buf[4] = host->data_buf[8];
-               host->data_buf[5] = host->data_buf[10];
-       }
 }
 
 static uint16_t get_dev_status_v3(struct mxc_nand_host *host)
@@ -694,9 +697,17 @@ static u_char mxc_nand_read_byte(struct mtd_info *mtd)
        if (host->status_request)
                return host->devtype_data->get_dev_status(host) & 0xFF;
 
-       ret = *(uint8_t *)(host->data_buf + host->buf_start);
-       host->buf_start++;
+       if (nand_chip->options & NAND_BUSWIDTH_16) {
+               /* only take the lower byte of each word */
+               ret = *(uint16_t *)(host->data_buf + host->buf_start);
+
+               host->buf_start += 2;
+       } else {
+               ret = *(uint8_t *)(host->data_buf + host->buf_start);
+               host->buf_start++;
+       }
 
+       pr_debug("%s: ret=0x%hhx (start=%u)\n", __func__, ret, host->buf_start);
        return ret;
 }
 
@@ -825,6 +836,12 @@ static void copy_spare(struct mtd_info *mtd, bool bfrom)
        }
 }
 
+/*
+ * MXC NANDFC can only perform full page+spare or spare-only read/write.  When
+ * the upper layers perform a read/write buf operation, the saved column address
+ * is used to index into the full page. So usually this function is called with
+ * column == 0 (unless no column cycle is needed indicated by column == -1)
+ */
 static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
 {
        struct nand_chip *nand_chip = mtd->priv;
@@ -832,16 +849,13 @@ static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
 
        /* Write out column address, if necessary */
        if (column != -1) {
-               /*
-                * MXC NANDFC can only perform full page+spare or
-                * spare-only read/write.  When the upper layers
-                * perform a read/write buf operation, the saved column
-                 * address is used to index into the full page.
-                */
-               host->devtype_data->send_addr(host, 0, page_addr == -1);
+               host->devtype_data->send_addr(host, column & 0xff,
+                                             page_addr == -1);
                if (mtd->writesize > 512)
                        /* another col addr cycle for 2k page */
-                       host->devtype_data->send_addr(host, 0, false);
+                       host->devtype_data->send_addr(host,
+                                                     (column >> 8) & 0xff,
+                                                     false);
        }
 
        /* Write out page address, if necessary */
@@ -903,7 +917,7 @@ static void preset_v1(struct mtd_info *mtd)
        struct mxc_nand_host *host = nand_chip->priv;
        uint16_t config1 = 0;
 
-       if (nand_chip->ecc.mode == NAND_ECC_HW)
+       if (nand_chip->ecc.mode == NAND_ECC_HW && mtd->writesize)
                config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
 
        if (!host->devtype_data->irqpending_quirk)
@@ -931,9 +945,6 @@ static void preset_v2(struct mtd_info *mtd)
        struct mxc_nand_host *host = nand_chip->priv;
        uint16_t config1 = 0;
 
-       if (nand_chip->ecc.mode == NAND_ECC_HW)
-               config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
-
        config1 |= NFC_V2_CONFIG1_FP_INT;
 
        if (!host->devtype_data->irqpending_quirk)
@@ -942,6 +953,9 @@ static void preset_v2(struct mtd_info *mtd)
        if (mtd->writesize) {
                uint16_t pages_per_block = mtd->erasesize / mtd->writesize;
 
+               if (nand_chip->ecc.mode == NAND_ECC_HW)
+                       config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
+
                host->eccsize = get_eccsize(mtd);
                if (host->eccsize == 4)
                        config1 |= NFC_V2_CONFIG1_ECC_MODE_4;
@@ -999,9 +1013,6 @@ static void preset_v3(struct mtd_info *mtd)
                NFC_V3_CONFIG2_INT_MSK |
                NFC_V3_CONFIG2_NUM_ADDR_PHASE0;
 
-       if (chip->ecc.mode == NAND_ECC_HW)
-               config2 |= NFC_V3_CONFIG2_ECC_EN;
-
        addr_phases = fls(chip->pagemask) >> 3;
 
        if (mtd->writesize == 2048) {
@@ -1016,6 +1027,9 @@ static void preset_v3(struct mtd_info *mtd)
        }
 
        if (mtd->writesize) {
+               if (chip->ecc.mode == NAND_ECC_HW)
+                       config2 |= NFC_V3_CONFIG2_ECC_EN;
+
                config2 |= NFC_V3_CONFIG2_PPB(
                                ffs(mtd->erasesize / mtd->writesize) - 6,
                                host->devtype_data->ppb_shift);
@@ -1066,6 +1080,9 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                host->status_request = true;
 
                host->devtype_data->send_cmd(host, command, true);
+               WARN_ONCE(column != -1 || page_addr != -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
                mxc_do_addr_cycle(mtd, column, page_addr);
                break;
 
@@ -1079,7 +1096,10 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                command = NAND_CMD_READ0; /* only READ0 is valid */
 
                host->devtype_data->send_cmd(host, command, false);
-               mxc_do_addr_cycle(mtd, column, page_addr);
+               WARN_ONCE(column < 0,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
+               mxc_do_addr_cycle(mtd, 0, page_addr);
 
                if (mtd->writesize > 512)
                        host->devtype_data->send_cmd(host,
@@ -1100,7 +1120,10 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                host->buf_start = column;
 
                host->devtype_data->send_cmd(host, command, false);
-               mxc_do_addr_cycle(mtd, column, page_addr);
+               WARN_ONCE(column < -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
+               mxc_do_addr_cycle(mtd, 0, page_addr);
                break;
 
        case NAND_CMD_PAGEPROG:
@@ -1108,6 +1131,9 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                copy_spare(mtd, false);
                host->devtype_data->send_page(mtd, NFC_INPUT);
                host->devtype_data->send_cmd(host, command, true);
+               WARN_ONCE(column != -1 || page_addr != -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
                mxc_do_addr_cycle(mtd, column, page_addr);
                break;
 
@@ -1115,15 +1141,29 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                host->devtype_data->send_cmd(host, command, true);
                mxc_do_addr_cycle(mtd, column, page_addr);
                host->devtype_data->send_read_id(host);
-               host->buf_start = column;
+               host->buf_start = 0;
                break;
 
        case NAND_CMD_ERASE1:
        case NAND_CMD_ERASE2:
                host->devtype_data->send_cmd(host, command, false);
+               WARN_ONCE(column != -1,
+                         "Unexpected column value (cmd=%u, col=%d)\n",
+                         command, column);
                mxc_do_addr_cycle(mtd, column, page_addr);
 
                break;
+       case NAND_CMD_PARAM:
+               host->devtype_data->send_cmd(host, command, false);
+               mxc_do_addr_cycle(mtd, column, page_addr);
+               host->devtype_data->send_page(mtd, NFC_OUTPUT);
+               memcpy32_fromio(host->data_buf, host->main_area0, 512);
+               host->buf_start = 0;
+               break;
+       default:
+               WARN_ONCE(1, "Unimplemented command (cmd=%u)\n",
+                         command);
+               break;
        }
 }
 
index df7eb4f..c2e1232 100644 (file)
@@ -386,7 +386,7 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
        uint8_t buf[2] = { 0, 0 };
        int ret = 0, res, i = 0;
 
-       ops.datbuf = NULL;
+       memset(&ops, 0, sizeof(ops));
        ops.oobbuf = buf;
        ops.ooboffs = chip->badblockpos;
        if (chip->options & NAND_BUSWIDTH_16) {
@@ -565,6 +565,25 @@ void nand_wait_ready(struct mtd_info *mtd)
 }
 EXPORT_SYMBOL_GPL(nand_wait_ready);
 
+/**
+ * nand_wait_status_ready - [GENERIC] Wait for the ready status after commands.
+ * @mtd: MTD device structure
+ * @timeo: Timeout in ms
+ *
+ * Wait for status ready (i.e. command done) or timeout.
+ */
+static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo)
+{
+       register struct nand_chip *chip = mtd->priv;
+
+       timeo = jiffies + msecs_to_jiffies(timeo);
+       do {
+               if ((chip->read_byte(mtd) & NAND_STATUS_READY))
+                       break;
+               touch_softlockup_watchdog();
+       } while (time_before(jiffies, timeo));
+};
+
 /**
  * nand_command - [DEFAULT] Send command to NAND device
  * @mtd: MTD device structure
@@ -643,8 +662,8 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
                               NAND_CTRL_CLE | NAND_CTRL_CHANGE);
                chip->cmd_ctrl(mtd,
                               NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
-               while (!(chip->read_byte(mtd) & NAND_STATUS_READY))
-                               ;
+               /* EZ-NAND can take upto 250ms as per ONFi v4.0 */
+               nand_wait_status_ready(mtd, 250);
                return;
 
                /* This applies to read commands */
@@ -740,8 +759,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
                               NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
                chip->cmd_ctrl(mtd, NAND_CMD_NONE,
                               NAND_NCE | NAND_CTRL_CHANGE);
-               while (!(chip->read_byte(mtd) & NAND_STATUS_READY))
-                               ;
+               /* EZ-NAND can take upto 250ms as per ONFi v4.0 */
+               nand_wait_status_ready(mtd, 250);
                return;
 
        case NAND_CMD_RNDOUT:
@@ -968,7 +987,7 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                        __func__, (unsigned long long)ofs, len);
 
        if (check_offs_len(mtd, ofs, len))
-               ret = -EINVAL;
+               return -EINVAL;
 
        /* Align to last block address if size addresses end of the device */
        if (ofs + len == mtd->size)
@@ -1031,7 +1050,7 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                        __func__, (unsigned long long)ofs, len);
 
        if (check_offs_len(mtd, ofs, len))
-               ret = -EINVAL;
+               return -EINVAL;
 
        nand_get_device(mtd, FL_LOCKING);
 
@@ -1716,9 +1735,9 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
        int ret;
 
        nand_get_device(mtd, FL_READING);
+       memset(&ops, 0, sizeof(ops));
        ops.len = len;
        ops.datbuf = buf;
-       ops.oobbuf = NULL;
        ops.mode = MTD_OPS_PLACE_OOB;
        ret = nand_do_read_ops(mtd, from, &ops);
        *retlen = ops.retlen;
@@ -2124,7 +2143,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 
 /**
- * nand_write_subpage_hwecc - [REPLACABLE] hardware ECC based subpage write
+ * nand_write_subpage_hwecc - [REPLACEABLE] hardware ECC based subpage write
  * @mtd:       mtd info structure
  * @chip:      nand chip info structure
  * @offset:    column address of subpage within the page
@@ -2508,9 +2527,9 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
        /* Grab the device */
        panic_nand_get_device(chip, mtd, FL_WRITING);
 
+       memset(&ops, 0, sizeof(ops));
        ops.len = len;
        ops.datbuf = (uint8_t *)buf;
-       ops.oobbuf = NULL;
        ops.mode = MTD_OPS_PLACE_OOB;
 
        ret = nand_do_write_ops(mtd, to, &ops);
@@ -2536,9 +2555,9 @@ static int nand_write(struct mtd_info *mtd, loff_t to, size_t len,
        int ret;
 
        nand_get_device(mtd, FL_WRITING);
+       memset(&ops, 0, sizeof(ops));
        ops.len = len;
        ops.datbuf = (uint8_t *)buf;
-       ops.oobbuf = NULL;
        ops.mode = MTD_OPS_PLACE_OOB;
        ret = nand_do_write_ops(mtd, to, &ops);
        *retlen = ops.retlen;
index 10b1f7a..a4615fc 100644 (file)
@@ -38,8 +38,8 @@
 
 #include <linux/platform_data/mtd-nand-pxa3xx.h>
 
-#define        CHIP_DELAY_TIMEOUT      (2 * HZ/10)
-#define NAND_STOP_DELAY                (2 * HZ/50)
+#define        CHIP_DELAY_TIMEOUT      msecs_to_jiffies(200)
+#define NAND_STOP_DELAY                msecs_to_jiffies(40)
 #define PAGE_CHUNK_SIZE                (2048)
 
 /*
@@ -605,11 +605,24 @@ static void start_data_dma(struct pxa3xx_nand_info *info)
 {}
 #endif
 
+static irqreturn_t pxa3xx_nand_irq_thread(int irq, void *data)
+{
+       struct pxa3xx_nand_info *info = data;
+
+       handle_data_pio(info);
+
+       info->state = STATE_CMD_DONE;
+       nand_writel(info, NDSR, NDSR_WRDREQ | NDSR_RDDREQ);
+
+       return IRQ_HANDLED;
+}
+
 static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
 {
        struct pxa3xx_nand_info *info = devid;
        unsigned int status, is_completed = 0, is_ready = 0;
        unsigned int ready, cmd_done;
+       irqreturn_t ret = IRQ_HANDLED;
 
        if (info->cs == 0) {
                ready           = NDSR_FLASH_RDY;
@@ -651,7 +664,8 @@ static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
                } else {
                        info->state = (status & NDSR_RDDREQ) ?
                                      STATE_PIO_READING : STATE_PIO_WRITING;
-                       handle_data_pio(info);
+                       ret = IRQ_WAKE_THREAD;
+                       goto NORMAL_IRQ_EXIT;
                }
        }
        if (status & cmd_done) {
@@ -692,7 +706,7 @@ static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
        if (is_ready)
                complete(&info->dev_ready);
 NORMAL_IRQ_EXIT:
-       return IRQ_HANDLED;
+       return ret;
 }
 
 static inline int is_buf_blank(uint8_t *buf, size_t len)
@@ -951,7 +965,7 @@ static void nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 {
        struct pxa3xx_nand_host *host = mtd->priv;
        struct pxa3xx_nand_info *info = host->info_data;
-       int ret, exec_cmd;
+       int exec_cmd;
 
        /*
         * if this is a x16 device ,then convert the input
@@ -983,9 +997,8 @@ static void nand_cmdfunc(struct mtd_info *mtd, unsigned command,
                info->need_wait = 1;
                pxa3xx_nand_start(info);
 
-               ret = wait_for_completion_timeout(&info->cmd_complete,
-                               CHIP_DELAY_TIMEOUT);
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->cmd_complete,
+                   CHIP_DELAY_TIMEOUT)) {
                        dev_err(&info->pdev->dev, "Wait time out!!!\n");
                        /* Stop State Machine for next command cycle */
                        pxa3xx_nand_stop(info);
@@ -1000,7 +1013,7 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
 {
        struct pxa3xx_nand_host *host = mtd->priv;
        struct pxa3xx_nand_info *info = host->info_data;
-       int ret, exec_cmd, ext_cmd_type;
+       int exec_cmd, ext_cmd_type;
 
        /*
         * if this is a x16 device then convert the input
@@ -1063,9 +1076,8 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
                init_completion(&info->cmd_complete);
                pxa3xx_nand_start(info);
 
-               ret = wait_for_completion_timeout(&info->cmd_complete,
-                               CHIP_DELAY_TIMEOUT);
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->cmd_complete,
+                   CHIP_DELAY_TIMEOUT)) {
                        dev_err(&info->pdev->dev, "Wait time out!!!\n");
                        /* Stop State Machine for next command cycle */
                        pxa3xx_nand_stop(info);
@@ -1198,13 +1210,11 @@ static int pxa3xx_nand_waitfunc(struct mtd_info *mtd, struct nand_chip *this)
 {
        struct pxa3xx_nand_host *host = mtd->priv;
        struct pxa3xx_nand_info *info = host->info_data;
-       int ret;
 
        if (info->need_wait) {
-               ret = wait_for_completion_timeout(&info->dev_ready,
-                               CHIP_DELAY_TIMEOUT);
                info->need_wait = 0;
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->dev_ready,
+                   CHIP_DELAY_TIMEOUT)) {
                        dev_err(&info->pdev->dev, "Ready time out!!!\n");
                        return NAND_STATUS_FAIL;
                }
@@ -1508,6 +1518,8 @@ static int pxa3xx_nand_scan(struct mtd_info *mtd)
                return ret;
        }
 
+       memset(pxa3xx_flash_ids, 0, sizeof(pxa3xx_flash_ids));
+
        pxa3xx_flash_ids[0].name = f->name;
        pxa3xx_flash_ids[0].dev_id = (f->chip_id >> 8) & 0xffff;
        pxa3xx_flash_ids[0].pagesize = f->page_size;
@@ -1710,7 +1722,9 @@ static int alloc_nand_resource(struct platform_device *pdev)
        /* initialize all interrupts to be disabled */
        disable_int(info, NDSR_MASK);
 
-       ret = request_irq(irq, pxa3xx_nand_irq, 0, pdev->name, info);
+       ret = request_threaded_irq(irq, pxa3xx_nand_irq,
+                                  pxa3xx_nand_irq_thread, IRQF_ONESHOT,
+                                  pdev->name, info);
        if (ret < 0) {
                dev_err(&pdev->dev, "failed to request IRQ\n");
                goto fail_free_buf;
index 35aef5e..0e02be4 100644 (file)
@@ -948,8 +948,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 
        cpu_type = platform_get_device_id(pdev)->driver_data;
 
-       pr_debug("s3c2410_nand_probe(%p)\n", pdev);
-
        info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
        if (info == NULL) {
                err = -ENOMEM;
@@ -1045,7 +1043,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
                s3c2410_nand_clk_set_state(info, CLOCK_SUSPEND);
        }
 
-       pr_debug("initialised ok\n");
        return 0;
 
  exit_error:
index a21c378..c3ce81c 100644 (file)
@@ -159,7 +159,6 @@ static void flctl_setup_dma(struct sh_flctl *flctl)
                return;
 
        memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = pdata->slave_id_fifo0_tx;
        cfg.direction = DMA_MEM_TO_DEV;
        cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl);
        cfg.src_addr = 0;
@@ -175,7 +174,6 @@ static void flctl_setup_dma(struct sh_flctl *flctl)
        if (!flctl->chan_fifo0_rx)
                goto err;
 
-       cfg.slave_id = pdata->slave_id_fifo0_rx;
        cfg.direction = DMA_DEV_TO_MEM;
        cfg.dst_addr = 0;
        cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl);
index 635ee00..43b3392 100644 (file)
@@ -1743,7 +1743,6 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
        struct onenand_chip *this = mtd->priv;
        int column, subpage;
        int written = 0;
-       int ret = 0;
 
        if (this->state == FL_PM_SUSPENDED)
                return -EBUSY;
@@ -1786,15 +1785,10 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
                onenand_panic_wait(mtd);
 
                /* In partial page write we don't update bufferram */
-               onenand_update_bufferram(mtd, to, !ret && !subpage);
+               onenand_update_bufferram(mtd, to, !subpage);
                if (ONENAND_IS_2PLANE(this)) {
                        ONENAND_SET_BUFFERRAM1(this);
-                       onenand_update_bufferram(mtd, to + this->writesize, !ret && !subpage);
-               }
-
-               if (ret) {
-                       printk(KERN_ERR "%s: write failed %d\n", __func__, ret);
-                       break;
+                       onenand_update_bufferram(mtd, to + this->writesize, !subpage);
                }
 
                written += thislen;
@@ -1808,7 +1802,7 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
        }
 
        *retlen = written;
-       return ret;
+       return 0;
 }
 
 /**
index 1c7308c..5d5d362 100644 (file)
@@ -460,8 +460,7 @@ fsl_qspi_runcmd(struct fsl_qspi *q, u8 cmd, unsigned int addr, int len)
        writel((seqid << QUADSPI_IPCR_SEQID_SHIFT) | len, base + QUADSPI_IPCR);
 
        /* Wait for the interrupt. */
-       err = wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000));
-       if (!err) {
+       if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000))) {
                dev_err(q->dev,
                        "cmd 0x%.2x timeout, addr@%.8x, FR:0x%.8x, SR:0x%.8x\n",
                        cmd, addr, readl(base + QUADSPI_FR),
@@ -830,27 +829,27 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
        ret = clk_prepare_enable(q->clk_en);
        if (ret) {
-               dev_err(dev, "can not enable the qspi_en clock\n");
+               dev_err(dev, "cannot enable the qspi_en clock: %d\n", ret);
                return ret;
        }
 
        ret = clk_prepare_enable(q->clk);
        if (ret) {
-               dev_err(dev, "can not enable the qspi clock\n");
+               dev_err(dev, "cannot enable the qspi clock: %d\n", ret);
                goto clk_failed;
        }
 
        /* find the irq */
        ret = platform_get_irq(pdev, 0);
        if (ret < 0) {
-               dev_err(dev, "failed to get the irq\n");
+               dev_err(dev, "failed to get the irq: %d\n", ret);
                goto irq_failed;
        }
 
        ret = devm_request_irq(dev, ret,
                        fsl_qspi_irq_handler, 0, pdev->name, q);
        if (ret) {
-               dev_err(dev, "failed to request irq.\n");
+               dev_err(dev, "failed to request irq: %d\n", ret);
                goto irq_failed;
        }
 
index b6a5a0c..14a5d23 100644 (file)
@@ -369,17 +369,13 @@ erase_err:
        return ret;
 }
 
-static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 {
-       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       struct mtd_info *mtd = nor->mtd;
        uint32_t offset = ofs;
        uint8_t status_old, status_new;
        int ret = 0;
 
-       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
-       if (ret)
-               return ret;
-
        status_old = read_sr(nor);
 
        if (offset < mtd->size - (mtd->size / 2))
@@ -402,26 +398,18 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                                (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
                write_enable(nor);
                ret = write_sr(nor, status_new);
-               if (ret)
-                       goto err;
        }
 
-err:
-       spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
        return ret;
 }
 
-static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
 {
-       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       struct mtd_info *mtd = nor->mtd;
        uint32_t offset = ofs;
        uint8_t status_old, status_new;
        int ret = 0;
 
-       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
-       if (ret)
-               return ret;
-
        status_old = read_sr(nor);
 
        if (offset+len > mtd->size - (mtd->size / 64))
@@ -444,15 +432,41 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                                (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
                write_enable(nor);
                ret = write_sr(nor, status_new);
-               if (ret)
-                       goto err;
        }
 
-err:
+       return ret;
+}
+
+static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       int ret;
+
+       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
+       if (ret)
+               return ret;
+
+       ret = nor->flash_lock(nor, ofs, len);
+
        spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
        return ret;
 }
 
+static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       int ret;
+
+       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+       if (ret)
+               return ret;
+
+       ret = nor->flash_unlock(nor, ofs, len);
+
+       spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+       return ret;
+}
+
 /* Used when the "_ext_id" is two bytes at most */
 #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)     \
        ((kernel_ulong_t)&(struct flash_info) {                         \
@@ -524,6 +538,7 @@ static const struct spi_device_id spi_nor_ids[] = {
        { "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
        { "en25qh128",  INFO(0x1c7018, 0, 64 * 1024,  256, 0) },
        { "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
+       { "en25s64",    INFO(0x1c3817, 0, 64 * 1024,  128, 0) },
 
        /* ESMT */
        { "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
@@ -553,6 +568,7 @@ static const struct spi_device_id spi_nor_ids[] = {
        { "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
        { "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
        { "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
+       { "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
        { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
        { "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
        { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
@@ -648,6 +664,7 @@ static const struct spi_device_id spi_nor_ids[] = {
        { "m25px80",    INFO(0x207114,  0, 64 * 1024, 16, 0) },
 
        /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
+       { "w25x05", INFO(0xef3010, 0, 64 * 1024,  1,  SECT_4K) },
        { "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
        { "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
        { "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
@@ -658,6 +675,7 @@ static const struct spi_device_id spi_nor_ids[] = {
        { "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
        { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
        { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+       { "w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128, SECT_4K) },
        { "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
        { "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
        { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
@@ -1045,6 +1063,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 
        /* nor protection support for STmicro chips */
        if (JEDEC_MFR(info) == CFI_MFR_ST) {
+               nor->flash_lock = stm_lock;
+               nor->flash_unlock = stm_unlock;
+       }
+
+       if (nor->flash_lock && nor->flash_unlock) {
                mtd->_lock = spi_nor_lock;
                mtd->_unlock = spi_nor_unlock;
        }
index e579f90..7931615 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/slab.h>
 #include <linux/mtd/nand_ecc.h>
 
+#include "mtd_test.h"
+
 /*
  * Test the implementation for software ECC
  *
@@ -274,6 +276,10 @@ static int nand_ecc_test_run(const size_t size)
                }
                pr_info("ok - %s-%zd\n",
                        nand_ecc_test[i].name, size);
+
+               err = mtdtest_relax();
+               if (err)
+                       break;
        }
 error:
        kfree(error_data);
index f437c77..4b7bee1 100644 (file)
@@ -1,4 +1,16 @@
 #include <linux/mtd/mtd.h>
+#include <linux/sched.h>
+
+static inline int mtdtest_relax(void)
+{
+       cond_resched();
+       if (signal_pending(current)) {
+               pr_info("aborting test due to pending signal!\n");
+               return -EINTR;
+       }
+
+       return 0;
+}
 
 int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum);
 int mtdtest_scan_for_bad_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
index 273f7e5..09a4cca 100644 (file)
@@ -320,6 +320,10 @@ static int overwrite_test(void)
                        break;
                }
 
+               err = mtdtest_relax();
+               if (err)
+                       break;
+
                opno++;
        }
 
index 5e06118..8e8525f 100644 (file)
@@ -70,7 +70,7 @@ static int write_eraseblock(int ebnum)
        int i;
        struct mtd_oob_ops ops;
        int err = 0;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
 
        prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
        for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
@@ -112,7 +112,10 @@ static int write_whole_device(void)
                        return err;
                if (i % 256 == 0)
                        pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
        }
        pr_info("written %u eraseblocks\n", i);
        return 0;
@@ -141,6 +144,31 @@ static size_t memcmpshow(loff_t addr, const void *cs, const void *ct, size_t cou
        return bitflips;
 }
 
+/*
+ * Compare with 0xff and show the address, offset and data bytes at
+ * comparison failure. Return number of bitflips encountered.
+ */
+static size_t memffshow(loff_t addr, loff_t offset, const void *cs,
+                       size_t count)
+{
+       const unsigned char *su1;
+       int res;
+       size_t i = 0;
+       size_t bitflips = 0;
+
+       for (su1 = cs; 0 < count; ++su1, count--, i++) {
+               res = *su1 ^ 0xff;
+               if (res) {
+                       pr_info("error @addr[0x%lx:0x%lx] 0x%x -> 0xff diff 0x%x\n",
+                               (unsigned long)addr, (unsigned long)offset + i,
+                               *su1, res);
+                       bitflips += hweight8(res);
+               }
+       }
+
+       return bitflips;
+}
+
 static int verify_eraseblock(int ebnum)
 {
        int i;
@@ -203,6 +231,15 @@ static int verify_eraseblock(int ebnum)
                        bitflips = memcmpshow(addr, readbuf + use_offset,
                                              writebuf + (use_len_max * i) + use_offset,
                                              use_len);
+
+                       /* verify pre-offset area for 0xff */
+                       bitflips += memffshow(addr, 0, readbuf, use_offset);
+
+                       /* verify post-(use_offset + use_len) area for 0xff */
+                       k = use_offset + use_len;
+                       bitflips += memffshow(addr, k, readbuf + k,
+                                             mtd->ecclayout->oobavail - k);
+
                        if (bitflips > bitflip_limit) {
                                pr_err("error: verify failed at %#llx\n",
                                                (long long)addr);
@@ -212,34 +249,8 @@ static int verify_eraseblock(int ebnum)
                                        return -1;
                                }
                        } else if (bitflips) {
-                               pr_info("ignoring error as within bitflip_limit\n");
+                               pr_info("ignoring errors as within bitflip limit\n");
                        }
-
-                       for (k = 0; k < use_offset; ++k)
-                               if (readbuf[k] != 0xff) {
-                                       pr_err("error: verify 0xff "
-                                              "failed at %#llx\n",
-                                              (long long)addr);
-                                       errcnt += 1;
-                                       if (errcnt > 1000) {
-                                               pr_err("error: too "
-                                                      "many errors\n");
-                                               return -1;
-                                       }
-                               }
-                       for (k = use_offset + use_len;
-                            k < mtd->ecclayout->oobavail; ++k)
-                               if (readbuf[k] != 0xff) {
-                                       pr_err("error: verify 0xff "
-                                              "failed at %#llx\n",
-                                              (long long)addr);
-                                       errcnt += 1;
-                                       if (errcnt > 1000) {
-                                               pr_err("error: too "
-                                                      "many errors\n");
-                                               return -1;
-                                       }
-                               }
                }
                if (vary_offset)
                        do_vary_offset();
@@ -310,7 +321,10 @@ static int verify_all_eraseblocks(void)
                        return err;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
        }
        pr_info("verified %u eraseblocks\n", i);
        return 0;
@@ -421,7 +435,10 @@ static int __init mtd_oobtest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("verified %u eraseblocks\n", i);
 
@@ -634,7 +651,11 @@ static int __init mtd_oobtest_init(void)
                                goto out;
                        if (i % 256 == 0)
                                pr_info("written up to eraseblock %u\n", i);
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
+
                        addr += mtd->writesize;
                }
        }
@@ -672,7 +693,10 @@ static int __init mtd_oobtest_init(void)
                }
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("verified %u eraseblocks\n", i);
 
index 88296e8..ba1890d 100644 (file)
@@ -407,7 +407,10 @@ static int __init mtd_pagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("written %u eraseblocks\n", i);
 
@@ -422,7 +425,10 @@ static int __init mtd_pagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("verified %u eraseblocks\n", i);
 
index a54cf15..a3196b7 100644 (file)
@@ -190,7 +190,10 @@ static int __init mtd_readtest_init(void)
                        if (!err)
                                err = ret;
                }
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
 
        if (err)
index 5ee9f70..5a6f31a 100644 (file)
@@ -185,7 +185,7 @@ static long calc_speed(void)
             (finish.tv_usec - start.tv_usec) / 1000;
        if (ms == 0)
                return 0;
-       k = goodebcnt * (mtd->erasesize / 1024) * 1000;
+       k = (uint64_t)goodebcnt * (mtd->erasesize / 1024) * 1000;
        do_div(k, ms);
        return k;
 }
@@ -269,7 +269,10 @@ static int __init mtd_speedtest_init(void)
                err = write_eraseblock(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -284,7 +287,10 @@ static int __init mtd_speedtest_init(void)
                err = read_eraseblock(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -303,7 +309,10 @@ static int __init mtd_speedtest_init(void)
                err = write_eraseblock_by_page(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -318,7 +327,10 @@ static int __init mtd_speedtest_init(void)
                err = read_eraseblock_by_page(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -337,7 +349,10 @@ static int __init mtd_speedtest_init(void)
                err = write_eraseblock_by_2pages(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -352,7 +367,10 @@ static int __init mtd_speedtest_init(void)
                err = read_eraseblock_by_2pages(i);
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        stop_timing();
        speed = calc_speed();
@@ -385,7 +403,11 @@ static int __init mtd_speedtest_init(void)
                        err = multiblock_erase(i, j);
                        if (err)
                                goto out;
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
+
                        i += j;
                }
                stop_timing();
index c9d42cc..e509f8a 100644 (file)
@@ -96,7 +96,7 @@ static int do_read(void)
                if (offs + len > mtd->erasesize)
                        len = mtd->erasesize - offs;
        }
-       addr = eb * mtd->erasesize + offs;
+       addr = (loff_t)eb * mtd->erasesize + offs;
        return mtdtest_read(mtd, addr, len, readbuf);
 }
 
@@ -124,7 +124,7 @@ static int do_write(void)
                        offsets[eb + 1] = 0;
                }
        }
-       addr = eb * mtd->erasesize + offs;
+       addr = (loff_t)eb * mtd->erasesize + offs;
        err = mtdtest_write(mtd, addr, len, writebuf);
        if (unlikely(err))
                return err;
@@ -221,7 +221,10 @@ static int __init mtd_stresstest_init(void)
                err = do_operation();
                if (err)
                        goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("finished, %d operations done\n", op);
 
index 7b59ef5..aecc6ce 100644 (file)
@@ -95,7 +95,7 @@ static int write_eraseblock2(int ebnum)
        loff_t addr = (loff_t)ebnum * mtd->erasesize;
 
        for (k = 1; k < 33; ++k) {
-               if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
+               if (addr + (subpgsize * k) > (loff_t)(ebnum + 1) * mtd->erasesize)
                        break;
                prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
                err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
@@ -195,7 +195,7 @@ static int verify_eraseblock2(int ebnum)
        loff_t addr = (loff_t)ebnum * mtd->erasesize;
 
        for (k = 1; k < 33; ++k) {
-               if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
+               if (addr + (subpgsize * k) > (loff_t)(ebnum + 1) * mtd->erasesize)
                        break;
                prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
                clear_data(readbuf, subpgsize * k);
@@ -269,7 +269,10 @@ static int verify_all_eraseblocks_ff(void)
                        return err;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
        }
        pr_info("verified %u eraseblocks\n", i);
        return 0;
@@ -346,7 +349,10 @@ static int __init mtd_subpagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("written %u eraseblocks\n", i);
 
@@ -360,7 +366,10 @@ static int __init mtd_subpagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("verified %u eraseblocks\n", i);
 
@@ -383,7 +392,10 @@ static int __init mtd_subpagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("written %u eraseblocks\n", i);
 
@@ -398,7 +410,10 @@ static int __init mtd_subpagetest_init(void)
                        goto out;
                if (i % 256 == 0)
                        pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
        }
        pr_info("verified %u eraseblocks\n", i);
 
index b55bc52..e5d6e6d 100644 (file)
@@ -101,11 +101,11 @@ static inline int check_eraseblock(int ebnum, unsigned char *buf)
 {
        int err, retries = 0;
        size_t read;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
        size_t len = mtd->erasesize;
 
        if (pgcnt) {
-               addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
+               addr = (loff_t)(ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
                len = pgcnt * pgsize;
        }
 
@@ -155,11 +155,11 @@ static inline int write_pattern(int ebnum, void *buf)
 {
        int err;
        size_t written;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
        size_t len = mtd->erasesize;
 
        if (pgcnt) {
-               addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
+               addr = (loff_t)(ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
                len = pgcnt * pgsize;
        }
        err = mtd_write(mtd, addr, len, &written, buf);
@@ -279,7 +279,10 @@ static int __init tort_init(void)
                                               " for 0xFF... pattern\n");
                                        goto out;
                                }
-                               cond_resched();
+
+                               err = mtdtest_relax();
+                               if (err)
+                                       goto out;
                        }
                }
 
@@ -294,7 +297,10 @@ static int __init tort_init(void)
                        err = write_pattern(i, patt);
                        if (err)
                                goto out;
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
                }
 
                /* Verify what we wrote */
@@ -314,7 +320,10 @@ static int __init tort_init(void)
                                               "0x55AA55..." : "0xAA55AA...");
                                        goto out;
                                }
-                               cond_resched();
+
+                               err = mtdtest_relax();
+                               if (err)
+                                       goto out;
                        }
                }
 
index 9690cf9..b7f824d 100644 (file)
@@ -1169,9 +1169,9 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
                return ERR_PTR(err);
 
        /* MTD device number is defined by the major / minor numbers */
-       major = imajor(path.dentry->d_inode);
-       minor = iminor(path.dentry->d_inode);
-       mode = path.dentry->d_inode->i_mode;
+       major = imajor(d_backing_inode(path.dentry));
+       minor = iminor(d_backing_inode(path.dentry));
+       mode = d_backing_inode(path.dentry)->i_mode;
        path_put(&path);
        if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode))
                return ERR_PTR(-EINVAL);
index 478e00c..e844887 100644 (file)
@@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
        if (error)
                return ERR_PTR(error);
 
-       inode = path.dentry->d_inode;
+       inode = d_backing_inode(path.dentry);
        mod = inode->i_mode;
        ubi_num = ubi_major2num(imajor(inode));
        vol_id = iminor(inode) - 1;
index f0285bc..371f75e 100644 (file)
@@ -538,7 +538,7 @@ static ssize_t tp_la_write(struct file *file, const char __user *buf,
        char s[32];
        unsigned long val;
        size_t size = min(sizeof(s) - 1, count);
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
 
        if (copy_from_user(s, buf, size))
                return -EFAULT;
@@ -647,7 +647,7 @@ static int pm_stats_open(struct inode *inode, struct file *file)
 static ssize_t pm_stats_clear(struct file *file, const char __user *buf,
                              size_t count, loff_t *pos)
 {
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
 
        t4_write_reg(adap, PM_RX_STAT_CONFIG_A, 0);
        t4_write_reg(adap, PM_TX_STAT_CONFIG_A, 0);
@@ -1005,7 +1005,7 @@ static ssize_t mbox_write(struct file *file, const char __user *buf,
                   &data[7], &c) < 8 || c != '\n')
                return -EINVAL;
 
-       ino = FILE_DATA(file);
+       ino = file_inode(file);
        mbox = (uintptr_t)ino->i_private & 7;
        adap = ino->i_private - mbox;
        addr = adap->regs + PF_REG(mbox, CIM_PF_MAILBOX_DATA_A);
@@ -1034,7 +1034,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, size_t count,
                          loff_t *ppos)
 {
        loff_t pos = *ppos;
-       loff_t avail = FILE_DATA(file)->i_size;
+       loff_t avail = file_inode(file)->i_size;
        struct adapter *adap = file->private_data;
 
        if (pos < 0)
@@ -1479,7 +1479,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf,
        int i, j;
        u32 key[10];
        char s[100], *p;
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
 
        if (count > sizeof(s) - 1)
                return -EINVAL;
@@ -1951,12 +1951,6 @@ static const struct file_operations mem_debugfs_fops = {
        .llseek  = default_llseek,
 };
 
-static void set_debugfs_file_size(struct dentry *de, loff_t size)
-{
-       if (!IS_ERR(de) && de->d_inode)
-               de->d_inode->i_size = size;
-}
-
 static void add_debugfs_mem(struct adapter *adap, const char *name,
                            unsigned int idx, unsigned int size_mb)
 {
@@ -2072,9 +2066,8 @@ int t4_setup_debugfs(struct adapter *adap)
                }
        }
 
-       de = debugfs_create_file("flash", S_IRUSR, adap->debugfs_root, adap,
-                                &flash_debugfs_fops);
-       set_debugfs_file_size(de, adap->params.sf_size);
+       de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
+                                     &flash_debugfs_fops, adap->params.sf_size);
 
        return 0;
 }
index 8f418ba..23f43a0 100644 (file)
@@ -37,8 +37,6 @@
 
 #include <linux/export.h>
 
-#define FILE_DATA(_file) ((_file)->f_path.dentry->d_inode)
-
 #define DEFINE_SIMPLE_DEBUGFS_FILE(name) \
 static int name##_open(struct inode *inode, struct file *file) \
 { \
index f0fbb4a..4f7dc04 100644 (file)
@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                                return err;
                        }
                        if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
-                               /* compute slave's gid block */
-                               smp->attr_mod = cpu_to_be32(slave / 8);
-                               /* execute cmd */
-                               err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                            vhcr->in_modifier, opcode_modifier,
-                                            vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
-                               if (!err) {
-                                       /* if needed, move slave gid to index 0 */
-                                       if (slave % 8)
-                                               memcpy(outsmp->data,
-                                                      outsmp->data + (slave % 8) * 8, 8);
-                                       /* delete all other gids */
-                                       memset(outsmp->data + 8, 0, 56);
+                               __be64 guid = mlx4_get_admin_guid(dev, slave,
+                                                                 port);
+
+                               /* set the PF admin guid to the FW/HW burned
+                                * GUID, if it wasn't yet set
+                                */
+                               if (slave == 0 && guid == 0) {
+                                       smp->attr_mod = 0;
+                                       err = mlx4_cmd_box(dev,
+                                                          inbox->dma,
+                                                          outbox->dma,
+                                                          vhcr->in_modifier,
+                                                          opcode_modifier,
+                                                          vhcr->op,
+                                                          MLX4_CMD_TIME_CLASS_C,
+                                                          MLX4_CMD_NATIVE);
+                                       if (err)
+                                               return err;
+                                       mlx4_set_admin_guid(dev,
+                                                           *(__be64 *)outsmp->
+                                                           data, slave, port);
+                               } else {
+                                       memcpy(outsmp->data, &guid, 8);
                                }
-                               return err;
+
+                               /* clean all other gids */
+                               memset(outsmp->data + 8, 0, 56);
+                               return 0;
                        }
                        if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
                                err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                                oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
                                vf_oper->vport[port].vlan_idx = NO_INDX;
                                vf_oper->vport[port].mac_idx = NO_INDX;
+                               mlx4_set_random_admin_guid(dev, i, port);
                        }
                        spin_lock_init(&s_state->lock);
                }
index 190fd62..2619c9f 100644 (file)
@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
                                priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
                        }
                        spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+                       mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
+                                           flr_slave);
                        queue_work(priv->mfunc.master.comm_wq,
                                   &priv->mfunc.master.slave_flr_event_work);
                        break;
index acceb75..ced5eca 100644 (file)
@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
 }
 EXPORT_SYMBOL_GPL(mlx4_counter_free);
 
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
+
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       return priv->mfunc.master.vf_admin[entry].vport[port].guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
+
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       __be64 guid;
+
+       /* hw GUID */
+       if (entry == 0)
+               return;
+
+       get_random_bytes((char *)&guid, sizeof(guid));
+       guid &= ~(cpu_to_be64(1ULL << 56));
+       guid |= cpu_to_be64(1ULL << 57);
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+
 static int mlx4_setup_hca(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
index f30eeb7..502d3dd 100644 (file)
@@ -499,6 +499,7 @@ struct mlx4_vport_state {
        bool spoofchk;
        u32 link_state;
        u8 qos_vport;
+       __be64 guid;
 };
 
 struct mlx4_vf_admin_state {
index df22383..8a64542 100644 (file)
@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
        return 0;
 }
 
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
 static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 {
        struct fw_page *fwp;
        int n;
 
-       fwp = find_fw_page(dev, addr & PAGE_MASK);
+       fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
        if (!fwp) {
                mlx5_core_warn(dev, "page not found\n");
                return;
        }
 
-       n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+       n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
        fwp->free_count++;
        set_bit(n, &fwp->bitmask);
        if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
                rb_erase(&fwp->rb_node, &dev->priv.page_root);
                if (fwp->free_count != 1)
                        list_del(&fwp->list);
-               dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
-                              DMA_BIDIRECTIONAL);
+               dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+                              PAGE_SIZE, DMA_BIDIRECTIONAL);
                __free_page(fwp->page);
                kfree(fwp);
        } else if (fwp->free_count == 1) {
index 1470b52..07bb3c8 100644 (file)
@@ -50,7 +50,7 @@ config OF_ADDRESS_PCI
 
 config OF_IRQ
        def_bool y
-       depends on !SPARC
+       depends on !SPARC && IRQ_DOMAIN
 
 config OF_NET
        depends on NETDEVICES
index a1aa0c7..99764db 100644 (file)
@@ -567,6 +567,29 @@ bool of_device_is_available(const struct device_node *device)
 }
 EXPORT_SYMBOL(of_device_is_available);
 
+/**
+ *  of_device_is_big_endian - check if a device has BE registers
+ *
+ *  @device: Node to check for endianness
+ *
+ *  Returns true if the device has a "big-endian" property, or if the kernel
+ *  was compiled for BE *and* the device has a "native-endian" property.
+ *  Returns false otherwise.
+ *
+ *  Callers would nominally use ioread32be/iowrite32be if
+ *  of_device_is_big_endian() == true, or readl/writel otherwise.
+ */
+bool of_device_is_big_endian(const struct device_node *device)
+{
+       if (of_property_read_bool(device, "big-endian"))
+               return true;
+       if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+           of_property_read_bool(device, "native-endian"))
+               return true;
+       return false;
+}
+EXPORT_SYMBOL(of_device_is_big_endian);
+
 /**
  *     of_get_parent - Get a node's parent if any
  *     @node:  Node to get parent
@@ -640,8 +663,9 @@ static struct device_node *__of_get_next_child(const struct device_node *node,
  *     @node:  parent node
  *     @prev:  previous child of the parent node, or NULL to get first
  *
- *     Returns a node pointer with refcount incremented, use
- *     of_node_put() on it when done.
+ *     Returns a node pointer with refcount incremented, use of_node_put() on
+ *     it when done. Returns NULL when prev is the last child. Decrements the
+ *     refcount of prev.
  */
 struct device_node *of_get_next_child(const struct device_node *node,
        struct device_node *prev)
index 3a896c9..cde35c5 100644 (file)
@@ -108,6 +108,25 @@ int of_fdt_is_compatible(const void *blob,
        return 0;
 }
 
+/**
+ * of_fdt_is_big_endian - Return true if given node needs BE MMIO accesses
+ * @blob: A device tree blob
+ * @node: node to test
+ *
+ * Returns true if the node has a "big-endian" property, or if the kernel
+ * was compiled for BE *and* the node has a "native-endian" property.
+ * Returns false otherwise.
+ */
+bool of_fdt_is_big_endian(const void *blob, unsigned long node)
+{
+       if (fdt_getprop(blob, node, "big-endian", NULL))
+               return true;
+       if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+           fdt_getprop(blob, node, "native-endian", NULL))
+               return true;
+       return false;
+}
+
 /**
  * of_fdt_match - Return true if node matches a list of compatible values
  */
@@ -172,7 +191,7 @@ static void * unflatten_dt_node(void *blob,
        if (!pathp)
                return mem;
 
-       allocl = l++;
+       allocl = ++l;
 
        /* version 0x10 has a more compact unit name here instead of the full
         * path. we accumulate the full path size using "fpsize", we'll rebuild
@@ -879,8 +898,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 
        endp = reg + (l / sizeof(__be32));
 
-       pr_debug("memory scan node %s, reg size %d, data: %x %x %x %x,\n",
-           uname, l, reg[0], reg[1], reg[2], reg[3]);
+       pr_debug("memory scan node %s, reg size %d,\n", uname, l);
 
        while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
                u64 base, size;
index e844907..1801634 100644 (file)
@@ -23,6 +23,8 @@
 #include <linux/i2c.h>
 #include <linux/i2c-mux.h>
 
+#include <linux/bitops.h>
+
 #include "of_private.h"
 
 static struct unittest_results {
@@ -1109,6 +1111,59 @@ static const char *overlay_path(int nr)
 
 static const char *bus_path = "/testcase-data/overlay-node/test-bus";
 
+/* it is guaranteed that overlay ids are assigned in sequence */
+#define MAX_UNITTEST_OVERLAYS  256
+static unsigned long overlay_id_bits[BITS_TO_LONGS(MAX_UNITTEST_OVERLAYS)];
+static int overlay_first_id = -1;
+
+static void of_unittest_track_overlay(int id)
+{
+       if (overlay_first_id < 0)
+               overlay_first_id = id;
+       id -= overlay_first_id;
+
+       /* we shouldn't need that many */
+       BUG_ON(id >= MAX_UNITTEST_OVERLAYS);
+       overlay_id_bits[BIT_WORD(id)] |= BIT_MASK(id);
+}
+
+static void of_unittest_untrack_overlay(int id)
+{
+       if (overlay_first_id < 0)
+               return;
+       id -= overlay_first_id;
+       BUG_ON(id >= MAX_UNITTEST_OVERLAYS);
+       overlay_id_bits[BIT_WORD(id)] &= ~BIT_MASK(id);
+}
+
+static void of_unittest_destroy_tracked_overlays(void)
+{
+       int id, ret, defers;
+
+       if (overlay_first_id < 0)
+               return;
+
+       /* try until no defers */
+       do {
+               defers = 0;
+               /* remove in reverse order */
+               for (id = MAX_UNITTEST_OVERLAYS - 1; id >= 0; id--) {
+                       if (!(overlay_id_bits[BIT_WORD(id)] & BIT_MASK(id)))
+                               continue;
+
+                       ret = of_overlay_destroy(id + overlay_first_id);
+                       if (ret != 0) {
+                               defers++;
+                               pr_warn("%s: overlay destroy failed for #%d\n",
+                                       __func__, id + overlay_first_id);
+                               continue;
+                       }
+
+                       overlay_id_bits[BIT_WORD(id)] &= ~BIT_MASK(id);
+               }
+       } while (defers > 0);
+}
+
 static int of_unittest_apply_overlay(int unittest_nr, int overlay_nr,
                int *overlay_id)
 {
@@ -1130,6 +1185,7 @@ static int of_unittest_apply_overlay(int unittest_nr, int overlay_nr,
                goto out;
        }
        id = ret;
+       of_unittest_track_overlay(id);
 
        ret = 0;
 
@@ -1343,6 +1399,7 @@ static void of_unittest_overlay_6(void)
                        return;
                }
                ov_id[i] = ret;
+               of_unittest_track_overlay(ov_id[i]);
        }
 
        for (i = 0; i < 2; i++) {
@@ -1367,6 +1424,7 @@ static void of_unittest_overlay_6(void)
                                                PDEV_OVERLAY));
                        return;
                }
+               of_unittest_untrack_overlay(ov_id[i]);
        }
 
        for (i = 0; i < 2; i++) {
@@ -1411,6 +1469,7 @@ static void of_unittest_overlay_8(void)
                        return;
                }
                ov_id[i] = ret;
+               of_unittest_track_overlay(ov_id[i]);
        }
 
        /* now try to remove first overlay (it should fail) */
@@ -1433,6 +1492,7 @@ static void of_unittest_overlay_8(void)
                                                PDEV_OVERLAY));
                        return;
                }
+               of_unittest_untrack_overlay(ov_id[i]);
        }
 
        unittest(1, "overlay test %d passed\n", 8);
@@ -1855,6 +1915,8 @@ static void __init of_unittest_overlay(void)
        of_unittest_overlay_i2c_cleanup();
 #endif
 
+       of_unittest_destroy_tracked_overlays();
+
 out:
        of_node_put(bus_np);
 }
index 3f49345..dd92c5e 100644 (file)
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
        dentry = d_alloc_name(root, name);
        if (!dentry) {
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                return -ENOMEM;
        }
        inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                return -ENOMEM;
        }
        inode->i_fop = fops;
        inode->i_private = priv;
        d_add(dentry, inode);
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        return 0;
 }
 
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
        struct dentry *dentry;
        struct inode *inode;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        dentry = d_alloc_name(parent, name);
        if (!dentry) {
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
                return NULL;
        }
        inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
        if (!inode) {
                dput(dentry);
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
                return NULL;
        }
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        d_add(dentry, inode);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        return dentry;
 }
 
index 440ed77..2a6531a 100644 (file)
@@ -4,7 +4,7 @@
 
 menuconfig CHROME_PLATFORMS
        bool "Platform support for Chrome hardware"
-       depends on X86
+       depends on X86 || ARM
        ---help---
          Say Y here to get to see options for platform support for
          various Chromebooks and Chromeboxes. This option alone does
@@ -16,8 +16,7 @@ if CHROME_PLATFORMS
 
 config CHROMEOS_LAPTOP
        tristate "Chrome OS Laptop"
-       depends on I2C
-       depends on DMI
+       depends on I2C && DMI && X86
        ---help---
          This driver instantiates i2c and smbus devices such as
          light sensors and touchpads.
@@ -27,6 +26,7 @@ config CHROMEOS_LAPTOP
 
 config CHROMEOS_PSTORE
        tristate "Chrome OS pstore support"
+       depends on X86
        ---help---
          This module instantiates the persistent storage on x86 ChromeOS
          devices. It can be used to store away console logs and crash
@@ -38,5 +38,25 @@ config CHROMEOS_PSTORE
          If you have a supported Chromebook, choose Y or M here.
          The module will be called chromeos_pstore.
 
+config CROS_EC_CHARDEV
+        tristate "Chrome OS Embedded Controller userspace device interface"
+        depends on MFD_CROS_EC
+        ---help---
+          This driver adds support to talk with the ChromeOS EC from userspace.
+
+          If you have a supported Chromebook, choose Y or M here.
+          The module will be called cros_ec_dev.
+
+config CROS_EC_LPC
+        tristate "ChromeOS Embedded Controller (LPC)"
+        depends on MFD_CROS_EC && (X86 || COMPILE_TEST)
+        help
+          If you say Y here, you get support for talking to the ChromeOS EC
+          over an LPC bus. This uses a simple byte-level protocol with a
+          checksum. This is used for userspace access only. The kernel
+          typically has its own communication methods.
+
+          To compile this driver as a module, choose M here: the
+          module will be called cros_ec_lpc.
 
 endif # CHROMEOS_PLATFORMS
index 2b860ca..bd8d860 100644 (file)
@@ -1,3 +1,6 @@
 
 obj-$(CONFIG_CHROMEOS_LAPTOP)  += chromeos_laptop.o
 obj-$(CONFIG_CHROMEOS_PSTORE)  += chromeos_pstore.o
+cros_ec_devs-objs               := cros_ec_dev.o cros_ec_sysfs.o cros_ec_lightbar.o
+obj-$(CONFIG_CROS_EC_CHARDEV)   += cros_ec_devs.o
+obj-$(CONFIG_CROS_EC_LPC)       += cros_ec_lpc.o
index b84fdd6..a04019a 100644 (file)
@@ -133,12 +133,13 @@ static struct i2c_client *__add_probed_i2c_device(
                const char *name,
                int bus,
                struct i2c_board_info *info,
-               const unsigned short *addrs)
+               const unsigned short *alt_addr_list)
 {
        const struct dmi_device *dmi_dev;
        const struct dmi_dev_onboard *dev_data;
        struct i2c_adapter *adapter;
-       struct i2c_client *client;
+       struct i2c_client *client = NULL;
+       const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
 
        if (bus < 0)
                return NULL;
@@ -169,8 +170,28 @@ static struct i2c_client *__add_probed_i2c_device(
                return NULL;
        }
 
-       /* add the i2c device */
-       client = i2c_new_probed_device(adapter, info, addrs, NULL);
+       /*
+        * Add the i2c device. If we can't detect it at the primary
+        * address we scan secondary addresses. In any case the client
+        * structure gets assigned primary address.
+        */
+       client = i2c_new_probed_device(adapter, info, addr_list, NULL);
+       if (!client && alt_addr_list) {
+               struct i2c_board_info dummy_info = {
+                       I2C_BOARD_INFO("dummy", info->addr),
+               };
+               struct i2c_client *dummy;
+
+               dummy = i2c_new_probed_device(adapter, &dummy_info,
+                                             alt_addr_list, NULL);
+               if (dummy) {
+                       pr_debug("%s %d-%02x is probed at %02x\n",
+                                 __func__, bus, info->addr, dummy->addr);
+                       i2c_unregister_device(dummy);
+                       client = i2c_new_device(adapter, info);
+               }
+       }
+
        if (!client)
                pr_notice("%s failed to register device %d-%02x\n",
                          __func__, bus, info->addr);
@@ -254,12 +275,10 @@ static struct i2c_client *add_i2c_device(const char *name,
                                                enum i2c_adapter_type type,
                                                struct i2c_board_info *info)
 {
-       const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
-
        return __add_probed_i2c_device(name,
                                       find_i2c_adapter_num(type),
                                       info,
-                                      addr_list);
+                                      NULL);
 }
 
 static int setup_cyapa_tp(enum i2c_adapter_type type)
@@ -275,7 +294,6 @@ static int setup_cyapa_tp(enum i2c_adapter_type type)
 static int setup_atmel_224s_tp(enum i2c_adapter_type type)
 {
        const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR,
-                                            ATMEL_TP_I2C_ADDR,
                                             I2C_CLIENT_END };
        if (tp)
                return 0;
@@ -289,7 +307,6 @@ static int setup_atmel_224s_tp(enum i2c_adapter_type type)
 static int setup_atmel_1664s_ts(enum i2c_adapter_type type)
 {
        const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR,
-                                            ATMEL_TS_I2C_ADDR,
                                             I2C_CLIENT_END };
        if (ts)
                return 0;
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c
new file mode 100644 (file)
index 0000000..6090d0b
--- /dev/null
@@ -0,0 +1,274 @@
+/*
+ * cros_ec_dev - expose the Chrome OS Embedded Controller to user-space
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Device variables */
+#define CROS_MAX_DEV 128
+static struct class *cros_class;
+static int ec_major;
+
+/* Basic communication */
+static int ec_get_version(struct cros_ec_device *ec, char *str, int maxlen)
+{
+       struct ec_response_get_version *resp;
+       static const char * const current_image_name[] = {
+               "unknown", "read-only", "read-write", "invalid",
+       };
+       struct cros_ec_command msg = {
+               .version = 0,
+               .command = EC_CMD_GET_VERSION,
+               .outdata = { 0 },
+               .outsize = 0,
+               .indata = { 0 },
+               .insize = sizeof(*resp),
+       };
+       int ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS) {
+               snprintf(str, maxlen,
+                        "%s\nUnknown EC version: EC returned %d\n",
+                        CROS_EC_DEV_VERSION, msg.result);
+               return 0;
+       }
+
+       resp = (struct ec_response_get_version *)msg.indata;
+       if (resp->current_image >= ARRAY_SIZE(current_image_name))
+               resp->current_image = 3; /* invalid */
+
+       snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION,
+                resp->version_string_ro, resp->version_string_rw,
+                current_image_name[resp->current_image]);
+
+       return 0;
+}
+
+/* Device file ops */
+static int ec_device_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = container_of(inode->i_cdev,
+                                         struct cros_ec_device, cdev);
+       return 0;
+}
+
+static int ec_device_release(struct inode *inode, struct file *filp)
+{
+       return 0;
+}
+
+static ssize_t ec_device_read(struct file *filp, char __user *buffer,
+                             size_t length, loff_t *offset)
+{
+       struct cros_ec_device *ec = filp->private_data;
+       char msg[sizeof(struct ec_response_get_version) +
+                sizeof(CROS_EC_DEV_VERSION)];
+       size_t count;
+       int ret;
+
+       if (*offset != 0)
+               return 0;
+
+       ret = ec_get_version(ec, msg, sizeof(msg));
+       if (ret)
+               return ret;
+
+       count = min(length, strlen(msg));
+
+       if (copy_to_user(buffer, msg, count))
+               return -EFAULT;
+
+       *offset = count;
+       return count;
+}
+
+/* Ioctls */
+static long ec_device_ioctl_xcmd(struct cros_ec_device *ec, void __user *arg)
+{
+       long ret;
+       struct cros_ec_command s_cmd = { };
+
+       if (copy_from_user(&s_cmd, arg, sizeof(s_cmd)))
+               return -EFAULT;
+
+       ret = cros_ec_cmd_xfer(ec, &s_cmd);
+       /* Only copy data to userland if data was received. */
+       if (ret < 0)
+               return ret;
+
+       if (copy_to_user(arg, &s_cmd, sizeof(s_cmd)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long ec_device_ioctl_readmem(struct cros_ec_device *ec, void __user *arg)
+{
+       struct cros_ec_readmem s_mem = { };
+       long num;
+
+       /* Not every platform supports direct reads */
+       if (!ec->cmd_readmem)
+               return -ENOTTY;
+
+       if (copy_from_user(&s_mem, arg, sizeof(s_mem)))
+               return -EFAULT;
+
+       num = ec->cmd_readmem(ec, s_mem.offset, s_mem.bytes, s_mem.buffer);
+       if (num <= 0)
+               return num;
+
+       if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long ec_device_ioctl(struct file *filp, unsigned int cmd,
+                           unsigned long arg)
+{
+       struct cros_ec_device *ec = filp->private_data;
+
+       if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC)
+               return -ENOTTY;
+
+       switch (cmd) {
+       case CROS_EC_DEV_IOCXCMD:
+               return ec_device_ioctl_xcmd(ec, (void __user *)arg);
+       case CROS_EC_DEV_IOCRDMEM:
+               return ec_device_ioctl_readmem(ec, (void __user *)arg);
+       }
+
+       return -ENOTTY;
+}
+
+/* Module initialization */
+static const struct file_operations fops = {
+       .open = ec_device_open,
+       .release = ec_device_release,
+       .read = ec_device_read,
+       .unlocked_ioctl = ec_device_ioctl,
+};
+
+static int ec_device_probe(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+       int retval = -ENOTTY;
+       dev_t devno = MKDEV(ec_major, 0);
+
+       /* Instantiate it (and remember the EC) */
+       cdev_init(&ec->cdev, &fops);
+
+       retval = cdev_add(&ec->cdev, devno, 1);
+       if (retval) {
+               dev_err(&pdev->dev, ": failed to add character device\n");
+               return retval;
+       }
+
+       ec->vdev = device_create(cros_class, NULL, devno, ec,
+                                CROS_EC_DEV_NAME);
+       if (IS_ERR(ec->vdev)) {
+               retval = PTR_ERR(ec->vdev);
+               dev_err(&pdev->dev, ": failed to create device\n");
+               cdev_del(&ec->cdev);
+               return retval;
+       }
+
+       /* Initialize extra interfaces */
+       ec_dev_sysfs_init(ec);
+       ec_dev_lightbar_init(ec);
+
+       return 0;
+}
+
+static int ec_device_remove(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+
+       ec_dev_lightbar_remove(ec);
+       ec_dev_sysfs_remove(ec);
+       device_destroy(cros_class, MKDEV(ec_major, 0));
+       cdev_del(&ec->cdev);
+       return 0;
+}
+
+static struct platform_driver cros_ec_dev_driver = {
+       .driver = {
+               .name = "cros-ec-ctl",
+       },
+       .probe = ec_device_probe,
+       .remove = ec_device_remove,
+};
+
+static int __init cros_ec_dev_init(void)
+{
+       int ret;
+       dev_t dev = 0;
+
+       cros_class = class_create(THIS_MODULE, "chromeos");
+       if (IS_ERR(cros_class)) {
+               pr_err(CROS_EC_DEV_NAME ": failed to register device class\n");
+               return PTR_ERR(cros_class);
+       }
+
+       /* Get a range of minor numbers (starting with 0) to work with */
+       ret = alloc_chrdev_region(&dev, 0, CROS_MAX_DEV, CROS_EC_DEV_NAME);
+       if (ret < 0) {
+               pr_err(CROS_EC_DEV_NAME ": alloc_chrdev_region() failed\n");
+               goto failed_chrdevreg;
+       }
+       ec_major = MAJOR(dev);
+
+       /* Register the driver */
+       ret = platform_driver_register(&cros_ec_dev_driver);
+       if (ret < 0) {
+               pr_warn(CROS_EC_DEV_NAME ": can't register driver: %d\n", ret);
+               goto failed_devreg;
+       }
+       return 0;
+
+failed_devreg:
+       unregister_chrdev_region(MKDEV(ec_major, 0), CROS_MAX_DEV);
+failed_chrdevreg:
+       class_destroy(cros_class);
+       return ret;
+}
+
+static void __exit cros_ec_dev_exit(void)
+{
+       platform_driver_unregister(&cros_ec_dev_driver);
+       unregister_chrdev(ec_major, CROS_EC_DEV_NAME);
+       class_destroy(cros_class);
+}
+
+module_init(cros_ec_dev_init);
+module_exit(cros_ec_dev_exit);
+
+MODULE_AUTHOR("Bill Richardson <wfrichar@chromium.org>");
+MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h
new file mode 100644 (file)
index 0000000..45d67f7
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * cros_ec_dev - expose the Chrome OS Embedded Controller to userspace
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CROS_EC_DEV_H_
+#define _CROS_EC_DEV_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <linux/mfd/cros_ec.h>
+
+#define CROS_EC_DEV_NAME "cros_ec"
+#define CROS_EC_DEV_VERSION "1.0.0"
+
+/*
+ * @offset: within EC_LPC_ADDR_MEMMAP region
+ * @bytes: number of bytes to read. zero means "read a string" (including '\0')
+ *         (at most only EC_MEMMAP_SIZE bytes can be read)
+ * @buffer: where to store the result
+ * ioctl returns the number of bytes read, negative on error
+ */
+struct cros_ec_readmem {
+       uint32_t offset;
+       uint32_t bytes;
+       uint8_t buffer[EC_MEMMAP_SIZE];
+};
+
+#define CROS_EC_DEV_IOC       0xEC
+#define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
+#define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
+
+void ec_dev_sysfs_init(struct cros_ec_device *);
+void ec_dev_sysfs_remove(struct cros_ec_device *);
+
+void ec_dev_lightbar_init(struct cros_ec_device *);
+void ec_dev_lightbar_remove(struct cros_ec_device *);
+
+#endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c
new file mode 100644 (file)
index 0000000..b4ff47a
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+ * cros_ec_lightbar - expose the Chromebook Pixel lightbar to userspace
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "cros_ec_lightbar: " fmt
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Rate-limit the lightbar interface to prevent DoS. */
+static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
+
+static ssize_t interval_msec_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       unsigned long msec = lb_interval_jiffies * 1000 / HZ;
+
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", msec);
+}
+
+static ssize_t interval_msec_store(struct device *dev,
+                                  struct device_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       unsigned long msec;
+
+       if (kstrtoul(buf, 0, &msec))
+               return -EINVAL;
+
+       lb_interval_jiffies = msec * HZ / 1000;
+
+       return count;
+}
+
+static DEFINE_MUTEX(lb_mutex);
+/* Return 0 if able to throttle correctly, error otherwise */
+static int lb_throttle(void)
+{
+       static unsigned long last_access;
+       unsigned long now, next_timeslot;
+       long delay;
+       int ret = 0;
+
+       mutex_lock(&lb_mutex);
+
+       now = jiffies;
+       next_timeslot = last_access + lb_interval_jiffies;
+
+       if (time_before(now, next_timeslot)) {
+               delay = (long)(next_timeslot) - (long)now;
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (schedule_timeout(delay) > 0) {
+                       /* interrupted - just abort */
+                       ret = -EINTR;
+                       goto out;
+               }
+               now = jiffies;
+       }
+
+       last_access = now;
+out:
+       mutex_unlock(&lb_mutex);
+
+       return ret;
+}
+
+#define INIT_MSG(P, R) { \
+               .command = EC_CMD_LIGHTBAR_CMD, \
+               .outsize = sizeof(*P), \
+               .insize = sizeof(*R), \
+       }
+
+static int get_lightbar_version(struct cros_ec_device *ec,
+                               uint32_t *ver_ptr, uint32_t *flg_ptr)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_VERSION;
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return 0;
+
+       switch (msg.result) {
+       case EC_RES_INVALID_PARAM:
+               /* Pixel had no version command. */
+               if (ver_ptr)
+                       *ver_ptr = 0;
+               if (flg_ptr)
+                       *flg_ptr = 0;
+               return 1;
+
+       case EC_RES_SUCCESS:
+               resp = (struct ec_response_lightbar *)msg.indata;
+
+               /* Future devices w/lightbars should implement this command */
+               if (ver_ptr)
+                       *ver_ptr = resp->version.num;
+               if (flg_ptr)
+                       *flg_ptr = resp->version.flags;
+               return 1;
+       }
+
+       /* Anything else (ie, EC_RES_INVALID_COMMAND) - no lightbar */
+       return 0;
+}
+
+static ssize_t version_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       uint32_t version, flags;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+       int ret;
+
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       /* This should always succeed, because we check during init. */
+       if (!get_lightbar_version(ec, &version, &flags))
+               return -EIO;
+
+       return scnprintf(buf, PAGE_SIZE, "%d %d\n", version, flags);
+}
+
+static ssize_t brightness_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+       unsigned int val;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       if (kstrtouint(buf, 0, &val))
+               return -EINVAL;
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_BRIGHTNESS;
+       param->brightness.num = val;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return -EINVAL;
+
+       return count;
+}
+
+
+/*
+ * We expect numbers, and we'll keep reading until we find them, skipping over
+ * any whitespace (sysfs guarantees that the input is null-terminated). Every
+ * four numbers are sent to the lightbar as <LED,R,G,B>. We fail at the first
+ * parsing error, if we don't parse any numbers, or if we have numbers left
+ * over.
+ */
+static ssize_t led_rgb_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+       unsigned int val[4];
+       int ret, i = 0, j = 0, ok = 0;
+
+       do {
+               /* Skip any whitespace */
+               while (*buf && isspace(*buf))
+                       buf++;
+
+               if (!*buf)
+                       break;
+
+               ret = sscanf(buf, "%i", &val[i++]);
+               if (ret == 0)
+                       return -EINVAL;
+
+               if (i == 4) {
+                       param = (struct ec_params_lightbar *)msg.outdata;
+                       param->cmd = LIGHTBAR_CMD_RGB;
+                       param->rgb.led = val[0];
+                       param->rgb.red = val[1];
+                       param->rgb.green = val[2];
+                       param->rgb.blue = val[3];
+                       /*
+                        * Throttle only the first of every four transactions,
+                        * so that the user can update all four LEDs at once.
+                        */
+                       if ((j++ % 4) == 0) {
+                               ret = lb_throttle();
+                               if (ret)
+                                       return ret;
+                       }
+
+                       ret = cros_ec_cmd_xfer(ec, &msg);
+                       if (ret < 0)
+                               return ret;
+
+                       if (msg.result != EC_RES_SUCCESS)
+                               return -EINVAL;
+
+                       i = 0;
+                       ok = 1;
+               }
+
+               /* Skip over the number we just read */
+               while (*buf && !isspace(*buf))
+                       buf++;
+
+       } while (*buf);
+
+       return (ok && i == 0) ? count : -EINVAL;
+}
+
+static char const *seqname[] = {
+       "ERROR", "S5", "S3", "S0", "S5S3", "S3S0",
+       "S0S3", "S3S5", "STOP", "RUN", "PULSE", "TEST", "KONAMI",
+};
+
+static ssize_t sequence_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_GET_SEQ;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       resp = (struct ec_response_lightbar *)msg.indata;
+       if (resp->get_seq.num >= ARRAY_SIZE(seqname))
+               return scnprintf(buf, PAGE_SIZE, "%d\n", resp->get_seq.num);
+       else
+               return scnprintf(buf, PAGE_SIZE, "%s\n",
+                                seqname[resp->get_seq.num]);
+}
+
+static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       unsigned int num;
+       int ret, len;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       for (len = 0; len < count; len++)
+               if (!isalnum(buf[len]))
+                       break;
+
+       for (num = 0; num < ARRAY_SIZE(seqname); num++)
+               if (!strncasecmp(seqname[num], buf, len))
+                       break;
+
+       if (num >= ARRAY_SIZE(seqname)) {
+               ret = kstrtouint(buf, 0, &num);
+               if (ret)
+                       return ret;
+       }
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_SEQ;
+       param->seq.num = num;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return -EINVAL;
+
+       return count;
+}
+
+/* Module initialization */
+
+static DEVICE_ATTR_RW(interval_msec);
+static DEVICE_ATTR_RO(version);
+static DEVICE_ATTR_WO(brightness);
+static DEVICE_ATTR_WO(led_rgb);
+static DEVICE_ATTR_RW(sequence);
+static struct attribute *__lb_cmds_attrs[] = {
+       &dev_attr_interval_msec.attr,
+       &dev_attr_version.attr,
+       &dev_attr_brightness.attr,
+       &dev_attr_led_rgb.attr,
+       &dev_attr_sequence.attr,
+       NULL,
+};
+static struct attribute_group lb_cmds_attr_group = {
+       .name = "lightbar",
+       .attrs = __lb_cmds_attrs,
+};
+
+void ec_dev_lightbar_init(struct cros_ec_device *ec)
+{
+       int ret = 0;
+
+       /* Only instantiate this stuff if the EC has a lightbar */
+       if (!get_lightbar_version(ec, NULL, NULL))
+               return;
+
+       ret = sysfs_create_group(&ec->vdev->kobj, &lb_cmds_attr_group);
+       if (ret)
+               pr_warn("sysfs_create_group() failed: %d\n", ret);
+}
+
+void ec_dev_lightbar_remove(struct cros_ec_device *ec)
+{
+       sysfs_remove_group(&ec->vdev->kobj, &lb_cmds_attr_group);
+}
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
new file mode 100644 (file)
index 0000000..8f9ac4d
--- /dev/null
@@ -0,0 +1,319 @@
+/*
+ * cros_ec_lpc - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2012-2015 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/dmi.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#define DRV_NAME "cros_ec_lpc"
+
+static int ec_response_timed_out(void)
+{
+       unsigned long one_second = jiffies + HZ;
+
+       usleep_range(200, 300);
+       do {
+               if (!(inb(EC_LPC_ADDR_HOST_CMD) & EC_LPC_STATUS_BUSY_MASK))
+                       return 0;
+               usleep_range(100, 200);
+       } while (time_before(jiffies, one_second));
+
+       return 1;
+}
+
+static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
+                               struct cros_ec_command *msg)
+{
+       struct ec_lpc_host_args args;
+       int csum;
+       int i;
+       int ret = 0;
+
+       if (msg->outsize > EC_PROTO2_MAX_PARAM_SIZE ||
+           msg->insize > EC_PROTO2_MAX_PARAM_SIZE) {
+               dev_err(ec->dev,
+                       "invalid buffer sizes (out %d, in %d)\n",
+                       msg->outsize, msg->insize);
+               return -EINVAL;
+       }
+
+       /* Now actually send the command to the EC and get the result */
+       args.flags = EC_HOST_ARGS_FLAG_FROM_HOST;
+       args.command_version = msg->version;
+       args.data_size = msg->outsize;
+
+       /* Initialize checksum */
+       csum = msg->command + args.flags +
+               args.command_version + args.data_size;
+
+       /* Copy data and update checksum */
+       for (i = 0; i < msg->outsize; i++) {
+               outb(msg->outdata[i], EC_LPC_ADDR_HOST_PARAM + i);
+               csum += msg->outdata[i];
+       }
+
+       /* Finalize checksum and write args */
+       args.checksum = csum & 0xFF;
+       outb(args.flags, EC_LPC_ADDR_HOST_ARGS);
+       outb(args.command_version, EC_LPC_ADDR_HOST_ARGS + 1);
+       outb(args.data_size, EC_LPC_ADDR_HOST_ARGS + 2);
+       outb(args.checksum, EC_LPC_ADDR_HOST_ARGS + 3);
+
+       /* Here we go */
+       outb(msg->command, EC_LPC_ADDR_HOST_CMD);
+
+       if (ec_response_timed_out()) {
+               dev_warn(ec->dev, "EC responsed timed out\n");
+               ret = -EIO;
+               goto done;
+       }
+
+       /* Check result */
+       msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+
+       switch (msg->result) {
+       case EC_RES_SUCCESS:
+               break;
+       case EC_RES_IN_PROGRESS:
+               ret = -EAGAIN;
+               dev_dbg(ec->dev, "command 0x%02x in progress\n",
+                       msg->command);
+               goto done;
+       default:
+               dev_dbg(ec->dev, "command 0x%02x returned %d\n",
+                       msg->command, msg->result);
+       }
+
+       /* Read back args */
+       args.flags = inb(EC_LPC_ADDR_HOST_ARGS);
+       args.command_version = inb(EC_LPC_ADDR_HOST_ARGS + 1);
+       args.data_size = inb(EC_LPC_ADDR_HOST_ARGS + 2);
+       args.checksum = inb(EC_LPC_ADDR_HOST_ARGS + 3);
+
+       if (args.data_size > msg->insize) {
+               dev_err(ec->dev,
+                       "packet too long (%d bytes, expected %d)",
+                       args.data_size, msg->insize);
+               ret = -ENOSPC;
+               goto done;
+       }
+
+       /* Start calculating response checksum */
+       csum = msg->command + args.flags +
+               args.command_version + args.data_size;
+
+       /* Read response and update checksum */
+       for (i = 0; i < args.data_size; i++) {
+               msg->indata[i] = inb(EC_LPC_ADDR_HOST_PARAM + i);
+               csum += msg->indata[i];
+       }
+
+       /* Verify checksum */
+       if (args.checksum != (csum & 0xFF)) {
+               dev_err(ec->dev,
+                       "bad packet checksum, expected %02x, got %02x\n",
+                       args.checksum, csum & 0xFF);
+               ret = -EBADMSG;
+               goto done;
+       }
+
+       /* Return actual amount of data received */
+       ret = args.data_size;
+done:
+       return ret;
+}
+
+/* Returns num bytes read, or negative on error. Doesn't need locking. */
+static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
+                              unsigned int bytes, void *dest)
+{
+       int i = offset;
+       char *s = dest;
+       int cnt = 0;
+
+       if (offset >= EC_MEMMAP_SIZE - bytes)
+               return -EINVAL;
+
+       /* fixed length */
+       if (bytes) {
+               for (; cnt < bytes; i++, s++, cnt++)
+                       *s = inb(EC_LPC_ADDR_MEMMAP + i);
+               return cnt;
+       }
+
+       /* string */
+       for (; i < EC_MEMMAP_SIZE; i++, s++) {
+               *s = inb(EC_LPC_ADDR_MEMMAP + i);
+               cnt++;
+               if (!*s)
+                       break;
+       }
+
+       return cnt;
+}
+
+static int cros_ec_lpc_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct cros_ec_device *ec_dev;
+       int ret;
+
+       if (!devm_request_region(dev, EC_LPC_ADDR_MEMMAP, EC_MEMMAP_SIZE,
+                                dev_name(dev))) {
+               dev_err(dev, "couldn't reserve memmap region\n");
+               return -EBUSY;
+       }
+
+       if ((inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID) != 'E') ||
+           (inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID + 1) != 'C')) {
+               dev_err(dev, "EC ID not detected\n");
+               return -ENODEV;
+       }
+
+       if (!devm_request_region(dev, EC_HOST_CMD_REGION0,
+                                EC_HOST_CMD_REGION_SIZE, dev_name(dev))) {
+               dev_err(dev, "couldn't reserve region0\n");
+               return -EBUSY;
+       }
+       if (!devm_request_region(dev, EC_HOST_CMD_REGION1,
+                                EC_HOST_CMD_REGION_SIZE, dev_name(dev))) {
+               dev_err(dev, "couldn't reserve region1\n");
+               return -EBUSY;
+       }
+
+       ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
+       if (!ec_dev)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, ec_dev);
+       ec_dev->dev = dev;
+       ec_dev->ec_name = pdev->name;
+       ec_dev->phys_name = dev_name(dev);
+       ec_dev->parent = dev;
+       ec_dev->cmd_xfer = cros_ec_cmd_xfer_lpc;
+       ec_dev->cmd_readmem = cros_ec_lpc_readmem;
+
+       ret = cros_ec_register(ec_dev);
+       if (ret) {
+               dev_err(dev, "couldn't register ec_dev (%d)\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int cros_ec_lpc_remove(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec_dev;
+
+       ec_dev = platform_get_drvdata(pdev);
+       cros_ec_remove(ec_dev);
+
+       return 0;
+}
+
+static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
+       {
+               /*
+                * Today all Chromebooks/boxes ship with Google_* as version and
+                * coreboot as bios vendor. No other systems with this
+                * combination are known to date.
+                */
+               .matches = {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "Google_"),
+               },
+       },
+       {
+               /* x86-link, the Chromebook Pixel. */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+               },
+       },
+       {
+               /* x86-peppy, the Acer C720 Chromebook. */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Peppy"),
+               },
+       },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
+
+static struct platform_driver cros_ec_lpc_driver = {
+       .driver = {
+               .name = DRV_NAME,
+       },
+       .probe = cros_ec_lpc_probe,
+       .remove = cros_ec_lpc_remove,
+};
+
+static struct platform_device cros_ec_lpc_device = {
+       .name = DRV_NAME
+};
+
+static int __init cros_ec_lpc_init(void)
+{
+       int ret;
+
+       if (!dmi_check_system(cros_ec_lpc_dmi_table)) {
+               pr_err(DRV_NAME ": unsupported system.\n");
+               return -ENODEV;
+       }
+
+       /* Register the driver */
+       ret = platform_driver_register(&cros_ec_lpc_driver);
+       if (ret) {
+               pr_err(DRV_NAME ": can't register driver: %d\n", ret);
+               return ret;
+       }
+
+       /* Register the device, and it'll get hooked up automatically */
+       ret = platform_device_register(&cros_ec_lpc_device);
+       if (ret) {
+               pr_err(DRV_NAME ": can't register device: %d\n", ret);
+               platform_driver_unregister(&cros_ec_lpc_driver);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void __exit cros_ec_lpc_exit(void)
+{
+       platform_device_unregister(&cros_ec_lpc_device);
+       platform_driver_unregister(&cros_ec_lpc_driver);
+}
+
+module_init(cros_ec_lpc_init);
+module_exit(cros_ec_lpc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC LPC driver");
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c
new file mode 100644 (file)
index 0000000..fb62ab6
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ * cros_ec_sysfs - expose the Chrome OS EC through sysfs
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "cros_ec_sysfs: " fmt
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Accessor functions */
+
+static ssize_t show_ec_reboot(struct device *dev,
+                             struct device_attribute *attr, char *buf)
+{
+       int count = 0;
+
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "ro|rw|cancel|cold|disable-jump|hibernate");
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          " [at-shutdown]\n");
+       return count;
+}
+
+static ssize_t store_ec_reboot(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count)
+{
+       static const struct {
+               const char * const str;
+               uint8_t cmd;
+               uint8_t flags;
+       } words[] = {
+               {"cancel",       EC_REBOOT_CANCEL, 0},
+               {"ro",           EC_REBOOT_JUMP_RO, 0},
+               {"rw",           EC_REBOOT_JUMP_RW, 0},
+               {"cold",         EC_REBOOT_COLD, 0},
+               {"disable-jump", EC_REBOOT_DISABLE_JUMP, 0},
+               {"hibernate",    EC_REBOOT_HIBERNATE, 0},
+               {"at-shutdown",  -1, EC_REBOOT_FLAG_ON_AP_SHUTDOWN},
+       };
+       struct cros_ec_command msg = { 0 };
+       struct ec_params_reboot_ec *param =
+               (struct ec_params_reboot_ec *)msg.outdata;
+       int got_cmd = 0, offset = 0;
+       int i;
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       param->flags = 0;
+       while (1) {
+               /* Find word to start scanning */
+               while (buf[offset] && isspace(buf[offset]))
+                       offset++;
+               if (!buf[offset])
+                       break;
+
+               for (i = 0; i < ARRAY_SIZE(words); i++) {
+                       if (!strncasecmp(words[i].str, buf+offset,
+                                        strlen(words[i].str))) {
+                               if (words[i].flags) {
+                                       param->flags |= words[i].flags;
+                               } else {
+                                       param->cmd = words[i].cmd;
+                                       got_cmd = 1;
+                               }
+                               break;
+                       }
+               }
+
+               /* On to the next word, if any */
+               while (buf[offset] && !isspace(buf[offset]))
+                       offset++;
+       }
+
+       if (!got_cmd)
+               return -EINVAL;
+
+       msg.command = EC_CMD_REBOOT_EC;
+       msg.outsize = sizeof(param);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS) {
+               dev_dbg(ec->dev, "EC result %d\n", msg.result);
+               return -EINVAL;
+       }
+
+       return count;
+}
+
+static ssize_t show_ec_version(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       static const char * const image_names[] = {"unknown", "RO", "RW"};
+       struct ec_response_get_version *r_ver;
+       struct ec_response_get_chip_info *r_chip;
+       struct ec_response_board_version *r_board;
+       struct cros_ec_command msg = { 0 };
+       int ret;
+       int count = 0;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       /* Get versions. RW may change. */
+       msg.command = EC_CMD_GET_VERSION;
+       msg.insize = sizeof(*r_ver);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       r_ver = (struct ec_response_get_version *)msg.indata;
+       /* Strings should be null-terminated, but let's be sure. */
+       r_ver->version_string_ro[sizeof(r_ver->version_string_ro) - 1] = '\0';
+       r_ver->version_string_rw[sizeof(r_ver->version_string_rw) - 1] = '\0';
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "RO version:    %s\n", r_ver->version_string_ro);
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "RW version:    %s\n", r_ver->version_string_rw);
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "Firmware copy: %s\n",
+                          (r_ver->current_image < ARRAY_SIZE(image_names) ?
+                           image_names[r_ver->current_image] : "?"));
+
+       /* Get build info. */
+       msg.command = EC_CMD_GET_BUILD_INFO;
+       msg.insize = sizeof(msg.indata);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    EC error %d\n", msg.result);
+       else {
+               msg.indata[sizeof(msg.indata) - 1] = '\0';
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    %s\n", msg.indata);
+       }
+
+       /* Get chip info. */
+       msg.command = EC_CMD_GET_CHIP_INFO;
+       msg.insize = sizeof(*r_chip);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip info:     XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip info:     EC error %d\n", msg.result);
+       else {
+               r_chip = (struct ec_response_get_chip_info *)msg.indata;
+
+               r_chip->vendor[sizeof(r_chip->vendor) - 1] = '\0';
+               r_chip->name[sizeof(r_chip->name) - 1] = '\0';
+               r_chip->revision[sizeof(r_chip->revision) - 1] = '\0';
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip vendor:   %s\n", r_chip->vendor);
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip name:     %s\n", r_chip->name);
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip revision: %s\n", r_chip->revision);
+       }
+
+       /* Get board version */
+       msg.command = EC_CMD_GET_BOARD_VERSION;
+       msg.insize = sizeof(*r_board);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: EC error %d\n", msg.result);
+       else {
+               r_board = (struct ec_response_board_version *)msg.indata;
+
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: %d\n",
+                                  r_board->board_version);
+       }
+
+       return count;
+}
+
+static ssize_t show_ec_flashinfo(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       struct ec_response_flash_info *resp;
+       struct cros_ec_command msg = { 0 };
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       /* The flash info shouldn't ever change, but ask each time anyway. */
+       msg.command = EC_CMD_FLASH_INFO;
+       msg.insize = sizeof(*resp);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       resp = (struct ec_response_flash_info *)msg.indata;
+
+       return scnprintf(buf, PAGE_SIZE,
+                        "FlashSize %d\nWriteSize %d\n"
+                        "EraseSize %d\nProtectSize %d\n",
+                        resp->flash_size, resp->write_block_size,
+                        resp->erase_block_size, resp->protect_block_size);
+}
+
+/* Module initialization */
+
+static DEVICE_ATTR(reboot, S_IWUSR | S_IRUGO, show_ec_reboot, store_ec_reboot);
+static DEVICE_ATTR(version, S_IRUGO, show_ec_version, NULL);
+static DEVICE_ATTR(flashinfo, S_IRUGO, show_ec_flashinfo, NULL);
+
+static struct attribute *__ec_attrs[] = {
+       &dev_attr_reboot.attr,
+       &dev_attr_version.attr,
+       &dev_attr_flashinfo.attr,
+       NULL,
+};
+
+static struct attribute_group ec_attr_group = {
+       .attrs = __ec_attrs,
+};
+
+void ec_dev_sysfs_init(struct cros_ec_device *ec)
+{
+       int error;
+
+       error = sysfs_create_group(&ec->vdev->kobj, &ec_attr_group);
+       if (error)
+               pr_warn("failed to create group: %d\n", error);
+}
+
+void ec_dev_sysfs_remove(struct cros_ec_device *ec)
+{
+       sysfs_remove_group(&ec->vdev->kobj, &ec_attr_group);
+}
index 9752761..f9f205c 100644 (file)
@@ -614,6 +614,7 @@ config ACPI_TOSHIBA
        depends on INPUT
        depends on RFKILL || RFKILL = n
        depends on SERIO_I8042 || SERIO_I8042 = n
+       depends on ACPI_VIDEO || ACPI_VIDEO = n
        select INPUT_POLLDEV
        select INPUT_SPARSEKMAP
        ---help---
index 66d6d22..6808715 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/vgaarb.h>
 #include <acpi/video.h>
 #include <asm/io.h>
 
@@ -31,6 +32,7 @@ struct apple_gmux_data {
        bool indexed;
        struct mutex index_lock;
 
+       struct pci_dev *pdev;
        struct backlight_device *bdev;
 
        /* switcheroo data */
@@ -415,6 +417,23 @@ static int gmux_resume(struct device *dev)
        return 0;
 }
 
+static struct pci_dev *gmux_get_io_pdev(void)
+{
+       struct pci_dev *pdev = NULL;
+
+       while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) {
+               u16 cmd;
+
+               pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+               if (!(cmd & PCI_COMMAND_IO))
+                       continue;
+
+               return pdev;
+       }
+
+       return NULL;
+}
+
 static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 {
        struct apple_gmux_data *gmux_data;
@@ -425,6 +444,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
        int ret = -ENXIO;
        acpi_status status;
        unsigned long long gpe;
+       struct pci_dev *pdev = NULL;
 
        if (apple_gmux_data)
                return -EBUSY;
@@ -475,7 +495,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
                        ver_minor = (version >> 16) & 0xff;
                        ver_release = (version >> 8) & 0xff;
                } else {
-                       pr_info("gmux device not present\n");
+                       pr_info("gmux device not present or IO disabled\n");
                        ret = -ENODEV;
                        goto err_release;
                }
@@ -483,6 +503,23 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
        pr_info("Found gmux version %d.%d.%d [%s]\n", ver_major, ver_minor,
                ver_release, (gmux_data->indexed ? "indexed" : "classic"));
 
+       /*
+        * Apple systems with gmux are EFI based and normally don't use
+        * VGA. In addition changing IO+MEM ownership between IGP and dGPU
+        * disables IO/MEM used for backlight control on some systems.
+        * Lock IO+MEM to GPU with active IO to prevent switch.
+        */
+       pdev = gmux_get_io_pdev();
+       if (pdev && vga_tryget(pdev,
+                              VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM)) {
+               pr_err("IO+MEM vgaarb-locking for PCI:%s failed\n",
+                       pci_name(pdev));
+               ret = -EBUSY;
+               goto err_release;
+       } else if (pdev)
+               pr_info("locked IO for PCI:%s\n", pci_name(pdev));
+       gmux_data->pdev = pdev;
+
        memset(&props, 0, sizeof(props));
        props.type = BACKLIGHT_PLATFORM;
        props.max_brightness = gmux_read32(gmux_data, GMUX_PORT_MAX_BRIGHTNESS);
@@ -574,6 +611,10 @@ err_enable_gpe:
 err_notify:
        backlight_device_unregister(bdev);
 err_release:
+       if (gmux_data->pdev)
+               vga_put(gmux_data->pdev,
+                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM);
+       pci_dev_put(pdev);
        release_region(gmux_data->iostart, gmux_data->iolen);
 err_free:
        kfree(gmux_data);
@@ -593,6 +634,11 @@ static void gmux_remove(struct pnp_dev *pnp)
                                           &gmux_notify_handler);
        }
 
+       if (gmux_data->pdev) {
+               vga_put(gmux_data->pdev,
+                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM);
+               pci_dev_put(gmux_data->pdev);
+       }
        backlight_device_unregister(gmux_data->bdev);
 
        release_region(gmux_data->iostart, gmux_data->iolen);
index 3d21efe..d688d80 100644 (file)
@@ -2,9 +2,11 @@
  *  Driver for Dell laptop extras
  *
  *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
  *
- *  Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
- *  Inc.
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
 #include "../../firmware/dcdbas.h"
 
 #define BRIGHTNESS_TOKEN 0x7d
+#define KBD_LED_OFF_TOKEN 0x01E1
+#define KBD_LED_ON_TOKEN 0x01E2
+#define KBD_LED_AUTO_TOKEN 0x01E3
+#define KBD_LED_AUTO_25_TOKEN 0x02EA
+#define KBD_LED_AUTO_50_TOKEN 0x02EB
+#define KBD_LED_AUTO_75_TOKEN 0x02EC
+#define KBD_LED_AUTO_100_TOKEN 0x02F6
 
 /* This structure will be modified by the firmware when we enter
  * system management mode, hence the volatiles */
@@ -62,6 +71,13 @@ struct calling_interface_structure {
 
 struct quirk_entry {
        u8 touchpad_led;
+
+       int needs_kbd_timeouts;
+       /*
+        * Ordered list of timeouts expressed in seconds.
+        * The list must end with -1
+        */
+       int kbd_timeouts[];
 };
 
 static struct quirk_entry *quirks;
@@ -76,6 +92,15 @@ static int __init dmi_matched(const struct dmi_system_id *dmi)
        return 1;
 }
 
+/*
+ * These values come from Windows utility provided by Dell. If any other value
+ * is used then BIOS silently set timeout to 0 without any error message.
+ */
+static struct quirk_entry quirk_dell_xps13_9333 = {
+       .needs_kbd_timeouts = 1,
+       .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
+};
+
 static int da_command_address;
 static int da_command_code;
 static int da_num_tokens;
@@ -267,6 +292,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
                },
                .driver_data = &quirk_dell_vostro_v130,
        },
+       {
+               .callback = dmi_matched,
+               .ident = "Dell XPS13 9333",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+               },
+               .driver_data = &quirk_dell_xps13_9333,
+       },
        { }
 };
 
@@ -331,17 +365,29 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy)
        }
 }
 
-static int find_token_location(int tokenid)
+static int find_token_id(int tokenid)
 {
        int i;
+
        for (i = 0; i < da_num_tokens; i++) {
                if (da_tokens[i].tokenID == tokenid)
-                       return da_tokens[i].location;
+                       return i;
        }
 
        return -1;
 }
 
+static int find_token_location(int tokenid)
+{
+       int id;
+
+       id = find_token_id(tokenid);
+       if (id == -1)
+               return -1;
+
+       return da_tokens[id].location;
+}
+
 static struct calling_interface_buffer *
 dell_send_request(struct calling_interface_buffer *buffer, int class,
                  int select)
@@ -362,6 +408,20 @@ dell_send_request(struct calling_interface_buffer *buffer, int class,
        return buffer;
 }
 
+static inline int dell_smi_error(int value)
+{
+       switch (value) {
+       case 0: /* Completed successfully */
+               return 0;
+       case -1: /* Completed with error */
+               return -EIO;
+       case -2: /* Function not supported */
+               return -ENXIO;
+       default: /* Unknown error */
+               return -EINVAL;
+       }
+}
+
 /* Derived from information in DellWirelessCtl.cpp:
    Class 17, select 11 is radio control. It returns an array of 32-bit values.
 
@@ -716,7 +776,7 @@ static int dell_send_intensity(struct backlight_device *bd)
        else
                dell_send_request(buffer, 1, 1);
 
-out:
+ out:
        release_buffer();
        return ret;
 }
@@ -740,7 +800,7 @@ static int dell_get_intensity(struct backlight_device *bd)
 
        ret = buffer->output[1];
 
-out:
+ out:
        release_buffer();
        return ret;
 }
@@ -789,6 +849,1018 @@ static void touchpad_led_exit(void)
        led_classdev_unregister(&touchpad_led);
 }
 
+/*
+ * Derived from information in smbios-keyboard-ctl:
+ *
+ * cbClass 4
+ * cbSelect 11
+ * Keyboard illumination
+ * cbArg1 determines the function to be performed
+ *
+ * cbArg1 0x0 = Get Feature Information
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of user-selectable modes
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *  cbRES2, byte2  Reserved for future use
+ *  cbRES2, byte3  Keyboard illumination type
+ *     0         Reserved
+ *     1         Tasklight
+ *     2         Backlight
+ *     3-255     Reserved for future use
+ *  cbRES3, byte0  Supported auto keyboard illumination trigger bitmap.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte1  Supported timeout unit bitmap
+ *     bit 0     Seconds
+ *     bit 1     Minutes
+ *     bit 2     Hours
+ *     bit 3     Days
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte2  Number of keyboard light brightness levels
+ *  cbRES4, byte0  Maximum acceptable seconds value (0 if seconds not supported).
+ *  cbRES4, byte1  Maximum acceptable minutes value (0 if minutes not supported).
+ *  cbRES4, byte2  Maximum acceptable hours value (0 if hours not supported).
+ *  cbRES4, byte3  Maximum acceptable days value (0 if days not supported)
+ *
+ * cbArg1 0x1 = Get Current State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbRES2, byte2  Currently active auto keyboard illumination triggers.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES2, byte3  Current Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *     NOTE: A value of 0 means always on (no timeout) if any bits of RES3 byte
+ *     are set upon return from the [Get feature information] call.
+ *  cbRES3, byte0  Current setting of ALS value that turns the light on or off.
+ *  cbRES3, byte1  Current ALS reading
+ *  cbRES3, byte2  Current keyboard light level.
+ *
+ * cbArg1 0x2 = Set New State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbArg2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbArg2, byte2  Desired auto keyboard illumination triggers. Must remain inactive to allow
+ *                 keyboard to turn off automatically.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbArg2, byte3  Desired Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *  cbArg3, byte0  Desired setting of ALS value that turns the light on or off.
+ *  cbArg3, byte2  Desired keyboard light level.
+ */
+
+
+enum kbd_timeout_unit {
+       KBD_TIMEOUT_SECONDS = 0,
+       KBD_TIMEOUT_MINUTES,
+       KBD_TIMEOUT_HOURS,
+       KBD_TIMEOUT_DAYS,
+};
+
+enum kbd_mode_bit {
+       KBD_MODE_BIT_OFF = 0,
+       KBD_MODE_BIT_ON,
+       KBD_MODE_BIT_ALS,
+       KBD_MODE_BIT_TRIGGER_ALS,
+       KBD_MODE_BIT_TRIGGER,
+       KBD_MODE_BIT_TRIGGER_25,
+       KBD_MODE_BIT_TRIGGER_50,
+       KBD_MODE_BIT_TRIGGER_75,
+       KBD_MODE_BIT_TRIGGER_100,
+};
+
+#define kbd_is_als_mode_bit(bit) \
+       ((bit) == KBD_MODE_BIT_ALS || (bit) == KBD_MODE_BIT_TRIGGER_ALS)
+#define kbd_is_trigger_mode_bit(bit) \
+       ((bit) >= KBD_MODE_BIT_TRIGGER_ALS && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+#define kbd_is_level_mode_bit(bit) \
+       ((bit) >= KBD_MODE_BIT_TRIGGER_25 && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+
+struct kbd_info {
+       u16 modes;
+       u8 type;
+       u8 triggers;
+       u8 levels;
+       u8 seconds;
+       u8 minutes;
+       u8 hours;
+       u8 days;
+};
+
+struct kbd_state {
+       u8 mode_bit;
+       u8 triggers;
+       u8 timeout_value;
+       u8 timeout_unit;
+       u8 als_setting;
+       u8 als_value;
+       u8 level;
+};
+
+static const int kbd_tokens[] = {
+       KBD_LED_OFF_TOKEN,
+       KBD_LED_AUTO_25_TOKEN,
+       KBD_LED_AUTO_50_TOKEN,
+       KBD_LED_AUTO_75_TOKEN,
+       KBD_LED_AUTO_100_TOKEN,
+       KBD_LED_ON_TOKEN,
+};
+
+static u16 kbd_token_bits;
+
+static struct kbd_info kbd_info;
+static bool kbd_als_supported;
+static bool kbd_triggers_supported;
+
+static u8 kbd_mode_levels[16];
+static int kbd_mode_levels_count;
+
+static u8 kbd_previous_level;
+static u8 kbd_previous_mode_bit;
+
+static bool kbd_led_present;
+
+/*
+ * NOTE: there are three ways to set the keyboard backlight level.
+ * First, via kbd_state.mode_bit (assigning KBD_MODE_BIT_TRIGGER_* value).
+ * Second, via kbd_state.level (assigning numerical value <= kbd_info.levels).
+ * Third, via SMBIOS tokens (KBD_LED_* in kbd_tokens)
+ *
+ * There are laptops which support only one of these methods. If we want to
+ * support as many machines as possible we need to implement all three methods.
+ * The first two methods use the kbd_state structure. The third uses SMBIOS
+ * tokens. If kbd_info.levels == 0, the machine does not support setting the
+ * keyboard backlight level via kbd_state.level.
+ */
+
+static int kbd_get_info(struct kbd_info *info)
+{
+       u8 units;
+       int ret;
+
+       get_buffer();
+
+       buffer->input[0] = 0x0;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+
+       if (ret) {
+               ret = dell_smi_error(ret);
+               goto out;
+       }
+
+       info->modes = buffer->output[1] & 0xFFFF;
+       info->type = (buffer->output[1] >> 24) & 0xFF;
+       info->triggers = buffer->output[2] & 0xFF;
+       units = (buffer->output[2] >> 8) & 0xFF;
+       info->levels = (buffer->output[2] >> 16) & 0xFF;
+
+       if (units & BIT(0))
+               info->seconds = (buffer->output[3] >> 0) & 0xFF;
+       if (units & BIT(1))
+               info->minutes = (buffer->output[3] >> 8) & 0xFF;
+       if (units & BIT(2))
+               info->hours = (buffer->output[3] >> 16) & 0xFF;
+       if (units & BIT(3))
+               info->days = (buffer->output[3] >> 24) & 0xFF;
+
+ out:
+       release_buffer();
+       return ret;
+}
+
+static unsigned int kbd_get_max_level(void)
+{
+       if (kbd_info.levels != 0)
+               return kbd_info.levels;
+       if (kbd_mode_levels_count > 0)
+               return kbd_mode_levels_count - 1;
+       return 0;
+}
+
+static int kbd_get_level(struct kbd_state *state)
+{
+       int i;
+
+       if (kbd_info.levels != 0)
+               return state->level;
+
+       if (kbd_mode_levels_count > 0) {
+               for (i = 0; i < kbd_mode_levels_count; ++i)
+                       if (kbd_mode_levels[i] == state->mode_bit)
+                               return i;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int kbd_set_level(struct kbd_state *state, u8 level)
+{
+       if (kbd_info.levels != 0) {
+               if (level != 0)
+                       kbd_previous_level = level;
+               if (state->level == level)
+                       return 0;
+               state->level = level;
+               if (level != 0 && state->mode_bit == KBD_MODE_BIT_OFF)
+                       state->mode_bit = kbd_previous_mode_bit;
+               else if (level == 0 && state->mode_bit != KBD_MODE_BIT_OFF) {
+                       kbd_previous_mode_bit = state->mode_bit;
+                       state->mode_bit = KBD_MODE_BIT_OFF;
+               }
+               return 0;
+       }
+
+       if (kbd_mode_levels_count > 0 && level < kbd_mode_levels_count) {
+               if (level != 0)
+                       kbd_previous_level = level;
+               state->mode_bit = kbd_mode_levels[level];
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int kbd_get_state(struct kbd_state *state)
+{
+       int ret;
+
+       get_buffer();
+
+       buffer->input[0] = 0x1;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+
+       if (ret) {
+               ret = dell_smi_error(ret);
+               goto out;
+       }
+
+       state->mode_bit = ffs(buffer->output[1] & 0xFFFF);
+       if (state->mode_bit != 0)
+               state->mode_bit--;
+
+       state->triggers = (buffer->output[1] >> 16) & 0xFF;
+       state->timeout_value = (buffer->output[1] >> 24) & 0x3F;
+       state->timeout_unit = (buffer->output[1] >> 30) & 0x3;
+       state->als_setting = buffer->output[2] & 0xFF;
+       state->als_value = (buffer->output[2] >> 8) & 0xFF;
+       state->level = (buffer->output[2] >> 16) & 0xFF;
+
+ out:
+       release_buffer();
+       return ret;
+}
+
+static int kbd_set_state(struct kbd_state *state)
+{
+       int ret;
+
+       get_buffer();
+       buffer->input[0] = 0x2;
+       buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
+       buffer->input[1] |= (state->triggers & 0xFF) << 16;
+       buffer->input[1] |= (state->timeout_value & 0x3F) << 24;
+       buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
+       buffer->input[2] = state->als_setting & 0xFF;
+       buffer->input[2] |= (state->level & 0xFF) << 16;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+       release_buffer();
+
+       return dell_smi_error(ret);
+}
+
+static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
+{
+       int ret;
+
+       ret = kbd_set_state(state);
+       if (ret == 0)
+               return 0;
+
+       /*
+        * When setting the new state fails,try to restore the previous one.
+        * This is needed on some machines where BIOS sets a default state when
+        * setting a new state fails. This default state could be all off.
+        */
+
+       if (kbd_set_state(old))
+               pr_err("Setting old previous keyboard state failed\n");
+
+       return ret;
+}
+
+static int kbd_set_token_bit(u8 bit)
+{
+       int id;
+       int ret;
+
+       if (bit >= ARRAY_SIZE(kbd_tokens))
+               return -EINVAL;
+
+       id = find_token_id(kbd_tokens[bit]);
+       if (id == -1)
+               return -EINVAL;
+
+       get_buffer();
+       buffer->input[0] = da_tokens[id].location;
+       buffer->input[1] = da_tokens[id].value;
+       dell_send_request(buffer, 1, 0);
+       ret = buffer->output[0];
+       release_buffer();
+
+       return dell_smi_error(ret);
+}
+
+static int kbd_get_token_bit(u8 bit)
+{
+       int id;
+       int ret;
+       int val;
+
+       if (bit >= ARRAY_SIZE(kbd_tokens))
+               return -EINVAL;
+
+       id = find_token_id(kbd_tokens[bit]);
+       if (id == -1)
+               return -EINVAL;
+
+       get_buffer();
+       buffer->input[0] = da_tokens[id].location;
+       dell_send_request(buffer, 0, 0);
+       ret = buffer->output[0];
+       val = buffer->output[1];
+       release_buffer();
+
+       if (ret)
+               return dell_smi_error(ret);
+
+       return (val == da_tokens[id].value);
+}
+
+static int kbd_get_first_active_token_bit(void)
+{
+       int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i) {
+               ret = kbd_get_token_bit(i);
+               if (ret == 1)
+                       return i;
+       }
+
+       return ret;
+}
+
+static int kbd_get_valid_token_counts(void)
+{
+       return hweight16(kbd_token_bits);
+}
+
+static inline int kbd_init_info(void)
+{
+       struct kbd_state state;
+       int ret;
+       int i;
+
+       ret = kbd_get_info(&kbd_info);
+       if (ret)
+               return ret;
+
+       kbd_get_state(&state);
+
+       /* NOTE: timeout value is stored in 6 bits so max value is 63 */
+       if (kbd_info.seconds > 63)
+               kbd_info.seconds = 63;
+       if (kbd_info.minutes > 63)
+               kbd_info.minutes = 63;
+       if (kbd_info.hours > 63)
+               kbd_info.hours = 63;
+       if (kbd_info.days > 63)
+               kbd_info.days = 63;
+
+       /* NOTE: On tested machines ON mode did not work and caused
+        *       problems (turned backlight off) so do not use it
+        */
+       kbd_info.modes &= ~BIT(KBD_MODE_BIT_ON);
+
+       kbd_previous_level = kbd_get_level(&state);
+       kbd_previous_mode_bit = state.mode_bit;
+
+       if (kbd_previous_level == 0 && kbd_get_max_level() != 0)
+               kbd_previous_level = 1;
+
+       if (kbd_previous_mode_bit == KBD_MODE_BIT_OFF) {
+               kbd_previous_mode_bit =
+                       ffs(kbd_info.modes & ~BIT(KBD_MODE_BIT_OFF));
+               if (kbd_previous_mode_bit != 0)
+                       kbd_previous_mode_bit--;
+       }
+
+       if (kbd_info.modes & (BIT(KBD_MODE_BIT_ALS) |
+                             BIT(KBD_MODE_BIT_TRIGGER_ALS)))
+               kbd_als_supported = true;
+
+       if (kbd_info.modes & (
+           BIT(KBD_MODE_BIT_TRIGGER_ALS) | BIT(KBD_MODE_BIT_TRIGGER) |
+           BIT(KBD_MODE_BIT_TRIGGER_25) | BIT(KBD_MODE_BIT_TRIGGER_50) |
+           BIT(KBD_MODE_BIT_TRIGGER_75) | BIT(KBD_MODE_BIT_TRIGGER_100)
+          ))
+               kbd_triggers_supported = true;
+
+       /* kbd_mode_levels[0] is reserved, see below */
+       for (i = 0; i < 16; ++i)
+               if (kbd_is_level_mode_bit(i) && (BIT(i) & kbd_info.modes))
+                       kbd_mode_levels[1 + kbd_mode_levels_count++] = i;
+
+       /*
+        * Find the first supported mode and assign to kbd_mode_levels[0].
+        * This should be 0 (off), but we cannot depend on the BIOS to
+        * support 0.
+        */
+       if (kbd_mode_levels_count > 0) {
+               for (i = 0; i < 16; ++i) {
+                       if (BIT(i) & kbd_info.modes) {
+                               kbd_mode_levels[0] = i;
+                               break;
+                       }
+               }
+               kbd_mode_levels_count++;
+       }
+
+       return 0;
+
+}
+
+static inline void kbd_init_tokens(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
+               if (find_token_id(kbd_tokens[i]) != -1)
+                       kbd_token_bits |= BIT(i);
+}
+
+static void kbd_init(void)
+{
+       int ret;
+
+       ret = kbd_init_info();
+       kbd_init_tokens();
+
+       if (kbd_token_bits != 0 || ret == 0)
+               kbd_led_present = true;
+}
+
+static ssize_t kbd_led_timeout_store(struct device *dev,
+                                    struct device_attribute *attr,
+                                    const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool convert;
+       int value;
+       int ret;
+       char ch;
+       u8 unit;
+       int i;
+
+       ret = sscanf(buf, "%d %c", &value, &ch);
+       if (ret < 1)
+               return -EINVAL;
+       else if (ret == 1)
+               ch = 's';
+
+       if (value < 0)
+               return -EINVAL;
+
+       convert = false;
+
+       switch (ch) {
+       case 's':
+               if (value > kbd_info.seconds)
+                       convert = true;
+               unit = KBD_TIMEOUT_SECONDS;
+               break;
+       case 'm':
+               if (value > kbd_info.minutes)
+                       convert = true;
+               unit = KBD_TIMEOUT_MINUTES;
+               break;
+       case 'h':
+               if (value > kbd_info.hours)
+                       convert = true;
+               unit = KBD_TIMEOUT_HOURS;
+               break;
+       case 'd':
+               if (value > kbd_info.days)
+                       convert = true;
+               unit = KBD_TIMEOUT_DAYS;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (quirks && quirks->needs_kbd_timeouts)
+               convert = true;
+
+       if (convert) {
+               /* Convert value from current units to seconds */
+               switch (unit) {
+               case KBD_TIMEOUT_DAYS:
+                       value *= 24;
+               case KBD_TIMEOUT_HOURS:
+                       value *= 60;
+               case KBD_TIMEOUT_MINUTES:
+                       value *= 60;
+                       unit = KBD_TIMEOUT_SECONDS;
+               }
+
+               if (quirks && quirks->needs_kbd_timeouts) {
+                       for (i = 0; quirks->kbd_timeouts[i] != -1; i++) {
+                               if (value <= quirks->kbd_timeouts[i]) {
+                                       value = quirks->kbd_timeouts[i];
+                                       break;
+                               }
+                       }
+               }
+
+               if (value <= kbd_info.seconds && kbd_info.seconds) {
+                       unit = KBD_TIMEOUT_SECONDS;
+               } else if (value / 60 <= kbd_info.minutes && kbd_info.minutes) {
+                       value /= 60;
+                       unit = KBD_TIMEOUT_MINUTES;
+               } else if (value / (60 * 60) <= kbd_info.hours && kbd_info.hours) {
+                       value /= (60 * 60);
+                       unit = KBD_TIMEOUT_HOURS;
+               } else if (value / (60 * 60 * 24) <= kbd_info.days && kbd_info.days) {
+                       value /= (60 * 60 * 24);
+                       unit = KBD_TIMEOUT_DAYS;
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       new_state = state;
+       new_state.timeout_value = value;
+       new_state.timeout_unit = unit;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t kbd_led_timeout_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct kbd_state state;
+       int ret;
+       int len;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       len = sprintf(buf, "%d", state.timeout_value);
+
+       switch (state.timeout_unit) {
+       case KBD_TIMEOUT_SECONDS:
+               return len + sprintf(buf+len, "s\n");
+       case KBD_TIMEOUT_MINUTES:
+               return len + sprintf(buf+len, "m\n");
+       case KBD_TIMEOUT_HOURS:
+               return len + sprintf(buf+len, "h\n");
+       case KBD_TIMEOUT_DAYS:
+               return len + sprintf(buf+len, "d\n");
+       default:
+               return -EINVAL;
+       }
+
+       return len;
+}
+
+static DEVICE_ATTR(stop_timeout, S_IRUGO | S_IWUSR,
+                  kbd_led_timeout_show, kbd_led_timeout_store);
+
+static const char * const kbd_led_triggers[] = {
+       "keyboard",
+       "touchpad",
+       /*"trackstick"*/ NULL, /* NOTE: trackstick is just alias for touchpad */
+       "mouse",
+};
+
+static ssize_t kbd_led_triggers_store(struct device *dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool triggers_enabled = false;
+       int trigger_bit = -1;
+       char trigger[21];
+       int i, ret;
+
+       ret = sscanf(buf, "%20s", trigger);
+       if (ret != 1)
+               return -EINVAL;
+
+       if (trigger[0] != '+' && trigger[0] != '-')
+               return -EINVAL;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       if (kbd_triggers_supported)
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+       if (kbd_triggers_supported) {
+               for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+                       if (!(kbd_info.triggers & BIT(i)))
+                               continue;
+                       if (!kbd_led_triggers[i])
+                               continue;
+                       if (strcmp(trigger+1, kbd_led_triggers[i]) != 0)
+                               continue;
+                       if (trigger[0] == '+' &&
+                           triggers_enabled && (state.triggers & BIT(i)))
+                               return count;
+                       if (trigger[0] == '-' &&
+                           (!triggers_enabled || !(state.triggers & BIT(i))))
+                               return count;
+                       trigger_bit = i;
+                       break;
+               }
+       }
+
+       if (trigger_bit != -1) {
+               new_state = state;
+               if (trigger[0] == '+')
+                       new_state.triggers |= BIT(trigger_bit);
+               else {
+                       new_state.triggers &= ~BIT(trigger_bit);
+                       /* NOTE: trackstick bit (2) must be disabled when
+                        *       disabling touchpad bit (1), otherwise touchpad
+                        *       bit (1) will not be disabled */
+                       if (trigger_bit == 1)
+                               new_state.triggers &= ~BIT(2);
+               }
+               if ((kbd_info.triggers & new_state.triggers) !=
+                   new_state.triggers)
+                       return -EINVAL;
+               if (new_state.triggers && !triggers_enabled) {
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+                       kbd_set_level(&new_state, kbd_previous_level);
+               } else if (new_state.triggers == 0) {
+                       kbd_set_level(&new_state, 0);
+               }
+               if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+                       return -EINVAL;
+               ret = kbd_set_state_safe(&new_state, &state);
+               if (ret)
+                       return ret;
+               if (new_state.mode_bit != KBD_MODE_BIT_OFF)
+                       kbd_previous_mode_bit = new_state.mode_bit;
+               return count;
+       }
+
+       return -EINVAL;
+}
+
+static ssize_t kbd_led_triggers_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct kbd_state state;
+       bool triggers_enabled;
+       int level, i, ret;
+       int len = 0;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       len = 0;
+
+       if (kbd_triggers_supported) {
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+               level = kbd_get_level(&state);
+               for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+                       if (!(kbd_info.triggers & BIT(i)))
+                               continue;
+                       if (!kbd_led_triggers[i])
+                               continue;
+                       if ((triggers_enabled || level <= 0) &&
+                           (state.triggers & BIT(i)))
+                               buf[len++] = '+';
+                       else
+                               buf[len++] = '-';
+                       len += sprintf(buf+len, "%s ", kbd_led_triggers[i]);
+               }
+       }
+
+       if (len)
+               buf[len - 1] = '\n';
+
+       return len;
+}
+
+static DEVICE_ATTR(start_triggers, S_IRUGO | S_IWUSR,
+                  kbd_led_triggers_show, kbd_led_triggers_store);
+
+static ssize_t kbd_led_als_enabled_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool triggers_enabled = false;
+       int enable;
+       int ret;
+
+       ret = kstrtoint(buf, 0, &enable);
+       if (ret)
+               return ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       if (enable == kbd_is_als_mode_bit(state.mode_bit))
+               return count;
+
+       new_state = state;
+
+       if (kbd_triggers_supported)
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+       if (enable) {
+               if (triggers_enabled)
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+               else
+                       new_state.mode_bit = KBD_MODE_BIT_ALS;
+       } else {
+               if (triggers_enabled) {
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+                       kbd_set_level(&new_state, kbd_previous_level);
+               } else {
+                       new_state.mode_bit = KBD_MODE_BIT_ON;
+               }
+       }
+       if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+               return -EINVAL;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+       kbd_previous_mode_bit = new_state.mode_bit;
+
+       return count;
+}
+
+static ssize_t kbd_led_als_enabled_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct kbd_state state;
+       bool enabled = false;
+       int ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+       enabled = kbd_is_als_mode_bit(state.mode_bit);
+
+       return sprintf(buf, "%d\n", enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(als_enabled, S_IRUGO | S_IWUSR,
+                  kbd_led_als_enabled_show, kbd_led_als_enabled_store);
+
+static ssize_t kbd_led_als_setting_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t count)
+{
+       struct kbd_state state;
+       struct kbd_state new_state;
+       u8 setting;
+       int ret;
+
+       ret = kstrtou8(buf, 10, &setting);
+       if (ret)
+               return ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       new_state = state;
+       new_state.als_setting = setting;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t kbd_led_als_setting_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct kbd_state state;
+       int ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%d\n", state.als_setting);
+}
+
+static DEVICE_ATTR(als_setting, S_IRUGO | S_IWUSR,
+                  kbd_led_als_setting_show, kbd_led_als_setting_store);
+
+static struct attribute *kbd_led_attrs[] = {
+       &dev_attr_stop_timeout.attr,
+       &dev_attr_start_triggers.attr,
+       NULL,
+};
+
+static const struct attribute_group kbd_led_group = {
+       .attrs = kbd_led_attrs,
+};
+
+static struct attribute *kbd_led_als_attrs[] = {
+       &dev_attr_als_enabled.attr,
+       &dev_attr_als_setting.attr,
+       NULL,
+};
+
+static const struct attribute_group kbd_led_als_group = {
+       .attrs = kbd_led_als_attrs,
+};
+
+static const struct attribute_group *kbd_led_groups[] = {
+       &kbd_led_group,
+       &kbd_led_als_group,
+       NULL,
+};
+
+static enum led_brightness kbd_led_level_get(struct led_classdev *led_cdev)
+{
+       int ret;
+       u16 num;
+       struct kbd_state state;
+
+       if (kbd_get_max_level()) {
+               ret = kbd_get_state(&state);
+               if (ret)
+                       return 0;
+               ret = kbd_get_level(&state);
+               if (ret < 0)
+                       return 0;
+               return ret;
+       }
+
+       if (kbd_get_valid_token_counts()) {
+               ret = kbd_get_first_active_token_bit();
+               if (ret < 0)
+                       return 0;
+               for (num = kbd_token_bits; num != 0 && ret > 0; --ret)
+                       num &= num - 1; /* clear the first bit set */
+               if (num == 0)
+                       return 0;
+               return ffs(num) - 1;
+       }
+
+       pr_warn("Keyboard brightness level control not supported\n");
+       return 0;
+}
+
+static void kbd_led_level_set(struct led_classdev *led_cdev,
+                             enum led_brightness value)
+{
+       struct kbd_state state;
+       struct kbd_state new_state;
+       u16 num;
+
+       if (kbd_get_max_level()) {
+               if (kbd_get_state(&state))
+                       return;
+               new_state = state;
+               if (kbd_set_level(&new_state, value))
+                       return;
+               kbd_set_state_safe(&new_state, &state);
+               return;
+       }
+
+       if (kbd_get_valid_token_counts()) {
+               for (num = kbd_token_bits; num != 0 && value > 0; --value)
+                       num &= num - 1; /* clear the first bit set */
+               if (num == 0)
+                       return;
+               kbd_set_token_bit(ffs(num) - 1);
+               return;
+       }
+
+       pr_warn("Keyboard brightness level control not supported\n");
+}
+
+static struct led_classdev kbd_led = {
+       .name           = "dell::kbd_backlight",
+       .brightness_set = kbd_led_level_set,
+       .brightness_get = kbd_led_level_get,
+       .groups         = kbd_led_groups,
+};
+
+static int __init kbd_led_init(struct device *dev)
+{
+       kbd_init();
+       if (!kbd_led_present)
+               return -ENODEV;
+       if (!kbd_als_supported)
+               kbd_led_groups[1] = NULL;
+       kbd_led.max_brightness = kbd_get_max_level();
+       if (!kbd_led.max_brightness) {
+               kbd_led.max_brightness = kbd_get_valid_token_counts();
+               if (kbd_led.max_brightness)
+                       kbd_led.max_brightness--;
+       }
+       return led_classdev_register(dev, &kbd_led);
+}
+
+static void brightness_set_exit(struct led_classdev *led_cdev,
+                               enum led_brightness value)
+{
+       /* Don't change backlight level on exit */
+};
+
+static void kbd_led_exit(void)
+{
+       if (!kbd_led_present)
+               return;
+       kbd_led.brightness_set = brightness_set_exit;
+       led_classdev_unregister(&kbd_led);
+}
+
 static int __init dell_init(void)
 {
        int max_intensity = 0;
@@ -841,6 +1913,8 @@ static int __init dell_init(void)
        if (quirks && quirks->touchpad_led)
                touchpad_led_init(&platform_device->dev);
 
+       kbd_led_init(&platform_device->dev);
+
        dell_laptop_dir = debugfs_create_dir("dell_laptop", NULL);
        if (dell_laptop_dir != NULL)
                debugfs_create_file("rfkill", 0444, dell_laptop_dir, NULL,
@@ -908,6 +1982,7 @@ static void __exit dell_exit(void)
        debugfs_remove_recursive(dell_laptop_dir);
        if (quirks && quirks->touchpad_led)
                touchpad_led_exit();
+       kbd_led_exit();
        i8042_remove_filter(dell_laptop_i8042_filter);
        cancel_delayed_work_sync(&dell_rfkill_work);
        backlight_device_unregister(dell_backlight_device);
@@ -924,5 +1999,7 @@ module_init(dell_init);
 module_exit(dell_exit);
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
 MODULE_DESCRIPTION("Dell laptop driver");
 MODULE_LICENSE("GPL");
index a4a4258..8037c8b 100644 (file)
@@ -62,7 +62,7 @@
  * (1 << 1):   Bluetooth enable/disable, RW.
  * (1 << 2):   GPS enable/disable, RW.
  * (1 << 3):   WiFi enable/disable, RW.
- * (1 << 4):   WWAN (3G) enable/disalbe, RW.
+ * (1 << 4):   WWAN (3G) enable/disable, RW.
  * (1 << 5):   Touchscreen enable/disable, Read Only.
  */
 #define OT_EC_DEVICE_STATE_ADDRESS     0xD6
index 3b8ceee..7769575 100644 (file)
@@ -319,6 +319,7 @@ static struct {
        u32 sensors_pdrv_attrs_registered:1;
        u32 sensors_pdev_attrs_registered:1;
        u32 hotkey_poll_active:1;
+       u32 has_adaptive_kbd:1;
 } tp_features;
 
 static struct {
@@ -1911,6 +1912,27 @@ enum {   /* hot key scan codes (derived from ACPI DSDT) */
        TP_ACPI_HOTKEYSCAN_UNK7,
        TP_ACPI_HOTKEYSCAN_UNK8,
 
+       TP_ACPI_HOTKEYSCAN_MUTE2,
+       TP_ACPI_HOTKEYSCAN_BRIGHTNESS_ZERO,
+       TP_ACPI_HOTKEYSCAN_CLIPPING_TOOL,
+       TP_ACPI_HOTKEYSCAN_CLOUD,
+       TP_ACPI_HOTKEYSCAN_UNK9,
+       TP_ACPI_HOTKEYSCAN_VOICE,
+       TP_ACPI_HOTKEYSCAN_UNK10,
+       TP_ACPI_HOTKEYSCAN_GESTURES,
+       TP_ACPI_HOTKEYSCAN_UNK11,
+       TP_ACPI_HOTKEYSCAN_UNK12,
+       TP_ACPI_HOTKEYSCAN_UNK13,
+       TP_ACPI_HOTKEYSCAN_CONFIG,
+       TP_ACPI_HOTKEYSCAN_NEW_TAB,
+       TP_ACPI_HOTKEYSCAN_RELOAD,
+       TP_ACPI_HOTKEYSCAN_BACK,
+       TP_ACPI_HOTKEYSCAN_MIC_DOWN,
+       TP_ACPI_HOTKEYSCAN_MIC_UP,
+       TP_ACPI_HOTKEYSCAN_MIC_CANCELLATION,
+       TP_ACPI_HOTKEYSCAN_CAMERA_MODE,
+       TP_ACPI_HOTKEYSCAN_ROTATE_DISPLAY,
+
        /* Hotkey keymap size */
        TPACPI_HOTKEY_MAP_LEN
 };
@@ -2647,9 +2669,7 @@ static ssize_t hotkey_enable_store(struct device *dev,
        return count;
 }
 
-static struct device_attribute dev_attr_hotkey_enable =
-       __ATTR(hotkey_enable, S_IWUSR | S_IRUGO,
-               hotkey_enable_show, hotkey_enable_store);
+static DEVICE_ATTR_RW(hotkey_enable);
 
 /* sysfs hotkey mask --------------------------------------------------- */
 static ssize_t hotkey_mask_show(struct device *dev,
@@ -2685,9 +2705,7 @@ static ssize_t hotkey_mask_store(struct device *dev,
        return (res) ? res : count;
 }
 
-static struct device_attribute dev_attr_hotkey_mask =
-       __ATTR(hotkey_mask, S_IWUSR | S_IRUGO,
-               hotkey_mask_show, hotkey_mask_store);
+static DEVICE_ATTR_RW(hotkey_mask);
 
 /* sysfs hotkey bios_enabled ------------------------------------------- */
 static ssize_t hotkey_bios_enabled_show(struct device *dev,
@@ -2697,8 +2715,7 @@ static ssize_t hotkey_bios_enabled_show(struct device *dev,
        return sprintf(buf, "0\n");
 }
 
-static struct device_attribute dev_attr_hotkey_bios_enabled =
-       __ATTR(hotkey_bios_enabled, S_IRUGO, hotkey_bios_enabled_show, NULL);
+static DEVICE_ATTR_RO(hotkey_bios_enabled);
 
 /* sysfs hotkey bios_mask ---------------------------------------------- */
 static ssize_t hotkey_bios_mask_show(struct device *dev,
@@ -2710,8 +2727,7 @@ static ssize_t hotkey_bios_mask_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_orig_mask);
 }
 
-static struct device_attribute dev_attr_hotkey_bios_mask =
-       __ATTR(hotkey_bios_mask, S_IRUGO, hotkey_bios_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_bios_mask);
 
 /* sysfs hotkey all_mask ----------------------------------------------- */
 static ssize_t hotkey_all_mask_show(struct device *dev,
@@ -2722,8 +2738,7 @@ static ssize_t hotkey_all_mask_show(struct device *dev,
                                hotkey_all_mask | hotkey_source_mask);
 }
 
-static struct device_attribute dev_attr_hotkey_all_mask =
-       __ATTR(hotkey_all_mask, S_IRUGO, hotkey_all_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_all_mask);
 
 /* sysfs hotkey recommended_mask --------------------------------------- */
 static ssize_t hotkey_recommended_mask_show(struct device *dev,
@@ -2735,9 +2750,7 @@ static ssize_t hotkey_recommended_mask_show(struct device *dev,
                        & ~hotkey_reserved_mask);
 }
 
-static struct device_attribute dev_attr_hotkey_recommended_mask =
-       __ATTR(hotkey_recommended_mask, S_IRUGO,
-               hotkey_recommended_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_recommended_mask);
 
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
 
@@ -2792,9 +2805,7 @@ static ssize_t hotkey_source_mask_store(struct device *dev,
        return (rc < 0) ? rc : count;
 }
 
-static struct device_attribute dev_attr_hotkey_source_mask =
-       __ATTR(hotkey_source_mask, S_IWUSR | S_IRUGO,
-               hotkey_source_mask_show, hotkey_source_mask_store);
+static DEVICE_ATTR_RW(hotkey_source_mask);
 
 /* sysfs hotkey hotkey_poll_freq --------------------------------------- */
 static ssize_t hotkey_poll_freq_show(struct device *dev,
@@ -2826,9 +2837,7 @@ static ssize_t hotkey_poll_freq_store(struct device *dev,
        return count;
 }
 
-static struct device_attribute dev_attr_hotkey_poll_freq =
-       __ATTR(hotkey_poll_freq, S_IWUSR | S_IRUGO,
-               hotkey_poll_freq_show, hotkey_poll_freq_store);
+static DEVICE_ATTR_RW(hotkey_poll_freq);
 
 #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
 
@@ -2849,8 +2858,7 @@ static ssize_t hotkey_radio_sw_show(struct device *dev,
                        (res == TPACPI_RFK_RADIO_OFF) ? 0 : 1);
 }
 
-static struct device_attribute dev_attr_hotkey_radio_sw =
-       __ATTR(hotkey_radio_sw, S_IRUGO, hotkey_radio_sw_show, NULL);
+static DEVICE_ATTR_RO(hotkey_radio_sw);
 
 static void hotkey_radio_sw_notify_change(void)
 {
@@ -2872,8 +2880,7 @@ static ssize_t hotkey_tablet_mode_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", !!s);
 }
 
-static struct device_attribute dev_attr_hotkey_tablet_mode =
-       __ATTR(hotkey_tablet_mode, S_IRUGO, hotkey_tablet_mode_show, NULL);
+static DEVICE_ATTR_RO(hotkey_tablet_mode);
 
 static void hotkey_tablet_mode_notify_change(void)
 {
@@ -2890,8 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason);
 }
 
-static struct device_attribute dev_attr_hotkey_wakeup_reason =
-       __ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL);
+static DEVICE_ATTR_RO(hotkey_wakeup_reason);
 
 static void hotkey_wakeup_reason_notify_change(void)
 {
@@ -2907,9 +2913,7 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack);
 }
 
-static struct device_attribute dev_attr_hotkey_wakeup_hotunplug_complete =
-       __ATTR(wakeup_hotunplug_complete, S_IRUGO,
-              hotkey_wakeup_hotunplug_complete_show, NULL);
+static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete);
 
 static void hotkey_wakeup_hotunplug_complete_notify_change(void)
 {
@@ -2917,6 +2921,57 @@ static void hotkey_wakeup_hotunplug_complete_notify_change(void)
                     "wakeup_hotunplug_complete");
 }
 
+/* sysfs adaptive kbd mode --------------------------------------------- */
+
+static int adaptive_keyboard_get_mode(void);
+static int adaptive_keyboard_set_mode(int new_mode);
+
+enum ADAPTIVE_KEY_MODE {
+       HOME_MODE,
+       WEB_BROWSER_MODE,
+       WEB_CONFERENCE_MODE,
+       FUNCTION_MODE,
+       LAYFLAT_MODE
+};
+
+static ssize_t adaptive_kbd_mode_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       int current_mode;
+
+       current_mode = adaptive_keyboard_get_mode();
+       if (current_mode < 0)
+               return current_mode;
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", current_mode);
+}
+
+static ssize_t adaptive_kbd_mode_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned long t;
+       int res;
+
+       if (parse_strtoul(buf, LAYFLAT_MODE, &t))
+               return -EINVAL;
+
+       res = adaptive_keyboard_set_mode(t);
+       return (res < 0) ? res : count;
+}
+
+static DEVICE_ATTR_RW(adaptive_kbd_mode);
+
+static struct attribute *adaptive_kbd_attributes[] = {
+       &dev_attr_adaptive_kbd_mode.attr,
+       NULL
+};
+
+static const struct attribute_group adaptive_kbd_attr_group = {
+       .attrs = adaptive_kbd_attributes,
+};
+
 /* --------------------------------------------------------------------- */
 
 static struct attribute *hotkey_attributes[] __initdata = {
@@ -3118,6 +3173,13 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
                /* (assignments unknown, please report if found) */
                KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
                KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+
+               /* No assignments, only used for Adaptive keyboards. */
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
                },
 
        /* Generic keymap for Lenovo ThinkPads */
@@ -3174,6 +3236,35 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 
                /* Extra keys in use since the X240 / T440 / T540 */
                KEY_CONFIG, KEY_SEARCH, KEY_SCALE, KEY_FILE,
+
+               /*
+                * These are the adaptive keyboard keycodes for Carbon X1 2014.
+                * The first item in this list is the Mute button which is
+                * emitted with 0x103 through
+                * adaptive_keyboard_hotkey_notify_hotkey() when the sound
+                * symbol is held.
+                * We'll need to offset those by 0x20.
+                */
+               KEY_RESERVED,        /* Mute held, 0x103 */
+               KEY_BRIGHTNESS_MIN,  /* Backlight off */
+               KEY_RESERVED,        /* Clipping tool */
+               KEY_RESERVED,        /* Cloud */
+               KEY_RESERVED,
+               KEY_VOICECOMMAND,    /* Voice */
+               KEY_RESERVED,
+               KEY_RESERVED,        /* Gestures */
+               KEY_RESERVED,
+               KEY_RESERVED,
+               KEY_RESERVED,
+               KEY_CONFIG,          /* Settings */
+               KEY_RESERVED,        /* New tab */
+               KEY_REFRESH,         /* Reload */
+               KEY_BACK,            /* Back */
+               KEY_RESERVED,        /* Microphone down */
+               KEY_RESERVED,        /* Microphone up */
+               KEY_RESERVED,        /* Microphone cancellation */
+               KEY_RESERVED,        /* Camera mode */
+               KEY_RESERVED,        /* Rotate display, 0x116 */
                },
        };
 
@@ -3227,6 +3318,20 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
        if (!tp_features.hotkey)
                return 1;
 
+       /*
+        * Check if we have an adaptive keyboard, like on the
+        * Lenovo Carbon X1 2014 (2nd Gen).
+        */
+       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
+               if ((hkeyv >> 8) == 2) {
+                       tp_features.has_adaptive_kbd = true;
+                       res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+                                       &adaptive_kbd_attr_group);
+                       if (res)
+                               goto err_exit;
+               }
+       }
+
        quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable,
                                     ARRAY_SIZE(tpacpi_hotkey_qtable));
 
@@ -3437,6 +3542,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 
 err_exit:
        delete_attr_set(hotkey_dev_attributes, &tpacpi_pdev->dev.kobj);
+       sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+                       &adaptive_kbd_attr_group);
+
        hotkey_dev_attributes = NULL;
 
        return (res < 0) ? res : 1;
@@ -3449,14 +3557,6 @@ err_exit:
  * Will consider support rest of modes in future.
  *
  */
-enum ADAPTIVE_KEY_MODE {
-       HOME_MODE,
-       WEB_BROWSER_MODE,
-       WEB_CONFERENCE_MODE,
-       FUNCTION_MODE,
-       LAYFLAT_MODE
-};
-
 static const int adaptive_keyboard_modes[] = {
        HOME_MODE,
 /*     WEB_BROWSER_MODE = 2,
@@ -3466,6 +3566,8 @@ static const int adaptive_keyboard_modes[] = {
 
 #define DFR_CHANGE_ROW                 0x101
 #define DFR_SHOW_QUICKVIEW_ROW         0x102
+#define FIRST_ADAPTIVE_KEY             0x103
+#define ADAPTIVE_KEY_OFFSET            0x020
 
 /* press Fn key a while second, it will switch to Function Mode. Then
  * release Fn key, previous mode be restored.
@@ -3473,6 +3575,32 @@ static const int adaptive_keyboard_modes[] = {
 static bool adaptive_keyboard_mode_is_saved;
 static int adaptive_keyboard_prev_mode;
 
+static int adaptive_keyboard_get_mode(void)
+{
+       int mode = 0;
+
+       if (!acpi_evalf(hkey_handle, &mode, "GTRW", "dd", 0)) {
+               pr_err("Cannot read adaptive keyboard mode\n");
+               return -EIO;
+       }
+
+       return mode;
+}
+
+static int adaptive_keyboard_set_mode(int new_mode)
+{
+       if (new_mode < 0 ||
+               new_mode > LAYFLAT_MODE)
+               return -EINVAL;
+
+       if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd", new_mode)) {
+               pr_err("Cannot set adaptive keyboard mode\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
 static int adaptive_keyboard_get_next_mode(int mode)
 {
        size_t i;
@@ -3493,8 +3621,9 @@ static int adaptive_keyboard_get_next_mode(int mode)
 
 static bool adaptive_keyboard_hotkey_notify_hotkey(unsigned int scancode)
 {
-       u32 current_mode = 0;
+       int current_mode = 0;
        int new_mode = 0;
+       int keycode;
 
        switch (scancode) {
        case DFR_CHANGE_ROW:
@@ -3502,43 +3631,51 @@ static bool adaptive_keyboard_hotkey_notify_hotkey(unsigned int scancode)
                        new_mode = adaptive_keyboard_prev_mode;
                        adaptive_keyboard_mode_is_saved = false;
                } else {
-                       if (!acpi_evalf(
-                                       hkey_handle, &current_mode,
-                                       "GTRW", "dd", 0)) {
-                               pr_err("Cannot read adaptive keyboard mode\n");
+                       current_mode = adaptive_keyboard_get_mode();
+                       if (current_mode < 0)
                                return false;
-                       } else {
-                               new_mode = adaptive_keyboard_get_next_mode(
-                                               current_mode);
-                       }
+                       new_mode = adaptive_keyboard_get_next_mode(
+                                       current_mode);
                }
 
-               if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd", new_mode)) {
-                       pr_err("Cannot set adaptive keyboard mode\n");
+               if (adaptive_keyboard_set_mode(new_mode) < 0)
                        return false;
-               }
 
                return true;
 
        case DFR_SHOW_QUICKVIEW_ROW:
-               if (!acpi_evalf(hkey_handle,
-                               &adaptive_keyboard_prev_mode,
-                               "GTRW", "dd", 0)) {
-                       pr_err("Cannot read adaptive keyboard mode\n");
+               current_mode = adaptive_keyboard_get_mode();
+               if (current_mode < 0)
                        return false;
-               } else {
-                       adaptive_keyboard_mode_is_saved = true;
 
-                       if (!acpi_evalf(hkey_handle,
-                                       NULL, "STRW", "vd", FUNCTION_MODE)) {
-                               pr_err("Cannot set adaptive keyboard mode\n");
-                               return false;
-                       }
-               }
+               adaptive_keyboard_prev_mode = current_mode;
+               adaptive_keyboard_mode_is_saved = true;
+
+               if (adaptive_keyboard_set_mode (FUNCTION_MODE) < 0)
+                       return false;
                return true;
 
        default:
-               return false;
+               if (scancode < FIRST_ADAPTIVE_KEY ||
+                   scancode >= FIRST_ADAPTIVE_KEY + TPACPI_HOTKEY_MAP_LEN -
+                               ADAPTIVE_KEY_OFFSET) {
+                       pr_info("Unhandled adaptive keyboard key: 0x%x\n",
+                                       scancode);
+                       return false;
+               }
+               keycode = hotkey_keycode_map[scancode - FIRST_ADAPTIVE_KEY + ADAPTIVE_KEY_OFFSET];
+               if (keycode != KEY_RESERVED) {
+                       mutex_lock(&tpacpi_inputdev_send_mutex);
+
+                       input_report_key(tpacpi_inputdev, keycode, 1);
+                       input_sync(tpacpi_inputdev);
+
+                       input_report_key(tpacpi_inputdev, keycode, 0);
+                       input_sync(tpacpi_inputdev);
+
+                       mutex_unlock(&tpacpi_inputdev_send_mutex);
+               }
+               return true;
        }
 }
 
@@ -3836,28 +3973,21 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
 
 static void hotkey_suspend(void)
 {
-       int hkeyv;
-
        /* Do these on suspend, we get the events on early resume! */
        hotkey_wakeup_reason = TP_ACPI_WAKEUP_NONE;
        hotkey_autosleep_ack = 0;
 
        /* save previous mode of adaptive keyboard of X1 Carbon */
-       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-               if ((hkeyv >> 8) == 2) {
-                       if (!acpi_evalf(hkey_handle,
-                                               &adaptive_keyboard_prev_mode,
-                                               "GTRW", "dd", 0)) {
-                               pr_err("Cannot read adaptive keyboard mode.\n");
-                       }
+       if (tp_features.has_adaptive_kbd) {
+               if (!acpi_evalf(hkey_handle, &adaptive_keyboard_prev_mode,
+                                       "GTRW", "dd", 0)) {
+                       pr_err("Cannot read adaptive keyboard mode.\n");
                }
        }
 }
 
 static void hotkey_resume(void)
 {
-       int hkeyv;
-
        tpacpi_disable_brightness_delay();
 
        if (hotkey_status_set(true) < 0 ||
@@ -3872,14 +4002,10 @@ static void hotkey_resume(void)
        hotkey_poll_setup_safe(false);
 
        /* restore previous mode of adapive keyboard of X1 Carbon */
-       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-               if ((hkeyv >> 8) == 2) {
-                       if (!acpi_evalf(hkey_handle,
-                                               NULL,
-                                               "STRW", "vd",
-                                               adaptive_keyboard_prev_mode)) {
-                               pr_err("Cannot set adaptive keyboard mode.\n");
-                       }
+       if (tp_features.has_adaptive_kbd) {
+               if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd",
+                                       adaptive_keyboard_prev_mode)) {
+                       pr_err("Cannot set adaptive keyboard mode.\n");
                }
        }
 }
@@ -4079,9 +4205,7 @@ static ssize_t bluetooth_enable_store(struct device *dev,
                                attr, buf, count);
 }
 
-static struct device_attribute dev_attr_bluetooth_enable =
-       __ATTR(bluetooth_enable, S_IWUSR | S_IRUGO,
-               bluetooth_enable_show, bluetooth_enable_store);
+static DEVICE_ATTR_RW(bluetooth_enable);
 
 /* --------------------------------------------------------------------- */
 
@@ -4269,9 +4393,7 @@ static ssize_t wan_enable_store(struct device *dev,
                        attr, buf, count);
 }
 
-static struct device_attribute dev_attr_wan_enable =
-       __ATTR(wwan_enable, S_IWUSR | S_IRUGO,
-               wan_enable_show, wan_enable_store);
+static DEVICE_ATTR_RW(wan_enable);
 
 /* --------------------------------------------------------------------- */
 
@@ -5048,8 +5170,7 @@ static ssize_t cmos_command_store(struct device *dev,
        return (res) ? res : count;
 }
 
-static struct device_attribute dev_attr_cmos_command =
-       __ATTR(cmos_command, S_IWUSR, NULL, cmos_command_store);
+static DEVICE_ATTR_WO(cmos_command);
 
 /* --------------------------------------------------------------------- */
 
@@ -8017,9 +8138,7 @@ static ssize_t fan_pwm1_enable_store(struct device *dev,
        return count;
 }
 
-static struct device_attribute dev_attr_fan_pwm1_enable =
-       __ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
-               fan_pwm1_enable_show, fan_pwm1_enable_store);
+static DEVICE_ATTR_RW(fan_pwm1_enable);
 
 /* sysfs fan pwm1 ------------------------------------------------------ */
 static ssize_t fan_pwm1_show(struct device *dev,
@@ -8079,9 +8198,7 @@ static ssize_t fan_pwm1_store(struct device *dev,
        return (rc) ? rc : count;
 }
 
-static struct device_attribute dev_attr_fan_pwm1 =
-       __ATTR(pwm1, S_IWUSR | S_IRUGO,
-               fan_pwm1_show, fan_pwm1_store);
+static DEVICE_ATTR_RW(fan_pwm1);
 
 /* sysfs fan fan1_input ------------------------------------------------ */
 static ssize_t fan_fan1_input_show(struct device *dev,
@@ -8098,9 +8215,7 @@ static ssize_t fan_fan1_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static struct device_attribute dev_attr_fan_fan1_input =
-       __ATTR(fan1_input, S_IRUGO,
-               fan_fan1_input_show, NULL);
+static DEVICE_ATTR_RO(fan_fan1_input);
 
 /* sysfs fan fan2_input ------------------------------------------------ */
 static ssize_t fan_fan2_input_show(struct device *dev,
@@ -8117,9 +8232,7 @@ static ssize_t fan_fan2_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static struct device_attribute dev_attr_fan_fan2_input =
-       __ATTR(fan2_input, S_IRUGO,
-               fan_fan2_input_show, NULL);
+static DEVICE_ATTR_RO(fan_fan2_input);
 
 /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
@@ -8735,8 +8848,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
 }
 
-static struct device_attribute dev_attr_thinkpad_acpi_pdev_name =
-       __ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
+static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name);
 
 /* --------------------------------------------------------------------- */
 
index dbcb7a8..9956b99 100644 (file)
@@ -51,6 +51,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/uaccess.h>
+#include <acpi/video.h>
 
 MODULE_AUTHOR("John Belmonte");
 MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver");
@@ -116,6 +117,7 @@ MODULE_LICENSE("GPL");
 #define HCI_KBD_ILLUMINATION           0x0095
 #define HCI_ECO_MODE                   0x0097
 #define HCI_ACCELEROMETER2             0x00a6
+#define HCI_SYSTEM_INFO                        0xc000
 #define SCI_PANEL_POWER_ON             0x010d
 #define SCI_ILLUMINATION               0x014e
 #define SCI_USB_SLEEP_CHARGE           0x0150
@@ -129,10 +131,13 @@ MODULE_LICENSE("GPL");
 #define HCI_ACCEL_MASK                 0x7fff
 #define HCI_HOTKEY_DISABLE             0x0b
 #define HCI_HOTKEY_ENABLE              0x09
+#define HCI_HOTKEY_SPECIAL_FUNCTIONS   0x10
 #define HCI_LCD_BRIGHTNESS_BITS                3
 #define HCI_LCD_BRIGHTNESS_SHIFT       (16-HCI_LCD_BRIGHTNESS_BITS)
 #define HCI_LCD_BRIGHTNESS_LEVELS      (1 << HCI_LCD_BRIGHTNESS_BITS)
 #define HCI_MISC_SHIFT                 0x10
+#define HCI_SYSTEM_TYPE1               0x10
+#define HCI_SYSTEM_TYPE2               0x11
 #define HCI_VIDEO_OUT_LCD              0x1
 #define HCI_VIDEO_OUT_CRT              0x2
 #define HCI_VIDEO_OUT_TV               0x4
@@ -147,9 +152,10 @@ MODULE_LICENSE("GPL");
 #define SCI_KBD_MODE_OFF               0x10
 #define SCI_KBD_TIME_MAX               0x3c001a
 #define SCI_USB_CHARGE_MODE_MASK       0xff
-#define SCI_USB_CHARGE_DISABLED                0x30000
-#define SCI_USB_CHARGE_ALTERNATE       0x30009
-#define SCI_USB_CHARGE_AUTO            0x30021
+#define SCI_USB_CHARGE_DISABLED                0x00
+#define SCI_USB_CHARGE_ALTERNATE       0x09
+#define SCI_USB_CHARGE_TYPICAL         0x11
+#define SCI_USB_CHARGE_AUTO            0x21
 #define SCI_USB_CHARGE_BAT_MASK                0x7
 #define SCI_USB_CHARGE_BAT_LVL_OFF     0x1
 #define SCI_USB_CHARGE_BAT_LVL_ON      0x4
@@ -174,6 +180,8 @@ struct toshiba_acpi_dev {
        int kbd_mode;
        int kbd_time;
        int usbsc_bat_level;
+       int usbsc_mode_base;
+       int hotkey_event_type;
 
        unsigned int illumination_supported:1;
        unsigned int video_supported:1;
@@ -243,29 +251,6 @@ static const struct key_entry toshiba_acpi_keymap[] = {
        { KE_END, 0 },
 };
 
-/* alternative keymap */
-static const struct dmi_system_id toshiba_alt_keymap_dmi[] = {
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Satellite M840"),
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Qosmio X75-A"),
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "TECRA A50-A"),
-               },
-       },
-       {}
-};
-
 static const struct key_entry toshiba_acpi_alt_keymap[] = {
        { KE_KEY, 0x157, { KEY_MUTE } },
        { KE_KEY, 0x102, { KEY_ZOOMOUT } },
@@ -280,6 +265,14 @@ static const struct key_entry toshiba_acpi_alt_keymap[] = {
        { KE_END, 0 },
 };
 
+/*
+ * List of models which have a broken acpi-video backlight interface and thus
+ * need to use the toshiba (vendor) interface instead.
+ */
+static const struct dmi_system_id toshiba_vendor_backlight_dmi[] = {
+       {}
+};
+
 /*
  * Utility
  */
@@ -819,6 +812,54 @@ static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
 }
 
 /* Sleep (Charge and Music) utilities support */
+static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
+{
+       u32 in[TCI_WORDS] = { SCI_GET, SCI_USB_SLEEP_CHARGE, 0, 0, 0, 0 };
+       u32 out[TCI_WORDS];
+       acpi_status status;
+
+       /* Set the feature to "not supported" in case of error */
+       dev->usb_sleep_charge_supported = 0;
+
+       if (!sci_open(dev))
+               return;
+
+       status = tci_raw(dev, in, out);
+       if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
+               pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_NOT_SUPPORTED) {
+               pr_info("USB Sleep and Charge not supported\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_SUCCESS) {
+               dev->usbsc_mode_base = out[4];
+       }
+
+       in[5] = SCI_USB_CHARGE_BAT_LVL;
+       status = tci_raw(dev, in, out);
+       if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
+               pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_NOT_SUPPORTED) {
+               pr_info("USB Sleep and Charge not supported\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_SUCCESS) {
+               dev->usbsc_bat_level = out[2];
+               /*
+                * If we reach this point, it means that the laptop has support
+                * for this feature and all values are initialized.
+                * Set it as supported.
+                */
+               dev->usb_sleep_charge_supported = 1;
+       }
+
+       sci_close(dev);
+}
+
 static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
                                        u32 *mode)
 {
@@ -934,11 +975,11 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
        status = tci_raw(dev, in, out);
        sci_close(dev);
        if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-               pr_err("ACPI call to get USB S&C battery level failed\n");
+               pr_err("ACPI call to get USB Rapid Charge failed\n");
                return -EIO;
        } else if (out[0] == TOS_NOT_SUPPORTED ||
                   out[0] == TOS_INPUT_DATA_ERROR) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("USB Rapid Charge not supported\n");
                return -ENODEV;
        }
 
@@ -962,10 +1003,10 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
        status = tci_raw(dev, in, out);
        sci_close(dev);
        if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C battery level failed\n");
+               pr_err("ACPI call to set USB Rapid Charge failed\n");
                return -EIO;
        } else if (out[0] == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("USB Rapid Charge not supported\n");
                return -ENODEV;
        } else if (out[0] == TOS_INPUT_DATA_ERROR) {
                return -EIO;
@@ -984,10 +1025,10 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
        result = sci_read(dev, SCI_USB_SLEEP_MUSIC, state);
        sci_close(dev);
        if (result == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C mode failed\n");
+               pr_err("ACPI call to get Sleep and Music failed\n");
                return -EIO;
        } else if (result == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("Sleep and Music not supported\n");
                return -ENODEV;
        } else if (result == TOS_INPUT_DATA_ERROR) {
                return -EIO;
@@ -1006,10 +1047,10 @@ static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
        result = sci_write(dev, SCI_USB_SLEEP_MUSIC, state);
        sci_close(dev);
        if (result == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C mode failed\n");
+               pr_err("ACPI call to set Sleep and Music failed\n");
                return -EIO;
        } else if (result == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("Sleep and Music not supported\n");
                return -ENODEV;
        } else if (result == TOS_INPUT_DATA_ERROR) {
                return -EIO;
@@ -1149,6 +1190,28 @@ static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
        return 0;
 }
 
+/* Hotkey Event type */
+static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
+                                        u32 *type)
+{
+       u32 val1 = 0x03;
+       u32 val2 = 0;
+       u32 result;
+
+       result = hci_read2(dev, HCI_SYSTEM_INFO, &val1, &val2);
+       if (result == TOS_FAILURE) {
+               pr_err("ACPI call to get System type failed\n");
+               return -EIO;
+       } else if (result == TOS_NOT_SUPPORTED) {
+               pr_info("System type not supported\n");
+               return -ENODEV;
+       }
+
+       *type = val2;
+
+       return 0;
+}
+
 /* Bluetooth rfkill handlers */
 
 static u32 hci_get_bt_present(struct toshiba_acpi_dev *dev, bool *present)
@@ -1973,17 +2036,21 @@ static ssize_t usb_sleep_charge_store(struct device *dev,
         * 0 - Disabled
         * 1 - Alternate (Non USB conformant devices that require more power)
         * 2 - Auto (USB conformant devices)
+        * 3 - Typical
         */
-       if (state != 0 && state != 1 && state != 2)
+       if (state != 0 && state != 1 && state != 2 && state != 3)
                return -EINVAL;
 
        /* Set the USB charging mode to internal value */
+       mode = toshiba->usbsc_mode_base;
        if (state == 0)
-               mode = SCI_USB_CHARGE_DISABLED;
+               mode |= SCI_USB_CHARGE_DISABLED;
        else if (state == 1)
-               mode = SCI_USB_CHARGE_ALTERNATE;
+               mode |= SCI_USB_CHARGE_ALTERNATE;
        else if (state == 2)
-               mode = SCI_USB_CHARGE_AUTO;
+               mode |= SCI_USB_CHARGE_AUTO;
+       else if (state == 3)
+               mode |= SCI_USB_CHARGE_TYPICAL;
 
        ret = toshiba_usb_sleep_charge_set(toshiba, mode);
        if (ret)
@@ -2333,6 +2400,20 @@ static int toshiba_acpi_enable_hotkeys(struct toshiba_acpi_dev *dev)
        return 0;
 }
 
+static void toshiba_acpi_enable_special_functions(struct toshiba_acpi_dev *dev)
+{
+       u32 result;
+
+       /*
+        * Re-activate the hotkeys, but this time, we are using the
+        * "Special Functions" mode.
+        */
+       result = hci_write1(dev, HCI_HOTKEY_EVENT,
+                           HCI_HOTKEY_SPECIAL_FUNCTIONS);
+       if (result != TOS_SUCCESS)
+               pr_err("Could not enable the Special Function mode\n");
+}
+
 static bool toshiba_acpi_i8042_filter(unsigned char data, unsigned char str,
                                      struct serio *port)
 {
@@ -2434,10 +2515,22 @@ static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
 
 static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 {
+       const struct key_entry *keymap = toshiba_acpi_keymap;
        acpi_handle ec_handle;
-       int error;
+       u32 events_type;
        u32 hci_result;
-       const struct key_entry *keymap = toshiba_acpi_keymap;
+       int error;
+
+       error = toshiba_acpi_enable_hotkeys(dev);
+       if (error)
+               return error;
+
+       error = toshiba_hotkey_event_type_get(dev, &events_type);
+       if (error) {
+               pr_err("Unable to query Hotkey Event Type\n");
+               return error;
+       }
+       dev->hotkey_event_type = events_type;
 
        dev->hotkey_dev = input_allocate_device();
        if (!dev->hotkey_dev)
@@ -2447,8 +2540,14 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
        dev->hotkey_dev->phys = "toshiba_acpi/input0";
        dev->hotkey_dev->id.bustype = BUS_HOST;
 
-       if (dmi_check_system(toshiba_alt_keymap_dmi))
+       if (events_type == HCI_SYSTEM_TYPE1 ||
+           !dev->kbd_function_keys_supported)
+               keymap = toshiba_acpi_keymap;
+       else if (events_type == HCI_SYSTEM_TYPE2 ||
+                dev->kbd_function_keys_supported)
                keymap = toshiba_acpi_alt_keymap;
+       else
+               pr_info("Unknown event type received %x\n", events_type);
        error = sparse_keymap_setup(dev->hotkey_dev, keymap, NULL);
        if (error)
                goto err_free_dev;
@@ -2490,12 +2589,6 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
                goto err_remove_filter;
        }
 
-       error = toshiba_acpi_enable_hotkeys(dev);
-       if (error) {
-               pr_info("Unable to enable hotkeys\n");
-               goto err_remove_filter;
-       }
-
        error = input_register_device(dev->hotkey_dev);
        if (error) {
                pr_info("Unable to register input device\n");
@@ -2541,6 +2634,20 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev)
        ret = get_tr_backlight_status(dev, &enabled);
        dev->tr_backlight_supported = !ret;
 
+       /*
+        * Tell acpi-video-detect code to prefer vendor backlight on all
+        * systems with transflective backlight and on dmi matched systems.
+        */
+       if (dev->tr_backlight_supported ||
+           dmi_check_system(toshiba_vendor_backlight_dmi))
+               acpi_video_dmi_promote_vendor();
+
+       if (acpi_video_backlight_support())
+               return 0;
+
+       /* acpi-video may have loaded before we called dmi_promote_vendor() */
+       acpi_video_unregister_backlight();
+
        memset(&props, 0, sizeof(props));
        props.type = BACKLIGHT_PLATFORM;
        props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
@@ -2624,6 +2731,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
 {
        struct toshiba_acpi_dev *dev;
        const char *hci_method;
+       u32 special_functions;
        u32 dummy;
        bool bt_present;
        int ret = 0;
@@ -2648,6 +2756,16 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
        acpi_dev->driver_data = dev;
        dev_set_drvdata(&acpi_dev->dev, dev);
 
+       /* Query the BIOS for supported features */
+
+       /*
+        * The "Special Functions" are always supported by the laptops
+        * with the new keyboard layout, query for its presence to help
+        * determine the keymap layout to use.
+        */
+       ret = toshiba_function_keys_get(dev, &special_functions);
+       dev->kbd_function_keys_supported = !ret;
+
        if (toshiba_acpi_setup_keyboard(dev))
                pr_info("Unable to activate hotkeys\n");
 
@@ -2716,8 +2834,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
        ret = toshiba_accelerometer_supported(dev);
        dev->accelerometer_supported = !ret;
 
-       ret = toshiba_usb_sleep_charge_get(dev, &dummy);
-       dev->usb_sleep_charge_supported = !ret;
+       toshiba_usb_sleep_charge_available(dev);
 
        ret = toshiba_usb_rapid_charge_get(dev, &dummy);
        dev->usb_rapid_charge_supported = !ret;
@@ -2725,23 +2842,25 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
        ret = toshiba_usb_sleep_music_get(dev, &dummy);
        dev->usb_sleep_music_supported = !ret;
 
-       ret = toshiba_function_keys_get(dev, &dummy);
-       dev->kbd_function_keys_supported = !ret;
-
        ret = toshiba_panel_power_on_get(dev, &dummy);
        dev->panel_power_on_supported = !ret;
 
        ret = toshiba_usb_three_get(dev, &dummy);
        dev->usb_three_supported = !ret;
 
-       /* Determine whether or not BIOS supports fan and video interfaces */
-
        ret = get_video_status(dev, &dummy);
        dev->video_supported = !ret;
 
        ret = get_fan_status(dev, &dummy);
        dev->fan_supported = !ret;
 
+       /*
+        * Enable the "Special Functions" mode only if they are
+        * supported and if they are activated.
+        */
+       if (dev->kbd_function_keys_supported && special_functions)
+               toshiba_acpi_enable_special_functions(dev);
+
        ret = sysfs_create_group(&dev->acpi_dev->dev.kobj,
                                 &toshiba_attr_group);
        if (ret) {
@@ -2770,6 +2889,21 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
        case 0x80: /* Hotkeys and some system events */
                toshiba_acpi_process_hotkeys(dev);
                break;
+       case 0x81: /* Dock events */
+       case 0x82:
+       case 0x83:
+               pr_info("Dock event received %x\n", event);
+               break;
+       case 0x88: /* Thermal events */
+               pr_info("Thermal event received\n");
+               break;
+       case 0x8f: /* LID closed */
+       case 0x90: /* LID is closed and Dock has been ejected */
+               break;
+       case 0x8c: /* SATA power events */
+       case 0x8b:
+               pr_info("SATA power event received %x\n", event);
+               break;
        case 0x92: /* Keyboard backlight mode changed */
                /* Update sysfs entries */
                ret = sysfs_update_group(&acpi_dev->dev.kobj,
@@ -2777,17 +2911,19 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
                if (ret)
                        pr_err("Unable to update sysfs entries\n");
                break;
-       case 0x81: /* Unknown */
-       case 0x82: /* Unknown */
-       case 0x83: /* Unknown */
-       case 0x8c: /* Unknown */
+       case 0x85: /* Unknown */
+       case 0x8d: /* Unknown */
        case 0x8e: /* Unknown */
-       case 0x8f: /* Unknown */
-       case 0x90: /* Unknown */
+       case 0x94: /* Unknown */
+       case 0x95: /* Unknown */
        default:
                pr_info("Unknown event received %x\n", event);
                break;
        }
+
+       acpi_bus_generate_netlink_event(acpi_dev->pnp.device_class,
+                                       dev_name(&acpi_dev->dev),
+                                       event, 0);
 }
 
 #ifdef CONFIG_PM_SLEEP
index 2cb1ea6..2498007 100644 (file)
@@ -2,6 +2,7 @@
  * Toshiba Bluetooth Enable Driver
  *
  * Copyright (C) 2009 Jes Sorensen <Jes.Sorensen@gmail.com>
+ * Copyright (C) 2015 Azael Avalos <coproscefalo@gmail.com>
  *
  * Thanks to Matthew Garrett for background info on ACPI innards which
  * normal people aren't meant to understand :-)
 #include <linux/types.h>
 #include <linux/acpi.h>
 
+#define BT_KILLSWITCH_MASK     0x01
+#define BT_PLUGGED_MASK                0x40
+#define BT_POWER_MASK          0x80
+
 MODULE_AUTHOR("Jes Sorensen <Jes.Sorensen@gmail.com>");
 MODULE_DESCRIPTION("Toshiba Laptop ACPI Bluetooth Enable Driver");
 MODULE_LICENSE("GPL");
@@ -57,32 +62,107 @@ static struct acpi_driver toshiba_bt_rfkill_driver = {
        .drv.pm =       &toshiba_bt_pm,
 };
 
+static int toshiba_bluetooth_present(acpi_handle handle)
+{
+       acpi_status result;
+       u64 bt_present;
+
+       /*
+        * Some Toshiba laptops may have a fake TOS6205 device in
+        * their ACPI BIOS, so query the _STA method to see if there
+        * is really anything there.
+        */
+       result = acpi_evaluate_integer(handle, "_STA", NULL, &bt_present);
+       if (ACPI_FAILURE(result)) {
+               pr_err("ACPI call to query Bluetooth presence failed");
+               return -ENXIO;
+       } else if (!bt_present) {
+               pr_info("Bluetooth device not present\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static int toshiba_bluetooth_status(acpi_handle handle)
+{
+       acpi_status result;
+       u64 status;
+
+       result = acpi_evaluate_integer(handle, "BTST", NULL, &status);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not get Bluetooth device status\n");
+               return -ENXIO;
+       }
+
+       pr_info("Bluetooth status %llu\n", status);
+
+       return status;
+}
 
 static int toshiba_bluetooth_enable(acpi_handle handle)
 {
-       acpi_status res1, res2;
-       u64 result;
+       acpi_status result;
+       bool killswitch;
+       bool powered;
+       bool plugged;
+       int status;
 
        /*
         * Query ACPI to verify RFKill switch is set to 'on'.
         * If not, we return silently, no need to report it as
         * an error.
         */
-       res1 = acpi_evaluate_integer(handle, "BTST", NULL, &result);
-       if (ACPI_FAILURE(res1))
-               return res1;
-       if (!(result & 0x01))
-               return 0;
+       status = toshiba_bluetooth_status(handle);
+       if (status < 0)
+               return status;
+
+       killswitch = (status & BT_KILLSWITCH_MASK) ? true : false;
+       powered = (status & BT_POWER_MASK) ? true : false;
+       plugged = (status & BT_PLUGGED_MASK) ? true : false;
 
-       pr_info("Re-enabling Toshiba Bluetooth\n");
-       res1 = acpi_evaluate_object(handle, "AUSB", NULL, NULL);
-       res2 = acpi_evaluate_object(handle, "BTPO", NULL, NULL);
-       if (!ACPI_FAILURE(res1) || !ACPI_FAILURE(res2))
+       if (!killswitch)
                return 0;
+       /*
+        * This check ensures to only enable the device if it is powered
+        * off or detached, as some recent devices somehow pass the killswitch
+        * test, causing a loop enabling/disabling the device, see bug 93911.
+        */
+       if (powered || plugged)
+               return 0;
+
+       result = acpi_evaluate_object(handle, "AUSB", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not attach USB Bluetooth device\n");
+               return -ENXIO;
+       }
+
+       result = acpi_evaluate_object(handle, "BTPO", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not power ON Bluetooth device\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int toshiba_bluetooth_disable(acpi_handle handle)
+{
+       acpi_status result;
+
+       result = acpi_evaluate_object(handle, "BTPF", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not power OFF Bluetooth device\n");
+               return -ENXIO;
+       }
 
-       pr_warn("Failed to re-enable Toshiba Bluetooth\n");
+       result = acpi_evaluate_object(handle, "DUSB", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not detach USB Bluetooth device\n");
+               return -ENXIO;
+       }
 
-       return -ENODEV;
+       return 0;
 }
 
 static void toshiba_bt_rfkill_notify(struct acpi_device *device, u32 event)
@@ -99,23 +179,18 @@ static int toshiba_bt_resume(struct device *dev)
 
 static int toshiba_bt_rfkill_add(struct acpi_device *device)
 {
-       acpi_status status;
-       u64 bt_present;
-       int result = -ENODEV;
+       int result;
 
-       /*
-        * Some Toshiba laptops may have a fake TOS6205 device in
-        * their ACPI BIOS, so query the _STA method to see if there
-        * is really anything there, before trying to enable it.
-        */
-       status = acpi_evaluate_integer(device->handle, "_STA", NULL,
-                                      &bt_present);
+       result = toshiba_bluetooth_present(device->handle);
+       if (result)
+               return result;
 
-       if (!ACPI_FAILURE(status) && bt_present) {
-               pr_info("Detected Toshiba ACPI Bluetooth device - "
-                       "installing RFKill handler\n");
-               result = toshiba_bluetooth_enable(device->handle);
-       }
+       pr_info("Toshiba ACPI Bluetooth device driver\n");
+
+       /* Enable the BT device */
+       result = toshiba_bluetooth_enable(device->handle);
+       if (result)
+               return result;
 
        return result;
 }
@@ -123,7 +198,7 @@ static int toshiba_bt_rfkill_add(struct acpi_device *device)
 static int toshiba_bt_rfkill_remove(struct acpi_device *device)
 {
        /* clean up */
-       return 0;
+       return toshiba_bluetooth_disable(device->handle);
 }
 
 module_acpi_driver(toshiba_bt_rfkill_driver);
index 737e56d..aac4757 100644 (file)
@@ -45,7 +45,6 @@ MODULE_LICENSE("GPL");
 
 #define ACPI_WMI_CLASS "wmi"
 
-static DEFINE_MUTEX(wmi_data_lock);
 static LIST_HEAD(wmi_block_list);
 
 struct guid_block {
@@ -240,10 +239,10 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
                if (memcmp(block->guid, guid_input, 16) == 0) {
                        if (out)
                                *out = wblock;
-                       return 1;
+                       return true;
                }
        }
-       return 0;
+       return false;
 }
 
 static acpi_status wmi_method_enable(struct wmi_block *wblock, int enable)
index e03877c..fd24323 100644 (file)
@@ -1064,6 +1064,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
        RAPL_CPU(0x3f, rapl_defaults_hsw_server),/* Haswell servers */
        RAPL_CPU(0x4f, rapl_defaults_hsw_server),/* Broadwell servers */
        RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
+       RAPL_CPU(0x4E, rapl_defaults_core),/* Skylake */
        RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
        RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
        RAPL_CPU(0x56, rapl_defaults_core),/* Future Xeon */
index 810aef3..ba34c7d 100644 (file)
@@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(of_pwm_get);
  * @table: array of consumers to register
  * @num: number of consumers in table
  */
-void __init pwm_add_table(struct pwm_lookup *table, size_t num)
+void pwm_add_table(struct pwm_lookup *table, size_t num)
 {
        mutex_lock(&pwm_lookup_lock);
 
index 522f707..fa5feab 100644 (file)
@@ -225,6 +225,10 @@ static const struct of_device_id atmel_hlcdc_dt_ids[] = {
                .compatible = "atmel,sama5d3-hlcdc",
                .data = &atmel_hlcdc_pwm_sama5d3_errata,
        },
+       {
+               .compatible = "atmel,sama5d4-hlcdc",
+               .data = &atmel_hlcdc_pwm_sama5d3_errata,
+       },
        { /* sentinel */ },
 };
 
index f75ecb0..b430811 100644 (file)
 #define  PERIOD_CDIV(div)      (((div) & 0x7) << 20)
 #define  PERIOD_CDIV_MAX       8
 
+static const unsigned int cdiv[PERIOD_CDIV_MAX] = {
+       1, 2, 4, 8, 16, 64, 256, 1024
+};
+
 struct mxs_pwm_chip {
        struct pwm_chip chip;
        struct clk *clk;
@@ -54,13 +58,13 @@ static int mxs_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 
        rate = clk_get_rate(mxs->clk);
        while (1) {
-               c = rate / (1 << div);
+               c = rate / cdiv[div];
                c = c * period_ns;
                do_div(c, 1000000000);
                if (c < PERIOD_PERIOD_MAX)
                        break;
                div++;
-               if (div > PERIOD_CDIV_MAX)
+               if (div >= PERIOD_CDIV_MAX)
                        return -EINVAL;
        }
 
index 3fb775d..34b5c27 100644 (file)
@@ -202,7 +202,7 @@ static const struct pwm_ops pca9685_pwm_ops = {
        .owner = THIS_MODULE,
 };
 
-static struct regmap_config pca9685_regmap_i2c_config = {
+static const struct regmap_config pca9685_regmap_i2c_config = {
        .reg_bits = 8,
        .val_bits = 8,
        .max_register = PCA9685_NUMREGS,
index 3e9b583..ff201e1 100644 (file)
@@ -269,12 +269,31 @@ static void pwm_samsung_disable(struct pwm_chip *chip, struct pwm_device *pwm)
        spin_unlock_irqrestore(&samsung_pwm_lock, flags);
 }
 
+static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+                                     struct pwm_device *pwm)
+{
+       unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
+       u32 tcon;
+       unsigned long flags;
+
+       spin_lock_irqsave(&samsung_pwm_lock, flags);
+
+       tcon = readl(chip->base + REG_TCON);
+       tcon |= TCON_MANUALUPDATE(tcon_chan);
+       writel(tcon, chip->base + REG_TCON);
+
+       tcon &= ~TCON_MANUALUPDATE(tcon_chan);
+       writel(tcon, chip->base + REG_TCON);
+
+       spin_unlock_irqrestore(&samsung_pwm_lock, flags);
+}
+
 static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
                              int duty_ns, int period_ns)
 {
        struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
        struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
-       u32 tin_ns = chan->tin_ns, tcnt, tcmp;
+       u32 tin_ns = chan->tin_ns, tcnt, tcmp, oldtcmp;
 
        /*
         * We currently avoid using 64bit arithmetic by using the
@@ -288,6 +307,7 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
                return 0;
 
        tcnt = readl(our_chip->base + REG_TCNTB(pwm->hwpwm));
+       oldtcmp = readl(our_chip->base + REG_TCMPB(pwm->hwpwm));
 
        /* We need tick count for calculation, not last tick. */
        ++tcnt;
@@ -335,6 +355,16 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
        writel(tcnt, our_chip->base + REG_TCNTB(pwm->hwpwm));
        writel(tcmp, our_chip->base + REG_TCMPB(pwm->hwpwm));
 
+       /*
+        * In case the PWM is currently at 100% duty cycle, force a manual
+        * update to prevent the signal staying high if the PWM is disabled
+        * shortly afer this update (before it autoreloaded the new values).
+        */
+       if (oldtcmp == (u32) -1) {
+               dev_dbg(our_chip->chip.dev, "Forcing manual update");
+               pwm_samsung_manual_update(our_chip, pwm);
+       }
+
        chan->period_ns = period_ns;
        chan->tin_ns = tin_ns;
        chan->duty_ns = duty_ns;
index 71d7802..6f1fa17 100644 (file)
@@ -1201,13 +1201,9 @@ static int virtio_ccw_online(struct ccw_device *cdev)
        vcdev->vdev.id.vendor = cdev->id.cu_type;
        vcdev->vdev.id.device = cdev->id.cu_model;
 
-       if (virtio_device_is_legacy_only(vcdev->vdev.id)) {
-               vcdev->revision = 0;
-       } else {
-               ret = virtio_ccw_set_transport_rev(vcdev);
-               if (ret)
-                       goto out_free;
-       }
+       ret = virtio_ccw_set_transport_rev(vcdev);
+       if (ret)
+               goto out_free;
 
        ret = register_virtio_device(&vcdev->vdev);
        if (ret) {
index 5741825..fe8a8d1 100644 (file)
@@ -3065,7 +3065,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
 {
        struct qla_hw_data *ha = vha->hw;
        struct se_cmd *se_cmd;
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
        struct qla_tgt_cmd *cmd;
 
        if (handle & CTIO_INTERMEDIATE_HANDLE_MARK) {
index ab4879e..68c2002 100644 (file)
@@ -53,9 +53,8 @@
 static struct workqueue_struct *tcm_qla2xxx_free_wq;
 static struct workqueue_struct *tcm_qla2xxx_cmd_wq;
 
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *tcm_qla2xxx_fabric_configfs;
-static struct target_fabric_configfs *tcm_qla2xxx_npiv_fabric_configfs;
+static const struct target_core_fabric_ops tcm_qla2xxx_ops;
+static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops;
 
 /*
  * Parse WWN.
@@ -336,6 +335,14 @@ static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg
        return tpg->tpg_attrib.demo_mode_login_only;
 }
 
+static int tcm_qla2xxx_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+
+       return tpg->tpg_attrib.fabric_prot_type;
+}
+
 static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl(
        struct se_portal_group *se_tpg)
 {
@@ -1082,8 +1089,53 @@ static ssize_t tcm_qla2xxx_tpg_store_enable(
 
 TF_TPG_BASE_ATTR(tcm_qla2xxx, enable, S_IRUGO | S_IWUSR);
 
+static ssize_t tcm_qla2xxx_tpg_show_dynamic_sessions(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       return target_show_dynamic_sessions(se_tpg, page);
+}
+
+TF_TPG_BASE_ATTR_RO(tcm_qla2xxx, dynamic_sessions);
+
+static ssize_t tcm_qla2xxx_tpg_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid qla2xxx fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tpg->tpg_attrib.fabric_prot_type = val;
+
+       return count;
+}
+
+static ssize_t tcm_qla2xxx_tpg_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+
+       return sprintf(page, "%d\n", tpg->tpg_attrib.fabric_prot_type);
+}
+TF_TPG_BASE_ATTR(tcm_qla2xxx, fabric_prot_type, S_IRUGO | S_IWUSR);
+
 static struct configfs_attribute *tcm_qla2xxx_tpg_attrs[] = {
        &tcm_qla2xxx_tpg_enable.attr,
+       &tcm_qla2xxx_tpg_dynamic_sessions.attr,
+       &tcm_qla2xxx_tpg_fabric_prot_type.attr,
        NULL,
 };
 
@@ -1124,7 +1176,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg(
        tpg->tpg_attrib.cache_dynamic_acls = 1;
        tpg->tpg_attrib.demo_mode_login_only = 1;
 
-       ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&tcm_qla2xxx_ops, wwn,
                                &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                kfree(tpg);
@@ -1244,7 +1296,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(
        tpg->tpg_attrib.cache_dynamic_acls = 1;
        tpg->tpg_attrib.demo_mode_login_only = 1;
 
-       ret = core_tpg_register(&tcm_qla2xxx_npiv_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn,
                                &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                kfree(tpg);
@@ -1560,7 +1612,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
 
        se_sess = transport_init_session_tags(num_tags,
                                              sizeof(struct qla_tgt_cmd),
-                                             TARGET_PROT_NORMAL);
+                                             TARGET_PROT_ALL);
        if (IS_ERR(se_sess)) {
                pr_err("Unable to initialize struct se_session\n");
                return PTR_ERR(se_sess);
@@ -1934,7 +1986,9 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = {
        NULL,
 };
 
-static struct target_core_fabric_ops tcm_qla2xxx_ops = {
+static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "qla2xxx",
        .get_fabric_name                = tcm_qla2xxx_get_fabric_name,
        .get_fabric_proto_ident         = tcm_qla2xxx_get_fabric_proto_ident,
        .tpg_get_wwn                    = tcm_qla2xxx_get_fabric_wwn,
@@ -1949,6 +2003,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = {
                                        tcm_qla2xxx_check_demo_write_protect,
        .tpg_check_prod_mode_write_protect =
                                        tcm_qla2xxx_check_prod_write_protect,
+       .tpg_check_prot_fabric_only     = tcm_qla2xxx_check_prot_fabric_only,
        .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only,
        .tpg_alloc_fabric_acl           = tcm_qla2xxx_alloc_fabric_acl,
        .tpg_release_fabric_acl         = tcm_qla2xxx_release_fabric_acl,
@@ -1983,9 +2038,15 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = tcm_qla2xxx_make_nodeacl,
        .fabric_drop_nodeacl            = tcm_qla2xxx_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = tcm_qla2xxx_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_qla2xxx_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = tcm_qla2xxx_tpg_attrib_attrs,
 };
 
-static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
+static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "qla2xxx_npiv",
        .get_fabric_name                = tcm_qla2xxx_npiv_get_fabric_name,
        .get_fabric_proto_ident         = tcm_qla2xxx_get_fabric_proto_ident,
        .tpg_get_wwn                    = tcm_qla2xxx_get_fabric_wwn,
@@ -2033,94 +2094,26 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = tcm_qla2xxx_make_nodeacl,
        .fabric_drop_nodeacl            = tcm_qla2xxx_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = tcm_qla2xxx_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_qla2xxx_npiv_tpg_attrs,
 };
 
 static int tcm_qla2xxx_register_configfs(void)
 {
-       struct target_fabric_configfs *fabric, *npiv_fabric;
        int ret;
 
        pr_debug("TCM QLOGIC QLA2XXX fabric module %s on %s/%s on "
            UTS_RELEASE"\n", TCM_QLA2XXX_VERSION, utsname()->sysname,
            utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "qla2xxx");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup fabric->tf_ops from our local tcm_qla2xxx_ops
-        */
-       fabric->tf_ops = tcm_qla2xxx_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs =
-                                               tcm_qla2xxx_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for TCM_QLA2XXX\n");
+
+       ret = target_register_template(&tcm_qla2xxx_ops);
+       if (ret)
                return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       tcm_qla2xxx_fabric_configfs = fabric;
-       pr_debug("TCM_QLA2XXX[0] - Set fabric -> tcm_qla2xxx_fabric_configfs\n");
 
-       /*
-        * Register the top level struct config_item_type for NPIV with TCM core
-        */
-       npiv_fabric = target_fabric_configfs_init(THIS_MODULE, "qla2xxx_npiv");
-       if (IS_ERR(npiv_fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               ret = PTR_ERR(npiv_fabric);
-               goto out_fabric;
-       }
-       /*
-        * Setup fabric->tf_ops from our local tcm_qla2xxx_npiv_ops
-        */
-       npiv_fabric->tf_ops = tcm_qla2xxx_npiv_ops;
-       /*
-        * Setup default attribute lists for various npiv_fabric->tf_cit_tmpl
-        */
-       npiv_fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs =
-           tcm_qla2xxx_npiv_tpg_attrs;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the npiv_fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(npiv_fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for TCM_QLA2XXX\n");
+       ret = target_register_template(&tcm_qla2xxx_npiv_ops);
+       if (ret)
                goto out_fabric;
-       }
-       /*
-        * Setup our local pointer to *npiv_fabric
-        */
-       tcm_qla2xxx_npiv_fabric_configfs = npiv_fabric;
-       pr_debug("TCM_QLA2XXX[0] - Set fabric -> tcm_qla2xxx_npiv_fabric_configfs\n");
 
        tcm_qla2xxx_free_wq = alloc_workqueue("tcm_qla2xxx_free",
                                                WQ_MEM_RECLAIM, 0);
@@ -2140,9 +2133,9 @@ static int tcm_qla2xxx_register_configfs(void)
 out_free_wq:
        destroy_workqueue(tcm_qla2xxx_free_wq);
 out_fabric_npiv:
-       target_fabric_configfs_deregister(tcm_qla2xxx_npiv_fabric_configfs);
+       target_unregister_template(&tcm_qla2xxx_npiv_ops);
 out_fabric:
-       target_fabric_configfs_deregister(tcm_qla2xxx_fabric_configfs);
+       target_unregister_template(&tcm_qla2xxx_ops);
        return ret;
 }
 
@@ -2151,13 +2144,8 @@ static void tcm_qla2xxx_deregister_configfs(void)
        destroy_workqueue(tcm_qla2xxx_cmd_wq);
        destroy_workqueue(tcm_qla2xxx_free_wq);
 
-       target_fabric_configfs_deregister(tcm_qla2xxx_fabric_configfs);
-       tcm_qla2xxx_fabric_configfs = NULL;
-       pr_debug("TCM_QLA2XXX[0] - Cleared tcm_qla2xxx_fabric_configfs\n");
-
-       target_fabric_configfs_deregister(tcm_qla2xxx_npiv_fabric_configfs);
-       tcm_qla2xxx_npiv_fabric_configfs = NULL;
-       pr_debug("TCM_QLA2XXX[0] - Cleared tcm_qla2xxx_npiv_fabric_configfs\n");
+       target_unregister_template(&tcm_qla2xxx_ops);
+       target_unregister_template(&tcm_qla2xxx_npiv_ops);
 }
 
 static int __init tcm_qla2xxx_init(void)
index 10c0021..2329511 100644 (file)
@@ -33,6 +33,7 @@ struct tcm_qla2xxx_tpg_attrib {
        int demo_mode_write_protect;
        int prod_mode_write_protect;
        int demo_mode_login_only;
+       int fabric_prot_type;
 };
 
 struct tcm_qla2xxx_tpg {
index 186924a..f6bac9e 100644 (file)
@@ -1023,7 +1023,6 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev,
        }
 
        memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = id;
        cfg.direction = dir;
        if (dir == DMA_MEM_TO_DEV) {
                cfg.dst_addr = port_addr;
index e57eec0..bcc7c63 100644 (file)
@@ -1030,7 +1030,6 @@ static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev,
        }
 
        memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = id;
        cfg.direction = dir;
        if (dir == DMA_MEM_TO_DEV) {
                cfg.dst_addr = port_addr;
index 0e3d8c7..b0b96ab 100644 (file)
@@ -1106,6 +1106,7 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
        struct ion_buffer *buffer;
        struct dma_buf *dmabuf;
        bool valid_handle;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 
        mutex_lock(&client->lock);
        valid_handle = ion_handle_validate(client, handle);
@@ -1118,8 +1119,12 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
        ion_buffer_get(buffer);
        mutex_unlock(&client->lock);
 
-       dmabuf = dma_buf_export(buffer, &dma_buf_ops, buffer->size, O_RDWR,
-                               NULL);
+       exp_info.ops = &dma_buf_ops;
+       exp_info.size = buffer->size;
+       exp_info.flags = O_RDWR;
+       exp_info.priv = buffer;
+
+       dmabuf = dma_buf_export(&exp_info);
        if (IS_ERR(dmabuf)) {
                ion_buffer_put(buffer);
                return dmabuf;
index fe1fd05..5af0135 100644 (file)
@@ -153,7 +153,7 @@ static int ll_ddelete(const struct dentry *de)
 
        CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
               d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
-              de, de, de->d_parent, de->d_inode,
+              de, de, de->d_parent, d_inode(de),
               d_unhashed(de) ? "" : "hashed,",
               list_empty(&de->d_subdirs) ? "" : "subdirs");
 
@@ -167,8 +167,8 @@ static int ll_ddelete(const struct dentry *de)
 #if 0
        /* if not ldlm lock for this inode, set i_nlink to 0 so that
         * this inode can be recycled later b=20433 */
-       if (de->d_inode && !find_cbdata(de->d_inode))
-               clear_nlink(de->d_inode);
+       if (d_really_is_positive(de) && !find_cbdata(d_inode(de)))
+               clear_nlink(d_inode(de));
 #endif
 
        if (d_lustre_invalid((struct dentry *)de))
@@ -181,7 +181,7 @@ int ll_d_init(struct dentry *de)
        LASSERT(de != NULL);
 
        CDEBUG(D_DENTRY, "ldd on dentry %pd (%p) parent %p inode %p refc %d\n",
-               de, de, de->d_parent, de->d_inode,
+               de, de, de->d_parent, d_inode(de),
                d_count(de));
 
        if (de->d_fsdata == NULL) {
@@ -261,7 +261,7 @@ void ll_invalidate_aliases(struct inode *inode)
        ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_u.d_alias) {
                CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
                       dentry, dentry, dentry->d_parent,
-                      dentry->d_inode, dentry->d_flags);
+                      d_inode(dentry), dentry->d_flags);
 
                d_lustre_invalidate(dentry, 0);
        }
@@ -309,7 +309,7 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
 static int ll_revalidate_dentry(struct dentry *dentry,
                                unsigned int lookup_flags)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
 
        /*
         * if open&create is set, talk to MDS to make sure file is created if
@@ -329,7 +329,7 @@ static int ll_revalidate_dentry(struct dentry *dentry,
        if (lookup_flags & LOOKUP_RCU)
                return -ECHILD;
 
-       do_statahead_enter(dir, &dentry, dentry->d_inode == NULL);
+       do_statahead_enter(dir, &dentry, d_inode(dentry) == NULL);
        ll_statahead_mark(dir, dentry);
        return 1;
 }
index 529062e..4b44c63 100644 (file)
@@ -388,7 +388,7 @@ int ll_file_release(struct inode *inode, struct file *file)
 static int ll_intent_file_open(struct dentry *dentry, void *lmm,
                               int lmmsize, struct lookup_intent *itp)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct dentry *parent = dentry->d_parent;
        const char *name = dentry->d_name.name;
@@ -413,7 +413,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
                        opc = LUSTRE_OPC_CREATE;
        }
 
-       op_data  = ll_prep_md_op_data(NULL, parent->d_inode,
+       op_data  = ll_prep_md_op_data(NULL, d_inode(parent),
                                      inode, name, len,
                                      O_RDWR, opc, NULL);
        if (IS_ERR(op_data))
@@ -2896,7 +2896,7 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc)
 
 static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ptlrpc_request *req = NULL;
        struct obd_export *exp;
        int rc = 0;
@@ -2948,12 +2948,12 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
                   do_lookup() -> ll_revalidate_it(). We cannot use d_drop
                   here to preserve get_cwd functionality on 2.6.
                   Bug 10503 */
-               if (!dentry->d_inode->i_nlink)
+               if (!d_inode(dentry)->i_nlink)
                        d_lustre_invalidate(dentry, 0);
 
                ll_lookup_finish_locks(&oit, inode);
-       } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
-               struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
+       } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
+               struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
                u64 valid = OBD_MD_FLGETATTR;
                struct md_op_data *op_data;
                int ealen = 0;
@@ -2991,7 +2991,7 @@ out:
 
 static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int rc;
 
        rc = __ll_inode_revalidate(dentry, ibits);
@@ -3019,7 +3019,7 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
 
 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
 {
-       struct inode *inode = de->d_inode;
+       struct inode *inode = d_inode(de);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ll_inode_info *lli = ll_i2info(inode);
        int res = 0;
index e7422f5..5f918e3 100644 (file)
@@ -1488,7 +1488,7 @@ static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
 {
        CDEBUG(D_DENTRY, "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
               dentry, dentry,
-              dentry->d_parent, dentry->d_inode, d_count(dentry));
+              dentry->d_parent, d_inode(dentry), d_count(dentry));
 
        spin_lock_nested(&dentry->d_lock,
                         nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
index bf1ec27..a27af78 100644 (file)
@@ -1166,7 +1166,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
                  struct md_open_data **mod)
 {
        struct lustre_md md;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ll_sb_info *sbi = ll_i2sbi(inode);
        struct ptlrpc_request *request = NULL;
        int rc, ia_valid;
@@ -1290,7 +1290,7 @@ static int ll_setattr_ost(struct inode *inode, struct iattr *attr)
  */
 int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ll_inode_info *lli = ll_i2info(inode);
        struct md_op_data *op_data = NULL;
        struct md_open_data *mod = NULL;
@@ -1465,7 +1465,7 @@ out:
 
 int ll_setattr(struct dentry *de, struct iattr *attr)
 {
-       int mode = de->d_inode->i_mode;
+       int mode = d_inode(de)->i_mode;
 
        if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
                              (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
index 243a784..db43b81 100644 (file)
@@ -230,11 +230,11 @@ static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
 static int ll_get_name(struct dentry *dentry, char *name,
                       struct dentry *child)
 {
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
        int rc;
        struct ll_getname_data lgd = {
                .lgd_name = name,
-               .lgd_fid = ll_i2info(child->d_inode)->lli_fid,
+               .lgd_fid = ll_i2info(d_inode(child))->lli_fid,
                .ctx.actor = ll_nfs_get_name_filldir,
        };
 
@@ -282,7 +282,7 @@ static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
 static struct dentry *ll_get_parent(struct dentry *dchild)
 {
        struct ptlrpc_request *req = NULL;
-       struct inode      *dir = dchild->d_inode;
+       struct inode      *dir = d_inode(dchild);
        struct ll_sb_info     *sbi;
        struct dentry    *result = NULL;
        struct mdt_body       *body;
index 49f1cb0..5a25dcd 100644 (file)
@@ -155,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
                        list_for_each_entry_safe(child, tmp_subdir,
                                                 &dentry->d_subdirs,
                                                 d_child) {
-                               if (child->d_inode == NULL)
+                               if (d_really_is_negative(child))
                                        d_lustre_invalidate(child, 1);
                        }
                }
@@ -392,7 +392,7 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
                        iput(inode);
                        CDEBUG(D_DENTRY,
                               "Reuse dentry %p inode %p refc %d flags %#x\n",
-                             new, new->d_inode, d_count(new), new->d_flags);
+                             new, d_inode(new), d_count(new), new->d_flags);
                        return new;
                }
        }
@@ -401,7 +401,7 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
                return ERR_PTR(rc);
        d_add(de, inode);
        CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
-              de, de->d_inode, d_count(de), de->d_flags);
+              de, d_inode(de), d_count(de), de->d_flags);
        return de;
 }
 
@@ -448,7 +448,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                   !it_disposition(it, DISP_OPEN_CREATE)) {
                /* With DISP_OPEN_CREATE dentry will
                   instantiated in ll_create_it. */
-               LASSERT((*de)->d_inode == NULL);
+               LASSERT(d_inode(*de) == NULL);
                d_instantiate(*de, inode);
        }
 
@@ -541,7 +541,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                goto out;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        if ((it->it_op & IT_OPEN) && inode &&
            !S_ISREG(inode->i_mode) &&
            !S_ISDIR(inode->i_mode)) {
@@ -638,9 +638,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
 
                        *opened |= FILE_CREATED;
                }
-               if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) {
+               if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) {
                        /* Open dentry. */
-                       if (S_ISFIFO(dentry->d_inode->i_mode)) {
+                       if (S_ISFIFO(d_inode(dentry)->i_mode)) {
                                /* We cannot call open here as it would
                                 * deadlock.
                                 */
@@ -862,8 +862,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
 
 static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
 {
-       if (child->d_inode)
-               *fid = *ll_inode2fid(child->d_inode);
+       if (d_really_is_positive(child))
+               *fid = *ll_inode2fid(d_inode(child));
 }
 
 /**
@@ -1076,7 +1076,7 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
 static int ll_link(struct dentry *old_dentry, struct inode *dir,
                   struct dentry *new_dentry)
 {
-       struct inode *src = old_dentry->d_inode;
+       struct inode *src = d_inode(old_dentry);
        struct ll_sb_info *sbi = ll_i2sbi(dir);
        struct ptlrpc_request *request = NULL;
        struct md_op_data *op_data;
index b75562c..7f80712 100644 (file)
@@ -880,7 +880,7 @@ static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry)
 static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
                            struct dentry *dentry)
 {
-       struct inode         *inode = dentry->d_inode;
+       struct inode         *inode = d_inode(dentry);
        struct lookup_intent      it = { .it_op = IT_GETATTR,
                                         .d.lustre.it_lock_handle = 0 };
        struct md_enqueue_info   *minfo;
@@ -926,7 +926,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
 static void ll_statahead_one(struct dentry *parent, const char *entry_name,
                             int entry_name_len)
 {
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
        struct ll_inode_info     *lli    = ll_i2info(dir);
        struct ll_statahead_info *sai    = lli->lli_sai;
        struct dentry       *dentry = NULL;
@@ -944,8 +944,8 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
                rc = do_sa_lookup(dir, entry);
        } else {
                rc = do_sa_revalidate(dir, entry, dentry);
-               if (rc == 1 && agl_should_run(sai, dentry->d_inode))
-                       ll_agl_add(sai, dentry->d_inode, entry->se_index);
+               if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
+                       ll_agl_add(sai, d_inode(dentry), entry->se_index);
        }
 
        if (dentry != NULL)
@@ -968,7 +968,7 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
 static int ll_agl_thread(void *arg)
 {
        struct dentry       *parent = (struct dentry *)arg;
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
        struct ll_inode_info     *plli   = ll_i2info(dir);
        struct ll_inode_info     *clli;
        struct ll_sb_info       *sbi    = ll_i2sbi(dir);
@@ -1042,7 +1042,7 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
        CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n",
               sai, parent);
 
-       plli = ll_i2info(parent->d_inode);
+       plli = ll_i2info(d_inode(parent));
        task = kthread_run(ll_agl_thread, parent,
                               "ll_agl_%u", plli->lli_opendir_pid);
        if (IS_ERR(task)) {
@@ -1059,7 +1059,7 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
 static int ll_statahead_thread(void *arg)
 {
        struct dentry       *parent = (struct dentry *)arg;
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
        struct ll_inode_info     *plli   = ll_i2info(dir);
        struct ll_inode_info     *clli;
        struct ll_sb_info       *sbi    = ll_i2sbi(dir);
@@ -1604,7 +1604,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
                        rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
                                                ll_inode2fid(inode), &bits);
                        if (rc == 1) {
-                               if ((*dentryp)->d_inode == NULL) {
+                               if (d_inode(*dentryp) == NULL) {
                                        struct dentry *alias;
 
                                        alias = ll_splice_alias(inode,
@@ -1614,13 +1614,13 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
                                                return PTR_ERR(alias);
                                        }
                                        *dentryp = alias;
-                               } else if ((*dentryp)->d_inode != inode) {
+                               } else if (d_inode(*dentryp) != inode) {
                                        /* revalidate, but inode is recreated */
                                        CDEBUG(D_READA,
                                              "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
                                              *dentryp,
-                                             (*dentryp)->d_inode->i_ino,
-                                             (*dentryp)->d_inode->i_generation,
+                                             d_inode(*dentryp)->i_ino,
+                                             d_inode(*dentryp)->i_generation,
                                              inode->i_ino,
                                              inode->i_generation);
                                        ll_sai_unplug(sai, entry);
@@ -1666,8 +1666,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
 
        /* get parent reference count here, and put it in ll_statahead_thread */
        parent = dget((*dentryp)->d_parent);
-       if (unlikely(sai->sai_inode != parent->d_inode)) {
-               struct ll_inode_info *nlli = ll_i2info(parent->d_inode);
+       if (unlikely(sai->sai_inode != d_inode(parent))) {
+               struct ll_inode_info *nlli = ll_i2info(d_inode(parent));
 
                CWARN("Race condition, someone changed %pd just now: old parent "DFID", new parent "DFID"\n",
                      *dentryp,
@@ -1689,7 +1689,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
        ll_sai_get(sai);
        lli->lli_sai = sai;
 
-       plli = ll_i2info(parent->d_inode);
+       plli = ll_i2info(d_inode(parent));
        rc = PTR_ERR(kthread_run(ll_statahead_thread, parent,
                                 "ll_sa_%u", plli->lli_opendir_pid));
        thread = &sai->sai_thread;
index 686b6a5..3711e67 100644 (file)
@@ -120,7 +120,7 @@ failed:
 
 static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ptlrpc_request *request = NULL;
        int rc;
        char *symname = NULL;
index b439936..e0fcbe1 100644 (file)
@@ -214,7 +214,7 @@ int ll_setxattr_common(struct inode *inode, const char *name,
 int ll_setxattr(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        LASSERT(inode);
        LASSERT(name);
@@ -267,7 +267,7 @@ int ll_setxattr(struct dentry *dentry, const char *name,
 
 int ll_removexattr(struct dentry *dentry, const char *name)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        LASSERT(inode);
        LASSERT(name);
@@ -457,7 +457,7 @@ out:
 ssize_t ll_getxattr(struct dentry *dentry, const char *name,
                    void *buffer, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        LASSERT(inode);
        LASSERT(name);
@@ -545,7 +545,7 @@ out:
 
 ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int rc = 0, rc2 = 0;
        struct lov_mds_md *lmm = NULL;
        struct ptlrpc_request *request = NULL;
index 81d44c4..2573612 100644 (file)
@@ -31,12 +31,13 @@ config TCM_PSCSI
        Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
        passthrough access to Linux/SCSI device
 
-config TCM_USER
+config TCM_USER2
        tristate "TCM/USER Subsystem Plugin for Linux"
        depends on UIO && NET
        help
        Say Y here to enable the TCM/USER subsystem plugin for a userspace
-       process to handle requests
+       process to handle requests. This is version 2 of the ABI; version 1
+       is obsolete.
 
 source "drivers/target/loopback/Kconfig"
 source "drivers/target/tcm_fc/Kconfig"
index bbb4a7d..e619c02 100644 (file)
@@ -22,7 +22,7 @@ obj-$(CONFIG_TARGET_CORE)     += target_core_mod.o
 obj-$(CONFIG_TCM_IBLOCK)       += target_core_iblock.o
 obj-$(CONFIG_TCM_FILEIO)       += target_core_file.o
 obj-$(CONFIG_TCM_PSCSI)                += target_core_pscsi.o
-obj-$(CONFIG_TCM_USER)         += target_core_user.o
+obj-$(CONFIG_TCM_USER2)                += target_core_user.o
 
 # Fabric modules
 obj-$(CONFIG_LOOPBACK_TARGET)  += loopback/
index 13a9240..0f43be9 100644 (file)
@@ -1,6 +1,5 @@
 iscsi_target_mod-y +=          iscsi_target_parameters.o \
                                iscsi_target_seq_pdu_list.o \
-                               iscsi_target_tq.o \
                                iscsi_target_auth.o \
                                iscsi_target_datain_values.o \
                                iscsi_target_device.o \
index 77d6425..34871a6 100644 (file)
@@ -33,8 +33,6 @@
 #include <target/iscsi/iscsi_target_core.h>
 #include "iscsi_target_parameters.h"
 #include "iscsi_target_seq_pdu_list.h"
-#include "iscsi_target_tq.h"
-#include "iscsi_target_configfs.h"
 #include "iscsi_target_datain_values.h"
 #include "iscsi_target_erl0.h"
 #include "iscsi_target_erl1.h"
@@ -537,7 +535,7 @@ static struct iscsit_transport iscsi_target_transport = {
 
 static int __init iscsi_target_init_module(void)
 {
-       int ret = 0;
+       int ret = 0, size;
 
        pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
 
@@ -546,24 +544,21 @@ static int __init iscsi_target_init_module(void)
                pr_err("Unable to allocate memory for iscsit_global\n");
                return -1;
        }
+       spin_lock_init(&iscsit_global->ts_bitmap_lock);
        mutex_init(&auth_id_lock);
        spin_lock_init(&sess_idr_lock);
        idr_init(&tiqn_idr);
        idr_init(&sess_idr);
 
-       ret = iscsi_target_register_configfs();
-       if (ret < 0)
+       ret = target_register_template(&iscsi_ops);
+       if (ret)
                goto out;
 
-       ret = iscsi_thread_set_init();
-       if (ret < 0)
+       size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long);
+       iscsit_global->ts_bitmap = vzalloc(size);
+       if (!iscsit_global->ts_bitmap) {
+               pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
                goto configfs_out;
-
-       if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) !=
-                       TARGET_THREAD_SET_COUNT) {
-               pr_err("iscsi_allocate_thread_sets() returned"
-                       " unexpected value!\n");
-               goto ts_out1;
        }
 
        lio_qr_cache = kmem_cache_create("lio_qr_cache",
@@ -572,7 +567,7 @@ static int __init iscsi_target_init_module(void)
        if (!lio_qr_cache) {
                pr_err("nable to kmem_cache_create() for"
                                " lio_qr_cache\n");
-               goto ts_out2;
+               goto bitmap_out;
        }
 
        lio_dr_cache = kmem_cache_create("lio_dr_cache",
@@ -617,12 +612,13 @@ dr_out:
        kmem_cache_destroy(lio_dr_cache);
 qr_out:
        kmem_cache_destroy(lio_qr_cache);
-ts_out2:
-       iscsi_deallocate_thread_sets();
-ts_out1:
-       iscsi_thread_set_free();
+bitmap_out:
+       vfree(iscsit_global->ts_bitmap);
 configfs_out:
-       iscsi_target_deregister_configfs();
+       /* XXX: this probably wants it to be it's own unwind step.. */
+       if (iscsit_global->discovery_tpg)
+               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
+       target_unregister_template(&iscsi_ops);
 out:
        kfree(iscsit_global);
        return -ENOMEM;
@@ -630,8 +626,6 @@ out:
 
 static void __exit iscsi_target_cleanup_module(void)
 {
-       iscsi_deallocate_thread_sets();
-       iscsi_thread_set_free();
        iscsit_release_discovery_tpg();
        iscsit_unregister_transport(&iscsi_target_transport);
        kmem_cache_destroy(lio_qr_cache);
@@ -639,8 +633,15 @@ static void __exit iscsi_target_cleanup_module(void)
        kmem_cache_destroy(lio_ooo_cache);
        kmem_cache_destroy(lio_r2t_cache);
 
-       iscsi_target_deregister_configfs();
+       /*
+        * Shutdown discovery sessions and disable discovery TPG
+        */
+       if (iscsit_global->discovery_tpg)
+               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
 
+       target_unregister_template(&iscsi_ops);
+
+       vfree(iscsit_global->ts_bitmap);
        kfree(iscsit_global);
 }
 
@@ -990,7 +991,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
        /*
         * Initialize struct se_cmd descriptor from target_core_mod infrastructure
         */
-       transport_init_se_cmd(&cmd->se_cmd, &lio_target_fabric_configfs->tf_ops,
+       transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
                        conn->sess->se_sess, be32_to_cpu(hdr->data_length),
                        cmd->data_direction, sam_task_attr,
                        cmd->sense_buffer + 2);
@@ -1805,8 +1806,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
                u8 tcm_function;
                int ret;
 
-               transport_init_se_cmd(&cmd->se_cmd,
-                                     &lio_target_fabric_configfs->tf_ops,
+               transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
                                      conn->sess->se_sess, 0, DMA_NONE,
                                      TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
 
@@ -2155,7 +2155,6 @@ reject:
        cmd->text_in_ptr = NULL;
        return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf);
 }
-EXPORT_SYMBOL(iscsit_handle_text_cmd);
 
 int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
@@ -3715,17 +3714,16 @@ static int iscsit_send_reject(
 
 void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
 {
-       struct iscsi_thread_set *ts = conn->thread_set;
        int ord, cpu;
        /*
-        * thread_id is assigned from iscsit_global->ts_bitmap from
-        * within iscsi_thread_set.c:iscsi_allocate_thread_sets()
+        * bitmap_id is assigned from iscsit_global->ts_bitmap from
+        * within iscsit_start_kthreads()
         *
-        * Here we use thread_id to determine which CPU that this
-        * iSCSI connection's iscsi_thread_set will be scheduled to
+        * Here we use bitmap_id to determine which CPU that this
+        * iSCSI connection's RX/TX threads will be scheduled to
         * execute upon.
         */
-       ord = ts->thread_id % cpumask_weight(cpu_online_mask);
+       ord = conn->bitmap_id % cpumask_weight(cpu_online_mask);
        for_each_online_cpu(cpu) {
                if (ord-- == 0) {
                        cpumask_set_cpu(cpu, conn->conn_cpumask);
@@ -3914,7 +3912,7 @@ check_rsp_state:
        switch (state) {
        case ISTATE_SEND_LOGOUTRSP:
                if (!iscsit_logout_post_handler(cmd, conn))
-                       goto restart;
+                       return -ECONNRESET;
                /* fall through */
        case ISTATE_SEND_STATUS:
        case ISTATE_SEND_ASYNCMSG:
@@ -3942,8 +3940,6 @@ check_rsp_state:
 
 err:
        return -1;
-restart:
-       return -EAGAIN;
 }
 
 static int iscsit_handle_response_queue(struct iscsi_conn *conn)
@@ -3970,21 +3966,13 @@ static int iscsit_handle_response_queue(struct iscsi_conn *conn)
 int iscsi_target_tx_thread(void *arg)
 {
        int ret = 0;
-       struct iscsi_conn *conn;
-       struct iscsi_thread_set *ts = arg;
+       struct iscsi_conn *conn = arg;
        /*
         * Allow ourselves to be interrupted by SIGINT so that a
         * connection recovery / failure event can be triggered externally.
         */
        allow_signal(SIGINT);
 
-restart:
-       conn = iscsi_tx_thread_pre_handler(ts);
-       if (!conn)
-               goto out;
-
-       ret = 0;
-
        while (!kthread_should_stop()) {
                /*
                 * Ensure that both TX and RX per connection kthreads
@@ -3993,11 +3981,9 @@ restart:
                iscsit_thread_check_cpumask(conn, current, 1);
 
                wait_event_interruptible(conn->queues_wq,
-                                        !iscsit_conn_all_queues_empty(conn) ||
-                                        ts->status == ISCSI_THREAD_SET_RESET);
+                                        !iscsit_conn_all_queues_empty(conn));
 
-               if ((ts->status == ISCSI_THREAD_SET_RESET) ||
-                    signal_pending(current))
+               if (signal_pending(current))
                        goto transport_err;
 
 get_immediate:
@@ -4008,15 +3994,14 @@ get_immediate:
                ret = iscsit_handle_response_queue(conn);
                if (ret == 1)
                        goto get_immediate;
-               else if (ret == -EAGAIN)
-                       goto restart;
+               else if (ret == -ECONNRESET)
+                       goto out;
                else if (ret < 0)
                        goto transport_err;
        }
 
 transport_err:
        iscsit_take_action_for_connection_exit(conn);
-       goto restart;
 out:
        return 0;
 }
@@ -4111,8 +4096,7 @@ int iscsi_target_rx_thread(void *arg)
        int ret;
        u8 buffer[ISCSI_HDR_LEN], opcode;
        u32 checksum = 0, digest = 0;
-       struct iscsi_conn *conn = NULL;
-       struct iscsi_thread_set *ts = arg;
+       struct iscsi_conn *conn = arg;
        struct kvec iov;
        /*
         * Allow ourselves to be interrupted by SIGINT so that a
@@ -4120,11 +4104,6 @@ int iscsi_target_rx_thread(void *arg)
         */
        allow_signal(SIGINT);
 
-restart:
-       conn = iscsi_rx_thread_pre_handler(ts);
-       if (!conn)
-               goto out;
-
        if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) {
                struct completion comp;
                int rc;
@@ -4134,7 +4113,7 @@ restart:
                if (rc < 0)
                        goto transport_err;
 
-               goto out;
+               goto transport_err;
        }
 
        while (!kthread_should_stop()) {
@@ -4210,8 +4189,6 @@ transport_err:
        if (!signal_pending(current))
                atomic_set(&conn->transport_failed, 1);
        iscsit_take_action_for_connection_exit(conn);
-       goto restart;
-out:
        return 0;
 }
 
@@ -4273,7 +4250,24 @@ int iscsit_close_connection(
        if (conn->conn_transport->transport_type == ISCSI_TCP)
                complete(&conn->conn_logout_comp);
 
-       iscsi_release_thread_set(conn);
+       if (!strcmp(current->comm, ISCSI_RX_THREAD_NAME)) {
+               if (conn->tx_thread &&
+                   cmpxchg(&conn->tx_thread_active, true, false)) {
+                       send_sig(SIGINT, conn->tx_thread, 1);
+                       kthread_stop(conn->tx_thread);
+               }
+       } else if (!strcmp(current->comm, ISCSI_TX_THREAD_NAME)) {
+               if (conn->rx_thread &&
+                   cmpxchg(&conn->rx_thread_active, true, false)) {
+                       send_sig(SIGINT, conn->rx_thread, 1);
+                       kthread_stop(conn->rx_thread);
+               }
+       }
+
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id,
+                             get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
 
        iscsit_stop_timers_for_cmds(conn);
        iscsit_stop_nopin_response_timer(conn);
@@ -4383,8 +4377,6 @@ int iscsit_close_connection(
 
        iscsit_put_transport(conn->conn_transport);
 
-       conn->thread_set = NULL;
-
        pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
        conn->conn_state = TARG_CONN_STATE_FREE;
        kfree(conn);
@@ -4551,15 +4543,13 @@ static void iscsit_logout_post_handler_closesession(
        struct iscsi_conn *conn)
 {
        struct iscsi_session *sess = conn->sess;
-
-       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
-       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+       int sleep = cmpxchg(&conn->tx_thread_active, true, false);
 
        atomic_set(&conn->conn_logout_remove, 0);
        complete(&conn->conn_logout_comp);
 
        iscsit_dec_conn_usage_count(conn);
-       iscsit_stop_session(sess, 1, 1);
+       iscsit_stop_session(sess, sleep, sleep);
        iscsit_dec_session_usage_count(sess);
        target_put_session(sess->se_sess);
 }
@@ -4567,13 +4557,12 @@ static void iscsit_logout_post_handler_closesession(
 static void iscsit_logout_post_handler_samecid(
        struct iscsi_conn *conn)
 {
-       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
-       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+       int sleep = cmpxchg(&conn->tx_thread_active, true, false);
 
        atomic_set(&conn->conn_logout_remove, 0);
        complete(&conn->conn_logout_comp);
 
-       iscsit_cause_connection_reinstatement(conn, 1);
+       iscsit_cause_connection_reinstatement(conn, sleep);
        iscsit_dec_conn_usage_count(conn);
 }
 
index e936d56..7d0f9c0 100644 (file)
@@ -35,7 +35,7 @@ extern void iscsit_stop_session(struct iscsi_session *, int, int);
 extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int);
 
 extern struct iscsit_global *iscsit_global;
-extern struct target_fabric_configfs *lio_target_fabric_configfs;
+extern const struct target_core_fabric_ops iscsi_ops;
 
 extern struct kmem_cache *lio_dr_cache;
 extern struct kmem_cache *lio_ooo_cache;
index 48384b6..469fce4 100644 (file)
@@ -37,9 +37,6 @@
 #include "iscsi_target_util.h"
 #include "iscsi_target.h"
 #include <target/iscsi/iscsi_target_stat.h>
-#include "iscsi_target_configfs.h"
-
-struct target_fabric_configfs *lio_target_fabric_configfs;
 
 struct lio_target_configfs_attribute {
        struct configfs_attribute attr;
@@ -1052,6 +1049,11 @@ TPG_ATTR(default_erl, S_IRUGO | S_IWUSR);
  */
 DEF_TPG_ATTRIB(t10_pi);
 TPG_ATTR(t10_pi, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_fabric_prot_type
+ */
+DEF_TPG_ATTRIB(fabric_prot_type);
+TPG_ATTR(fabric_prot_type, S_IRUGO | S_IWUSR);
 
 static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
        &iscsi_tpg_attrib_authentication.attr,
@@ -1065,6 +1067,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
        &iscsi_tpg_attrib_demo_mode_discovery.attr,
        &iscsi_tpg_attrib_default_erl.attr,
        &iscsi_tpg_attrib_t10_pi.attr,
+       &iscsi_tpg_attrib_fabric_prot_type.attr,
        NULL,
 };
 
@@ -1410,8 +1413,18 @@ out:
 
 TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR);
 
+static ssize_t lio_target_tpg_show_dynamic_sessions(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       return target_show_dynamic_sessions(se_tpg, page);
+}
+
+TF_TPG_BASE_ATTR_RO(lio_target, dynamic_sessions);
+
 static struct configfs_attribute *lio_target_tpg_attrs[] = {
        &lio_target_tpg_enable.attr,
+       &lio_target_tpg_dynamic_sessions.attr,
        NULL,
 };
 
@@ -1450,10 +1463,8 @@ static struct se_portal_group *lio_target_tiqn_addtpg(
        if (!tpg)
                return NULL;
 
-       ret = core_tpg_register(
-                       &lio_target_fabric_configfs->tf_ops,
-                       wwn, &tpg->tpg_se_tpg, tpg,
-                       TRANSPORT_TPG_TYPE_NORMAL);
+       ret = core_tpg_register(&iscsi_ops, wwn, &tpg->tpg_se_tpg,
+                               tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0)
                return NULL;
 
@@ -1872,6 +1883,20 @@ static int lio_tpg_check_prod_mode_write_protect(
        return tpg->tpg_attrib.prod_mode_write_protect;
 }
 
+static int lio_tpg_check_prot_fabric_only(
+       struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+       /*
+        * Only report fabric_prot_type if t10_pi has also been enabled
+        * for incoming ib_isert sessions.
+        */
+       if (!tpg->tpg_attrib.t10_pi)
+               return 0;
+
+       return tpg->tpg_attrib.fabric_prot_type;
+}
+
 static void lio_tpg_release_fabric_acl(
        struct se_portal_group *se_tpg,
        struct se_node_acl *se_acl)
@@ -1953,115 +1978,60 @@ static void lio_release_cmd(struct se_cmd *se_cmd)
        iscsit_release_cmd(cmd);
 }
 
-/* End functions for target_core_fabric_ops */
-
-int iscsi_target_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       lio_target_fabric_configfs = NULL;
-       fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() for"
-                               " LIO-Target failed!\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup the fabric API of function pointers used by target_core_mod..
-        */
-       fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name;
-       fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident;
-       fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn;
-       fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag;
-       fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth;
-       fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id;
-       fabric->tf_ops.tpg_get_pr_transport_id_len =
-                               &iscsi_get_pr_transport_id_len;
-       fabric->tf_ops.tpg_parse_pr_out_transport_id =
-                               &iscsi_parse_pr_out_transport_id;
-       fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode;
-       fabric->tf_ops.tpg_check_demo_mode_cache =
-                               &lio_tpg_check_demo_mode_cache;
-       fabric->tf_ops.tpg_check_demo_mode_write_protect =
-                               &lio_tpg_check_demo_mode_write_protect;
-       fabric->tf_ops.tpg_check_prod_mode_write_protect =
-                               &lio_tpg_check_prod_mode_write_protect;
-       fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl;
-       fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl;
-       fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index;
-       fabric->tf_ops.check_stop_free = &lio_check_stop_free,
-       fabric->tf_ops.release_cmd = &lio_release_cmd;
-       fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
-       fabric->tf_ops.close_session = &lio_tpg_close_session;
-       fabric->tf_ops.sess_get_index = &lio_sess_get_index;
-       fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
-       fabric->tf_ops.write_pending = &lio_write_pending;
-       fabric->tf_ops.write_pending_status = &lio_write_pending_status;
-       fabric->tf_ops.set_default_node_attributes =
-                               &lio_set_default_node_attributes;
-       fabric->tf_ops.get_task_tag = &iscsi_get_task_tag;
-       fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state;
-       fabric->tf_ops.queue_data_in = &lio_queue_data_in;
-       fabric->tf_ops.queue_status = &lio_queue_status;
-       fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
-       fabric->tf_ops.aborted_task = &lio_aborted_task;
-       /*
-        * Setup function pointers for generic logic in target_core_fabric_configfs.c
-        */
-       fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn;
-       fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn;
-       fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg;
-       fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg;
-       fabric->tf_ops.fabric_post_link = NULL;
-       fabric->tf_ops.fabric_pre_unlink = NULL;
-       fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg;
-       fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg;
-       fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl;
-       fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        * sturct config_item_type's
-        */
-       fabric->tf_cit_tmpl.tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs;
-
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() for"
-                               " LIO-Target failed!\n");
-               target_fabric_configfs_free(fabric);
-               return ret;
-       }
-
-       lio_target_fabric_configfs = fabric;
-       pr_debug("LIO_TARGET[0] - Set fabric ->"
-                       " lio_target_fabric_configfs\n");
-       return 0;
-}
-
-
-void iscsi_target_deregister_configfs(void)
-{
-       if (!lio_target_fabric_configfs)
-               return;
-       /*
-        * Shutdown discovery sessions and disable discovery TPG
-        */
-       if (iscsit_global->discovery_tpg)
-               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
-
-       target_fabric_configfs_deregister(lio_target_fabric_configfs);
-       lio_target_fabric_configfs = NULL;
-       pr_debug("LIO_TARGET[0] - Cleared"
-                               " lio_target_fabric_configfs\n");
-}
+const struct target_core_fabric_ops iscsi_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "iscsi",
+       .get_fabric_name                = iscsi_get_fabric_name,
+       .get_fabric_proto_ident         = iscsi_get_fabric_proto_ident,
+       .tpg_get_wwn                    = lio_tpg_get_endpoint_wwn,
+       .tpg_get_tag                    = lio_tpg_get_tag,
+       .tpg_get_default_depth          = lio_tpg_get_default_depth,
+       .tpg_get_pr_transport_id        = iscsi_get_pr_transport_id,
+       .tpg_get_pr_transport_id_len    = iscsi_get_pr_transport_id_len,
+       .tpg_parse_pr_out_transport_id  = iscsi_parse_pr_out_transport_id,
+       .tpg_check_demo_mode            = lio_tpg_check_demo_mode,
+       .tpg_check_demo_mode_cache      = lio_tpg_check_demo_mode_cache,
+       .tpg_check_demo_mode_write_protect =
+                       lio_tpg_check_demo_mode_write_protect,
+       .tpg_check_prod_mode_write_protect =
+                       lio_tpg_check_prod_mode_write_protect,
+       .tpg_check_prot_fabric_only     = &lio_tpg_check_prot_fabric_only,
+       .tpg_alloc_fabric_acl           = lio_tpg_alloc_fabric_acl,
+       .tpg_release_fabric_acl         = lio_tpg_release_fabric_acl,
+       .tpg_get_inst_index             = lio_tpg_get_inst_index,
+       .check_stop_free                = lio_check_stop_free,
+       .release_cmd                    = lio_release_cmd,
+       .shutdown_session               = lio_tpg_shutdown_session,
+       .close_session                  = lio_tpg_close_session,
+       .sess_get_index                 = lio_sess_get_index,
+       .sess_get_initiator_sid         = lio_sess_get_initiator_sid,
+       .write_pending                  = lio_write_pending,
+       .write_pending_status           = lio_write_pending_status,
+       .set_default_node_attributes    = lio_set_default_node_attributes,
+       .get_task_tag                   = iscsi_get_task_tag,
+       .get_cmd_state                  = iscsi_get_cmd_state,
+       .queue_data_in                  = lio_queue_data_in,
+       .queue_status                   = lio_queue_status,
+       .queue_tm_rsp                   = lio_queue_tm_rsp,
+       .aborted_task                   = lio_aborted_task,
+       .fabric_make_wwn                = lio_target_call_coreaddtiqn,
+       .fabric_drop_wwn                = lio_target_call_coredeltiqn,
+       .fabric_make_tpg                = lio_target_tiqn_addtpg,
+       .fabric_drop_tpg                = lio_target_tiqn_deltpg,
+       .fabric_make_np                 = lio_target_call_addnptotpg,
+       .fabric_drop_np                 = lio_target_call_delnpfromtpg,
+       .fabric_make_nodeacl            = lio_target_make_nodeacl,
+       .fabric_drop_nodeacl            = lio_target_drop_nodeacl,
+
+       .tfc_discovery_attrs            = lio_target_discovery_auth_attrs,
+       .tfc_wwn_attrs                  = lio_target_wwn_attrs,
+       .tfc_tpg_base_attrs             = lio_target_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = lio_target_tpg_attrib_attrs,
+       .tfc_tpg_auth_attrs             = lio_target_tpg_auth_attrs,
+       .tfc_tpg_param_attrs            = lio_target_tpg_param_attrs,
+       .tfc_tpg_np_base_attrs          = lio_target_portal_attrs,
+       .tfc_tpg_nacl_base_attrs        = lio_target_initiator_attrs,
+       .tfc_tpg_nacl_attrib_attrs      = lio_target_nacl_attrib_attrs,
+       .tfc_tpg_nacl_auth_attrs        = lio_target_nacl_auth_attrs,
+       .tfc_tpg_nacl_param_attrs       = lio_target_nacl_param_attrs,
+};
diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h
deleted file mode 100644 (file)
index 8cd5a63..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef ISCSI_TARGET_CONFIGFS_H
-#define ISCSI_TARGET_CONFIGFS_H
-
-extern int iscsi_target_register_configfs(void);
-extern void iscsi_target_deregister_configfs(void);
-
-#endif /* ISCSI_TARGET_CONFIGFS_H */
index bdd8731..959a14c 100644 (file)
@@ -23,7 +23,6 @@
 
 #include <target/iscsi/iscsi_target_core.h>
 #include "iscsi_target_seq_pdu_list.h"
-#include "iscsi_target_tq.h"
 #include "iscsi_target_erl0.h"
 #include "iscsi_target_erl1.h"
 #include "iscsi_target_erl2.h"
@@ -860,7 +859,10 @@ void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn)
        }
        spin_unlock_bh(&conn->state_lock);
 
-       iscsi_thread_set_force_reinstatement(conn);
+       if (conn->tx_thread && conn->tx_thread_active)
+               send_sig(SIGINT, conn->tx_thread, 1);
+       if (conn->rx_thread && conn->rx_thread_active)
+               send_sig(SIGINT, conn->rx_thread, 1);
 
 sleep:
        wait_for_completion(&conn->conn_wait_rcfr_comp);
@@ -885,10 +887,10 @@ void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep)
                return;
        }
 
-       if (iscsi_thread_set_force_reinstatement(conn) < 0) {
-               spin_unlock_bh(&conn->state_lock);
-               return;
-       }
+       if (conn->tx_thread && conn->tx_thread_active)
+               send_sig(SIGINT, conn->tx_thread, 1);
+       if (conn->rx_thread && conn->rx_thread_active)
+               send_sig(SIGINT, conn->rx_thread, 1);
 
        atomic_set(&conn->connection_reinstatement, 1);
        if (!sleep) {
index 153fb66..8ce94ff 100644 (file)
@@ -26,7 +26,6 @@
 
 #include <target/iscsi/iscsi_target_core.h>
 #include <target/iscsi/iscsi_target_stat.h>
-#include "iscsi_target_tq.h"
 #include "iscsi_target_device.h"
 #include "iscsi_target_nego.h"
 #include "iscsi_target_erl0.h"
@@ -699,6 +698,51 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
                iscsit_start_nopin_timer(conn);
 }
 
+static int iscsit_start_kthreads(struct iscsi_conn *conn)
+{
+       int ret = 0;
+
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       conn->bitmap_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
+                                       ISCSIT_BITMAP_BITS, get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
+
+       if (conn->bitmap_id < 0) {
+               pr_err("bitmap_find_free_region() failed for"
+                      " iscsit_start_kthreads()\n");
+               return -ENOMEM;
+       }
+
+       conn->tx_thread = kthread_run(iscsi_target_tx_thread, conn,
+                                     "%s", ISCSI_TX_THREAD_NAME);
+       if (IS_ERR(conn->tx_thread)) {
+               pr_err("Unable to start iscsi_target_tx_thread\n");
+               ret = PTR_ERR(conn->tx_thread);
+               goto out_bitmap;
+       }
+       conn->tx_thread_active = true;
+
+       conn->rx_thread = kthread_run(iscsi_target_rx_thread, conn,
+                                     "%s", ISCSI_RX_THREAD_NAME);
+       if (IS_ERR(conn->rx_thread)) {
+               pr_err("Unable to start iscsi_target_rx_thread\n");
+               ret = PTR_ERR(conn->rx_thread);
+               goto out_tx;
+       }
+       conn->rx_thread_active = true;
+
+       return 0;
+out_tx:
+       kthread_stop(conn->tx_thread);
+       conn->tx_thread_active = false;
+out_bitmap:
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id,
+                             get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
+       return ret;
+}
+
 int iscsi_post_login_handler(
        struct iscsi_np *np,
        struct iscsi_conn *conn,
@@ -709,7 +753,7 @@ int iscsi_post_login_handler(
        struct se_session *se_sess = sess->se_sess;
        struct iscsi_portal_group *tpg = sess->tpg;
        struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
-       struct iscsi_thread_set *ts;
+       int rc;
 
        iscsit_inc_conn_usage_count(conn);
 
@@ -724,7 +768,6 @@ int iscsi_post_login_handler(
        /*
         * SCSI Initiator -> SCSI Target Port Mapping
         */
-       ts = iscsi_get_thread_set();
        if (!zero_tsih) {
                iscsi_set_session_parameters(sess->sess_ops,
                                conn->param_list, 0);
@@ -751,9 +794,11 @@ int iscsi_post_login_handler(
                        sess->sess_ops->InitiatorName);
                spin_unlock_bh(&sess->conn_lock);
 
-               iscsi_post_login_start_timers(conn);
+               rc = iscsit_start_kthreads(conn);
+               if (rc)
+                       return rc;
 
-               iscsi_activate_thread_set(conn, ts);
+               iscsi_post_login_start_timers(conn);
                /*
                 * Determine CPU mask to ensure connection's RX and TX kthreads
                 * are scheduled on the same CPU.
@@ -810,8 +855,11 @@ int iscsi_post_login_handler(
                " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt);
        spin_unlock_bh(&se_tpg->session_lock);
 
+       rc = iscsit_start_kthreads(conn);
+       if (rc)
+               return rc;
+
        iscsi_post_login_start_timers(conn);
-       iscsi_activate_thread_set(conn, ts);
        /*
         * Determine CPU mask to ensure connection's RX and TX kthreads
         * are scheduled on the same CPU.
index bdd127c..e8a2408 100644 (file)
@@ -68,10 +68,8 @@ int iscsit_load_discovery_tpg(void)
                return -1;
        }
 
-       ret = core_tpg_register(
-                       &lio_target_fabric_configfs->tf_ops,
-                       NULL, &tpg->tpg_se_tpg, tpg,
-                       TRANSPORT_TPG_TYPE_DISCOVERY);
+       ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg,
+                               tpg, TRANSPORT_TPG_TYPE_DISCOVERY);
        if (ret < 0) {
                kfree(tpg);
                return -1;
@@ -228,6 +226,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
        a->demo_mode_discovery = TA_DEMO_MODE_DISCOVERY;
        a->default_erl = TA_DEFAULT_ERL;
        a->t10_pi = TA_DEFAULT_T10_PI;
+       a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE;
 }
 
 int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
@@ -878,3 +877,21 @@ int iscsit_ta_t10_pi(
 
        return 0;
 }
+
+int iscsit_ta_fabric_prot_type(
+       struct iscsi_portal_group *tpg,
+       u32 prot_type)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((prot_type != 0) && (prot_type != 1) && (prot_type != 3)) {
+               pr_err("Illegal value for fabric_prot_type: %u\n", prot_type);
+               return -EINVAL;
+       }
+
+       a->fabric_prot_type = prot_type;
+       pr_debug("iSCSI_TPG[%hu] - T10 Fabric Protection Type: %u\n",
+                tpg->tpgt, prot_type);
+
+       return 0;
+}
index e726533..95ff5bd 100644 (file)
@@ -39,5 +39,6 @@ extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32);
 extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32);
 
 #endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c
deleted file mode 100644 (file)
index 26aa509..0000000
+++ /dev/null
@@ -1,495 +0,0 @@
-/*******************************************************************************
- * This file contains the iSCSI Login Thread and Thread Queue functions.
- *
- * (c) Copyright 2007-2013 Datera, Inc.
- *
- * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- ******************************************************************************/
-
-#include <linux/kthread.h>
-#include <linux/list.h>
-#include <linux/bitmap.h>
-
-#include <target/iscsi/iscsi_target_core.h>
-#include "iscsi_target_tq.h"
-#include "iscsi_target.h"
-
-static LIST_HEAD(inactive_ts_list);
-static DEFINE_SPINLOCK(inactive_ts_lock);
-static DEFINE_SPINLOCK(ts_bitmap_lock);
-
-static void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts)
-{
-       if (!list_empty(&ts->ts_list)) {
-               WARN_ON(1);
-               return;
-       }
-       spin_lock(&inactive_ts_lock);
-       list_add_tail(&ts->ts_list, &inactive_ts_list);
-       iscsit_global->inactive_ts++;
-       spin_unlock(&inactive_ts_lock);
-}
-
-static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void)
-{
-       struct iscsi_thread_set *ts;
-
-       spin_lock(&inactive_ts_lock);
-       if (list_empty(&inactive_ts_list)) {
-               spin_unlock(&inactive_ts_lock);
-               return NULL;
-       }
-
-       ts = list_first_entry(&inactive_ts_list, struct iscsi_thread_set, ts_list);
-
-       list_del_init(&ts->ts_list);
-       iscsit_global->inactive_ts--;
-       spin_unlock(&inactive_ts_lock);
-
-       return ts;
-}
-
-int iscsi_allocate_thread_sets(u32 thread_pair_count)
-{
-       int allocated_thread_pair_count = 0, i, thread_id;
-       struct iscsi_thread_set *ts = NULL;
-
-       for (i = 0; i < thread_pair_count; i++) {
-               ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL);
-               if (!ts) {
-                       pr_err("Unable to allocate memory for"
-                                       " thread set.\n");
-                       return allocated_thread_pair_count;
-               }
-               /*
-                * Locate the next available regision in the thread_set_bitmap
-                */
-               spin_lock(&ts_bitmap_lock);
-               thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
-                               iscsit_global->ts_bitmap_count, get_order(1));
-               spin_unlock(&ts_bitmap_lock);
-               if (thread_id < 0) {
-                       pr_err("bitmap_find_free_region() failed for"
-                               " thread_set_bitmap\n");
-                       kfree(ts);
-                       return allocated_thread_pair_count;
-               }
-
-               ts->thread_id = thread_id;
-               ts->status = ISCSI_THREAD_SET_FREE;
-               INIT_LIST_HEAD(&ts->ts_list);
-               spin_lock_init(&ts->ts_state_lock);
-               init_completion(&ts->rx_restart_comp);
-               init_completion(&ts->tx_restart_comp);
-               init_completion(&ts->rx_start_comp);
-               init_completion(&ts->tx_start_comp);
-               sema_init(&ts->ts_activate_sem, 0);
-
-               ts->create_threads = 1;
-               ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
-                                       ISCSI_TX_THREAD_NAME);
-               if (IS_ERR(ts->tx_thread)) {
-                       dump_stack();
-                       pr_err("Unable to start iscsi_target_tx_thread\n");
-                       break;
-               }
-
-               ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s",
-                                       ISCSI_RX_THREAD_NAME);
-               if (IS_ERR(ts->rx_thread)) {
-                       kthread_stop(ts->tx_thread);
-                       pr_err("Unable to start iscsi_target_rx_thread\n");
-                       break;
-               }
-               ts->create_threads = 0;
-
-               iscsi_add_ts_to_inactive_list(ts);
-               allocated_thread_pair_count++;
-       }
-
-       pr_debug("Spawned %d thread set(s) (%d total threads).\n",
-               allocated_thread_pair_count, allocated_thread_pair_count * 2);
-       return allocated_thread_pair_count;
-}
-
-static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->status = ISCSI_THREAD_SET_DIE;
-
-       if (ts->rx_thread) {
-               complete(&ts->rx_start_comp);
-               spin_unlock_bh(&ts->ts_state_lock);
-               kthread_stop(ts->rx_thread);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       if (ts->tx_thread) {
-               complete(&ts->tx_start_comp);
-               spin_unlock_bh(&ts->ts_state_lock);
-               kthread_stop(ts->tx_thread);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-       /*
-        * Release this thread_id in the thread_set_bitmap
-        */
-       spin_lock(&ts_bitmap_lock);
-       bitmap_release_region(iscsit_global->ts_bitmap,
-                       ts->thread_id, get_order(1));
-       spin_unlock(&ts_bitmap_lock);
-
-       kfree(ts);
-}
-
-void iscsi_deallocate_thread_sets(void)
-{
-       struct iscsi_thread_set *ts = NULL;
-       u32 released_count = 0;
-
-       while ((ts = iscsi_get_ts_from_inactive_list())) {
-
-               iscsi_deallocate_thread_one(ts);
-               released_count++;
-       }
-
-       if (released_count)
-               pr_debug("Stopped %d thread set(s) (%d total threads)."
-                       "\n", released_count, released_count * 2);
-}
-
-static void iscsi_deallocate_extra_thread_sets(void)
-{
-       u32 orig_count, released_count = 0;
-       struct iscsi_thread_set *ts = NULL;
-
-       orig_count = TARGET_THREAD_SET_COUNT;
-
-       while ((iscsit_global->inactive_ts + 1) > orig_count) {
-               ts = iscsi_get_ts_from_inactive_list();
-               if (!ts)
-                       break;
-
-               iscsi_deallocate_thread_one(ts);
-               released_count++;
-       }
-
-       if (released_count)
-               pr_debug("Stopped %d thread set(s) (%d total threads)."
-                       "\n", released_count, released_count * 2);
-}
-
-void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       conn->thread_set = ts;
-       ts->conn = conn;
-       ts->status = ISCSI_THREAD_SET_ACTIVE;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       complete(&ts->rx_start_comp);
-       complete(&ts->tx_start_comp);
-
-       down(&ts->ts_activate_sem);
-}
-
-struct iscsi_thread_set *iscsi_get_thread_set(void)
-{
-       struct iscsi_thread_set *ts;
-
-get_set:
-       ts = iscsi_get_ts_from_inactive_list();
-       if (!ts) {
-               iscsi_allocate_thread_sets(1);
-               goto get_set;
-       }
-
-       ts->delay_inactive = 1;
-       ts->signal_sent = 0;
-       ts->thread_count = 2;
-       init_completion(&ts->rx_restart_comp);
-       init_completion(&ts->tx_restart_comp);
-       sema_init(&ts->ts_activate_sem, 0);
-
-       return ts;
-}
-
-void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear)
-{
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn->thread_set) {
-               pr_err("struct iscsi_conn->thread_set is NULL\n");
-               return;
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->thread_clear &= ~thread_clear;
-
-       if ((thread_clear & ISCSI_CLEAR_RX_THREAD) &&
-           (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD))
-               complete(&ts->rx_restart_comp);
-       else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) &&
-                (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD))
-               complete(&ts->tx_restart_comp);
-       spin_unlock_bh(&ts->ts_state_lock);
-}
-
-void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent)
-{
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn->thread_set) {
-               pr_err("struct iscsi_conn->thread_set is NULL\n");
-               return;
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->signal_sent |= signal_sent;
-       spin_unlock_bh(&ts->ts_state_lock);
-}
-
-int iscsi_release_thread_set(struct iscsi_conn *conn)
-{
-       int thread_called = 0;
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn || !conn->thread_set) {
-               pr_err("connection or thread set pointer is NULL\n");
-               BUG();
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->status = ISCSI_THREAD_SET_RESET;
-
-       if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
-                       strlen(ISCSI_RX_THREAD_NAME)))
-               thread_called = ISCSI_RX_THREAD;
-       else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
-                       strlen(ISCSI_TX_THREAD_NAME)))
-               thread_called = ISCSI_TX_THREAD;
-
-       if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) &&
-          (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) {
-
-               if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) {
-                       send_sig(SIGINT, ts->rx_thread, 1);
-                       ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
-               }
-               ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD;
-               spin_unlock_bh(&ts->ts_state_lock);
-               wait_for_completion(&ts->rx_restart_comp);
-               spin_lock_bh(&ts->ts_state_lock);
-               ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD;
-       }
-       if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) &&
-          (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) {
-
-               if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) {
-                       send_sig(SIGINT, ts->tx_thread, 1);
-                       ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
-               }
-               ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD;
-               spin_unlock_bh(&ts->ts_state_lock);
-               wait_for_completion(&ts->tx_restart_comp);
-               spin_lock_bh(&ts->ts_state_lock);
-               ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD;
-       }
-
-       ts->conn = NULL;
-       ts->status = ISCSI_THREAD_SET_FREE;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn)
-{
-       struct iscsi_thread_set *ts;
-
-       if (!conn->thread_set)
-               return -1;
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->status != ISCSI_THREAD_SET_ACTIVE) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               return -1;
-       }
-
-       if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) {
-               send_sig(SIGINT, ts->tx_thread, 1);
-               ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
-       }
-       if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) {
-               send_sig(SIGINT, ts->rx_thread, 1);
-               ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-static void iscsi_check_to_add_additional_sets(void)
-{
-       int thread_sets_add;
-
-       spin_lock(&inactive_ts_lock);
-       thread_sets_add = iscsit_global->inactive_ts;
-       spin_unlock(&inactive_ts_lock);
-       if (thread_sets_add == 1)
-               iscsi_allocate_thread_sets(1);
-}
-
-static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() ||
-           signal_pending(current)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               return -1;
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       int ret;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->create_threads) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               goto sleep;
-       }
-
-       if (ts->status != ISCSI_THREAD_SET_DIE)
-               flush_signals(current);
-
-       if (ts->delay_inactive && (--ts->thread_count == 0)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-
-               if (!iscsit_global->in_shutdown)
-                       iscsi_deallocate_extra_thread_sets();
-
-               iscsi_add_ts_to_inactive_list(ts);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-
-       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
-           (ts->thread_clear & ISCSI_CLEAR_RX_THREAD))
-               complete(&ts->rx_restart_comp);
-
-       ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-sleep:
-       ret = wait_for_completion_interruptible(&ts->rx_start_comp);
-       if (ret != 0)
-               return NULL;
-
-       if (iscsi_signal_thread_pre_handler(ts) < 0)
-               return NULL;
-
-       iscsi_check_to_add_additional_sets();
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (!ts->conn) {
-               pr_err("struct iscsi_thread_set->conn is NULL for"
-                       " RX thread_id: %s/%d\n", current->comm, current->pid);
-               spin_unlock_bh(&ts->ts_state_lock);
-               return NULL;
-       }
-       ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       up(&ts->ts_activate_sem);
-
-       return ts->conn;
-}
-
-struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       int ret;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->create_threads) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               goto sleep;
-       }
-
-       if (ts->status != ISCSI_THREAD_SET_DIE)
-               flush_signals(current);
-
-       if (ts->delay_inactive && (--ts->thread_count == 0)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-
-               if (!iscsit_global->in_shutdown)
-                       iscsi_deallocate_extra_thread_sets();
-
-               iscsi_add_ts_to_inactive_list(ts);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
-           (ts->thread_clear & ISCSI_CLEAR_TX_THREAD))
-               complete(&ts->tx_restart_comp);
-
-       ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-sleep:
-       ret = wait_for_completion_interruptible(&ts->tx_start_comp);
-       if (ret != 0)
-               return NULL;
-
-       if (iscsi_signal_thread_pre_handler(ts) < 0)
-               return NULL;
-
-       iscsi_check_to_add_additional_sets();
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (!ts->conn) {
-               pr_err("struct iscsi_thread_set->conn is NULL for"
-                       " TX thread_id: %s/%d\n", current->comm, current->pid);
-               spin_unlock_bh(&ts->ts_state_lock);
-               return NULL;
-       }
-       ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       up(&ts->ts_activate_sem);
-
-       return ts->conn;
-}
-
-int iscsi_thread_set_init(void)
-{
-       int size;
-
-       iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS;
-
-       size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long);
-       iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL);
-       if (!iscsit_global->ts_bitmap) {
-               pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-void iscsi_thread_set_free(void)
-{
-       kfree(iscsit_global->ts_bitmap);
-}
diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h
deleted file mode 100644 (file)
index cc1eede..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef ISCSI_THREAD_QUEUE_H
-#define ISCSI_THREAD_QUEUE_H
-
-/*
- * Defines for thread sets.
- */
-extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *);
-extern int iscsi_allocate_thread_sets(u32);
-extern void iscsi_deallocate_thread_sets(void);
-extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *);
-extern struct iscsi_thread_set *iscsi_get_thread_set(void);
-extern void iscsi_set_thread_clear(struct iscsi_conn *, u8);
-extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8);
-extern int iscsi_release_thread_set(struct iscsi_conn *);
-extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *);
-extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *);
-extern int iscsi_thread_set_init(void);
-extern void iscsi_thread_set_free(void);
-
-extern int iscsi_target_tx_thread(void *);
-extern int iscsi_target_rx_thread(void *);
-
-#define TARGET_THREAD_SET_COUNT                        4
-
-#define ISCSI_RX_THREAD                         1
-#define ISCSI_TX_THREAD                         2
-#define ISCSI_RX_THREAD_NAME                   "iscsi_trx"
-#define ISCSI_TX_THREAD_NAME                   "iscsi_ttx"
-#define ISCSI_BLOCK_RX_THREAD                  0x1
-#define ISCSI_BLOCK_TX_THREAD                  0x2
-#define ISCSI_CLEAR_RX_THREAD                  0x1
-#define ISCSI_CLEAR_TX_THREAD                  0x2
-#define ISCSI_SIGNAL_RX_THREAD                 0x1
-#define ISCSI_SIGNAL_TX_THREAD                 0x2
-
-/* struct iscsi_thread_set->status */
-#define ISCSI_THREAD_SET_FREE                  1
-#define ISCSI_THREAD_SET_ACTIVE                        2
-#define ISCSI_THREAD_SET_DIE                   3
-#define ISCSI_THREAD_SET_RESET                 4
-#define ISCSI_THREAD_SET_DEALLOCATE_THREADS    5
-
-/* By default allow a maximum of 32K iSCSI connections */
-#define ISCSI_TS_BITMAP_BITS                   32768
-
-struct iscsi_thread_set {
-       /* flags used for blocking and restarting sets */
-       int     blocked_threads;
-       /* flag for creating threads */
-       int     create_threads;
-       /* flag for delaying readding to inactive list */
-       int     delay_inactive;
-       /* status for thread set */
-       int     status;
-       /* which threads have had signals sent */
-       int     signal_sent;
-       /* flag for which threads exited first */
-       int     thread_clear;
-       /* Active threads in the thread set */
-       int     thread_count;
-       /* Unique thread ID */
-       u32     thread_id;
-       /* pointer to connection if set is active */
-       struct iscsi_conn       *conn;
-       /* used for controlling ts state accesses */
-       spinlock_t      ts_state_lock;
-       /* used for restarting thread queue */
-       struct completion       rx_restart_comp;
-       /* used for restarting thread queue */
-       struct completion       tx_restart_comp;
-       /* used for normal unused blocking */
-       struct completion       rx_start_comp;
-       /* used for normal unused blocking */
-       struct completion       tx_start_comp;
-       /* OS descriptor for rx thread */
-       struct task_struct      *rx_thread;
-       /* OS descriptor for tx thread */
-       struct task_struct      *tx_thread;
-       /* struct iscsi_thread_set in list list head*/
-       struct list_head        ts_list;
-       struct semaphore        ts_activate_sem;
-};
-
-#endif   /*** ISCSI_THREAD_QUEUE_H ***/
index 390df8e..b18edda 100644 (file)
@@ -33,7 +33,6 @@
 #include "iscsi_target_erl1.h"
 #include "iscsi_target_erl2.h"
 #include "iscsi_target_tpg.h"
-#include "iscsi_target_tq.h"
 #include "iscsi_target_util.h"
 #include "iscsi_target.h"
 
index c36bd7c..51f0c89 100644 (file)
@@ -41,8 +41,7 @@
 
 #define to_tcm_loop_hba(hba)   container_of(hba, struct tcm_loop_hba, dev)
 
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *tcm_loop_fabric_configfs;
+static const struct target_core_fabric_ops loop_ops;
 
 static struct workqueue_struct *tcm_loop_workqueue;
 static struct kmem_cache *tcm_loop_cmd_cache;
@@ -108,7 +107,7 @@ static struct device_driver tcm_loop_driverfs = {
 /*
  * Used with root_device_register() in tcm_loop_alloc_core_bus() below
  */
-struct device *tcm_loop_primary;
+static struct device *tcm_loop_primary;
 
 static void tcm_loop_submission_work(struct work_struct *work)
 {
@@ -697,6 +696,13 @@ static int tcm_loop_check_prod_mode_write_protect(struct se_portal_group *se_tpg
        return 0;
 }
 
+static int tcm_loop_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+       return tl_tpg->tl_fabric_prot_type;
+}
+
 static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl(
        struct se_portal_group *se_tpg)
 {
@@ -912,6 +918,46 @@ static void tcm_loop_port_unlink(
 
 /* End items for tcm_loop_port_cit */
 
+static ssize_t tcm_loop_tpg_attrib_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+
+       return sprintf(page, "%d\n", tl_tpg->tl_fabric_prot_type);
+}
+
+static ssize_t tcm_loop_tpg_attrib_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid qla2xxx fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tl_tpg->tl_fabric_prot_type = val;
+
+       return count;
+}
+
+TF_TPG_ATTRIB_ATTR(tcm_loop, fabric_prot_type, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *tcm_loop_tpg_attrib_attrs[] = {
+       &tcm_loop_tpg_attrib_fabric_prot_type.attr,
+       NULL,
+};
+
 /* Start items for tcm_loop_nexus_cit */
 
 static int tcm_loop_make_nexus(
@@ -937,7 +983,8 @@ static int tcm_loop_make_nexus(
        /*
         * Initialize the struct se_session pointer
         */
-       tl_nexus->se_sess = transport_init_session(TARGET_PROT_ALL);
+       tl_nexus->se_sess = transport_init_session(
+                               TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
        if (IS_ERR(tl_nexus->se_sess)) {
                ret = PTR_ERR(tl_nexus->se_sess);
                goto out;
@@ -1165,21 +1212,19 @@ static struct se_portal_group *tcm_loop_make_naa_tpg(
        struct tcm_loop_hba *tl_hba = container_of(wwn,
                        struct tcm_loop_hba, tl_hba_wwn);
        struct tcm_loop_tpg *tl_tpg;
-       char *tpgt_str, *end_ptr;
        int ret;
-       unsigned short int tpgt;
+       unsigned long tpgt;
 
-       tpgt_str = strstr(name, "tpgt_");
-       if (!tpgt_str) {
+       if (strstr(name, "tpgt_") != name) {
                pr_err("Unable to locate \"tpgt_#\" directory"
                                " group\n");
                return ERR_PTR(-EINVAL);
        }
-       tpgt_str += 5; /* Skip ahead of "tpgt_" */
-       tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+       if (kstrtoul(name+5, 10, &tpgt))
+               return ERR_PTR(-EINVAL);
 
        if (tpgt >= TL_TPGS_PER_HBA) {
-               pr_err("Passed tpgt: %hu exceeds TL_TPGS_PER_HBA:"
+               pr_err("Passed tpgt: %lu exceeds TL_TPGS_PER_HBA:"
                                " %u\n", tpgt, TL_TPGS_PER_HBA);
                return ERR_PTR(-EINVAL);
        }
@@ -1189,14 +1234,13 @@ static struct se_portal_group *tcm_loop_make_naa_tpg(
        /*
         * Register the tl_tpg as a emulated SAS TCM Target Endpoint
         */
-       ret = core_tpg_register(&tcm_loop_fabric_configfs->tf_ops,
-                       wwn, &tl_tpg->tl_se_tpg, tl_tpg,
+       ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, tl_tpg,
                        TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0)
                return ERR_PTR(-ENOMEM);
 
        pr_debug("TCM_Loop_ConfigFS: Allocated Emulated %s"
-               " Target Port %s,t,0x%04x\n", tcm_loop_dump_proto_id(tl_hba),
+               " Target Port %s,t,0x%04lx\n", tcm_loop_dump_proto_id(tl_hba),
                config_item_name(&wwn->wwn_group.cg_item), tpgt);
 
        return &tl_tpg->tl_se_tpg;
@@ -1338,127 +1382,51 @@ static struct configfs_attribute *tcm_loop_wwn_attrs[] = {
 
 /* End items for tcm_loop_cit */
 
-static int tcm_loop_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-       /*
-        * Set the TCM Loop HBA counter to zero
-        */
-       tcm_loop_hba_no_cnt = 0;
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "loopback");
-       if (IS_ERR(fabric)) {
-               pr_err("tcm_loop_register_configfs() failed!\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup the fabric API of function pointers used by target_core_mod
-        */
-       fabric->tf_ops.get_fabric_name = &tcm_loop_get_fabric_name;
-       fabric->tf_ops.get_fabric_proto_ident = &tcm_loop_get_fabric_proto_ident;
-       fabric->tf_ops.tpg_get_wwn = &tcm_loop_get_endpoint_wwn;
-       fabric->tf_ops.tpg_get_tag = &tcm_loop_get_tag;
-       fabric->tf_ops.tpg_get_default_depth = &tcm_loop_get_default_depth;
-       fabric->tf_ops.tpg_get_pr_transport_id = &tcm_loop_get_pr_transport_id;
-       fabric->tf_ops.tpg_get_pr_transport_id_len =
-                                       &tcm_loop_get_pr_transport_id_len;
-       fabric->tf_ops.tpg_parse_pr_out_transport_id =
-                                       &tcm_loop_parse_pr_out_transport_id;
-       fabric->tf_ops.tpg_check_demo_mode = &tcm_loop_check_demo_mode;
-       fabric->tf_ops.tpg_check_demo_mode_cache =
-                                       &tcm_loop_check_demo_mode_cache;
-       fabric->tf_ops.tpg_check_demo_mode_write_protect =
-                                       &tcm_loop_check_demo_mode_write_protect;
-       fabric->tf_ops.tpg_check_prod_mode_write_protect =
-                                       &tcm_loop_check_prod_mode_write_protect;
-       /*
-        * The TCM loopback fabric module runs in demo-mode to a local
-        * virtual SCSI device, so fabric dependent initator ACLs are
-        * not required.
-        */
-       fabric->tf_ops.tpg_alloc_fabric_acl = &tcm_loop_tpg_alloc_fabric_acl;
-       fabric->tf_ops.tpg_release_fabric_acl =
-                                       &tcm_loop_tpg_release_fabric_acl;
-       fabric->tf_ops.tpg_get_inst_index = &tcm_loop_get_inst_index;
-       /*
-        * Used for setting up remaining TCM resources in process context
-        */
-       fabric->tf_ops.check_stop_free = &tcm_loop_check_stop_free;
-       fabric->tf_ops.release_cmd = &tcm_loop_release_cmd;
-       fabric->tf_ops.shutdown_session = &tcm_loop_shutdown_session;
-       fabric->tf_ops.close_session = &tcm_loop_close_session;
-       fabric->tf_ops.sess_get_index = &tcm_loop_sess_get_index;
-       fabric->tf_ops.sess_get_initiator_sid = NULL;
-       fabric->tf_ops.write_pending = &tcm_loop_write_pending;
-       fabric->tf_ops.write_pending_status = &tcm_loop_write_pending_status;
-       /*
-        * Not used for TCM loopback
-        */
-       fabric->tf_ops.set_default_node_attributes =
-                                       &tcm_loop_set_default_node_attributes;
-       fabric->tf_ops.get_task_tag = &tcm_loop_get_task_tag;
-       fabric->tf_ops.get_cmd_state = &tcm_loop_get_cmd_state;
-       fabric->tf_ops.queue_data_in = &tcm_loop_queue_data_in;
-       fabric->tf_ops.queue_status = &tcm_loop_queue_status;
-       fabric->tf_ops.queue_tm_rsp = &tcm_loop_queue_tm_rsp;
-       fabric->tf_ops.aborted_task = &tcm_loop_aborted_task;
-
-       /*
-        * Setup function pointers for generic logic in target_core_fabric_configfs.c
-        */
-       fabric->tf_ops.fabric_make_wwn = &tcm_loop_make_scsi_hba;
-       fabric->tf_ops.fabric_drop_wwn = &tcm_loop_drop_scsi_hba;
-       fabric->tf_ops.fabric_make_tpg = &tcm_loop_make_naa_tpg;
-       fabric->tf_ops.fabric_drop_tpg = &tcm_loop_drop_naa_tpg;
-       /*
-        * fabric_post_link() and fabric_pre_unlink() are used for
-        * registration and release of TCM Loop Virtual SCSI LUNs.
-        */
-       fabric->tf_ops.fabric_post_link = &tcm_loop_port_link;
-       fabric->tf_ops.fabric_pre_unlink = &tcm_loop_port_unlink;
-       fabric->tf_ops.fabric_make_np = NULL;
-       fabric->tf_ops.fabric_drop_np = NULL;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       /*
-        * Once fabric->tf_ops has been setup, now register the fabric for
-        * use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() for"
-                               " TCM_Loop failed!\n");
-               target_fabric_configfs_free(fabric);
-               return -1;
-       }
-       /*
-        * Setup our local pointer to *fabric.
-        */
-       tcm_loop_fabric_configfs = fabric;
-       pr_debug("TCM_LOOP[0] - Set fabric ->"
-                       " tcm_loop_fabric_configfs\n");
-       return 0;
-}
-
-static void tcm_loop_deregister_configfs(void)
-{
-       if (!tcm_loop_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(tcm_loop_fabric_configfs);
-       tcm_loop_fabric_configfs = NULL;
-       pr_debug("TCM_LOOP[0] - Cleared"
-                               " tcm_loop_fabric_configfs\n");
-}
+static const struct target_core_fabric_ops loop_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "loopback",
+       .get_fabric_name                = tcm_loop_get_fabric_name,
+       .get_fabric_proto_ident         = tcm_loop_get_fabric_proto_ident,
+       .tpg_get_wwn                    = tcm_loop_get_endpoint_wwn,
+       .tpg_get_tag                    = tcm_loop_get_tag,
+       .tpg_get_default_depth          = tcm_loop_get_default_depth,
+       .tpg_get_pr_transport_id        = tcm_loop_get_pr_transport_id,
+       .tpg_get_pr_transport_id_len    = tcm_loop_get_pr_transport_id_len,
+       .tpg_parse_pr_out_transport_id  = tcm_loop_parse_pr_out_transport_id,
+       .tpg_check_demo_mode            = tcm_loop_check_demo_mode,
+       .tpg_check_demo_mode_cache      = tcm_loop_check_demo_mode_cache,
+       .tpg_check_demo_mode_write_protect =
+                               tcm_loop_check_demo_mode_write_protect,
+       .tpg_check_prod_mode_write_protect =
+                               tcm_loop_check_prod_mode_write_protect,
+       .tpg_check_prot_fabric_only     = tcm_loop_check_prot_fabric_only,
+       .tpg_alloc_fabric_acl           = tcm_loop_tpg_alloc_fabric_acl,
+       .tpg_release_fabric_acl         = tcm_loop_tpg_release_fabric_acl,
+       .tpg_get_inst_index             = tcm_loop_get_inst_index,
+       .check_stop_free                = tcm_loop_check_stop_free,
+       .release_cmd                    = tcm_loop_release_cmd,
+       .shutdown_session               = tcm_loop_shutdown_session,
+       .close_session                  = tcm_loop_close_session,
+       .sess_get_index                 = tcm_loop_sess_get_index,
+       .write_pending                  = tcm_loop_write_pending,
+       .write_pending_status           = tcm_loop_write_pending_status,
+       .set_default_node_attributes    = tcm_loop_set_default_node_attributes,
+       .get_task_tag                   = tcm_loop_get_task_tag,
+       .get_cmd_state                  = tcm_loop_get_cmd_state,
+       .queue_data_in                  = tcm_loop_queue_data_in,
+       .queue_status                   = tcm_loop_queue_status,
+       .queue_tm_rsp                   = tcm_loop_queue_tm_rsp,
+       .aborted_task                   = tcm_loop_aborted_task,
+       .fabric_make_wwn                = tcm_loop_make_scsi_hba,
+       .fabric_drop_wwn                = tcm_loop_drop_scsi_hba,
+       .fabric_make_tpg                = tcm_loop_make_naa_tpg,
+       .fabric_drop_tpg                = tcm_loop_drop_naa_tpg,
+       .fabric_post_link               = tcm_loop_port_link,
+       .fabric_pre_unlink              = tcm_loop_port_unlink,
+       .tfc_wwn_attrs                  = tcm_loop_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_loop_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = tcm_loop_tpg_attrib_attrs,
+};
 
 static int __init tcm_loop_fabric_init(void)
 {
@@ -1482,7 +1450,7 @@ static int __init tcm_loop_fabric_init(void)
        if (ret)
                goto out_destroy_cache;
 
-       ret = tcm_loop_register_configfs();
+       ret = target_register_template(&loop_ops);
        if (ret)
                goto out_release_core_bus;
 
@@ -1500,7 +1468,7 @@ out:
 
 static void __exit tcm_loop_fabric_exit(void)
 {
-       tcm_loop_deregister_configfs();
+       target_unregister_template(&loop_ops);
        tcm_loop_release_core_bus();
        kmem_cache_destroy(tcm_loop_cmd_cache);
        destroy_workqueue(tcm_loop_workqueue);
index 6ae49f2..1e72ff7 100644 (file)
@@ -43,6 +43,7 @@ struct tcm_loop_nacl {
 struct tcm_loop_tpg {
        unsigned short tl_tpgt;
        unsigned short tl_transport_status;
+       enum target_prot_type tl_fabric_prot_type;
        atomic_t tl_tpg_port_count;
        struct se_portal_group tl_se_tpg;
        struct tcm_loop_hba *tl_hba;
index 9512af6..18b0f97 100644 (file)
@@ -42,8 +42,7 @@
 
 #include "sbp_target.h"
 
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *sbp_fabric_configfs;
+static const struct target_core_fabric_ops sbp_ops;
 
 /* FireWire address region for management and command block address handlers */
 static const struct fw_address_region sbp_register_region = {
@@ -2215,8 +2214,7 @@ static struct se_portal_group *sbp_make_tpg(
                goto out_free_tpg;
        }
 
-       ret = core_tpg_register(&sbp_fabric_configfs->tf_ops, wwn,
-                       &tpg->se_tpg, (void *)tpg,
+       ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, tpg,
                        TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0)
                goto out_unreg_mgt_agt;
@@ -2503,7 +2501,9 @@ static struct configfs_attribute *sbp_tpg_attrib_attrs[] = {
        NULL,
 };
 
-static struct target_core_fabric_ops sbp_ops = {
+static const struct target_core_fabric_ops sbp_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "sbp",
        .get_fabric_name                = sbp_get_fabric_name,
        .get_fabric_proto_ident         = sbp_get_fabric_proto_ident,
        .tpg_get_wwn                    = sbp_get_fabric_wwn,
@@ -2544,68 +2544,20 @@ static struct target_core_fabric_ops sbp_ops = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = sbp_make_nodeacl,
        .fabric_drop_nodeacl            = sbp_drop_nodeacl,
-};
-
-static int sbp_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       fabric = target_fabric_configfs_init(THIS_MODULE, "sbp");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-
-       fabric->tf_ops = sbp_ops;
-
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = sbp_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for SBP\n");
-               return ret;
-       }
 
-       sbp_fabric_configfs = fabric;
-
-       return 0;
-};
-
-static void sbp_deregister_configfs(void)
-{
-       if (!sbp_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(sbp_fabric_configfs);
-       sbp_fabric_configfs = NULL;
+       .tfc_wwn_attrs                  = sbp_wwn_attrs,
+       .tfc_tpg_base_attrs             = sbp_tpg_base_attrs,
+       .tfc_tpg_attrib_attrs           = sbp_tpg_attrib_attrs,
 };
 
 static int __init sbp_init(void)
 {
-       int ret;
-
-       ret = sbp_register_configfs();
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return target_register_template(&sbp_ops);
 };
 
 static void __exit sbp_exit(void)
 {
-       sbp_deregister_configfs();
+       target_unregister_template(&sbp_ops);
 };
 
 MODULE_DESCRIPTION("FireWire SBP fabric driver");
index 75d89ad..ddaf76a 100644 (file)
@@ -142,8 +142,8 @@ static struct config_group *target_core_register_fabric(
 
        tf = target_core_get_fabric(name);
        if (!tf) {
-               pr_err("target_core_register_fabric() trying autoload for %s\n",
-                       name);
+               pr_debug("target_core_register_fabric() trying autoload for %s\n",
+                        name);
 
                /*
                 * Below are some hardcoded request_module() calls to automatically
@@ -165,8 +165,8 @@ static struct config_group *target_core_register_fabric(
                         */
                        ret = request_module("iscsi_target_mod");
                        if (ret < 0) {
-                               pr_err("request_module() failed for"
-                                      " iscsi_target_mod.ko: %d\n", ret);
+                               pr_debug("request_module() failed for"
+                                        " iscsi_target_mod.ko: %d\n", ret);
                                return ERR_PTR(-EINVAL);
                        }
                } else if (!strncmp(name, "loopback", 8)) {
@@ -178,8 +178,8 @@ static struct config_group *target_core_register_fabric(
                         */
                        ret = request_module("tcm_loop");
                        if (ret < 0) {
-                               pr_err("request_module() failed for"
-                                      " tcm_loop.ko: %d\n", ret);
+                               pr_debug("request_module() failed for"
+                                        " tcm_loop.ko: %d\n", ret);
                                return ERR_PTR(-EINVAL);
                        }
                }
@@ -188,8 +188,8 @@ static struct config_group *target_core_register_fabric(
        }
 
        if (!tf) {
-               pr_err("target_core_get_fabric() failed for %s\n",
-                      name);
+               pr_debug("target_core_get_fabric() failed for %s\n",
+                        name);
                return ERR_PTR(-EINVAL);
        }
        pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
@@ -300,81 +300,17 @@ struct configfs_subsystem *target_core_subsystem[] = {
 // Start functions called by external Target Fabrics Modules
 //############################################################################*/
 
-/*
- * First function called by fabric modules to:
- *
- * 1) Allocate a struct target_fabric_configfs and save the *fabric_cit pointer.
- * 2) Add struct target_fabric_configfs to g_tf_list
- * 3) Return struct target_fabric_configfs to fabric module to be passed
- *    into target_fabric_configfs_register().
- */
-struct target_fabric_configfs *target_fabric_configfs_init(
-       struct module *fabric_mod,
-       const char *name)
+static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
 {
-       struct target_fabric_configfs *tf;
-
-       if (!(name)) {
-               pr_err("Unable to locate passed fabric name\n");
-               return ERR_PTR(-EINVAL);
+       if (!tfo->name) {
+               pr_err("Missing tfo->name\n");
+               return -EINVAL;
        }
-       if (strlen(name) >= TARGET_FABRIC_NAME_SIZE) {
+       if (strlen(tfo->name) >= TARGET_FABRIC_NAME_SIZE) {
                pr_err("Passed name: %s exceeds TARGET_FABRIC"
-                       "_NAME_SIZE\n", name);
-               return ERR_PTR(-EINVAL);
+                       "_NAME_SIZE\n", tfo->name);
+               return -EINVAL;
        }
-
-       tf = kzalloc(sizeof(struct target_fabric_configfs), GFP_KERNEL);
-       if (!tf)
-               return ERR_PTR(-ENOMEM);
-
-       INIT_LIST_HEAD(&tf->tf_list);
-       atomic_set(&tf->tf_access_cnt, 0);
-       /*
-        * Setup the default generic struct config_item_type's (cits) in
-        * struct target_fabric_configfs->tf_cit_tmpl
-        */
-       tf->tf_module = fabric_mod;
-       target_fabric_setup_cits(tf);
-
-       tf->tf_subsys = target_core_subsystem[0];
-       snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", name);
-
-       mutex_lock(&g_tf_lock);
-       list_add_tail(&tf->tf_list, &g_tf_list);
-       mutex_unlock(&g_tf_lock);
-
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< BEGIN FABRIC API >>>>>>>>"
-                       ">>>>>>>>>>>>>>\n");
-       pr_debug("Initialized struct target_fabric_configfs: %p for"
-                       " %s\n", tf, tf->tf_name);
-       return tf;
-}
-EXPORT_SYMBOL(target_fabric_configfs_init);
-
-/*
- * Called by fabric plugins after FAILED target_fabric_configfs_register() call.
- */
-void target_fabric_configfs_free(
-       struct target_fabric_configfs *tf)
-{
-       mutex_lock(&g_tf_lock);
-       list_del(&tf->tf_list);
-       mutex_unlock(&g_tf_lock);
-
-       kfree(tf);
-}
-EXPORT_SYMBOL(target_fabric_configfs_free);
-
-/*
- * Perform a sanity check of the passed tf->tf_ops before completing
- * TCM fabric module registration.
- */
-static int target_fabric_tf_ops_check(
-       struct target_fabric_configfs *tf)
-{
-       struct target_core_fabric_ops *tfo = &tf->tf_ops;
-
        if (!tfo->get_fabric_name) {
                pr_err("Missing tfo->get_fabric_name()\n");
                return -EINVAL;
@@ -508,77 +444,59 @@ static int target_fabric_tf_ops_check(
        return 0;
 }
 
-/*
- * Called 2nd from fabric module with returned parameter of
- * struct target_fabric_configfs * from target_fabric_configfs_init().
- *
- * Upon a successful registration, the new fabric's struct config_item is
- * return.  Also, a pointer to this struct is set in the passed
- * struct target_fabric_configfs.
- */
-int target_fabric_configfs_register(
-       struct target_fabric_configfs *tf)
+int target_register_template(const struct target_core_fabric_ops *fo)
 {
+       struct target_fabric_configfs *tf;
        int ret;
 
+       ret = target_fabric_tf_ops_check(fo);
+       if (ret)
+               return ret;
+
+       tf = kzalloc(sizeof(struct target_fabric_configfs), GFP_KERNEL);
        if (!tf) {
-               pr_err("Unable to locate target_fabric_configfs"
-                       " pointer\n");
-               return -EINVAL;
-       }
-       if (!tf->tf_subsys) {
-               pr_err("Unable to target struct config_subsystem"
-                       " pointer\n");
-               return -EINVAL;
+               pr_err("%s: could not allocate memory!\n", __func__);
+               return -ENOMEM;
        }
-       ret = target_fabric_tf_ops_check(tf);
-       if (ret < 0)
-               return ret;
 
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< END FABRIC API >>>>>>>>>>>>"
-               ">>>>>>>>>>\n");
+       INIT_LIST_HEAD(&tf->tf_list);
+       atomic_set(&tf->tf_access_cnt, 0);
+
+       /*
+        * Setup the default generic struct config_item_type's (cits) in
+        * struct target_fabric_configfs->tf_cit_tmpl
+        */
+       tf->tf_module = fo->module;
+       tf->tf_subsys = target_core_subsystem[0];
+       snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name);
+
+       tf->tf_ops = *fo;
+       target_fabric_setup_cits(tf);
+
+       mutex_lock(&g_tf_lock);
+       list_add_tail(&tf->tf_list, &g_tf_list);
+       mutex_unlock(&g_tf_lock);
+
        return 0;
 }
-EXPORT_SYMBOL(target_fabric_configfs_register);
+EXPORT_SYMBOL(target_register_template);
 
-void target_fabric_configfs_deregister(
-       struct target_fabric_configfs *tf)
+void target_unregister_template(const struct target_core_fabric_ops *fo)
 {
-       struct configfs_subsystem *su;
+       struct target_fabric_configfs *t;
 
-       if (!tf) {
-               pr_err("Unable to locate passed target_fabric_"
-                       "configfs\n");
-               return;
-       }
-       su = tf->tf_subsys;
-       if (!su) {
-               pr_err("Unable to locate passed tf->tf_subsys"
-                       " pointer\n");
-               return;
-       }
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< BEGIN FABRIC API >>>>>>>>>>"
-                       ">>>>>>>>>>>>\n");
        mutex_lock(&g_tf_lock);
-       if (atomic_read(&tf->tf_access_cnt)) {
-               mutex_unlock(&g_tf_lock);
-               pr_err("Non zero tf->tf_access_cnt for fabric %s\n",
-                       tf->tf_name);
-               BUG();
+       list_for_each_entry(t, &g_tf_list, tf_list) {
+               if (!strcmp(t->tf_name, fo->name)) {
+                       BUG_ON(atomic_read(&t->tf_access_cnt));
+                       list_del(&t->tf_list);
+                       kfree(t);
+                       break;
+               }
        }
-       list_del(&tf->tf_list);
        mutex_unlock(&g_tf_lock);
-
-       pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing tf:"
-                       " %s\n", tf->tf_name);
-       tf->tf_module = NULL;
-       tf->tf_subsys = NULL;
-       kfree(tf);
-
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< END FABRIC API >>>>>>>>>>>>>>>>>"
-                       ">>>>>\n");
 }
-EXPORT_SYMBOL(target_fabric_configfs_deregister);
+EXPORT_SYMBOL(target_unregister_template);
 
 /*##############################################################################
 // Stop functions called by external Target Fabrics Modules
@@ -945,7 +863,7 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port(
        struct se_lun *lun;
        struct se_portal_group *se_tpg;
        struct t10_pr_registration *pr_reg;
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
        ssize_t len = 0;
 
        spin_lock(&dev->dev_reservation_lock);
@@ -979,7 +897,7 @@ SE_DEV_PR_ATTR_RO(res_pr_holder_tg_port);
 static ssize_t target_core_dev_pr_show_attr_res_pr_registered_i_pts(
                struct se_device *dev, char *page)
 {
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
        struct t10_pr_registration *pr_reg;
        unsigned char buf[384];
        char i_buf[PR_REG_ISID_ID_LEN];
index 0c3f901..1f7886b 100644 (file)
@@ -56,6 +56,20 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf)
        pr_debug("Setup generic %s\n", __stringify(_name));             \
 }
 
+#define TF_CIT_SETUP_DRV(_name, _item_ops, _group_ops)         \
+static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \
+{                                                                      \
+       struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \
+       struct config_item_type *cit = &tfc->tfc_##_name##_cit;         \
+       struct configfs_attribute **attrs = tf->tf_ops.tfc_##_name##_attrs; \
+                                                                       \
+       cit->ct_item_ops = _item_ops;                                   \
+       cit->ct_group_ops = _group_ops;                                 \
+       cit->ct_attrs = attrs;                                          \
+       cit->ct_owner = tf->tf_module;                                  \
+       pr_debug("Setup generic %s\n", __stringify(_name));             \
+}
+
 /* Start of tfc_tpg_mappedlun_cit */
 
 static int target_fabric_mappedlun_link(
@@ -278,7 +292,7 @@ static struct configfs_item_operations target_fabric_nacl_attrib_item_ops = {
        .store_attribute        = target_fabric_nacl_attrib_attr_store,
 };
 
-TF_CIT_SETUP(tpg_nacl_attrib, &target_fabric_nacl_attrib_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_attrib, &target_fabric_nacl_attrib_item_ops, NULL);
 
 /* End of tfc_tpg_nacl_attrib_cit */
 
@@ -291,7 +305,7 @@ static struct configfs_item_operations target_fabric_nacl_auth_item_ops = {
        .store_attribute        = target_fabric_nacl_auth_attr_store,
 };
 
-TF_CIT_SETUP(tpg_nacl_auth, &target_fabric_nacl_auth_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_auth, &target_fabric_nacl_auth_item_ops, NULL);
 
 /* End of tfc_tpg_nacl_auth_cit */
 
@@ -304,7 +318,7 @@ static struct configfs_item_operations target_fabric_nacl_param_item_ops = {
        .store_attribute        = target_fabric_nacl_param_attr_store,
 };
 
-TF_CIT_SETUP(tpg_nacl_param, &target_fabric_nacl_param_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_param, &target_fabric_nacl_param_item_ops, NULL);
 
 /* End of tfc_tpg_nacl_param_cit */
 
@@ -461,8 +475,8 @@ static struct configfs_group_operations target_fabric_nacl_base_group_ops = {
        .drop_item              = target_fabric_drop_mappedlun,
 };
 
-TF_CIT_SETUP(tpg_nacl_base, &target_fabric_nacl_base_item_ops,
-               &target_fabric_nacl_base_group_ops, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_base, &target_fabric_nacl_base_item_ops,
+               &target_fabric_nacl_base_group_ops);
 
 /* End of tfc_tpg_nacl_base_cit */
 
@@ -570,7 +584,7 @@ static struct configfs_item_operations target_fabric_np_base_item_ops = {
        .store_attribute        = target_fabric_np_base_attr_store,
 };
 
-TF_CIT_SETUP(tpg_np_base, &target_fabric_np_base_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_np_base, &target_fabric_np_base_item_ops, NULL);
 
 /* End of tfc_tpg_np_base_cit */
 
@@ -966,7 +980,7 @@ static struct configfs_item_operations target_fabric_tpg_attrib_item_ops = {
        .store_attribute        = target_fabric_tpg_attrib_attr_store,
 };
 
-TF_CIT_SETUP(tpg_attrib, &target_fabric_tpg_attrib_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_attrib, &target_fabric_tpg_attrib_item_ops, NULL);
 
 /* End of tfc_tpg_attrib_cit */
 
@@ -979,7 +993,7 @@ static struct configfs_item_operations target_fabric_tpg_auth_item_ops = {
        .store_attribute        = target_fabric_tpg_auth_attr_store,
 };
 
-TF_CIT_SETUP(tpg_auth, &target_fabric_tpg_auth_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_auth, &target_fabric_tpg_auth_item_ops, NULL);
 
 /* End of tfc_tpg_attrib_cit */
 
@@ -992,7 +1006,7 @@ static struct configfs_item_operations target_fabric_tpg_param_item_ops = {
        .store_attribute        = target_fabric_tpg_param_attr_store,
 };
 
-TF_CIT_SETUP(tpg_param, &target_fabric_tpg_param_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_param, &target_fabric_tpg_param_item_ops, NULL);
 
 /* End of tfc_tpg_param_cit */
 
@@ -1018,7 +1032,7 @@ static struct configfs_item_operations target_fabric_tpg_base_item_ops = {
        .store_attribute        = target_fabric_tpg_attr_store,
 };
 
-TF_CIT_SETUP(tpg_base, &target_fabric_tpg_base_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_base, &target_fabric_tpg_base_item_ops, NULL);
 
 /* End of tfc_tpg_base_cit */
 
@@ -1192,7 +1206,7 @@ static struct configfs_item_operations target_fabric_wwn_item_ops = {
        .store_attribute        = target_fabric_wwn_attr_store,
 };
 
-TF_CIT_SETUP(wwn, &target_fabric_wwn_item_ops, &target_fabric_wwn_group_ops, NULL);
+TF_CIT_SETUP_DRV(wwn, &target_fabric_wwn_item_ops, &target_fabric_wwn_group_ops);
 
 /* End of tfc_wwn_cit */
 
@@ -1206,7 +1220,7 @@ static struct configfs_item_operations target_fabric_discovery_item_ops = {
        .store_attribute        = target_fabric_discovery_attr_store,
 };
 
-TF_CIT_SETUP(discovery, &target_fabric_discovery_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(discovery, &target_fabric_discovery_item_ops, NULL);
 
 /* End of tfc_discovery_cit */
 
index 44620fb..f7e6e51 100644 (file)
@@ -264,40 +264,32 @@ static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot,
        struct se_device *se_dev = cmd->se_dev;
        struct fd_dev *dev = FD_DEV(se_dev);
        struct file *prot_fd = dev->fd_prot_file;
-       struct scatterlist *sg;
        loff_t pos = (cmd->t_task_lba * se_dev->prot_length);
        unsigned char *buf;
-       u32 prot_size, len, size;
-       int rc, ret = 1, i;
+       u32 prot_size;
+       int rc, ret = 1;
 
        prot_size = (cmd->data_length / se_dev->dev_attrib.block_size) *
                     se_dev->prot_length;
 
        if (!is_write) {
-               fd_prot->prot_buf = vzalloc(prot_size);
+               fd_prot->prot_buf = kzalloc(prot_size, GFP_KERNEL);
                if (!fd_prot->prot_buf) {
                        pr_err("Unable to allocate fd_prot->prot_buf\n");
                        return -ENOMEM;
                }
                buf = fd_prot->prot_buf;
 
-               fd_prot->prot_sg_nents = cmd->t_prot_nents;
-               fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist) *
-                                          fd_prot->prot_sg_nents, GFP_KERNEL);
+               fd_prot->prot_sg_nents = 1;
+               fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist),
+                                          GFP_KERNEL);
                if (!fd_prot->prot_sg) {
                        pr_err("Unable to allocate fd_prot->prot_sg\n");
-                       vfree(fd_prot->prot_buf);
+                       kfree(fd_prot->prot_buf);
                        return -ENOMEM;
                }
-               size = prot_size;
-
-               for_each_sg(fd_prot->prot_sg, sg, fd_prot->prot_sg_nents, i) {
-
-                       len = min_t(u32, PAGE_SIZE, size);
-                       sg_set_buf(sg, buf, len);
-                       size -= len;
-                       buf += len;
-               }
+               sg_init_table(fd_prot->prot_sg, fd_prot->prot_sg_nents);
+               sg_set_buf(fd_prot->prot_sg, buf, prot_size);
        }
 
        if (is_write) {
@@ -318,7 +310,7 @@ static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot,
 
        if (is_write || ret < 0) {
                kfree(fd_prot->prot_sg);
-               vfree(fd_prot->prot_buf);
+               kfree(fd_prot->prot_buf);
        }
 
        return ret;
@@ -331,36 +323,33 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
        struct fd_dev *dev = FD_DEV(se_dev);
        struct file *fd = dev->fd_file;
        struct scatterlist *sg;
-       struct iovec *iov;
-       mm_segment_t old_fs;
+       struct iov_iter iter;
+       struct bio_vec *bvec;
+       ssize_t len = 0;
        loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
        int ret = 0, i;
 
-       iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
-       if (!iov) {
+       bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL);
+       if (!bvec) {
                pr_err("Unable to allocate fd_do_readv iov[]\n");
                return -ENOMEM;
        }
 
        for_each_sg(sgl, sg, sgl_nents, i) {
-               iov[i].iov_len = sg->length;
-               iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
-       }
+               bvec[i].bv_page = sg_page(sg);
+               bvec[i].bv_len = sg->length;
+               bvec[i].bv_offset = sg->offset;
 
-       old_fs = get_fs();
-       set_fs(get_ds());
+               len += sg->length;
+       }
 
+       iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
        if (is_write)
-               ret = vfs_writev(fd, &iov[0], sgl_nents, &pos);
+               ret = vfs_iter_write(fd, &iter, &pos);
        else
-               ret = vfs_readv(fd, &iov[0], sgl_nents, &pos);
-
-       set_fs(old_fs);
-
-       for_each_sg(sgl, sg, sgl_nents, i)
-               kunmap(sg_page(sg));
+               ret = vfs_iter_read(fd, &iter, &pos);
 
-       kfree(iov);
+       kfree(bvec);
 
        if (is_write) {
                if (ret < 0 || ret != cmd->data_length) {
@@ -436,59 +425,17 @@ fd_execute_sync_cache(struct se_cmd *cmd)
        return 0;
 }
 
-static unsigned char *
-fd_setup_write_same_buf(struct se_cmd *cmd, struct scatterlist *sg,
-                   unsigned int len)
-{
-       struct se_device *se_dev = cmd->se_dev;
-       unsigned int block_size = se_dev->dev_attrib.block_size;
-       unsigned int i = 0, end;
-       unsigned char *buf, *p, *kmap_buf;
-
-       buf = kzalloc(min_t(unsigned int, len, PAGE_SIZE), GFP_KERNEL);
-       if (!buf) {
-               pr_err("Unable to allocate fd_execute_write_same buf\n");
-               return NULL;
-       }
-
-       kmap_buf = kmap(sg_page(sg)) + sg->offset;
-       if (!kmap_buf) {
-               pr_err("kmap() failed in fd_setup_write_same\n");
-               kfree(buf);
-               return NULL;
-       }
-       /*
-        * Fill local *buf to contain multiple WRITE_SAME blocks up to
-        * min(len, PAGE_SIZE)
-        */
-       p = buf;
-       end = min_t(unsigned int, len, PAGE_SIZE);
-
-       while (i < end) {
-               memcpy(p, kmap_buf, block_size);
-
-               i += block_size;
-               p += block_size;
-       }
-       kunmap(sg_page(sg));
-
-       return buf;
-}
-
 static sense_reason_t
 fd_execute_write_same(struct se_cmd *cmd)
 {
        struct se_device *se_dev = cmd->se_dev;
        struct fd_dev *fd_dev = FD_DEV(se_dev);
-       struct file *f = fd_dev->fd_file;
-       struct scatterlist *sg;
-       struct iovec *iov;
-       mm_segment_t old_fs;
-       sector_t nolb = sbc_get_write_same_sectors(cmd);
        loff_t pos = cmd->t_task_lba * se_dev->dev_attrib.block_size;
-       unsigned int len, len_tmp, iov_num;
-       int i, rc;
-       unsigned char *buf;
+       sector_t nolb = sbc_get_write_same_sectors(cmd);
+       struct iov_iter iter;
+       struct bio_vec *bvec;
+       unsigned int len = 0, i;
+       ssize_t ret;
 
        if (!nolb) {
                target_complete_cmd(cmd, SAM_STAT_GOOD);
@@ -499,56 +446,92 @@ fd_execute_write_same(struct se_cmd *cmd)
                       " backends not supported\n");
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
-       sg = &cmd->t_data_sg[0];
 
        if (cmd->t_data_nents > 1 ||
-           sg->length != cmd->se_dev->dev_attrib.block_size) {
+           cmd->t_data_sg[0].length != cmd->se_dev->dev_attrib.block_size) {
                pr_err("WRITE_SAME: Illegal SGL t_data_nents: %u length: %u"
-                       " block_size: %u\n", cmd->t_data_nents, sg->length,
+                       " block_size: %u\n",
+                       cmd->t_data_nents,
+                       cmd->t_data_sg[0].length,
                        cmd->se_dev->dev_attrib.block_size);
                return TCM_INVALID_CDB_FIELD;
        }
 
-       len = len_tmp = nolb * se_dev->dev_attrib.block_size;
-       iov_num = DIV_ROUND_UP(len, PAGE_SIZE);
-
-       buf = fd_setup_write_same_buf(cmd, sg, len);
-       if (!buf)
+       bvec = kcalloc(nolb, sizeof(struct bio_vec), GFP_KERNEL);
+       if (!bvec)
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-       iov = vzalloc(sizeof(struct iovec) * iov_num);
-       if (!iov) {
-               pr_err("Unable to allocate fd_execute_write_same iovecs\n");
-               kfree(buf);
+       for (i = 0; i < nolb; i++) {
+               bvec[i].bv_page = sg_page(&cmd->t_data_sg[0]);
+               bvec[i].bv_len = cmd->t_data_sg[0].length;
+               bvec[i].bv_offset = cmd->t_data_sg[0].offset;
+
+               len += se_dev->dev_attrib.block_size;
+       }
+
+       iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+       ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos);
+
+       kfree(bvec);
+       if (ret < 0 || ret != len) {
+               pr_err("vfs_iter_write() returned %zd for write same\n", ret);
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
-       /*
-        * Map the single fabric received scatterlist block now populated
-        * in *buf into each iovec for I/O submission.
-        */
-       for (i = 0; i < iov_num; i++) {
-               iov[i].iov_base = buf;
-               iov[i].iov_len = min_t(unsigned int, len_tmp, PAGE_SIZE);
-               len_tmp -= iov[i].iov_len;
+
+       target_complete_cmd(cmd, SAM_STAT_GOOD);
+       return 0;
+}
+
+static int
+fd_do_prot_fill(struct se_device *se_dev, sector_t lba, sector_t nolb,
+               void *buf, size_t bufsize)
+{
+       struct fd_dev *fd_dev = FD_DEV(se_dev);
+       struct file *prot_fd = fd_dev->fd_prot_file;
+       sector_t prot_length, prot;
+       loff_t pos = lba * se_dev->prot_length;
+
+       if (!prot_fd) {
+               pr_err("Unable to locate fd_dev->fd_prot_file\n");
+               return -ENODEV;
        }
 
-       old_fs = get_fs();
-       set_fs(get_ds());
-       rc = vfs_writev(f, &iov[0], iov_num, &pos);
-       set_fs(old_fs);
+       prot_length = nolb * se_dev->prot_length;
 
-       vfree(iov);
-       kfree(buf);
+       for (prot = 0; prot < prot_length;) {
+               sector_t len = min_t(sector_t, bufsize, prot_length - prot);
+               ssize_t ret = kernel_write(prot_fd, buf, len, pos + prot);
 
-       if (rc < 0 || rc != len) {
-               pr_err("vfs_writev() returned %d for write same\n", rc);
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+               if (ret != len) {
+                       pr_err("vfs_write to prot file failed: %zd\n", ret);
+                       return ret < 0 ? ret : -ENODEV;
+               }
+               prot += ret;
        }
 
-       target_complete_cmd(cmd, SAM_STAT_GOOD);
        return 0;
 }
 
+static int
+fd_do_prot_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
+{
+       void *buf;
+       int rc;
+
+       buf = (void *)__get_free_page(GFP_KERNEL);
+       if (!buf) {
+               pr_err("Unable to allocate FILEIO prot buf\n");
+               return -ENOMEM;
+       }
+       memset(buf, 0xff, PAGE_SIZE);
+
+       rc = fd_do_prot_fill(cmd->se_dev, lba, nolb, buf, PAGE_SIZE);
+
+       free_page((unsigned long)buf);
+
+       return rc;
+}
+
 static sense_reason_t
 fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb)
 {
@@ -556,6 +539,12 @@ fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb)
        struct inode *inode = file->f_mapping->host;
        int ret;
 
+       if (cmd->se_dev->dev_attrib.pi_prot_type) {
+               ret = fd_do_prot_unmap(cmd, lba, nolb);
+               if (ret)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       }
+
        if (S_ISBLK(inode->i_mode)) {
                /* The backend is block device, use discard */
                struct block_device *bdev = inode->i_bdev;
@@ -595,7 +584,7 @@ fd_execute_write_same_unmap(struct se_cmd *cmd)
        struct file *file = fd_dev->fd_file;
        sector_t lba = cmd->t_task_lba;
        sector_t nolb = sbc_get_write_same_sectors(cmd);
-       int ret;
+       sense_reason_t ret;
 
        if (!nolb) {
                target_complete_cmd(cmd, SAM_STAT_GOOD);
@@ -643,7 +632,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
        if (data_direction == DMA_FROM_DEVICE) {
                memset(&fd_prot, 0, sizeof(struct fd_prot));
 
-               if (cmd->prot_type) {
+               if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                        ret = fd_do_prot_rw(cmd, &fd_prot, false);
                        if (ret < 0)
                                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -651,23 +640,23 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
                ret = fd_do_rw(cmd, sgl, sgl_nents, 0);
 
-               if (ret > 0 && cmd->prot_type) {
+               if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                        u32 sectors = cmd->data_length / dev->dev_attrib.block_size;
 
                        rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors,
                                                 0, fd_prot.prot_sg, 0);
                        if (rc) {
                                kfree(fd_prot.prot_sg);
-                               vfree(fd_prot.prot_buf);
+                               kfree(fd_prot.prot_buf);
                                return rc;
                        }
                        kfree(fd_prot.prot_sg);
-                       vfree(fd_prot.prot_buf);
+                       kfree(fd_prot.prot_buf);
                }
        } else {
                memset(&fd_prot, 0, sizeof(struct fd_prot));
 
-               if (cmd->prot_type) {
+               if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                        u32 sectors = cmd->data_length / dev->dev_attrib.block_size;
 
                        ret = fd_do_prot_rw(cmd, &fd_prot, false);
@@ -678,7 +667,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                                  0, fd_prot.prot_sg, 0);
                        if (rc) {
                                kfree(fd_prot.prot_sg);
-                               vfree(fd_prot.prot_buf);
+                               kfree(fd_prot.prot_buf);
                                return rc;
                        }
                }
@@ -705,7 +694,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                        vfs_fsync_range(fd_dev->fd_file, start, end, 1);
                }
 
-               if (ret > 0 && cmd->prot_type) {
+               if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                        ret = fd_do_prot_rw(cmd, &fd_prot, true);
                        if (ret < 0)
                                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -714,7 +703,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
        if (ret < 0) {
                kfree(fd_prot.prot_sg);
-               vfree(fd_prot.prot_buf);
+               kfree(fd_prot.prot_buf);
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
 
@@ -878,48 +867,28 @@ static int fd_init_prot(struct se_device *dev)
 
 static int fd_format_prot(struct se_device *dev)
 {
-       struct fd_dev *fd_dev = FD_DEV(dev);
-       struct file *prot_fd = fd_dev->fd_prot_file;
-       sector_t prot_length, prot;
        unsigned char *buf;
-       loff_t pos = 0;
        int unit_size = FDBD_FORMAT_UNIT_SIZE * dev->dev_attrib.block_size;
-       int rc, ret = 0, size, len;
+       int ret;
 
        if (!dev->dev_attrib.pi_prot_type) {
                pr_err("Unable to format_prot while pi_prot_type == 0\n");
                return -ENODEV;
        }
-       if (!prot_fd) {
-               pr_err("Unable to locate fd_dev->fd_prot_file\n");
-               return -ENODEV;
-       }
 
        buf = vzalloc(unit_size);
        if (!buf) {
                pr_err("Unable to allocate FILEIO prot buf\n");
                return -ENOMEM;
        }
-       prot_length = (dev->transport->get_blocks(dev) + 1) * dev->prot_length;
-       size = prot_length;
 
        pr_debug("Using FILEIO prot_length: %llu\n",
-                (unsigned long long)prot_length);
+                (unsigned long long)(dev->transport->get_blocks(dev) + 1) *
+                                       dev->prot_length);
 
        memset(buf, 0xff, unit_size);
-       for (prot = 0; prot < prot_length; prot += unit_size) {
-               len = min(unit_size, size);
-               rc = kernel_write(prot_fd, buf, len, pos);
-               if (rc != len) {
-                       pr_err("vfs_write to prot file failed: %d\n", rc);
-                       ret = -ENODEV;
-                       goto out;
-               }
-               pos += len;
-               size -= len;
-       }
-
-out:
+       ret = fd_do_prot_fill(dev, 0, dev->transport->get_blocks(dev) + 1,
+                             buf, unit_size);
        vfree(buf);
        return ret;
 }
index d4a4b0f..1b7947c 100644 (file)
@@ -444,7 +444,7 @@ iblock_execute_write_same_unmap(struct se_cmd *cmd)
        struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
        sector_t lba = cmd->t_task_lba;
        sector_t nolb = sbc_get_write_same_sectors(cmd);
-       int ret;
+       sense_reason_t ret;
 
        ret = iblock_do_unmap(cmd, bdev, lba, nolb);
        if (ret)
@@ -774,7 +774,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                sg_num--;
        }
 
-       if (cmd->prot_type) {
+       if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                int rc = iblock_alloc_bip(cmd, bio_start);
                if (rc)
                        goto fail_put_bios;
index 60381db..874a9bc 100644 (file)
@@ -4,7 +4,13 @@
 /* target_core_alua.c */
 extern struct t10_alua_lu_gp *default_lu_gp;
 
+/* target_core_configfs.c */
+extern struct configfs_subsystem *target_core_subsystem[];
+
 /* target_core_device.c */
+extern struct mutex g_device_mutex;
+extern struct list_head g_device_list;
+
 struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
 int    core_free_device_list_for_node(struct se_node_acl *,
                struct se_portal_group *);
index 2de6fb8..c1aa965 100644 (file)
@@ -78,6 +78,22 @@ enum preempt_type {
 static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *,
                                              struct t10_pr_registration *, int, int);
 
+static int is_reservation_holder(
+       struct t10_pr_registration *pr_res_holder,
+       struct t10_pr_registration *pr_reg)
+{
+       int pr_res_type;
+
+       if (pr_res_holder) {
+               pr_res_type = pr_res_holder->pr_res_type;
+
+               return pr_res_holder == pr_reg ||
+                      pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG ||
+                      pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG;
+       }
+       return 0;
+}
+
 static sense_reason_t
 target_scsi2_reservation_check(struct se_cmd *cmd)
 {
@@ -664,7 +680,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
        struct se_dev_entry *deve_tmp;
        struct se_node_acl *nacl_tmp;
        struct se_port *port, *port_tmp;
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
        struct t10_pr_registration *pr_reg, *pr_reg_atp, *pr_reg_tmp, *pr_reg_tmp_safe;
        int ret;
        /*
@@ -963,7 +979,7 @@ int core_scsi3_check_aptpl_registration(
 }
 
 static void __core_scsi3_dump_registration(
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
        struct se_device *dev,
        struct se_node_acl *nacl,
        struct t10_pr_registration *pr_reg,
@@ -1004,7 +1020,7 @@ static void __core_scsi3_add_registration(
        enum register_type register_type,
        int register_move)
 {
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
        struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe;
        struct t10_reservation *pr_tmpl = &dev->t10_pr;
 
@@ -1220,8 +1236,10 @@ static void __core_scsi3_free_registration(
        struct t10_pr_registration *pr_reg,
        struct list_head *preempt_and_abort_list,
        int dec_holders)
+       __releases(&pr_tmpl->registration_lock)
+       __acquires(&pr_tmpl->registration_lock)
 {
-       struct target_core_fabric_ops *tfo =
+       const struct target_core_fabric_ops *tfo =
                        pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo;
        struct t10_reservation *pr_tmpl = &dev->t10_pr;
        char i_buf[PR_REG_ISID_ID_LEN];
@@ -1445,7 +1463,7 @@ core_scsi3_decode_spec_i_port(
        struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe;
        LIST_HEAD(tid_dest_list);
        struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp;
-       struct target_core_fabric_ops *tmp_tf_ops;
+       const struct target_core_fabric_ops *tmp_tf_ops;
        unsigned char *buf;
        unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident;
        char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
@@ -2287,7 +2305,6 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
        spin_lock(&dev->dev_reservation_lock);
        pr_res_holder = dev->dev_pr_res_holder;
        if (pr_res_holder) {
-               int pr_res_type = pr_res_holder->pr_res_type;
                /*
                 * From spc4r17 Section 5.7.9: Reserving:
                 *
@@ -2298,9 +2315,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
                 * the logical unit, then the command shall be completed with
                 * RESERVATION CONFLICT status.
                 */
-               if ((pr_res_holder != pr_reg) &&
-                   (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) &&
-                   (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
+               if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                        struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
                        pr_err("SPC-3 PR: Attempted RESERVE from"
                                " [%s]: %s while reservation already held by"
@@ -2409,7 +2424,7 @@ static void __core_scsi3_complete_pro_release(
        int explicit,
        int unreg)
 {
-       struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
        char i_buf[PR_REG_ISID_ID_LEN];
        int pr_res_type = 0, pr_res_scope = 0;
 
@@ -2477,7 +2492,6 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
        struct se_lun *se_lun = cmd->se_lun;
        struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_res_holder;
        struct t10_reservation *pr_tmpl = &dev->t10_pr;
-       int all_reg = 0;
        sense_reason_t ret = 0;
 
        if (!se_sess || !se_lun) {
@@ -2514,13 +2528,9 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
                spin_unlock(&dev->dev_reservation_lock);
                goto out_put_pr_reg;
        }
-       if ((pr_res_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
-           (pr_res_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))
-               all_reg = 1;
 
-       if ((all_reg == 0) && (pr_res_holder != pr_reg)) {
+       if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                /*
-                * Non 'All Registrants' PR Type cases..
                 * Release request from a registered I_T nexus that is not a
                 * persistent reservation holder. return GOOD status.
                 */
@@ -2726,7 +2736,7 @@ static void __core_scsi3_complete_pro_preempt(
        enum preempt_type preempt_type)
 {
        struct se_node_acl *nacl = pr_reg->pr_reg_nacl;
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
        char i_buf[PR_REG_ISID_ID_LEN];
 
        memset(i_buf, 0, PR_REG_ISID_ID_LEN);
@@ -3111,7 +3121,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
        struct se_node_acl *pr_res_nacl, *pr_reg_nacl, *dest_node_acl = NULL;
        struct se_port *se_port;
        struct se_portal_group *se_tpg, *dest_se_tpg = NULL;
-       struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops;
+       const struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops;
        struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg;
        struct t10_reservation *pr_tmpl = &dev->t10_pr;
        unsigned char *buf;
@@ -3375,7 +3385,7 @@ after_iport_check:
         * From spc4r17 section 5.7.8  Table 50 --
         *      Register behaviors for a REGISTER AND MOVE service action
         */
-       if (pr_res_holder != pr_reg) {
+       if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                pr_warn("SPC-3 PR REGISTER_AND_MOVE: Calling I_T"
                        " Nexus is not reservation holder\n");
                spin_unlock(&dev->dev_reservation_lock);
index 98e83ac..a263bf5 100644 (file)
@@ -139,10 +139,22 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
        unsigned char *p;
 
        while (total_sg_needed) {
+               unsigned int chain_entry = 0;
+
                sg_per_table = (total_sg_needed > max_sg_per_table) ?
                        max_sg_per_table : total_sg_needed;
 
-               sg = kzalloc(sg_per_table * sizeof(struct scatterlist),
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
+
+               /*
+                * Reserve extra element for chain entry
+                */
+               if (sg_per_table < total_sg_needed)
+                       chain_entry = 1;
+
+#endif /* CONFIG_ARCH_HAS_SG_CHAIN */
+
+               sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg),
                                GFP_KERNEL);
                if (!sg) {
                        pr_err("Unable to allocate scatterlist array"
@@ -150,7 +162,16 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
                        return -ENOMEM;
                }
 
-               sg_init_table(sg, sg_per_table);
+               sg_init_table(sg, sg_per_table + chain_entry);
+
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
+
+               if (i > 0) {
+                       sg_chain(sg_table[i - 1].sg_table,
+                                max_sg_per_table + 1, sg);
+               }
+
+#endif /* CONFIG_ARCH_HAS_SG_CHAIN */
 
                sg_table[i].sg_table = sg;
                sg_table[i].rd_sg_count = sg_per_table;
@@ -382,6 +403,76 @@ static struct rd_dev_sg_table *rd_get_prot_table(struct rd_dev *rd_dev, u32 page
        return NULL;
 }
 
+typedef sense_reason_t (*dif_verify)(struct se_cmd *, sector_t, unsigned int,
+                                    unsigned int, struct scatterlist *, int);
+
+static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify)
+{
+       struct se_device *se_dev = cmd->se_dev;
+       struct rd_dev *dev = RD_DEV(se_dev);
+       struct rd_dev_sg_table *prot_table;
+       bool need_to_release = false;
+       struct scatterlist *prot_sg;
+       u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
+       u32 prot_offset, prot_page;
+       u32 prot_npages __maybe_unused;
+       u64 tmp;
+       sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+       tmp = cmd->t_task_lba * se_dev->prot_length;
+       prot_offset = do_div(tmp, PAGE_SIZE);
+       prot_page = tmp;
+
+       prot_table = rd_get_prot_table(dev, prot_page);
+       if (!prot_table)
+               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+       prot_sg = &prot_table->sg_table[prot_page -
+                                       prot_table->page_start_offset];
+
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
+
+       prot_npages = DIV_ROUND_UP(prot_offset + sectors * se_dev->prot_length,
+                                  PAGE_SIZE);
+
+       /*
+        * Allocate temporaly contiguous scatterlist entries if prot pages
+        * straddles multiple scatterlist tables.
+        */
+       if (prot_table->page_end_offset < prot_page + prot_npages - 1) {
+               int i;
+
+               prot_sg = kcalloc(prot_npages, sizeof(*prot_sg), GFP_KERNEL);
+               if (!prot_sg)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+               need_to_release = true;
+               sg_init_table(prot_sg, prot_npages);
+
+               for (i = 0; i < prot_npages; i++) {
+                       if (prot_page + i > prot_table->page_end_offset) {
+                               prot_table = rd_get_prot_table(dev,
+                                                               prot_page + i);
+                               if (!prot_table) {
+                                       kfree(prot_sg);
+                                       return rc;
+                               }
+                               sg_unmark_end(&prot_sg[i - 1]);
+                       }
+                       prot_sg[i] = prot_table->sg_table[prot_page + i -
+                                               prot_table->page_start_offset];
+               }
+       }
+
+#endif /* !CONFIG_ARCH_HAS_SG_CHAIN */
+
+       rc = dif_verify(cmd, cmd->t_task_lba, sectors, 0, prot_sg, prot_offset);
+       if (need_to_release)
+               kfree(prot_sg);
+
+       return rc;
+}
+
 static sense_reason_t
 rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
              enum dma_data_direction data_direction)
@@ -419,24 +510,9 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                        data_direction == DMA_FROM_DEVICE ? "Read" : "Write",
                        cmd->t_task_lba, rd_size, rd_page, rd_offset);
 
-       if (cmd->prot_type && data_direction == DMA_TO_DEVICE) {
-               struct rd_dev_sg_table *prot_table;
-               struct scatterlist *prot_sg;
-               u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
-               u32 prot_offset, prot_page;
-
-               tmp = cmd->t_task_lba * se_dev->prot_length;
-               prot_offset = do_div(tmp, PAGE_SIZE);
-               prot_page = tmp;
-
-               prot_table = rd_get_prot_table(dev, prot_page);
-               if (!prot_table)
-                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-
-               prot_sg = &prot_table->sg_table[prot_page - prot_table->page_start_offset];
-
-               rc = sbc_dif_verify_write(cmd, cmd->t_task_lba, sectors, 0,
-                                         prot_sg, prot_offset);
+       if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type &&
+           data_direction == DMA_TO_DEVICE) {
+               rc = rd_do_prot_rw(cmd, sbc_dif_verify_write);
                if (rc)
                        return rc;
        }
@@ -502,24 +578,9 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
        }
        sg_miter_stop(&m);
 
-       if (cmd->prot_type && data_direction == DMA_FROM_DEVICE) {
-               struct rd_dev_sg_table *prot_table;
-               struct scatterlist *prot_sg;
-               u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
-               u32 prot_offset, prot_page;
-
-               tmp = cmd->t_task_lba * se_dev->prot_length;
-               prot_offset = do_div(tmp, PAGE_SIZE);
-               prot_page = tmp;
-
-               prot_table = rd_get_prot_table(dev, prot_page);
-               if (!prot_table)
-                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-
-               prot_sg = &prot_table->sg_table[prot_page - prot_table->page_start_offset];
-
-               rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, 0,
-                                        prot_sg, prot_offset);
+       if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type &&
+           data_direction == DMA_FROM_DEVICE) {
+               rc = rd_do_prot_rw(cmd, sbc_dif_verify_read);
                if (rc)
                        return rc;
        }
index 3e72974..8855781 100644 (file)
@@ -93,6 +93,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
        struct se_session *sess = cmd->se_sess;
+       int pi_prot_type = dev->dev_attrib.pi_prot_type;
+
        unsigned char *rbuf;
        unsigned char buf[32];
        unsigned long long blocks = dev->transport->get_blocks(dev);
@@ -114,8 +116,15 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
         * Set P_TYPE and PROT_EN bits for DIF support
         */
        if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
-                       buf[12] = (dev->dev_attrib.pi_prot_type - 1) << 1 | 0x1;
+               /*
+                * Only override a device's pi_prot_type if no T10-PI is
+                * available, and sess_prot_type has been explicitly enabled.
+                */
+               if (!pi_prot_type)
+                       pi_prot_type = sess->sess_prot_type;
+
+               if (pi_prot_type)
+                       buf[12] = (pi_prot_type - 1) << 1 | 0x1;
        }
 
        if (dev->transport->get_lbppbe)
@@ -312,7 +321,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
        return 0;
 }
 
-static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd)
+static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success)
 {
        unsigned char *buf, *addr;
        struct scatterlist *sg;
@@ -376,7 +385,7 @@ sbc_execute_rw(struct se_cmd *cmd)
                               cmd->data_direction);
 }
 
-static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
 {
        struct se_device *dev = cmd->se_dev;
 
@@ -399,7 +408,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
        return TCM_NO_SENSE;
 }
 
-static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success)
 {
        struct se_device *dev = cmd->se_dev;
        struct scatterlist *write_sg = NULL, *sg;
@@ -414,10 +423,15 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
 
        /*
         * Handle early failure in transport_generic_request_failure(),
-        * which will not have taken ->caw_mutex yet..
+        * which will not have taken ->caw_sem yet..
         */
-       if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
+       if (!success && (!cmd->t_data_sg || !cmd->t_bidi_data_sg))
                return TCM_NO_SENSE;
+       /*
+        * Handle special case for zero-length COMPARE_AND_WRITE
+        */
+       if (!cmd->data_length)
+               goto out;
        /*
         * Immediately exit + release dev->caw_sem if command has already
         * been failed with a non-zero SCSI status.
@@ -581,12 +595,13 @@ sbc_compare_and_write(struct se_cmd *cmd)
 }
 
 static int
-sbc_set_prot_op_checks(u8 protect, enum target_prot_type prot_type,
+sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_type,
                       bool is_write, struct se_cmd *cmd)
 {
        if (is_write) {
-               cmd->prot_op = protect ? TARGET_PROT_DOUT_PASS :
-                                        TARGET_PROT_DOUT_INSERT;
+               cmd->prot_op = fabric_prot ? TARGET_PROT_DOUT_STRIP :
+                              protect ? TARGET_PROT_DOUT_PASS :
+                              TARGET_PROT_DOUT_INSERT;
                switch (protect) {
                case 0x0:
                case 0x3:
@@ -610,8 +625,9 @@ sbc_set_prot_op_checks(u8 protect, enum target_prot_type prot_type,
                        return -EINVAL;
                }
        } else {
-               cmd->prot_op = protect ? TARGET_PROT_DIN_PASS :
-                                        TARGET_PROT_DIN_STRIP;
+               cmd->prot_op = fabric_prot ? TARGET_PROT_DIN_INSERT :
+                              protect ? TARGET_PROT_DIN_PASS :
+                              TARGET_PROT_DIN_STRIP;
                switch (protect) {
                case 0x0:
                case 0x1:
@@ -644,11 +660,15 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
               u32 sectors, bool is_write)
 {
        u8 protect = cdb[1] >> 5;
+       int sp_ops = cmd->se_sess->sup_prot_ops;
+       int pi_prot_type = dev->dev_attrib.pi_prot_type;
+       bool fabric_prot = false;
 
        if (!cmd->t_prot_sg || !cmd->t_prot_nents) {
-               if (protect && !dev->dev_attrib.pi_prot_type) {
-                       pr_err("CDB contains protect bit, but device does not"
-                              " advertise PROTECT=1 feature bit\n");
+               if (unlikely(protect &&
+                   !dev->dev_attrib.pi_prot_type && !cmd->se_sess->sess_prot_type)) {
+                       pr_err("CDB contains protect bit, but device + fabric does"
+                              " not advertise PROTECT=1 feature bit\n");
                        return TCM_INVALID_CDB_FIELD;
                }
                if (cmd->prot_pto)
@@ -669,15 +689,32 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
                cmd->reftag_seed = cmd->t_task_lba;
                break;
        case TARGET_DIF_TYPE0_PROT:
+               /*
+                * See if the fabric supports T10-PI, and the session has been
+                * configured to allow export PROTECT=1 feature bit with backend
+                * devices that don't support T10-PI.
+                */
+               fabric_prot = is_write ?
+                             !!(sp_ops & (TARGET_PROT_DOUT_PASS | TARGET_PROT_DOUT_STRIP)) :
+                             !!(sp_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DIN_INSERT));
+
+               if (fabric_prot && cmd->se_sess->sess_prot_type) {
+                       pi_prot_type = cmd->se_sess->sess_prot_type;
+                       break;
+               }
+               if (!protect)
+                       return TCM_NO_SENSE;
+               /* Fallthrough */
        default:
-               return TCM_NO_SENSE;
+               pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
+                      "PROTECT: 0x%02x\n", cdb[0], protect);
+               return TCM_INVALID_CDB_FIELD;
        }
 
-       if (sbc_set_prot_op_checks(protect, dev->dev_attrib.pi_prot_type,
-                                  is_write, cmd))
+       if (sbc_set_prot_op_checks(protect, fabric_prot, pi_prot_type, is_write, cmd))
                return TCM_INVALID_CDB_FIELD;
 
-       cmd->prot_type = dev->dev_attrib.pi_prot_type;
+       cmd->prot_type = pi_prot_type;
        cmd->prot_length = dev->prot_length * sectors;
 
        /**
@@ -1166,14 +1203,16 @@ sbc_dif_generate(struct se_cmd *cmd)
                        sdt = paddr + offset;
                        sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j,
                                                dev->dev_attrib.block_size));
-                       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT)
+                       if (cmd->prot_type == TARGET_DIF_TYPE1_PROT)
                                sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
                        sdt->app_tag = 0;
 
-                       pr_debug("DIF WRITE INSERT sector: %llu guard_tag: 0x%04x"
+                       pr_debug("DIF %s INSERT sector: %llu guard_tag: 0x%04x"
                                 " app_tag: 0x%04x ref_tag: %u\n",
-                                (unsigned long long)sector, sdt->guard_tag,
-                                sdt->app_tag, be32_to_cpu(sdt->ref_tag));
+                                (cmd->data_direction == DMA_TO_DEVICE) ?
+                                "WRITE" : "READ", (unsigned long long)sector,
+                                sdt->guard_tag, sdt->app_tag,
+                                be32_to_cpu(sdt->ref_tag));
 
                        sector++;
                        offset += sizeof(struct se_dif_v1_tuple);
@@ -1185,12 +1224,16 @@ sbc_dif_generate(struct se_cmd *cmd)
 }
 
 static sense_reason_t
-sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
+sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt,
                  const void *p, sector_t sector, unsigned int ei_lba)
 {
+       struct se_device *dev = cmd->se_dev;
        int block_size = dev->dev_attrib.block_size;
        __be16 csum;
 
+       if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+               goto check_ref;
+
        csum = cpu_to_be16(crc_t10dif(p, block_size));
 
        if (sdt->guard_tag != csum) {
@@ -1200,7 +1243,11 @@ sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
                return TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
        }
 
-       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT &&
+check_ref:
+       if (!(cmd->prot_checks & TARGET_DIF_CHECK_REFTAG))
+               return 0;
+
+       if (cmd->prot_type == TARGET_DIF_TYPE1_PROT &&
            be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
                pr_err("DIFv1 Type 1 reference failed on sector: %llu tag: 0x%08x"
                       " sector MSB: 0x%08x\n", (unsigned long long)sector,
@@ -1208,7 +1255,7 @@ sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
                return TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
        }
 
-       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE2_PROT &&
+       if (cmd->prot_type == TARGET_DIF_TYPE2_PROT &&
            be32_to_cpu(sdt->ref_tag) != ei_lba) {
                pr_err("DIFv1 Type 2 reference failed on sector: %llu tag: 0x%08x"
                       " ei_lba: 0x%08x\n", (unsigned long long)sector,
@@ -1229,6 +1276,9 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read,
        unsigned int i, len, left;
        unsigned int offset = sg_off;
 
+       if (!sg)
+               return;
+
        left = sectors * dev->prot_length;
 
        for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
@@ -1292,7 +1342,7 @@ sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                                 (unsigned long long)sector, sdt->guard_tag,
                                 sdt->app_tag, be32_to_cpu(sdt->ref_tag));
 
-                       rc = sbc_dif_v1_verify(dev, sdt, daddr + j, sector,
+                       rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
                                               ei_lba);
                        if (rc) {
                                kunmap_atomic(paddr);
@@ -1309,6 +1359,9 @@ sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                kunmap_atomic(paddr);
                kunmap_atomic(daddr);
        }
+       if (!sg)
+               return 0;
+
        sbc_dif_copy_prot(cmd, sectors, false, sg, sg_off);
 
        return 0;
@@ -1353,7 +1406,7 @@ __sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                                continue;
                        }
 
-                       rc = sbc_dif_v1_verify(dev, sdt, daddr + j, sector,
+                       rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
                                               ei_lba);
                        if (rc) {
                                kunmap_atomic(paddr);
index 6c8bd6b..7912aa1 100644 (file)
@@ -103,10 +103,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf)
                buf[5] |= 0x8;
        /*
         * Set Protection (PROTECT) bit when DIF has been enabled on the
-        * device, and the transport supports VERIFY + PASS.
+        * device, and the fabric supports VERIFY + PASS.  Also report
+        * PROTECT=1 if sess_prot_type has been configured to allow T10-PI
+        * to unprotected devices.
         */
        if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
+               if (dev->dev_attrib.pi_prot_type || cmd->se_sess->sess_prot_type)
                        buf[5] |= 0x1;
        }
 
@@ -467,9 +469,11 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf)
         * only for TYPE3 protection.
         */
        if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT)
+               if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT ||
+                   cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE1_PROT)
                        buf[4] = 0x5;
-               else if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE3_PROT)
+               else if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE3_PROT ||
+                       cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE3_PROT)
                        buf[4] = 0x4;
        }
 
@@ -861,7 +865,7 @@ static int spc_modesense_control(struct se_cmd *cmd, u8 pc, u8 *p)
         * TAG field.
         */
        if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
+               if (dev->dev_attrib.pi_prot_type || sess->sess_prot_type)
                        p[5] |= 0x80;
        }
 
@@ -1099,7 +1103,7 @@ static sense_reason_t spc_emulate_modeselect(struct se_cmd *cmd)
        unsigned char *buf;
        unsigned char tbuf[SE_MODE_PAGE_BUF];
        int length;
-       int ret = 0;
+       sense_reason_t ret = 0;
        int i;
 
        if (!cmd->data_length) {
index fa5e157..315ec34 100644 (file)
@@ -125,8 +125,8 @@ void core_tmr_abort_task(
                if (dev != se_cmd->se_dev)
                        continue;
 
-               /* skip se_cmd associated with tmr */
-               if (tmr->task_cmd == se_cmd)
+               /* skip task management functions, including tmr->task_cmd */
+               if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
                        continue;
 
                ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
index 0696de9..47f0644 100644 (file)
@@ -672,7 +672,7 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg)
 }
 
 int core_tpg_register(
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
        struct se_wwn *se_wwn,
        struct se_portal_group *se_tpg,
        void *tpg_fabric_ptr,
index ac3cbab..3fe5cb2 100644 (file)
@@ -322,6 +322,7 @@ void __transport_register_session(
        struct se_session *se_sess,
        void *fabric_sess_ptr)
 {
+       const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo;
        unsigned char buf[PR_REG_ISID_LEN];
 
        se_sess->se_tpg = se_tpg;
@@ -333,6 +334,21 @@ void __transport_register_session(
         * eg: *NOT* discovery sessions.
         */
        if (se_nacl) {
+               /*
+                *
+                * Determine if fabric allows for T10-PI feature bits exposed to
+                * initiators for device backends with !dev->dev_attrib.pi_prot_type.
+                *
+                * If so, then always save prot_type on a per se_node_acl node
+                * basis and re-instate the previous sess_prot_type to avoid
+                * disabling PI from below any previously initiator side
+                * registered LUNs.
+                */
+               if (se_nacl->saved_prot_type)
+                       se_sess->sess_prot_type = se_nacl->saved_prot_type;
+               else if (tfo->tpg_check_prot_fabric_only)
+                       se_sess->sess_prot_type = se_nacl->saved_prot_type =
+                                       tfo->tpg_check_prot_fabric_only(se_tpg);
                /*
                 * If the fabric module supports an ISID based TransportID,
                 * save this value in binary from the fabric I_T Nexus now.
@@ -404,6 +420,30 @@ void target_put_session(struct se_session *se_sess)
 }
 EXPORT_SYMBOL(target_put_session);
 
+ssize_t target_show_dynamic_sessions(struct se_portal_group *se_tpg, char *page)
+{
+       struct se_session *se_sess;
+       ssize_t len = 0;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       list_for_each_entry(se_sess, &se_tpg->tpg_sess_list, sess_list) {
+               if (!se_sess->se_node_acl)
+                       continue;
+               if (!se_sess->se_node_acl->dynamic_node_acl)
+                       continue;
+               if (strlen(se_sess->se_node_acl->initiatorname) + 1 + len > PAGE_SIZE)
+                       break;
+
+               len += snprintf(page + len, PAGE_SIZE - len, "%s\n",
+                               se_sess->se_node_acl->initiatorname);
+               len += 1; /* Include NULL terminator */
+       }
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       return len;
+}
+EXPORT_SYMBOL(target_show_dynamic_sessions);
+
 static void target_complete_nacl(struct kref *kref)
 {
        struct se_node_acl *nacl = container_of(kref,
@@ -462,7 +502,7 @@ EXPORT_SYMBOL(transport_free_session);
 void transport_deregister_session(struct se_session *se_sess)
 {
        struct se_portal_group *se_tpg = se_sess->se_tpg;
-       struct target_core_fabric_ops *se_tfo;
+       const struct target_core_fabric_ops *se_tfo;
        struct se_node_acl *se_nacl;
        unsigned long flags;
        bool comp_nacl = true;
@@ -1118,7 +1158,7 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
  */
 void transport_init_se_cmd(
        struct se_cmd *cmd,
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
        struct se_session *se_sess,
        u32 data_length,
        int data_direction,
@@ -1570,6 +1610,8 @@ EXPORT_SYMBOL(target_submit_tmr);
  * has completed.
  */
 bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags)
+       __releases(&cmd->t_state_lock)
+       __acquires(&cmd->t_state_lock)
 {
        bool was_active = false;
 
@@ -1615,11 +1657,11 @@ void transport_generic_request_failure(struct se_cmd *cmd,
        transport_complete_task_attr(cmd);
        /*
         * Handle special case for COMPARE_AND_WRITE failure, where the
-        * callback is expected to drop the per device ->caw_mutex.
+        * callback is expected to drop the per device ->caw_sem.
         */
        if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
             cmd->transport_complete_callback)
-               cmd->transport_complete_callback(cmd);
+               cmd->transport_complete_callback(cmd, false);
 
        switch (sense_reason) {
        case TCM_NON_EXISTENT_LUN:
@@ -1706,6 +1748,41 @@ void __target_execute_cmd(struct se_cmd *cmd)
        }
 }
 
+static int target_write_prot_action(struct se_cmd *cmd)
+{
+       u32 sectors;
+       /*
+        * Perform WRITE_INSERT of PI using software emulation when backend
+        * device has PI enabled, if the transport has not already generated
+        * PI using hardware WRITE_INSERT offload.
+        */
+       switch (cmd->prot_op) {
+       case TARGET_PROT_DOUT_INSERT:
+               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_INSERT))
+                       sbc_dif_generate(cmd);
+               break;
+       case TARGET_PROT_DOUT_STRIP:
+               if (cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_STRIP)
+                       break;
+
+               sectors = cmd->data_length >> ilog2(cmd->se_dev->dev_attrib.block_size);
+               cmd->pi_err = sbc_dif_verify_write(cmd, cmd->t_task_lba,
+                                                  sectors, 0, NULL, 0);
+               if (unlikely(cmd->pi_err)) {
+                       spin_lock_irq(&cmd->t_state_lock);
+                       cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+                       spin_unlock_irq(&cmd->t_state_lock);
+                       transport_generic_request_failure(cmd, cmd->pi_err);
+                       return -1;
+               }
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 static bool target_handle_task_attr(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
@@ -1785,15 +1862,9 @@ void target_execute_cmd(struct se_cmd *cmd)
        cmd->t_state = TRANSPORT_PROCESSING;
        cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
        spin_unlock_irq(&cmd->t_state_lock);
-       /*
-        * Perform WRITE_INSERT of PI using software emulation when backend
-        * device has PI enabled, if the transport has not already generated
-        * PI using hardware WRITE_INSERT offload.
-        */
-       if (cmd->prot_op == TARGET_PROT_DOUT_INSERT) {
-               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_INSERT))
-                       sbc_dif_generate(cmd);
-       }
+
+       if (target_write_prot_action(cmd))
+               return;
 
        if (target_handle_task_attr(cmd)) {
                spin_lock_irq(&cmd->t_state_lock);
@@ -1919,16 +1990,28 @@ static void transport_handle_queue_full(
        schedule_work(&cmd->se_dev->qf_work_queue);
 }
 
-static bool target_check_read_strip(struct se_cmd *cmd)
+static bool target_read_prot_action(struct se_cmd *cmd)
 {
        sense_reason_t rc;
 
-       if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) {
-               rc = sbc_dif_read_strip(cmd);
-               if (rc) {
-                       cmd->pi_err = rc;
-                       return true;
+       switch (cmd->prot_op) {
+       case TARGET_PROT_DIN_STRIP:
+               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) {
+                       rc = sbc_dif_read_strip(cmd);
+                       if (rc) {
+                               cmd->pi_err = rc;
+                               return true;
+                       }
                }
+               break;
+       case TARGET_PROT_DIN_INSERT:
+               if (cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_INSERT)
+                       break;
+
+               sbc_dif_generate(cmd);
+               break;
+       default:
+               break;
        }
 
        return false;
@@ -1975,8 +2058,12 @@ static void target_complete_ok_work(struct work_struct *work)
        if (cmd->transport_complete_callback) {
                sense_reason_t rc;
 
-               rc = cmd->transport_complete_callback(cmd);
+               rc = cmd->transport_complete_callback(cmd, true);
                if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) {
+                       if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
+                           !cmd->data_length)
+                               goto queue_rsp;
+
                        return;
                } else if (rc) {
                        ret = transport_send_check_condition_and_sense(cmd,
@@ -1990,6 +2077,7 @@ static void target_complete_ok_work(struct work_struct *work)
                }
        }
 
+queue_rsp:
        switch (cmd->data_direction) {
        case DMA_FROM_DEVICE:
                spin_lock(&cmd->se_lun->lun_sep_lock);
@@ -2003,8 +2091,7 @@ static void target_complete_ok_work(struct work_struct *work)
                 * backend had PI enabled, if the transport will not be
                 * performing hardware READ_STRIP offload.
                 */
-               if (cmd->prot_op == TARGET_PROT_DIN_STRIP &&
-                   target_check_read_strip(cmd)) {
+               if (target_read_prot_action(cmd)) {
                        ret = transport_send_check_condition_and_sense(cmd,
                                                cmd->pi_err, 0);
                        if (ret == -EAGAIN || ret == -ENOMEM)
@@ -2094,6 +2181,16 @@ static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
 static inline void transport_free_pages(struct se_cmd *cmd)
 {
        if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) {
+               /*
+                * Release special case READ buffer payload required for
+                * SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE
+                */
+               if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
+                       transport_free_sgl(cmd->t_bidi_data_sg,
+                                          cmd->t_bidi_data_nents);
+                       cmd->t_bidi_data_sg = NULL;
+                       cmd->t_bidi_data_nents = 0;
+               }
                transport_reset_sgl_orig(cmd);
                return;
        }
@@ -2246,6 +2343,7 @@ sense_reason_t
 transport_generic_new_cmd(struct se_cmd *cmd)
 {
        int ret = 0;
+       bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
 
        /*
         * Determine is the TCM fabric module has already allocated physical
@@ -2254,7 +2352,6 @@ transport_generic_new_cmd(struct se_cmd *cmd)
         */
        if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) &&
            cmd->data_length) {
-               bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
 
                if ((cmd->se_cmd_flags & SCF_BIDI) ||
                    (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) {
@@ -2285,6 +2382,20 @@ transport_generic_new_cmd(struct se_cmd *cmd)
                                       cmd->data_length, zero_flag);
                if (ret < 0)
                        return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       } else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
+                   cmd->data_length) {
+               /*
+                * Special case for COMPARE_AND_WRITE with fabrics
+                * using SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC.
+                */
+               u32 caw_length = cmd->t_task_nolb *
+                                cmd->se_dev->dev_attrib.block_size;
+
+               ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
+                                      &cmd->t_bidi_data_nents,
+                                      caw_length, zero_flag);
+               if (ret < 0)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
        /*
         * If this command is not a write we can execute it right here,
@@ -2376,10 +2487,8 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd,
         * fabric acknowledgement that requires two target_put_sess_cmd()
         * invocations before se_cmd descriptor release.
         */
-       if (ack_kref) {
+       if (ack_kref)
                kref_get(&se_cmd->cmd_kref);
-               se_cmd->se_cmd_flags |= SCF_ACK_KREF;
-       }
 
        spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
        if (se_sess->sess_tearing_down) {
@@ -2398,6 +2507,7 @@ out:
 EXPORT_SYMBOL(target_get_sess_cmd);
 
 static void target_release_cmd_kref(struct kref *kref)
+               __releases(&se_cmd->se_sess->sess_cmd_lock)
 {
        struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
        struct se_session *se_sess = se_cmd->se_sess;
index 1a1bcf7..dbc872a 100644 (file)
@@ -344,8 +344,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
                entry = (void *) mb + CMDR_OFF + cmd_head;
                tcmu_flush_dcache_range(entry, sizeof(*entry));
-               tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
-               tcmu_hdr_set_len(&entry->hdr, pad_size);
+               tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
+               tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
+               entry->hdr.cmd_id = 0; /* not used for PAD */
+               entry->hdr.kflags = 0;
+               entry->hdr.uflags = 0;
 
                UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
 
@@ -355,9 +358,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
        entry = (void *) mb + CMDR_OFF + cmd_head;
        tcmu_flush_dcache_range(entry, sizeof(*entry));
-       tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
-       tcmu_hdr_set_len(&entry->hdr, command_size);
-       entry->cmd_id = tcmu_cmd->cmd_id;
+       tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
+       tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
+       entry->hdr.cmd_id = tcmu_cmd->cmd_id;
+       entry->hdr.kflags = 0;
+       entry->hdr.uflags = 0;
 
        /*
         * Fix up iovecs, and handle if allocation in data ring wrapped.
@@ -376,7 +381,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
                /* Even iov_base is relative to mb_addr */
                iov->iov_len = copy_bytes;
-               iov->iov_base = (void *) udev->data_off + udev->data_head;
+               iov->iov_base = (void __user *) udev->data_off +
+                                               udev->data_head;
                iov_cnt++;
                iov++;
 
@@ -388,7 +394,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                        copy_bytes = sg->length - copy_bytes;
 
                        iov->iov_len = copy_bytes;
-                       iov->iov_base = (void *) udev->data_off + udev->data_head;
+                       iov->iov_base = (void __user *) udev->data_off +
+                                                       udev->data_head;
 
                        if (se_cmd->data_direction == DMA_TO_DEVICE) {
                                to = (void *) mb + udev->data_off + udev->data_head;
@@ -405,6 +412,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                kunmap_atomic(from);
        }
        entry->req.iov_cnt = iov_cnt;
+       entry->req.iov_bidi_cnt = 0;
+       entry->req.iov_dif_cnt = 0;
 
        /* All offsets relative to mb_addr, not start of entry! */
        cdb_off = CMDR_OFF + cmd_head + base_command_size;
@@ -462,6 +471,17 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
                return;
        }
 
+       if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
+               UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+               pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
+                       cmd->se_cmd);
+               transport_generic_request_failure(cmd->se_cmd,
+                       TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+               cmd->se_cmd = NULL;
+               kmem_cache_free(tcmu_cmd_cache, cmd);
+               return;
+       }
+
        if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
                memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
                               se_cmd->scsi_sense_length);
@@ -540,14 +560,16 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 
                tcmu_flush_dcache_range(entry, sizeof(*entry));
 
-               if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
-                       UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+               if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) {
+                       UPDATE_HEAD(udev->cmdr_last_cleaned,
+                                   tcmu_hdr_get_len(entry->hdr.len_op),
+                                   udev->cmdr_size);
                        continue;
                }
-               WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
+               WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
                spin_lock(&udev->commands_lock);
-               cmd = idr_find(&udev->commands, entry->cmd_id);
+               cmd = idr_find(&udev->commands, entry->hdr.cmd_id);
                if (cmd)
                        idr_remove(&udev->commands, cmd->cmd_id);
                spin_unlock(&udev->commands_lock);
@@ -560,7 +582,9 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 
                tcmu_handle_completion(cmd, entry);
 
-               UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+               UPDATE_HEAD(udev->cmdr_last_cleaned,
+                           tcmu_hdr_get_len(entry->hdr.len_op),
+                           udev->cmdr_size);
 
                handled++;
        }
@@ -838,14 +862,14 @@ static int tcmu_configure_device(struct se_device *dev)
        udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
 
        mb = udev->mb_addr;
-       mb->version = 1;
+       mb->version = TCMU_MAILBOX_VERSION;
        mb->cmdr_off = CMDR_OFF;
        mb->cmdr_size = udev->cmdr_size;
 
        WARN_ON(!PAGE_ALIGNED(udev->data_off));
        WARN_ON(udev->data_size % PAGE_SIZE);
 
-       info->version = "1";
+       info->version = xstr(TCMU_MAILBOX_VERSION);
 
        info->mem[0].name = "tcm-user command & data buffer";
        info->mem[0].addr = (phys_addr_t) udev->mb_addr;
index 33ac39b..a600ff1 100644 (file)
 #include <target/target_core_fabric.h>
 #include <target/target_core_configfs.h>
 
+#include "target_core_internal.h"
 #include "target_core_pr.h"
 #include "target_core_ua.h"
 #include "target_core_xcopy.h"
 
 static struct workqueue_struct *xcopy_wq = NULL;
-/*
- * From target_core_device.c
- */
-extern struct mutex g_device_mutex;
-extern struct list_head g_device_list;
-/*
- * From target_core_configfs.c
- */
-extern struct configfs_subsystem *target_core_subsystem[];
 
 static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
 {
@@ -433,7 +425,7 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd)
        return 0;
 }
 
-static struct target_core_fabric_ops xcopy_pt_tfo = {
+static const struct target_core_fabric_ops xcopy_pt_tfo = {
        .get_fabric_name        = xcopy_pt_get_fabric_name,
        .get_task_tag           = xcopy_pt_get_tag,
        .get_cmd_state          = xcopy_pt_get_cmd_state,
@@ -548,33 +540,22 @@ static void target_xcopy_setup_pt_port(
        }
 }
 
-static int target_xcopy_init_pt_lun(
-       struct xcopy_pt_cmd *xpt_cmd,
-       struct xcopy_op *xop,
-       struct se_device *se_dev,
-       struct se_cmd *pt_cmd,
-       bool remote_port)
+static void target_xcopy_init_pt_lun(struct se_device *se_dev,
+               struct se_cmd *pt_cmd, bool remote_port)
 {
        /*
         * Don't allocate + init an pt_cmd->se_lun if honoring local port for
         * reservations.  The pt_cmd->se_lun pointer will be setup from within
         * target_xcopy_setup_pt_port()
         */
-       if (!remote_port) {
-               pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
-               return 0;
+       if (remote_port) {
+               pr_debug("Setup emulated se_dev: %p from se_dev\n",
+                       pt_cmd->se_dev);
+               pt_cmd->se_lun = &se_dev->xcopy_lun;
+               pt_cmd->se_dev = se_dev;
        }
 
-       pt_cmd->se_lun = &se_dev->xcopy_lun;
-       pt_cmd->se_dev = se_dev;
-
-       pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev);
-       pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
-
-       pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n",
-               pt_cmd->se_lun->lun_se_dev);
-
-       return 0;
+       pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
 }
 
 static int target_xcopy_setup_pt_cmd(
@@ -592,11 +573,8 @@ static int target_xcopy_setup_pt_cmd(
         * Setup LUN+port to honor reservations based upon xop->op_origin for
         * X-COPY PUSH or X-COPY PULL based upon where the CDB was received.
         */
-       rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port);
-       if (rc < 0) {
-               ret = rc;
-               goto out;
-       }
+       target_xcopy_init_pt_lun(se_dev, cmd, remote_port);
+
        xpt_cmd->xcopy_op = xop;
        target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port);
 
index a0bcfd3..881deb3 100644 (file)
@@ -129,7 +129,6 @@ struct ft_cmd {
 
 extern struct mutex ft_lport_lock;
 extern struct fc4_prov ft_prov;
-extern struct target_fabric_configfs *ft_configfs;
 extern unsigned int ft_debug_logging;
 
 /*
index efdcb96..65dce13 100644 (file)
@@ -48,7 +48,7 @@
 
 #include "tcm_fc.h"
 
-struct target_fabric_configfs *ft_configfs;
+static const struct target_core_fabric_ops ft_fabric_ops;
 
 static LIST_HEAD(ft_wwn_list);
 DEFINE_MUTEX(ft_lport_lock);
@@ -337,7 +337,7 @@ static struct se_portal_group *ft_add_tpg(
                return NULL;
        }
 
-       ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg,
+       ret = core_tpg_register(&ft_fabric_ops, wwn, &tpg->se_tpg,
                                tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                destroy_workqueue(wq);
@@ -507,7 +507,9 @@ static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg)
        return tpg->index;
 }
 
-static struct target_core_fabric_ops ft_fabric_ops = {
+static const struct target_core_fabric_ops ft_fabric_ops = {
+       .module =                       THIS_MODULE,
+       .name =                         "fc",
        .get_fabric_name =              ft_get_fabric_name,
        .get_fabric_proto_ident =       fc_get_fabric_proto_ident,
        .tpg_get_wwn =                  ft_get_fabric_wwn,
@@ -552,62 +554,10 @@ static struct target_core_fabric_ops ft_fabric_ops = {
        .fabric_drop_np =               NULL,
        .fabric_make_nodeacl =          &ft_add_acl,
        .fabric_drop_nodeacl =          &ft_del_acl,
-};
-
-static int ft_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "fc");
-       if (IS_ERR(fabric)) {
-               pr_err("%s: target_fabric_configfs_init() failed!\n",
-                      __func__);
-               return PTR_ERR(fabric);
-       }
-       fabric->tf_ops = ft_fabric_ops;
-
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = ft_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs =
-                                                   ft_nacl_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_debug("target_fabric_configfs_register() for"
-                           " FC Target failed!\n");
-               target_fabric_configfs_free(fabric);
-               return -1;
-       }
-
-       /*
-        * Setup our local pointer to *fabric.
-        */
-       ft_configfs = fabric;
-       return 0;
-}
 
-static void ft_deregister_configfs(void)
-{
-       if (!ft_configfs)
-               return;
-       target_fabric_configfs_deregister(ft_configfs);
-       ft_configfs = NULL;
-}
+       .tfc_wwn_attrs                  = ft_wwn_attrs,
+       .tfc_tpg_nacl_base_attrs        = ft_nacl_base_attrs,
+};
 
 static struct notifier_block ft_notifier = {
        .notifier_call = ft_lport_notify
@@ -615,15 +565,24 @@ static struct notifier_block ft_notifier = {
 
 static int __init ft_init(void)
 {
-       if (ft_register_configfs())
-               return -1;
-       if (fc_fc4_register_provider(FC_TYPE_FCP, &ft_prov)) {
-               ft_deregister_configfs();
-               return -1;
-       }
+       int ret;
+
+       ret = target_register_template(&ft_fabric_ops);
+       if (ret)
+               goto out;
+
+       ret = fc_fc4_register_provider(FC_TYPE_FCP, &ft_prov);
+       if (ret)
+               goto out_unregister_template;
+
        blocking_notifier_chain_register(&fc_lport_notifier_head, &ft_notifier);
        fc_lport_iterate(ft_lport_add, NULL);
        return 0;
+
+out_unregister_template:
+       target_unregister_template(&ft_fabric_ops);
+out:
+       return ret;
 }
 
 static void __exit ft_exit(void)
@@ -632,7 +591,7 @@ static void __exit ft_exit(void)
                                           &ft_notifier);
        fc_fc4_deregister_provider(FC_TYPE_FCP, &ft_prov);
        fc_lport_iterate(ft_lport_del, NULL);
-       ft_deregister_configfs();
+       target_unregister_template(&ft_fabric_ops);
        synchronize_rcu();
 }
 
index 422ebea..4506e40 100644 (file)
@@ -450,6 +450,18 @@ static unsigned int mem32_serial_in(struct uart_port *p, int offset)
        return readl(p->membase + offset);
 }
 
+static void mem32be_serial_out(struct uart_port *p, int offset, int value)
+{
+       offset = offset << p->regshift;
+       iowrite32be(value, p->membase + offset);
+}
+
+static unsigned int mem32be_serial_in(struct uart_port *p, int offset)
+{
+       offset = offset << p->regshift;
+       return ioread32be(p->membase + offset);
+}
+
 static unsigned int io_serial_in(struct uart_port *p, int offset)
 {
        offset = offset << p->regshift;
@@ -488,6 +500,11 @@ static void set_io_from_upio(struct uart_port *p)
                p->serial_out = mem32_serial_out;
                break;
 
+       case UPIO_MEM32BE:
+               p->serial_in = mem32be_serial_in;
+               p->serial_out = mem32be_serial_out;
+               break;
+
 #if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
        case UPIO_AU:
                p->serial_in = au_serial_in;
@@ -513,6 +530,7 @@ serial_port_out_sync(struct uart_port *p, int offset, int value)
        switch (p->iotype) {
        case UPIO_MEM:
        case UPIO_MEM32:
+       case UPIO_MEM32BE:
        case UPIO_AU:
                p->serial_out(p, offset, value);
                p->serial_in(p, UART_LCR);      /* safe, no side-effects */
@@ -2748,6 +2766,7 @@ static int serial8250_request_std_resource(struct uart_8250_port *up)
        case UPIO_AU:
        case UPIO_TSI:
        case UPIO_MEM32:
+       case UPIO_MEM32BE:
        case UPIO_MEM:
                if (!port->mapbase)
                        break;
@@ -2784,6 +2803,7 @@ static void serial8250_release_std_resource(struct uart_8250_port *up)
        case UPIO_AU:
        case UPIO_TSI:
        case UPIO_MEM32:
+       case UPIO_MEM32BE:
        case UPIO_MEM:
                if (!port->mapbase)
                        break;
index 8e11968..6c0fd8b 100644 (file)
@@ -42,6 +42,8 @@ unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offse
                return readb(port->membase + offset);
        case UPIO_MEM32:
                return readl(port->membase + (offset << 2));
+       case UPIO_MEM32BE:
+               return ioread32be(port->membase + (offset << 2));
        case UPIO_PORT:
                return inb(port->iobase + offset);
        default:
@@ -58,6 +60,9 @@ void __weak __init serial8250_early_out(struct uart_port *port, int offset, int
        case UPIO_MEM32:
                writel(value, port->membase + (offset << 2));
                break;
+       case UPIO_MEM32BE:
+               iowrite32be(value, port->membase + (offset << 2));
+               break;
        case UPIO_PORT:
                outb(value, port->iobase + offset);
                break;
index aa00154..5b73afb 100644 (file)
@@ -116,7 +116,8 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
                        port->iotype = UPIO_MEM;
                        break;
                case 4:
-                       port->iotype = UPIO_MEM32;
+                       port->iotype = of_device_is_big_endian(np) ?
+                                      UPIO_MEM32BE : UPIO_MEM32;
                        break;
                default:
                        dev_warn(&ofdev->dev, "unsupported reg-io-width (%d)\n",
index 6af58c6..2030565 100644 (file)
@@ -1505,7 +1505,7 @@ static void destroy_ep_files (struct dev_data *dev)
                list_del_init (&ep->epfiles);
                dentry = ep->dentry;
                ep->dentry = NULL;
-               parent = dentry->d_parent->d_inode;
+               parent = d_inode(dentry->d_parent);
 
                /* break link to controller */
                if (ep->state == STATE_EP_ENABLED)
index 6e0a019..8b80add 100644 (file)
@@ -29,7 +29,7 @@
 
 USB_GADGET_COMPOSITE_OPTIONS();
 
-static struct target_fabric_configfs *usbg_fabric_configfs;
+static const struct target_core_fabric_ops usbg_ops;
 
 static inline struct f_uas *to_f_uas(struct usb_function *f)
 {
@@ -1572,8 +1572,7 @@ static struct se_portal_group *usbg_make_tpg(
        tpg->tport = tport;
        tpg->tport_tpgt = tpgt;
 
-       ret = core_tpg_register(&usbg_fabric_configfs->tf_ops, wwn,
-                               &tpg->se_tpg, tpg,
+       ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, tpg,
                                TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                destroy_workqueue(tpg->workqueue);
@@ -1864,7 +1863,9 @@ static int usbg_check_stop_free(struct se_cmd *se_cmd)
        return 1;
 }
 
-static struct target_core_fabric_ops usbg_ops = {
+static const struct target_core_fabric_ops usbg_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "usb_gadget",
        .get_fabric_name                = usbg_get_fabric_name,
        .get_fabric_proto_ident         = usbg_get_fabric_proto_ident,
        .tpg_get_wwn                    = usbg_get_fabric_wwn,
@@ -1906,46 +1907,9 @@ static struct target_core_fabric_ops usbg_ops = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = usbg_make_nodeacl,
        .fabric_drop_nodeacl            = usbg_drop_nodeacl,
-};
-
-static int usbg_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       fabric = target_fabric_configfs_init(THIS_MODULE, "usb_gadget");
-       if (IS_ERR(fabric)) {
-               printk(KERN_ERR "target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-
-       fabric->tf_ops = usbg_ops;
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = usbg_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = usbg_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               printk(KERN_ERR "target_fabric_configfs_register() failed"
-                               " for usb-gadget\n");
-               return ret;
-       }
-       usbg_fabric_configfs = fabric;
-       return 0;
-};
 
-static void usbg_deregister_configfs(void)
-{
-       if (!(usbg_fabric_configfs))
-               return;
-
-       target_fabric_configfs_deregister(usbg_fabric_configfs);
-       usbg_fabric_configfs = NULL;
+       .tfc_wwn_attrs                  = usbg_wwn_attrs,
+       .tfc_tpg_base_attrs             = usbg_base_attrs,
 };
 
 /* Start gadget.c code */
@@ -2454,16 +2418,13 @@ static void usbg_detach(struct usbg_tpg *tpg)
 
 static int __init usb_target_gadget_init(void)
 {
-       int ret;
-
-       ret = usbg_register_configfs();
-       return ret;
+       return target_register_template(&usbg_ops);
 }
 module_init(usb_target_gadget_init);
 
 static void __exit usb_target_gadget_exit(void)
 {
-       usbg_deregister_configfs();
+       target_unregister_template(&usbg_ops);
 }
 module_exit(usb_target_gadget_exit);
 
index 71df240..5e19bb5 100644 (file)
@@ -131,6 +131,8 @@ struct vhost_scsi_tpg {
        int tv_tpg_port_count;
        /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
        int tv_tpg_vhost_count;
+       /* Used for enabling T10-PI with legacy devices */
+       int tv_fabric_prot_type;
        /* list for vhost_scsi_list */
        struct list_head tv_tpg_list;
        /* Used to protect access for tpg_nexus */
@@ -214,9 +216,7 @@ struct vhost_scsi {
        int vs_events_nr; /* num of pending events, protected by vq->mutex */
 };
 
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *vhost_scsi_fabric_configfs;
-
+static struct target_core_fabric_ops vhost_scsi_ops;
 static struct workqueue_struct *vhost_scsi_workqueue;
 
 /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -431,6 +431,14 @@ vhost_scsi_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
                        port_nexus_ptr);
 }
 
+static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+
+       return tpg->tv_fabric_prot_type;
+}
+
 static struct se_node_acl *
 vhost_scsi_alloc_fabric_acl(struct se_portal_group *se_tpg)
 {
@@ -1878,6 +1886,45 @@ static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus,
        }
 }
 
+static ssize_t vhost_scsi_tpg_attrib_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid vhost_scsi fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tpg->tv_fabric_prot_type = val;
+
+       return count;
+}
+
+static ssize_t vhost_scsi_tpg_attrib_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+
+       return sprintf(page, "%d\n", tpg->tv_fabric_prot_type);
+}
+TF_TPG_ATTRIB_ATTR(vhost_scsi, fabric_prot_type, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = {
+       &vhost_scsi_tpg_attrib_fabric_prot_type.attr,
+       NULL,
+};
+
 static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
                                const char *name)
 {
@@ -2155,7 +2202,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn,
        tpg->tport = tport;
        tpg->tport_tpgt = tpgt;
 
-       ret = core_tpg_register(&vhost_scsi_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&vhost_scsi_ops, wwn,
                                &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                kfree(tpg);
@@ -2277,6 +2324,8 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = {
 };
 
 static struct target_core_fabric_ops vhost_scsi_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "vhost",
        .get_fabric_name                = vhost_scsi_get_fabric_name,
        .get_fabric_proto_ident         = vhost_scsi_get_fabric_proto_ident,
        .tpg_get_wwn                    = vhost_scsi_get_fabric_wwn,
@@ -2289,6 +2338,7 @@ static struct target_core_fabric_ops vhost_scsi_ops = {
        .tpg_check_demo_mode_cache      = vhost_scsi_check_true,
        .tpg_check_demo_mode_write_protect = vhost_scsi_check_false,
        .tpg_check_prod_mode_write_protect = vhost_scsi_check_false,
+       .tpg_check_prot_fabric_only     = vhost_scsi_check_prot_fabric_only,
        .tpg_alloc_fabric_acl           = vhost_scsi_alloc_fabric_acl,
        .tpg_release_fabric_acl         = vhost_scsi_release_fabric_acl,
        .tpg_get_inst_index             = vhost_scsi_tpg_get_inst_index,
@@ -2320,70 +2370,20 @@ static struct target_core_fabric_ops vhost_scsi_ops = {
        .fabric_drop_np                 = NULL,
        .fabric_make_nodeacl            = vhost_scsi_make_nodeacl,
        .fabric_drop_nodeacl            = vhost_scsi_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = vhost_scsi_wwn_attrs,
+       .tfc_tpg_base_attrs             = vhost_scsi_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = vhost_scsi_tpg_attrib_attrs,
 };
 
-static int vhost_scsi_register_configfs(void)
+static int __init vhost_scsi_init(void)
 {
-       struct target_fabric_configfs *fabric;
-       int ret;
+       int ret = -ENOMEM;
 
-       pr_debug("vhost-scsi fabric module %s on %s/%s"
+       pr_debug("TCM_VHOST fabric module %s on %s/%s"
                " on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
                utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "vhost");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup fabric->tf_ops from our local vhost_scsi_ops
-        */
-       fabric->tf_ops = vhost_scsi_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = vhost_scsi_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = vhost_scsi_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed"
-                               " for TCM_VHOST\n");
-               return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       vhost_scsi_fabric_configfs = fabric;
-       pr_debug("TCM_VHOST[0] - Set fabric -> vhost_scsi_fabric_configfs\n");
-       return 0;
-};
-
-static void vhost_scsi_deregister_configfs(void)
-{
-       if (!vhost_scsi_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(vhost_scsi_fabric_configfs);
-       vhost_scsi_fabric_configfs = NULL;
-       pr_debug("TCM_VHOST[0] - Cleared vhost_scsi_fabric_configfs\n");
-};
 
-static int __init vhost_scsi_init(void)
-{
-       int ret = -ENOMEM;
        /*
         * Use our own dedicated workqueue for submitting I/O into
         * target core to avoid contention within system_wq.
@@ -2396,7 +2396,7 @@ static int __init vhost_scsi_init(void)
        if (ret < 0)
                goto out_destroy_workqueue;
 
-       ret = vhost_scsi_register_configfs();
+       ret = target_register_template(&vhost_scsi_ops);
        if (ret < 0)
                goto out_vhost_scsi_deregister;
 
@@ -2412,7 +2412,7 @@ out:
 
 static void vhost_scsi_exit(void)
 {
-       vhost_scsi_deregister_configfs();
+       target_unregister_template(&vhost_scsi_ops);
        vhost_scsi_deregister();
        destroy_workqueue(vhost_scsi_workqueue);
 };
index b546da5..cab9f3f 100644 (file)
@@ -48,6 +48,16 @@ config VIRTIO_BALLOON
 
         If unsure, say M.
 
+config VIRTIO_INPUT
+       tristate "Virtio input driver"
+       depends on VIRTIO
+       depends on INPUT
+       ---help---
+        This driver supports virtio input devices such as
+        keyboards, mice and tablets.
+
+        If unsure, say M.
+
  config VIRTIO_MMIO
        tristate "Platform bus driver for memory mapped virtio devices"
        depends on HAS_IOMEM
index d85565b..41e30e3 100644 (file)
@@ -4,3 +4,4 @@ obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
 virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
index 5ce2aa4..b1877d7 100644 (file)
@@ -278,12 +278,6 @@ static struct bus_type virtio_bus = {
        .remove = virtio_dev_remove,
 };
 
-bool virtio_device_is_legacy_only(struct virtio_device_id id)
-{
-       return id.device == VIRTIO_ID_BALLOON;
-}
-EXPORT_SYMBOL_GPL(virtio_device_is_legacy_only);
-
 int register_virtio_driver(struct virtio_driver *driver)
 {
        /* Catch this early. */
index 6a356e3..82e80e0 100644 (file)
@@ -214,8 +214,8 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
                               u16 tag, u64 val)
 {
        BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
-       vb->stats[idx].tag = tag;
-       vb->stats[idx].val = val;
+       vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
+       vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
 }
 
 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
@@ -283,18 +283,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
 
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
-       __le32 v;
        s64 target;
+       u32 num_pages;
 
-       virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v);
+       virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
+                    &num_pages);
 
-       target = le32_to_cpu(v);
+       /* Legacy balloon config space is LE, unlike all other devices. */
+       if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+               num_pages = le32_to_cpu((__force __le32)num_pages);
+
+       target = num_pages;
        return target - vb->num_pages;
 }
 
 static void update_balloon_size(struct virtio_balloon *vb)
 {
-       __le32 actual = cpu_to_le32(vb->num_pages);
+       u32 actual = vb->num_pages;
+
+       /* Legacy balloon config space is LE, unlike all other devices. */
+       if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+               actual = (__force u32)cpu_to_le32(actual);
 
        virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
                      &actual);
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
new file mode 100644 (file)
index 0000000..60e2a16
--- /dev/null
@@ -0,0 +1,384 @@
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/input.h>
+
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_input.h>
+
+struct virtio_input {
+       struct virtio_device       *vdev;
+       struct input_dev           *idev;
+       char                       name[64];
+       char                       serial[64];
+       char                       phys[64];
+       struct virtqueue           *evt, *sts;
+       struct virtio_input_event  evts[64];
+       spinlock_t                 lock;
+       bool                       ready;
+};
+
+static void virtinput_queue_evtbuf(struct virtio_input *vi,
+                                  struct virtio_input_event *evtbuf)
+{
+       struct scatterlist sg[1];
+
+       sg_init_one(sg, evtbuf, sizeof(*evtbuf));
+       virtqueue_add_inbuf(vi->evt, sg, 1, evtbuf, GFP_ATOMIC);
+}
+
+static void virtinput_recv_events(struct virtqueue *vq)
+{
+       struct virtio_input *vi = vq->vdev->priv;
+       struct virtio_input_event *event;
+       unsigned long flags;
+       unsigned int len;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       if (vi->ready) {
+               while ((event = virtqueue_get_buf(vi->evt, &len)) != NULL) {
+                       spin_unlock_irqrestore(&vi->lock, flags);
+                       input_event(vi->idev,
+                                   le16_to_cpu(event->type),
+                                   le16_to_cpu(event->code),
+                                   le32_to_cpu(event->value));
+                       spin_lock_irqsave(&vi->lock, flags);
+                       virtinput_queue_evtbuf(vi, event);
+               }
+               virtqueue_kick(vq);
+       }
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+/*
+ * On error we are losing the status update, which isn't critical as
+ * this is typically used for stuff like keyboard leds.
+ */
+static int virtinput_send_status(struct virtio_input *vi,
+                                u16 type, u16 code, s32 value)
+{
+       struct virtio_input_event *stsbuf;
+       struct scatterlist sg[1];
+       unsigned long flags;
+       int rc;
+
+       stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC);
+       if (!stsbuf)
+               return -ENOMEM;
+
+       stsbuf->type  = cpu_to_le16(type);
+       stsbuf->code  = cpu_to_le16(code);
+       stsbuf->value = cpu_to_le32(value);
+       sg_init_one(sg, stsbuf, sizeof(*stsbuf));
+
+       spin_lock_irqsave(&vi->lock, flags);
+       if (vi->ready) {
+               rc = virtqueue_add_outbuf(vi->sts, sg, 1, stsbuf, GFP_ATOMIC);
+               virtqueue_kick(vi->sts);
+       } else {
+               rc = -ENODEV;
+       }
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       if (rc != 0)
+               kfree(stsbuf);
+       return rc;
+}
+
+static void virtinput_recv_status(struct virtqueue *vq)
+{
+       struct virtio_input *vi = vq->vdev->priv;
+       struct virtio_input_event *stsbuf;
+       unsigned long flags;
+       unsigned int len;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       while ((stsbuf = virtqueue_get_buf(vi->sts, &len)) != NULL)
+               kfree(stsbuf);
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_status(struct input_dev *idev, unsigned int type,
+                           unsigned int code, int value)
+{
+       struct virtio_input *vi = input_get_drvdata(idev);
+
+       return virtinput_send_status(vi, type, code, value);
+}
+
+static u8 virtinput_cfg_select(struct virtio_input *vi,
+                              u8 select, u8 subsel)
+{
+       u8 size;
+
+       virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select);
+       virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel);
+       virtio_cread(vi->vdev, struct virtio_input_config, size, &size);
+       return size;
+}
+
+static void virtinput_cfg_bits(struct virtio_input *vi, int select, int subsel,
+                              unsigned long *bits, unsigned int bitcount)
+{
+       unsigned int bit;
+       u8 *virtio_bits;
+       u8 bytes;
+
+       bytes = virtinput_cfg_select(vi, select, subsel);
+       if (!bytes)
+               return;
+       if (bitcount > bytes * 8)
+               bitcount = bytes * 8;
+
+       /*
+        * Bitmap in virtio config space is a simple stream of bytes,
+        * with the first byte carrying bits 0-7, second bits 8-15 and
+        * so on.
+        */
+       virtio_bits = kzalloc(bytes, GFP_KERNEL);
+       if (!virtio_bits)
+               return;
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.bitmap),
+                          virtio_bits, bytes);
+       for (bit = 0; bit < bitcount; bit++) {
+               if (virtio_bits[bit / 8] & (1 << (bit % 8)))
+                       __set_bit(bit, bits);
+       }
+       kfree(virtio_bits);
+
+       if (select == VIRTIO_INPUT_CFG_EV_BITS)
+               __set_bit(subsel, vi->idev->evbit);
+}
+
+static void virtinput_cfg_abs(struct virtio_input *vi, int abs)
+{
+       u32 mi, ma, re, fu, fl;
+
+       virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl);
+       input_set_abs_params(vi->idev, abs, mi, ma, fu, fl);
+       input_abs_set_res(vi->idev, abs, re);
+}
+
+static int virtinput_init_vqs(struct virtio_input *vi)
+{
+       struct virtqueue *vqs[2];
+       vq_callback_t *cbs[] = { virtinput_recv_events,
+                                virtinput_recv_status };
+       static const char *names[] = { "events", "status" };
+       int err;
+
+       err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
+       if (err)
+               return err;
+       vi->evt = vqs[0];
+       vi->sts = vqs[1];
+
+       return 0;
+}
+
+static void virtinput_fill_evt(struct virtio_input *vi)
+{
+       unsigned long flags;
+       int i, size;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       size = virtqueue_get_vring_size(vi->evt);
+       if (size > ARRAY_SIZE(vi->evts))
+               size = ARRAY_SIZE(vi->evts);
+       for (i = 0; i < size; i++)
+               virtinput_queue_evtbuf(vi, &vi->evts[i]);
+       virtqueue_kick(vi->evt);
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_probe(struct virtio_device *vdev)
+{
+       struct virtio_input *vi;
+       unsigned long flags;
+       size_t size;
+       int abs, err;
+
+       if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+               return -ENODEV;
+
+       vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+       if (!vi)
+               return -ENOMEM;
+
+       vdev->priv = vi;
+       vi->vdev = vdev;
+       spin_lock_init(&vi->lock);
+
+       err = virtinput_init_vqs(vi);
+       if (err)
+               goto err_init_vq;
+
+       vi->idev = input_allocate_device();
+       if (!vi->idev) {
+               err = -ENOMEM;
+               goto err_input_alloc;
+       }
+       input_set_drvdata(vi->idev, vi);
+
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_NAME, 0);
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.string),
+                          vi->name, min(size, sizeof(vi->name)));
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_SERIAL, 0);
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.string),
+                          vi->serial, min(size, sizeof(vi->serial)));
+       snprintf(vi->phys, sizeof(vi->phys),
+                "virtio%d/input0", vdev->index);
+       vi->idev->name = vi->name;
+       vi->idev->phys = vi->phys;
+       vi->idev->uniq = vi->serial;
+
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0);
+       if (size >= sizeof(struct virtio_input_devids)) {
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.bustype, &vi->idev->id.bustype);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.vendor, &vi->idev->id.vendor);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.product, &vi->idev->id.product);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.version, &vi->idev->id.version);
+       } else {
+               vi->idev->id.bustype = BUS_VIRTUAL;
+       }
+
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_PROP_BITS, 0,
+                          vi->idev->propbit, INPUT_PROP_CNT);
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REP);
+       if (size)
+               __set_bit(EV_REP, vi->idev->evbit);
+
+       vi->idev->dev.parent = &vdev->dev;
+       vi->idev->event = virtinput_status;
+
+       /* device -> kernel */
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_KEY,
+                          vi->idev->keybit, KEY_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REL,
+                          vi->idev->relbit, REL_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_ABS,
+                          vi->idev->absbit, ABS_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_MSC,
+                          vi->idev->mscbit, MSC_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SW,
+                          vi->idev->swbit,  SW_CNT);
+
+       /* kernel -> device */
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_LED,
+                          vi->idev->ledbit, LED_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND,
+                          vi->idev->sndbit, SND_CNT);
+
+       if (test_bit(EV_ABS, vi->idev->evbit)) {
+               for (abs = 0; abs < ABS_CNT; abs++) {
+                       if (!test_bit(abs, vi->idev->absbit))
+                               continue;
+                       virtinput_cfg_abs(vi, abs);
+               }
+       }
+
+       virtio_device_ready(vdev);
+       vi->ready = true;
+       err = input_register_device(vi->idev);
+       if (err)
+               goto err_input_register;
+
+       virtinput_fill_evt(vi);
+       return 0;
+
+err_input_register:
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+       input_free_device(vi->idev);
+err_input_alloc:
+       vdev->config->del_vqs(vdev);
+err_init_vq:
+       kfree(vi);
+       return err;
+}
+
+static void virtinput_remove(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       input_unregister_device(vi->idev);
+       vdev->config->del_vqs(vdev);
+       kfree(vi);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtinput_freeze(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       vdev->config->del_vqs(vdev);
+       return 0;
+}
+
+static int virtinput_restore(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       int err;
+
+       err = virtinput_init_vqs(vi);
+       if (err)
+               return err;
+
+       virtio_device_ready(vdev);
+       vi->ready = true;
+       virtinput_fill_evt(vi);
+       return 0;
+}
+#endif
+
+static unsigned int features[] = {
+       /* none */
+};
+static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_INPUT, VIRTIO_DEV_ANY_ID },
+       { 0 },
+};
+
+static struct virtio_driver virtio_input_driver = {
+       .driver.name         = KBUILD_MODNAME,
+       .driver.owner        = THIS_MODULE,
+       .feature_table       = features,
+       .feature_table_size  = ARRAY_SIZE(features),
+       .id_table            = id_table,
+       .probe               = virtinput_probe,
+       .remove              = virtinput_remove,
+#ifdef CONFIG_PM_SLEEP
+       .freeze              = virtinput_freeze,
+       .restore             = virtinput_restore,
+#endif
+};
+
+module_virtio_driver(virtio_input_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Virtio input device driver");
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
index 6010d7e..7a5e60d 100644 (file)
@@ -581,14 +581,6 @@ static int virtio_mmio_probe(struct platform_device *pdev)
        }
        vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
 
-       /* Reject legacy-only IDs for version 2 devices */
-       if (vm_dev->version == 2 &&
-                       virtio_device_is_legacy_only(vm_dev->vdev.id)) {
-               dev_err(&pdev->dev, "Version 2 not supported for devices %u!\n",
-                               vm_dev->vdev.id.device);
-               return -ENODEV;
-       }
-
        if (vm_dev->version == 1)
                writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
 
index 2aa38e5..e88e099 100644 (file)
 #define VIRTIO_PCI_NO_LEGACY
 #include "virtio_pci_common.h"
 
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(u8 __iomem *addr)
+{
+       return ioread8(addr);
+}
+static inline u16 vp_ioread16 (u16 __iomem *addr)
+{
+       return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(u32 __iomem *addr)
+{
+       return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+       iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
+{
+       iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
+{
+       iowrite32(value, addr);
+}
+
+static void vp_iowrite64_twopart(u64 val,
+                                __le32 __iomem *lo, __le32 __iomem *hi)
+{
+       vp_iowrite32((u32)val, lo);
+       vp_iowrite32(val >> 32, hi);
+}
+
 static void __iomem *map_capability(struct pci_dev *dev, int off,
                                    size_t minlen,
                                    u32 align,
@@ -94,22 +138,16 @@ static void __iomem *map_capability(struct pci_dev *dev, int off,
        return p;
 }
 
-static void iowrite64_twopart(u64 val, __le32 __iomem *lo, __le32 __iomem *hi)
-{
-       iowrite32((u32)val, lo);
-       iowrite32(val >> 32, hi);
-}
-
 /* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        u64 features;
 
-       iowrite32(0, &vp_dev->common->device_feature_select);
-       features = ioread32(&vp_dev->common->device_feature);
-       iowrite32(1, &vp_dev->common->device_feature_select);
-       features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
+       vp_iowrite32(0, &vp_dev->common->device_feature_select);
+       features = vp_ioread32(&vp_dev->common->device_feature);
+       vp_iowrite32(1, &vp_dev->common->device_feature_select);
+       features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
 
        return features;
 }
@@ -128,10 +166,10 @@ static int vp_finalize_features(struct virtio_device *vdev)
                return -EINVAL;
        }
 
-       iowrite32(0, &vp_dev->common->guest_feature_select);
-       iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
-       iowrite32(1, &vp_dev->common->guest_feature_select);
-       iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
+       vp_iowrite32(0, &vp_dev->common->guest_feature_select);
+       vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
+       vp_iowrite32(1, &vp_dev->common->guest_feature_select);
+       vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
 
        return 0;
 }
@@ -210,14 +248,14 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 static u32 vp_generation(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return ioread8(&vp_dev->common->config_generation);
+       return vp_ioread8(&vp_dev->common->config_generation);
 }
 
 /* config->{get,set}_status() implementations */
 static u8 vp_get_status(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return ioread8(&vp_dev->common->device_status);
+       return vp_ioread8(&vp_dev->common->device_status);
 }
 
 static void vp_set_status(struct virtio_device *vdev, u8 status)
@@ -225,17 +263,17 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        /* We should never be setting status to 0. */
        BUG_ON(status == 0);
-       iowrite8(status, &vp_dev->common->device_status);
+       vp_iowrite8(status, &vp_dev->common->device_status);
 }
 
 static void vp_reset(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
        /* 0 status means a reset. */
-       iowrite8(0, &vp_dev->common->device_status);
+       vp_iowrite8(0, &vp_dev->common->device_status);
        /* Flush out the status write, and flush in device writes,
         * including MSI-X interrupts, if any. */
-       ioread8(&vp_dev->common->device_status);
+       vp_ioread8(&vp_dev->common->device_status);
        /* Flush pending VQ/configuration callbacks. */
        vp_synchronize_vectors(vdev);
 }
@@ -243,10 +281,10 @@ static void vp_reset(struct virtio_device *vdev)
 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 {
        /* Setup the vector used for configuration events */
-       iowrite16(vector, &vp_dev->common->msix_config);
+       vp_iowrite16(vector, &vp_dev->common->msix_config);
        /* Verify we had enough resources to assign the vector */
        /* Will also flush the write out to device */
-       return ioread16(&vp_dev->common->msix_config);
+       return vp_ioread16(&vp_dev->common->msix_config);
 }
 
 static size_t vring_pci_size(u16 num)
@@ -286,15 +324,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        u16 num, off;
        int err;
 
-       if (index >= ioread16(&cfg->num_queues))
+       if (index >= vp_ioread16(&cfg->num_queues))
                return ERR_PTR(-ENOENT);
 
        /* Select the queue we're interested in */
-       iowrite16(index, &cfg->queue_select);
+       vp_iowrite16(index, &cfg->queue_select);
 
        /* Check if queue is either not available or already active. */
-       num = ioread16(&cfg->queue_size);
-       if (!num || ioread16(&cfg->queue_enable))
+       num = vp_ioread16(&cfg->queue_size);
+       if (!num || vp_ioread16(&cfg->queue_enable))
                return ERR_PTR(-ENOENT);
 
        if (num & (num - 1)) {
@@ -303,7 +341,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        }
 
        /* get offset of notification word for this vq */
-       off = ioread16(&cfg->queue_notify_off);
+       off = vp_ioread16(&cfg->queue_notify_off);
 
        info->num = num;
        info->msix_vector = msix_vec;
@@ -322,13 +360,13 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        }
 
        /* activate the queue */
-       iowrite16(num, &cfg->queue_size);
-       iowrite64_twopart(virt_to_phys(info->queue),
-                         &cfg->queue_desc_lo, &cfg->queue_desc_hi);
-       iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
-                         &cfg->queue_avail_lo, &cfg->queue_avail_hi);
-       iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
-                         &cfg->queue_used_lo, &cfg->queue_used_hi);
+       vp_iowrite16(num, &cfg->queue_size);
+       vp_iowrite64_twopart(virt_to_phys(info->queue),
+                            &cfg->queue_desc_lo, &cfg->queue_desc_hi);
+       vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
+                            &cfg->queue_avail_lo, &cfg->queue_avail_hi);
+       vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
+                            &cfg->queue_used_lo, &cfg->queue_used_hi);
 
        if (vp_dev->notify_base) {
                /* offset should not wrap */
@@ -357,8 +395,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        }
 
        if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-               iowrite16(msix_vec, &cfg->queue_msix_vector);
-               msix_vec = ioread16(&cfg->queue_msix_vector);
+               vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
+               msix_vec = vp_ioread16(&cfg->queue_msix_vector);
                if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
                        err = -EBUSY;
                        goto err_assign_vector;
@@ -393,8 +431,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
         * this, there's no way to go back except reset.
         */
        list_for_each_entry(vq, &vdev->vqs, list) {
-               iowrite16(vq->index, &vp_dev->common->queue_select);
-               iowrite16(1, &vp_dev->common->queue_enable);
+               vp_iowrite16(vq->index, &vp_dev->common->queue_select);
+               vp_iowrite16(1, &vp_dev->common->queue_enable);
        }
 
        return 0;
@@ -405,13 +443,13 @@ static void del_vq(struct virtio_pci_vq_info *info)
        struct virtqueue *vq = info->vq;
        struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
-       iowrite16(vq->index, &vp_dev->common->queue_select);
+       vp_iowrite16(vq->index, &vp_dev->common->queue_select);
 
        if (vp_dev->msix_enabled) {
-               iowrite16(VIRTIO_MSI_NO_VECTOR,
-                         &vp_dev->common->queue_msix_vector);
+               vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
+                            &vp_dev->common->queue_msix_vector);
                /* Flush the write out to device */
-               ioread16(&vp_dev->common->queue_msix_vector);
+               vp_ioread16(&vp_dev->common->queue_msix_vector);
        }
 
        if (!vp_dev->notify_base)
@@ -577,9 +615,6 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
        }
        vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 
-       if (virtio_device_is_legacy_only(vp_dev->vdev.id))
-               return -ENODEV;
-
        /* check for a common config: if not, use legacy mode (bar 0). */
        common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
                                            IORESOURCE_IO | IORESOURCE_MEM);
index ce4f3a7..e5e7c55 100644 (file)
@@ -169,7 +169,6 @@ config AT91SAM9X_WATCHDOG
 
 config CADENCE_WATCHDOG
        tristate "Cadence Watchdog Timer"
-       depends on ARM
        select WATCHDOG_CORE
        help
          Say Y here if you want to include support for the watchdog
@@ -1190,6 +1189,7 @@ config OCTEON_WDT
        tristate "Cavium OCTEON SOC family Watchdog Timer"
        depends on CAVIUM_OCTEON_SOC
        default y
+       select WATCHDOG_CORE
        select EXPORT_UASM if OCTEON_WDT = m
        help
          Hardware driver for OCTEON's on chip watchdog timer.
index 4e37db3..22d8ae6 100644 (file)
@@ -99,12 +99,14 @@ static int secure_register_read(struct bcm_kona_wdt *wdt, uint32_t offset)
 
 static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
 {
-       int ctl_val, cur_val, ret;
+       int ctl_val, cur_val;
        unsigned long flags;
        struct bcm_kona_wdt *wdt = s->private;
 
-       if (!wdt)
-               return seq_puts(s, "No device pointer\n");
+       if (!wdt) {
+               seq_puts(s, "No device pointer\n");
+               return 0;
+       }
 
        spin_lock_irqsave(&wdt->lock, flags);
        ctl_val = secure_register_read(wdt, SECWDOG_CTRL_REG);
@@ -112,7 +114,7 @@ static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
        spin_unlock_irqrestore(&wdt->lock, flags);
 
        if (ctl_val < 0 || cur_val < 0) {
-               ret = seq_puts(s, "Error accessing hardware\n");
+               seq_puts(s, "Error accessing hardware\n");
        } else {
                int ctl, cur, ctl_sec, cur_sec, res;
 
@@ -121,15 +123,18 @@ static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
                cur = cur_val & SECWDOG_COUNT_MASK;
                ctl_sec = TICKS_TO_SECS(ctl, wdt);
                cur_sec = TICKS_TO_SECS(cur, wdt);
-               ret = seq_printf(s, "Resolution: %d / %d\n"
-                               "Control: %d s / %d (%#x) ticks\n"
-                               "Current: %d s / %d (%#x) ticks\n"
-                               "Busy count: %lu\n", res,
-                               wdt->resolution, ctl_sec, ctl, ctl, cur_sec,
-                               cur, cur, wdt->busy_count);
+               seq_printf(s,
+                          "Resolution: %d / %d\n"
+                          "Control: %d s / %d (%#x) ticks\n"
+                          "Current: %d s / %d (%#x) ticks\n"
+                          "Busy count: %lu\n",
+                          res, wdt->resolution,
+                          ctl_sec, ctl, ctl,
+                          cur_sec, cur, cur,
+                          wdt->busy_count);
        }
 
-       return ret;
+       return 0;
 }
 
 static int bcm_kona_dbg_open(struct inode *inode, struct file *file)
index 8453531..14521c8 100644 (file)
@@ -3,6 +3,8 @@
  *
  * Copyright (C) 2007, 2008, 2009, 2010 Cavium Networks
  *
+ * Converted to use WATCHDOG_CORE by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
  * Some parts derived from wdt.c
  *
  *     (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
@@ -103,13 +105,10 @@ MODULE_PARM_DESC(nowayout,
        "Watchdog cannot be stopped once started (default="
                                __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
-static unsigned long octeon_wdt_is_open;
-static char expect_close;
-
-static u32 __initdata nmi_stage1_insns[64];
+static u32 nmi_stage1_insns[64] __initdata;
 /* We need one branch and therefore one relocation per target label. */
-static struct uasm_label __initdata labels[5];
-static struct uasm_reloc __initdata relocs[5];
+static struct uasm_label labels[5] __initdata;
+static struct uasm_reloc relocs[5] __initdata;
 
 enum lable_id {
        label_enter_bootloader = 1
@@ -218,7 +217,8 @@ static void __init octeon_wdt_build_stage1(void)
        pr_debug("\t.set pop\n");
 
        if (len > 32)
-               panic("NMI stage 1 handler exceeds 32 instructions, was %d\n", len);
+               panic("NMI stage 1 handler exceeds 32 instructions, was %d\n",
+                     len);
 }
 
 static int cpu2core(int cpu)
@@ -294,6 +294,7 @@ static void octeon_wdt_write_hex(u64 value, int digits)
 {
        int d;
        int v;
+
        for (d = 0; d < digits; d++) {
                v = (value >> ((digits - d - 1) * 4)) & 0xf;
                if (v >= 10)
@@ -303,7 +304,7 @@ static void octeon_wdt_write_hex(u64 value, int digits)
        }
 }
 
-const char *reg_name[] = {
+static const char reg_name[][3] = {
        "$0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
        "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
        "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
@@ -444,7 +445,7 @@ static int octeon_wdt_cpu_callback(struct notifier_block *nfb,
        return NOTIFY_OK;
 }
 
-static void octeon_wdt_ping(void)
+static int octeon_wdt_ping(struct watchdog_device __always_unused *wdog)
 {
        int cpu;
        int coreid;
@@ -457,10 +458,12 @@ static void octeon_wdt_ping(void)
                    !cpumask_test_cpu(cpu, &irq_enabled_cpus)) {
                        /* We have to enable the irq */
                        int irq = OCTEON_IRQ_WDOG0 + coreid;
+
                        enable_irq(irq);
                        cpumask_set_cpu(cpu, &irq_enabled_cpus);
                }
        }
+       return 0;
 }
 
 static void octeon_wdt_calc_parameters(int t)
@@ -489,7 +492,8 @@ static void octeon_wdt_calc_parameters(int t)
        timeout_cnt = ((octeon_get_io_clock_rate() >> 8) * timeout_sec) >> 8;
 }
 
-static int octeon_wdt_set_heartbeat(int t)
+static int octeon_wdt_set_timeout(struct watchdog_device *wdog,
+                                 unsigned int t)
 {
        int cpu;
        int coreid;
@@ -509,158 +513,45 @@ static int octeon_wdt_set_heartbeat(int t)
                cvmx_write_csr(CVMX_CIU_WDOGX(coreid), ciu_wdog.u64);
                cvmx_write_csr(CVMX_CIU_PP_POKEX(coreid), 1);
        }
-       octeon_wdt_ping(); /* Get the irqs back on. */
+       octeon_wdt_ping(wdog); /* Get the irqs back on. */
        return 0;
 }
 
-/**
- *     octeon_wdt_write:
- *     @file: file handle to the watchdog
- *     @buf: buffer to write (unused as data does not matter here
- *     @count: count of bytes
- *     @ppos: pointer to the position to write. No seeks allowed
- *
- *     A write to a watchdog device is defined as a keepalive signal. Any
- *     write of data will do, as we we don't define content meaning.
- */
-
-static ssize_t octeon_wdt_write(struct file *file, const char __user *buf,
-                               size_t count, loff_t *ppos)
-{
-       if (count) {
-               if (!nowayout) {
-                       size_t i;
-
-                       /* In case it was set long ago */
-                       expect_close = 0;
-
-                       for (i = 0; i != count; i++) {
-                               char c;
-                               if (get_user(c, buf + i))
-                                       return -EFAULT;
-                               if (c == 'V')
-                                       expect_close = 1;
-                       }
-               }
-               octeon_wdt_ping();
-       }
-       return count;
-}
-
-/**
- *     octeon_wdt_ioctl:
- *     @file: file handle to the device
- *     @cmd: watchdog command
- *     @arg: argument pointer
- *
- *     The watchdog API defines a common set of functions for all
- *     watchdogs according to their available features. We only
- *     actually usefully support querying capabilities and setting
- *     the timeout.
- */
-
-static long octeon_wdt_ioctl(struct file *file, unsigned int cmd,
-                            unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_heartbeat;
-
-       static struct watchdog_info ident = {
-               .options =              WDIOF_SETTIMEOUT|
-                                       WDIOF_MAGICCLOSE|
-                                       WDIOF_KEEPALIVEPING,
-               .firmware_version =     1,
-               .identity =             "OCTEON",
-       };
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-       case WDIOC_KEEPALIVE:
-               octeon_wdt_ping();
-               return 0;
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_heartbeat, p))
-                       return -EFAULT;
-               if (octeon_wdt_set_heartbeat(new_heartbeat))
-                       return -EINVAL;
-               /* Fall through. */
-       case WDIOC_GETTIMEOUT:
-               return put_user(heartbeat, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-/**
- *     octeon_wdt_open:
- *     @inode: inode of device
- *     @file: file handle to device
- *
- *     The watchdog device has been opened. The watchdog device is single
- *     open and on opening we do a ping to reset the counters.
- */
-
-static int octeon_wdt_open(struct inode *inode, struct file *file)
+static int octeon_wdt_start(struct watchdog_device *wdog)
 {
-       if (test_and_set_bit(0, &octeon_wdt_is_open))
-               return -EBUSY;
-       /*
-        *      Activate
-        */
-       octeon_wdt_ping();
+       octeon_wdt_ping(wdog);
        do_coundown = 1;
-       return nonseekable_open(inode, file);
+       return 0;
 }
 
-/**
- *     octeon_wdt_release:
- *     @inode: inode to board
- *     @file: file handle to board
- *
- *     The watchdog has a configurable API. There is a religious dispute
- *     between people who want their watchdog to be able to shut down and
- *     those who want to be sure if the watchdog manager dies the machine
- *     reboots. In the former case we disable the counters, in the latter
- *     case you have to open it again very soon.
- */
-
-static int octeon_wdt_release(struct inode *inode, struct file *file)
+static int octeon_wdt_stop(struct watchdog_device *wdog)
 {
-       if (expect_close) {
-               do_coundown = 0;
-               octeon_wdt_ping();
-       } else {
-               pr_crit("WDT device closed unexpectedly.  WDT will not stop!\n");
-       }
-       clear_bit(0, &octeon_wdt_is_open);
-       expect_close = 0;
+       do_coundown = 0;
+       octeon_wdt_ping(wdog);
        return 0;
 }
 
-static const struct file_operations octeon_wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .write          = octeon_wdt_write,
-       .unlocked_ioctl = octeon_wdt_ioctl,
-       .open           = octeon_wdt_open,
-       .release        = octeon_wdt_release,
+static struct notifier_block octeon_wdt_cpu_notifier = {
+       .notifier_call = octeon_wdt_cpu_callback,
 };
 
-static struct miscdevice octeon_wdt_miscdev = {
-       .minor  = WATCHDOG_MINOR,
-       .name   = "watchdog",
-       .fops   = &octeon_wdt_fops,
+static const struct watchdog_info octeon_wdt_info = {
+       .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
+       .identity = "OCTEON",
 };
 
-static struct notifier_block octeon_wdt_cpu_notifier = {
-       .notifier_call = octeon_wdt_cpu_callback,
+static const struct watchdog_ops octeon_wdt_ops = {
+       .owner          = THIS_MODULE,
+       .start          = octeon_wdt_start,
+       .stop           = octeon_wdt_stop,
+       .ping           = octeon_wdt_ping,
+       .set_timeout    = octeon_wdt_set_timeout,
 };
 
+static struct watchdog_device octeon_wdt = {
+       .info   = &octeon_wdt_info,
+       .ops    = &octeon_wdt_ops,
+};
 
 /**
  * Module/ driver initialization.
@@ -685,7 +576,8 @@ static int __init octeon_wdt_init(void)
        max_timeout_sec = 6;
        do {
                max_timeout_sec--;
-               timeout_cnt = ((octeon_get_io_clock_rate() >> 8) * max_timeout_sec) >> 8;
+               timeout_cnt = ((octeon_get_io_clock_rate() >> 8) *
+                             max_timeout_sec) >> 8;
        } while (timeout_cnt > 65535);
 
        BUG_ON(timeout_cnt == 0);
@@ -694,11 +586,15 @@ static int __init octeon_wdt_init(void)
 
        pr_info("Initial granularity %d Sec\n", timeout_sec);
 
-       ret = misc_register(&octeon_wdt_miscdev);
+       octeon_wdt.timeout      = timeout_sec;
+       octeon_wdt.max_timeout  = UINT_MAX;
+
+       watchdog_set_nowayout(&octeon_wdt, nowayout);
+
+       ret = watchdog_register_device(&octeon_wdt);
        if (ret) {
-               pr_err("cannot register miscdev on minor=%d (err=%d)\n",
-                      WATCHDOG_MINOR, ret);
-               goto out;
+               pr_err("watchdog_register_device() failed: %d\n", ret);
+               return ret;
        }
 
        /* Build the NMI handler ... */
@@ -721,8 +617,7 @@ static int __init octeon_wdt_init(void)
        __register_hotcpu_notifier(&octeon_wdt_cpu_notifier);
        cpu_notifier_register_done();
 
-out:
-       return ret;
+       return 0;
 }
 
 /**
@@ -732,7 +627,7 @@ static void __exit octeon_wdt_cleanup(void)
 {
        int cpu;
 
-       misc_deregister(&octeon_wdt_miscdev);
+       watchdog_unregister_device(&octeon_wdt);
 
        cpu_notifier_register_begin();
        __unregister_hotcpu_notifier(&octeon_wdt_cpu_notifier);
index 55e2201..b9c6049 100644 (file)
@@ -216,7 +216,7 @@ static struct platform_driver platform_wdt_driver = {
 module_platform_driver(platform_wdt_driver);
 
 MODULE_AUTHOR("MontaVista Software, Inc. <source@mvista.com>");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
 MODULE_DESCRIPTION("PNX4008 Watchdog Driver");
 
 module_param(heartbeat, uint, 0);
index aa85618..aa03ca8 100644 (file)
@@ -20,9 +20,9 @@
 #include <linux/reboot.h>
 #include <linux/watchdog.h>
 
-#define WDT_RST                0x0
-#define WDT_EN         0x8
-#define WDT_BITE_TIME  0x24
+#define WDT_RST                0x38
+#define WDT_EN         0x40
+#define WDT_BITE_TIME  0x5C
 
 struct qcom_wdt {
        struct watchdog_device  wdd;
@@ -117,6 +117,8 @@ static int qcom_wdt_probe(struct platform_device *pdev)
 {
        struct qcom_wdt *wdt;
        struct resource *res;
+       struct device_node *np = pdev->dev.of_node;
+       u32 percpu_offset;
        int ret;
 
        wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
@@ -124,6 +126,14 @@ static int qcom_wdt_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       /* We use CPU0's DGT for the watchdog */
+       if (of_property_read_u32(np, "cpu-offset", &percpu_offset))
+               percpu_offset = 0;
+
+       res->start += percpu_offset;
+       res->end += percpu_offset;
+
        wdt->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(wdt->base))
                return PTR_ERR(wdt->base);
@@ -203,9 +213,8 @@ static int qcom_wdt_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id qcom_wdt_of_table[] = {
-       { .compatible = "qcom,kpss-wdt-msm8960", },
-       { .compatible = "qcom,kpss-wdt-apq8064", },
-       { .compatible = "qcom,kpss-wdt-ipq8064", },
+       { .compatible = "qcom,kpss-timer" },
+       { .compatible = "qcom,scss-timer" },
        { },
 };
 MODULE_DEVICE_TABLE(of, qcom_wdt_of_table);
index a62b1b6..e7f0d5b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Watchdog driver for the RTC based watchdog in STMP3xxx and i.MX23/28
  *
- * Author: Wolfram Sang <w.sang@pengutronix.de>
+ * Author: Wolfram Sang <kernel@pengutronix.de>
  *
  * Copyright (C) 2011-12 Wolfram Sang, Pengutronix
  *
@@ -129,4 +129,4 @@ module_platform_driver(stmp3xxx_wdt_driver);
 
 MODULE_DESCRIPTION("STMP3XXX RTC Watchdog Driver");
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
index a270004..7cd226d 100644 (file)
@@ -276,4 +276,8 @@ config XEN_AUTO_XLATE
        help
          Support for auto-translated physmap guests.
 
+config XEN_ACPI
+       def_bool y
+       depends on X86 && ACPI
+
 endmenu
index 40edd1c..e293bc5 100644 (file)
@@ -13,7 +13,7 @@ CFLAGS_efi.o                          += -fshort-wchar
 
 dom0-$(CONFIG_PCI) += pci.o
 dom0-$(CONFIG_USB_SUPPORT) += dbgp.o
-dom0-$(CONFIG_ACPI) += acpi.o $(xen-pad-y)
+dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y)
 xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
 dom0-$(CONFIG_X86) += pcpu.o
 obj-$(CONFIG_XEN_DOM0)                 += $(dom0-y)
index 07ef383..b7f5150 100644 (file)
@@ -204,8 +204,7 @@ static LIST_HEAD(scsiback_free_pages);
 static DEFINE_MUTEX(scsiback_mutex);
 static LIST_HEAD(scsiback_list);
 
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *scsiback_fabric_configfs;
+static const struct target_core_fabric_ops scsiback_ops;
 
 static void scsiback_get(struct vscsibk_info *info)
 {
@@ -1902,7 +1901,7 @@ scsiback_make_tpg(struct se_wwn *wwn,
        tpg->tport = tport;
        tpg->tport_tpgt = tpgt;
 
-       ret = core_tpg_register(&scsiback_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&scsiback_ops, wwn,
                                &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
        if (ret < 0) {
                kfree(tpg);
@@ -1944,7 +1943,9 @@ static int scsiback_check_false(struct se_portal_group *se_tpg)
        return 0;
 }
 
-static struct target_core_fabric_ops scsiback_ops = {
+static const struct target_core_fabric_ops scsiback_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "xen-pvscsi",
        .get_fabric_name                = scsiback_get_fabric_name,
        .get_fabric_proto_ident         = scsiback_get_fabric_proto_ident,
        .tpg_get_wwn                    = scsiback_get_fabric_wwn,
@@ -1991,62 +1992,10 @@ static struct target_core_fabric_ops scsiback_ops = {
        .fabric_make_nodeacl            = scsiback_make_nodeacl,
        .fabric_drop_nodeacl            = scsiback_drop_nodeacl,
 #endif
-};
-
-static int scsiback_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       pr_debug("fabric module %s on %s/%s on "UTS_RELEASE"\n",
-                VSCSI_VERSION, utsname()->sysname, utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "xen-pvscsi");
-       if (IS_ERR(fabric))
-               return PTR_ERR(fabric);
 
-       /*
-        * Setup fabric->tf_ops from our local scsiback_ops
-        */
-       fabric->tf_ops = scsiback_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = scsiback_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = scsiback_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = scsiback_param_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               target_fabric_configfs_free(fabric);
-               return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       scsiback_fabric_configfs = fabric;
-       pr_debug("Set fabric -> scsiback_fabric_configfs\n");
-       return 0;
-};
-
-static void scsiback_deregister_configfs(void)
-{
-       if (!scsiback_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(scsiback_fabric_configfs);
-       scsiback_fabric_configfs = NULL;
-       pr_debug("Cleared scsiback_fabric_configfs\n");
+       .tfc_wwn_attrs                  = scsiback_wwn_attrs,
+       .tfc_tpg_base_attrs             = scsiback_tpg_attrs,
+       .tfc_tpg_param_attrs            = scsiback_param_attrs,
 };
 
 static const struct xenbus_device_id scsiback_ids[] = {
@@ -2078,6 +2027,9 @@ static int __init scsiback_init(void)
        if (!xen_domain())
                return -ENODEV;
 
+       pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
+                VSCSI_VERSION, utsname()->sysname, utsname()->machine);
+
        scsiback_cachep = kmem_cache_create("vscsiif_cache",
                sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
        if (!scsiback_cachep)
@@ -2087,7 +2039,7 @@ static int __init scsiback_init(void)
        if (ret)
                goto out_cache_destroy;
 
-       ret = scsiback_register_configfs();
+       ret = target_register_template(&scsiback_ops);
        if (ret)
                goto out_unregister_xenbus;
 
@@ -2110,7 +2062,7 @@ static void __exit scsiback_exit(void)
                        BUG();
                gnttab_free_pages(1, &page);
        }
-       scsiback_deregister_configfs();
+       target_unregister_template(&scsiback_ops);
        xenbus_unregister_driver(&scsiback_driver);
        kmem_cache_destroy(scsiback_cachep);
 }
index 8482f2d..31c0103 100644 (file)
@@ -247,7 +247,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
        if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
                return v9fs_remote_get_acl(dentry, name, buffer, size, type);
 
-       acl = v9fs_get_cached_acl(dentry->d_inode, type);
+       acl = v9fs_get_cached_acl(d_inode(dentry), type);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (acl == NULL)
@@ -285,7 +285,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
        int retval;
        struct posix_acl *acl;
        struct v9fs_session_info *v9ses;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (strcmp(name, "") != 0)
                return -EINVAL;
index a345b2d..bd456c6 100644 (file)
@@ -53,7 +53,7 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
                 dentry, dentry);
 
        /* Don't cache negative dentries */
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                return 1;
        return 0;
 }
@@ -83,7 +83,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        if (!inode)
                goto out_valid;
 
index 76c3b1a..5cc00e5 100644 (file)
@@ -138,6 +138,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
                                           &err);
                        if (err)
                                return err;
+                       if (n == 0)
+                               return 0;
 
                        rdir->head = 0;
                        rdir->tail = n;
index 3662f1d..703342e 100644 (file)
@@ -595,7 +595,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
                 dir, dentry, flags);
 
        v9ses = v9fs_inode2v9ses(dir);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        dfid = v9fs_fid_lookup(dentry->d_parent);
        if (IS_ERR(dfid)) {
                retval = PTR_ERR(dfid);
@@ -864,7 +864,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        }
 
        /* Only creates */
-       if (!(flags & O_CREAT) || dentry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(dentry))
                return finish_no_open(file, res);
 
        err = 0;
@@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        }
 
        v9fs_invalidate_inode_attr(dir);
-       v9inode = V9FS_I(dentry->d_inode);
+       v9inode = V9FS_I(d_inode(dentry));
        mutex_lock(&v9inode->v_mutex);
        if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
            !v9inode->writeback_fid &&
@@ -908,7 +908,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 
        file->private_data = fid;
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-               v9fs_cache_inode_set_cookie(dentry->d_inode, file);
+               v9fs_cache_inode_set_cookie(d_inode(dentry), file);
 
        *opened |= FILE_CREATED;
 out:
@@ -969,8 +969,8 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        p9_debug(P9_DEBUG_VFS, "\n");
        retval = 0;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
        v9ses = v9fs_inode2v9ses(old_inode);
        oldfid = v9fs_fid_lookup(old_dentry);
        if (IS_ERR(oldfid))
@@ -1061,7 +1061,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
                return 0;
        }
        fid = v9fs_fid_lookup(dentry);
@@ -1072,8 +1072,8 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        if (IS_ERR(st))
                return PTR_ERR(st);
 
-       v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
-       generic_fillattr(dentry->d_inode, stat);
+       v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb);
+       generic_fillattr(d_inode(dentry), stat);
 
        p9stat_free(st);
        kfree(st);
@@ -1095,7 +1095,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
        struct p9_wstat wstat;
 
        p9_debug(P9_DEBUG_VFS, "\n");
-       retval = inode_change_ok(dentry->d_inode, iattr);
+       retval = inode_change_ok(d_inode(dentry), iattr);
        if (retval)
                return retval;
 
@@ -1128,20 +1128,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 
        /* Write all dirty data */
        if (d_is_reg(dentry))
-               filemap_write_and_wait(dentry->d_inode->i_mapping);
+               filemap_write_and_wait(d_inode(dentry)->i_mapping);
 
        retval = p9_client_wstat(fid, &wstat);
        if (retval < 0)
                return retval;
 
        if ((iattr->ia_valid & ATTR_SIZE) &&
-           iattr->ia_size != i_size_read(dentry->d_inode))
-               truncate_setsize(dentry->d_inode, iattr->ia_size);
+           iattr->ia_size != i_size_read(d_inode(dentry)))
+               truncate_setsize(d_inode(dentry), iattr->ia_size);
 
-       v9fs_invalidate_inode_attr(dentry->d_inode);
+       v9fs_invalidate_inode_attr(d_inode(dentry));
 
-       setattr_copy(dentry->d_inode, iattr);
-       mark_inode_dirty(dentry->d_inode);
+       setattr_copy(d_inode(dentry), iattr);
+       mark_inode_dirty(d_inode(dentry));
        return 0;
 }
 
@@ -1403,7 +1403,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
        retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
        __putname(name);
        if (!retval) {
-               v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+               v9fs_refresh_inode(oldfid, d_inode(old_dentry));
                v9fs_invalidate_inode_attr(dir);
        }
 clunk_fid:
index 6054c16..9861c7c 100644 (file)
@@ -265,7 +265,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
        }
 
        /* Only creates */
-       if (!(flags & O_CREAT) || dentry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(dentry))
                return  finish_no_open(file, res);
 
        v9ses = v9fs_inode2v9ses(dir);
@@ -481,7 +481,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
                return 0;
        }
        fid = v9fs_fid_lookup(dentry);
@@ -496,8 +496,8 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
        if (IS_ERR(st))
                return PTR_ERR(st);
 
-       v9fs_stat2inode_dotl(st, dentry->d_inode);
-       generic_fillattr(dentry->d_inode, stat);
+       v9fs_stat2inode_dotl(st, d_inode(dentry));
+       generic_fillattr(d_inode(dentry), stat);
        /* Change block size to what the server returned */
        stat->blksize = st->st_blksize;
 
@@ -557,7 +557,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
        int retval;
        struct p9_fid *fid;
        struct p9_iattr_dotl p9attr;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        p9_debug(P9_DEBUG_VFS, "\n");
 
@@ -795,10 +795,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
                if (IS_ERR(fid))
                        return PTR_ERR(fid);
 
-               v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
+               v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
        }
-       ihold(old_dentry->d_inode);
-       d_instantiate(dentry, old_dentry->d_inode);
+       ihold(d_inode(old_dentry));
+       d_instantiate(dentry, d_inode(old_dentry));
 
        return err;
 }
index 0afd038..e99a338 100644 (file)
@@ -168,8 +168,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                        retval = PTR_ERR(st);
                        goto release_sb;
                }
-               root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode_dotl(st, root->d_inode);
+               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+               v9fs_stat2inode_dotl(st, d_inode(root));
                kfree(st);
        } else {
                struct p9_wstat *st = NULL;
@@ -179,8 +179,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                        goto release_sb;
                }
 
-               root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode(st, root->d_inode, sb);
+               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+               v9fs_stat2inode(st, d_inode(root), sb);
 
                p9stat_free(st);
                kfree(st);
index b9acada..335055d 100644 (file)
@@ -298,7 +298,7 @@ out:
 int
 adfs_notify_change(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        unsigned int ia_valid = attr->ia_valid;
        int error;
index 5022ac9..a8f463c 100644 (file)
@@ -138,7 +138,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
 static int
 affs_remove_link(struct dentry *dentry)
 {
-       struct inode *dir, *inode = dentry->d_inode;
+       struct inode *dir, *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bh = NULL, *link_bh = NULL;
        u32 link_ino, ino;
@@ -268,11 +268,11 @@ affs_remove_header(struct dentry *dentry)
        struct buffer_head *bh = NULL;
        int retval;
 
-       dir = dentry->d_parent->d_inode;
+       dir = d_inode(dentry->d_parent);
        sb = dir->i_sb;
 
        retval = -ENOENT;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        if (!inode)
                goto done;
 
@@ -471,10 +471,9 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
 bool
 affs_nofilenametruncate(const struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
-
 }
 
 /* Check if the name is valid for a affs object. */
index 9628003..a022f4a 100644 (file)
@@ -213,7 +213,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 int
 affs_notify_change(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
index ec8ca0e..181e05b 100644 (file)
@@ -251,7 +251,7 @@ int
 affs_unlink(struct inode *dir, struct dentry *dentry)
 {
        pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
-                dentry->d_inode->i_ino, dentry);
+                d_inode(dentry)->i_ino, dentry);
 
        return affs_remove_header(dentry);
 }
@@ -320,7 +320,7 @@ int
 affs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
-                dentry->d_inode->i_ino, dentry);
+                d_inode(dentry)->i_ino, dentry);
 
        return affs_remove_header(dentry);
 }
@@ -403,7 +403,7 @@ err:
 int
 affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
                 dentry);
@@ -430,13 +430,13 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
                return retval;
 
        /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                retval = affs_remove_header(new_dentry);
                if (retval)
                        return retval;
        }
 
-       bh = affs_bread(sb, old_dentry->d_inode->i_ino);
+       bh = affs_bread(sb, d_inode(old_dentry)->i_ino);
        if (!bh)
                return -EIO;
 
index 4ec35e9..e10e177 100644 (file)
@@ -505,7 +505,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
        _enter("{%x:%u},%p{%pd},",
               vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
 
-       ASSERTCMP(dentry->d_inode, ==, NULL);
+       ASSERTCMP(d_inode(dentry), ==, NULL);
 
        if (dentry->d_name.len >= AFSNAMEMAX) {
                _leave(" = -ENAMETOOLONG");
@@ -563,8 +563,8 @@ success:
        _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
               fid.vnode,
               fid.unique,
-              dentry->d_inode->i_ino,
-              dentry->d_inode->i_generation);
+              d_inode(dentry)->i_ino,
+              d_inode(dentry)->i_generation);
 
        return NULL;
 }
@@ -586,9 +586,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       vnode = AFS_FS_I(dentry->d_inode);
+       vnode = AFS_FS_I(d_inode(dentry));
 
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                _enter("{v={%x:%u} n=%pd fl=%lx},",
                       vnode->fid.vid, vnode->fid.vnode, dentry,
                       vnode->flags);
@@ -601,7 +601,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
        /* lock down the parent dentry so we can peer at it */
        parent = dget_parent(dentry);
-       dir = AFS_FS_I(parent->d_inode);
+       dir = AFS_FS_I(d_inode(parent));
 
        /* validate the parent directory */
        if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
@@ -623,9 +623,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        switch (ret) {
        case 0:
                /* the filename maps to something */
-               if (!dentry->d_inode)
+               if (d_really_is_negative(dentry))
                        goto out_bad;
-               if (is_bad_inode(dentry->d_inode)) {
+               if (is_bad_inode(d_inode(dentry))) {
                        printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
                               dentry);
                        goto out_bad;
@@ -647,7 +647,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
                        _debug("%pd: file deleted (uq %u -> %u I:%u)",
                               dentry, fid.unique,
                               vnode->fid.unique,
-                              dentry->d_inode->i_generation);
+                              d_inode(dentry)->i_generation);
                        spin_lock(&vnode->lock);
                        set_bit(AFS_VNODE_DELETED, &vnode->flags);
                        spin_unlock(&vnode->lock);
@@ -658,7 +658,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
        case -ENOENT:
                /* the filename is unknown */
                _debug("%pd: dirent not found", dentry);
-               if (dentry->d_inode)
+               if (d_really_is_positive(dentry))
                        goto not_found;
                goto out_valid;
 
@@ -703,9 +703,9 @@ static int afs_d_delete(const struct dentry *dentry)
        if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
                goto zap;
 
-       if (dentry->d_inode &&
-           (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(dentry->d_inode)->flags) ||
-            test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags)))
+       if (d_really_is_positive(dentry) &&
+           (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(d_inode(dentry))->flags) ||
+            test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags)))
                goto zap;
 
        _leave(" = 0 [keep]");
@@ -814,8 +814,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
        if (ret < 0)
                goto rmdir_error;
 
-       if (dentry->d_inode) {
-               vnode = AFS_FS_I(dentry->d_inode);
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
                clear_nlink(&vnode->vfs_inode);
                set_bit(AFS_VNODE_DELETED, &vnode->flags);
                afs_discard_callback_on_delete(vnode);
@@ -856,8 +856,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                goto error;
        }
 
-       if (dentry->d_inode) {
-               vnode = AFS_FS_I(dentry->d_inode);
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
 
                /* make sure we have a callback promise on the victim */
                ret = afs_validate(vnode, key);
@@ -869,7 +869,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
        if (ret < 0)
                goto remove_error;
 
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                /* if the file wasn't deleted due to excess hard links, the
                 * fileserver will break the callback promise on the file - if
                 * it had one - before it returns to us, and if it was deleted,
@@ -879,7 +879,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                 * or it was outstanding on a different server, then it won't
                 * break it either...
                 */
-               vnode = AFS_FS_I(dentry->d_inode);
+               vnode = AFS_FS_I(d_inode(dentry));
                if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
                        _debug("AFS_VNODE_DELETED");
                if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
@@ -977,7 +977,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
        struct key *key;
        int ret;
 
-       vnode = AFS_FS_I(from->d_inode);
+       vnode = AFS_FS_I(d_inode(from));
        dvnode = AFS_FS_I(dir);
 
        _enter("{%x:%u},{%x:%u},{%pd}",
@@ -1089,7 +1089,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct key *key;
        int ret;
 
-       vnode = AFS_FS_I(old_dentry->d_inode);
+       vnode = AFS_FS_I(d_inode(old_dentry));
        orig_dvnode = AFS_FS_I(old_dir);
        new_dvnode = AFS_FS_I(new_dir);
 
index 8a1d38e..e06f5a2 100644 (file)
@@ -379,7 +379,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 {
        struct inode *inode;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
 
@@ -458,7 +458,7 @@ void afs_evict_inode(struct inode *inode)
  */
 int afs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+       struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
        struct key *key;
        int ret;
 
index 938c5ab..ccd0b21 100644 (file)
@@ -134,7 +134,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
 
        _enter("{%pd}", mntpt);
 
-       BUG_ON(!mntpt->d_inode);
+       BUG_ON(!d_inode(mntpt));
 
        ret = -ENOMEM;
        devname = (char *) get_zeroed_page(GFP_KERNEL);
@@ -145,7 +145,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
        if (!options)
                goto error_no_options;
 
-       vnode = AFS_FS_I(mntpt->d_inode);
+       vnode = AFS_FS_I(d_inode(mntpt));
        if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
                /* if the directory is a pseudo directory, use the d_name */
                static const char afs_root_cell[] = ":root.cell.";
@@ -169,14 +169,14 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
                }
        } else {
                /* read the contents of the AFS special symlink */
-               loff_t size = i_size_read(mntpt->d_inode);
+               loff_t size = i_size_read(d_inode(mntpt));
                char *buf;
 
                ret = -EINVAL;
                if (size > PAGE_SIZE - 1)
                        goto error_no_page;
 
-               page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
+               page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
                if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
                        goto error_no_page;
index c486155..1fb4a51 100644 (file)
@@ -529,7 +529,7 @@ static void afs_destroy_inode(struct inode *inode)
 static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct afs_volume_status vs;
-       struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+       struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
        struct key *key;
        int ret;
 
index d10e619..5b700ef 100644 (file)
@@ -235,12 +235,12 @@ static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
 
 static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
 {
-       return sbi->sb->s_root->d_inode->i_ino;
+       return d_inode(sbi->sb->s_root)->i_ino;
 }
 
 static inline int simple_positive(struct dentry *dentry)
 {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 static inline void __autofs4_add_expiring(struct dentry *dentry)
index 11dd118..1cebc3c 100644 (file)
@@ -374,7 +374,7 @@ static struct dentry *should_expire(struct dentry *dentry,
                return NULL;
        }
 
-       if (dentry->d_inode && d_is_symlink(dentry)) {
+       if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
                DPRINTK("checking symlink %p %pd", dentry, dentry);
                /*
                 * A symlink can't be "busy" in the usual sense so
index 1c55388..a3ae0b2 100644 (file)
@@ -71,7 +71,7 @@ void autofs4_kill_sb(struct super_block *sb)
 static int autofs4_show_options(struct seq_file *m, struct dentry *root)
 {
        struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
-       struct inode *root_inode = root->d_sb->s_root->d_inode;
+       struct inode *root_inode = d_inode(root->d_sb->s_root);
 
        if (!sbi)
                return 0;
@@ -352,8 +352,8 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
 
        inode->i_mode = mode;
        if (sb->s_root) {
-               inode->i_uid = sb->s_root->d_inode->i_uid;
-               inode->i_gid = sb->s_root->d_inode->i_gid;
+               inode->i_uid = d_inode(sb->s_root)->i_uid;
+               inode->i_gid = d_inode(sb->s_root)->i_gid;
        }
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        inode->i_ino = get_next_ino();
index 7e44fdd..c6d7d3d 100644 (file)
@@ -240,7 +240,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry,
                spin_lock(&expiring->d_lock);
 
                /* We've already been dentry_iput or unlinked */
-               if (!expiring->d_inode)
+               if (d_really_is_negative(expiring))
                        goto next;
 
                qstr = &expiring->d_name;
@@ -371,7 +371,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
         * having d_mountpoint() true, so there's no need to call back
         * to the daemon.
         */
-       if (dentry->d_inode && d_is_symlink(dentry)) {
+       if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
                spin_unlock(&sbi->fs_lock);
                goto done;
        }
@@ -459,7 +459,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
                        return 0;
                if (d_mountpoint(dentry))
                        return 0;
-               inode = ACCESS_ONCE(dentry->d_inode);
+               inode = d_inode_rcu(dentry);
                if (inode && S_ISLNK(inode->i_mode))
                        return -EISDIR;
                if (list_empty(&dentry->d_subdirs))
@@ -485,7 +485,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
                 * an incorrect ELOOP error return.
                 */
                if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
-                   (dentry->d_inode && d_is_symlink(dentry)))
+                   (d_really_is_positive(dentry) && d_is_symlink(dentry)))
                        status = -EISDIR;
        }
        spin_unlock(&sbi->fs_lock);
@@ -625,8 +625,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
        }
        dput(ino->dentry);
 
-       dentry->d_inode->i_size = 0;
-       clear_nlink(dentry->d_inode);
+       d_inode(dentry)->i_size = 0;
+       clear_nlink(d_inode(dentry));
 
        dir->i_mtime = CURRENT_TIME;
 
@@ -719,8 +719,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
                        atomic_dec(&p_ino->count);
        }
        dput(ino->dentry);
-       dentry->d_inode->i_size = 0;
-       clear_nlink(dentry->d_inode);
+       d_inode(dentry)->i_size = 0;
+       clear_nlink(d_inode(dentry));
 
        if (dir->i_nlink)
                drop_nlink(dir);
@@ -839,7 +839,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
 */
 int is_autofs4_dentry(struct dentry *dentry)
 {
-       return dentry && dentry->d_inode &&
+       return dentry && d_really_is_positive(dentry) &&
                dentry->d_op == &autofs4_dentry_operations &&
                dentry->d_fsdata != NULL;
 }
index 1e8ea19..de58cc7 100644 (file)
@@ -18,7 +18,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
        struct autofs_info *ino = autofs4_dentry_ino(dentry);
        if (ino && !autofs4_oz_mode(sbi))
                ino->last_used = jiffies;
-       nd_set_link(nd, dentry->d_inode->i_private);
+       nd_set_link(nd, d_inode(dentry)->i_private);
        return NULL;
 }
 
index 2ad05ab..35b755e 100644 (file)
@@ -322,7 +322,7 @@ static int validate_request(struct autofs_wait_queue **wait,
                 * continue on and create a new request.
                 */
                if (!IS_ROOT(dentry)) {
-                       if (dentry->d_inode && d_unhashed(dentry)) {
+                       if (d_really_is_positive(dentry) && d_unhashed(dentry)) {
                                struct dentry *parent = dentry->d_parent;
                                new = d_lookup(parent, &dentry->d_name);
                                if (new)
@@ -364,7 +364,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
        if (pid == 0 || tgid == 0)
                return -ENOENT;
 
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                /*
                 * A wait for a negative dentry is invalid for certain
                 * cases. A direct or offset mount "always" has its mount
index 16e0a48..7943533 100644 (file)
@@ -471,7 +471,7 @@ static void *
 befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct super_block *sb = dentry->d_sb;
-       struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+       struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
        befs_data_stream *data = &befs_ino->i_data.ds;
        befs_off_t len = data->size;
        char *link;
@@ -501,7 +501,7 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
 static void *
 befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+       struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
 
        nd_set_link(nd, befs_ino->i_data.symlink);
        return NULL;
index 7a81827..3ec6113 100644 (file)
@@ -153,7 +153,7 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
 static int bfs_link(struct dentry *old, struct inode *dir,
                                                struct dentry *new)
 {
-       struct inode *inode = old->d_inode;
+       struct inode *inode = d_inode(old);
        struct bfs_sb_info *info = BFS_SB(inode->i_sb);
        int err;
 
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
 static int bfs_unlink(struct inode *dir, struct dentry *dentry)
 {
        int error = -ENOENT;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct buffer_head *bh;
        struct bfs_dirent *de;
        struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -216,7 +216,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        int error = -ENOENT;
 
        old_bh = new_bh = NULL;
-       old_inode = old_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
        if (S_ISDIR(old_inode->i_mode))
                return -EINVAL;
 
@@ -231,7 +231,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                goto end_rename;
 
        error = -EPERM;
-       new_inode = new_dentry->d_inode;
+       new_inode = d_inode(new_dentry);
        new_bh = bfs_find_entry(new_dir, 
                                new_dentry->d_name.name, 
                                new_dentry->d_name.len, &new_de);
index 9dcb054..78f005f 100644 (file)
@@ -591,7 +591,7 @@ static void kill_node(Node *e)
        write_unlock(&entries_lock);
 
        if (dentry) {
-               drop_nlink(dentry->d_inode);
+               drop_nlink(d_inode(dentry));
                d_drop(dentry);
                dput(dentry);
                simple_release_fs(&bm_mnt, &entry_count);
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete this handler. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&root->d_inode->i_mutex);
+               mutex_lock(&d_inode(root)->i_mutex);
 
                kill_node(e);
 
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                dput(root);
                break;
        default:
@@ -675,14 +675,14 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                return PTR_ERR(e);
 
        root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
        if (IS_ERR(dentry))
                goto out;
 
        err = -EEXIST;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                goto out2;
 
        inode = bm_get_inode(sb, S_IFREG | 0644);
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
 out2:
        dput(dentry);
 out:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        dput(root);
 
        if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
        case 3:
                /* Delete all handlers. */
                root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&root->d_inode->i_mutex);
+               mutex_lock(&d_inode(root)->i_mutex);
 
                while (!list_empty(&entries))
                        kill_node(list_entry(entries.next, Node, list));
 
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                dput(root);
                break;
        default:
index 897ee05..c7e4163 100644 (file)
@@ -152,7 +152,8 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
        struct inode *inode = file->f_mapping->host;
 
        return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
-                                   blkdev_get_block, NULL, NULL, 0);
+                                   blkdev_get_block, NULL, NULL,
+                                   DIO_SKIP_DIO_COUNT);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1716,7 +1717,7 @@ struct block_device *lookup_bdev(const char *pathname)
        if (error)
                return ERR_PTR(error);
 
-       inode = path.dentry->d_inode;
+       inode = d_backing_inode(path.dentry);
        error = -ENOTBLK;
        if (!S_ISBLK(inode->i_mode))
                goto fail;
index 4dabeb8..df9932b 100644 (file)
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper);
 BTRFS_WORK_HELPER(scrubnc_helper);
 
 static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
                         int thresh)
 {
        struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -132,7 +132,7 @@ static inline void
 __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
 
 struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
-                                             int flags,
+                                             unsigned int flags,
                                              int max_active,
                                              int thresh)
 {
index e386c29..ec2ee47 100644 (file)
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
 BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
 
 struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
-                                             int flags,
+                                             unsigned int flags,
                                              int max_active,
                                              int thresh);
 void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
index f55721f..9de772e 100644 (file)
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
        struct ulist *roots = NULL;
        struct ulist_iterator uiter;
        struct ulist_node *node;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
        int ret = 0;
 
        tmp = ulist_alloc(GFP_NOFS);
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
        struct ulist *roots = NULL;
        struct ulist_node *ref_node = NULL;
        struct ulist_node *root_node = NULL;
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        struct ulist_iterator ref_uiter;
        struct ulist_iterator root_uiter;
 
index de5e4f2..0ef5cc1 100644 (file)
@@ -66,7 +66,11 @@ struct btrfs_inode {
         */
        struct btrfs_key location;
 
-       /* Lock for counters */
+       /*
+        * Lock for counters and all fields used to determine if the inode is in
+        * the log or not (last_trans, last_sub_trans, last_log_commit,
+        * logged_trans).
+        */
        spinlock_t lock;
 
        /* the extent_tree has caches of all the extent mappings to disk */
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
 
 static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
 {
+       int ret = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
        if (BTRFS_I(inode)->logged_trans == generation &&
            BTRFS_I(inode)->last_sub_trans <=
            BTRFS_I(inode)->last_log_commit &&
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
                 */
                smp_mb();
                if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
-                       return 1;
+                       ret = 1;
        }
-       return 0;
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return ret;
 }
 
 #define BTRFS_DIO_ORIG_BIO_SUBMITTED   0x1
index d897ef8..ce7dec8 100644 (file)
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
                               (unsigned long long)bio->bi_iter.bi_sector,
                               dev_bytenr, bio->bi_bdev);
 
-               mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
-                                      GFP_NOFS);
+               mapped_datav = kmalloc_array(bio->bi_vcnt,
+                                            sizeof(*mapped_datav), GFP_NOFS);
                if (!mapped_datav)
                        goto leave;
                cur_bytenr = dev_bytenr;
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root,
 
        mutex_unlock(&btrfsic_mutex);
 
-       if (is_vmalloc_addr(state))
-               vfree(state);
-       else
-               kfree(state);
+       kvfree(state);
 }
index e9df886..ce62324 100644 (file)
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        cb->orig_bio = bio;
 
        nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
-       cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
+       cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
                                       GFP_NOFS);
        if (!cb->compressed_pages)
                goto fail1;
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
 static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
 static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
 
-static struct btrfs_compress_op *btrfs_compress_op[] = {
+static const struct btrfs_compress_op * const btrfs_compress_op[] = {
        &btrfs_zlib_compress,
        &btrfs_lzo_compress,
 };
index d181f70..13a4dc0 100644 (file)
@@ -77,7 +77,7 @@ struct btrfs_compress_op {
                          size_t srclen, size_t destlen);
 };
 
-extern struct btrfs_compress_op btrfs_zlib_compress;
-extern struct btrfs_compress_op btrfs_lzo_compress;
+extern const struct btrfs_compress_op btrfs_zlib_compress;
+extern const struct btrfs_compress_op btrfs_lzo_compress;
 
 #endif
index 6d67f32..0f11ebc 100644 (file)
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        if (!tree_mod_need_log(fs_info, eb))
                return 0;
 
-       tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+       tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
        if (!tm_list)
                return -ENOMEM;
 
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
 
        if (log_removal && btrfs_header_level(old_root) > 0) {
                nritems = btrfs_header_nritems(old_root);
-               tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+               tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
                                  flags);
                if (!tm_list) {
                        ret = -ENOMEM;
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
        if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
                return 0;
 
-       tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+       tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
                          GFP_NOFS);
        if (!tm_list)
                return -ENOMEM;
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
                return 0;
 
        nritems = btrfs_header_nritems(eb);
-       tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
-                         GFP_NOFS);
+       tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
        if (!tm_list)
                return -ENOMEM;
 
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                        ret = btrfs_dec_ref(trans, root, buf, 1);
                        BUG_ON(ret); /* -ENOMEM */
                }
-               clean_tree_block(trans, root, buf);
+               clean_tree_block(trans, root->fs_info, buf);
                *last_ref = 1;
        }
        return 0;
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        continue;
                }
 
-               cur = btrfs_find_tree_block(root, blocknr);
+               cur = btrfs_find_tree_block(root->fs_info, blocknr);
                if (cur)
                        uptodate = btrfs_buffer_uptodate(cur, gen, 0);
                else
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
                path->locks[level] = 0;
                path->nodes[level] = NULL;
-               clean_tree_block(trans, root, mid);
+               clean_tree_block(trans, root->fs_info, mid);
                btrfs_tree_unlock(mid);
                /* once for the path */
                free_extent_buffer(mid);
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret < 0 && wret != -ENOSPC)
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
-                       clean_tree_block(trans, root, right);
+                       clean_tree_block(trans, root->fs_info, right);
                        btrfs_tree_unlock(right);
                        del_ptr(root, path, level + 1, pslot + 1);
                        root_sub_used(root, right->len);
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                BUG_ON(wret == 1);
        }
        if (btrfs_header_nritems(mid) == 0) {
-               clean_tree_block(trans, root, mid);
+               clean_tree_block(trans, root->fs_info, mid);
                btrfs_tree_unlock(mid);
                del_ptr(root, path, level + 1, pslot);
                root_sub_used(root, mid->len);
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root,
 
        search = btrfs_node_blockptr(node, slot);
        blocksize = root->nodesize;
-       eb = btrfs_find_tree_block(root, search);
+       eb = btrfs_find_tree_block(root->fs_info, search);
        if (eb) {
                free_extent_buffer(eb);
                return;
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
        if (slot > 0) {
                block1 = btrfs_node_blockptr(parent, slot - 1);
                gen = btrfs_node_ptr_generation(parent, slot - 1);
-               eb = btrfs_find_tree_block(root, block1);
+               eb = btrfs_find_tree_block(root->fs_info, block1);
                /*
                 * if we get -eagain from btrfs_buffer_uptodate, we
                 * don't want to return eagain here.  That will loop
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
        if (slot + 1 < nritems) {
                block2 = btrfs_node_blockptr(parent, slot + 1);
                gen = btrfs_node_ptr_generation(parent, slot + 1);
-               eb = btrfs_find_tree_block(root, block2);
+               eb = btrfs_find_tree_block(root->fs_info, block2);
                if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
                        block2 = 0;
                free_extent_buffer(eb);
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        blocknr = btrfs_node_blockptr(b, slot);
        gen = btrfs_node_ptr_generation(b, slot);
 
-       tmp = btrfs_find_tree_block(root, blocknr);
+       tmp = btrfs_find_tree_block(root->fs_info, blocknr);
        if (tmp) {
                /* first we do an atomic uptodate check */
                if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
@@ -3126,7 +3125,8 @@ again:
  * higher levels
  *
  */
-static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_fs_info *fs_info,
+                          struct btrfs_path *path,
                           struct btrfs_disk_key *key, int level)
 {
        int i;
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
                if (!path->nodes[i])
                        break;
                t = path->nodes[i];
-               tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
+               tree_mod_log_set_node_key(fs_info, t, tslot, 1);
                btrfs_set_node_key(t, key, tslot);
                btrfs_mark_buffer_dirty(path->nodes[i]);
                if (tslot != 0)
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
  * This function isn't completely safe. It's the caller's responsibility
  * that the new key won't break the order
  */
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path,
                             struct btrfs_key *new_key)
 {
        struct btrfs_disk_key disk_key;
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
        btrfs_set_item_key(eb, &disk_key, slot);
        btrfs_mark_buffer_dirty(eb);
        if (slot == 0)
-               fixup_low_keys(root, path, &disk_key, 1);
+               fixup_low_keys(fs_info, path, &disk_key, 1);
 }
 
 /*
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (left_nritems)
                btrfs_mark_buffer_dirty(left);
        else
-               clean_tree_block(trans, root, left);
+               clean_tree_block(trans, root->fs_info, left);
 
        btrfs_mark_buffer_dirty(right);
 
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        if (path->slots[0] >= left_nritems) {
                path->slots[0] -= left_nritems;
                if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(trans, root, path->nodes[0]);
+                       clean_tree_block(trans, root->fs_info, path->nodes[0]);
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
                path->nodes[0] = right;
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
        else
-               clean_tree_block(trans, root, right);
+               clean_tree_block(trans, root->fs_info, right);
 
        btrfs_item_key(right, &disk_key, 0);
-       fixup_low_keys(root, path, &disk_key, 1);
+       fixup_low_keys(root->fs_info, path, &disk_key, 1);
 
        /* then fixup the leaf pointer in the path */
        if (path->slots[0] < push_items) {
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int mid;
        int slot;
        struct extent_buffer *right;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        int ret = 0;
        int wret;
        int split;
@@ -4271,10 +4273,10 @@ again:
        btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(right, root->root_key.objectid);
        btrfs_set_header_level(right, 0);
-       write_extent_buffer(right, root->fs_info->fsid,
+       write_extent_buffer(right, fs_info->fsid,
                            btrfs_header_fsid(), BTRFS_FSID_SIZE);
 
-       write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+       write_extent_buffer(right, fs_info->chunk_tree_uuid,
                            btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
 
@@ -4297,7 +4299,7 @@ again:
                        path->nodes[0] = right;
                        path->slots[0] = 0;
                        if (path->slots[1] == 0)
-                               fixup_low_keys(root, path, &disk_key, 1);
+                               fixup_low_keys(fs_info, path, &disk_key, 1);
                }
                btrfs_mark_buffer_dirty(right);
                return ret;
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
                btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
                btrfs_set_item_key(leaf, &disk_key, slot);
                if (slot == 0)
-                       fixup_low_keys(root, path, &disk_key, 1);
+                       fixup_low_keys(root->fs_info, path, &disk_key, 1);
        }
 
        item = btrfs_item_nr(slot);
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
 
        if (path->slots[0] == 0) {
                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
-               fixup_low_keys(root, path, &disk_key, 1);
+               fixup_low_keys(root->fs_info, path, &disk_key, 1);
        }
        btrfs_unlock_up_safe(path, 1);
 
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
                struct btrfs_disk_key disk_key;
 
                btrfs_node_key(parent, &disk_key, 0);
-               fixup_low_keys(root, path, &disk_key, level + 1);
+               fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
        }
        btrfs_mark_buffer_dirty(parent);
 }
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        btrfs_set_header_level(leaf, 0);
                } else {
                        btrfs_set_path_blocking(path);
-                       clean_tree_block(trans, root, leaf);
+                       clean_tree_block(trans, root->fs_info, leaf);
                        btrfs_del_leaf(trans, root, path, leaf);
                }
        } else {
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                        struct btrfs_disk_key disk_key;
 
                        btrfs_item_key(leaf, &disk_key, 0);
-                       fixup_low_keys(root, path, &disk_key, 1);
+                       fixup_low_keys(root->fs_info, path, &disk_key, 1);
                }
 
                /* delete the leaf if it is mostly empty */
index f9c89ca..6f364e1 100644 (file)
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item {
        __le64 flags;
 } __attribute__ ((__packed__));
 
+#define BTRFS_QGROUP_LEVEL_SHIFT               48
+static inline u64 btrfs_qgroup_level(u64 qgroupid)
+{
+       return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
 /*
  * is subvolume quota turned on?
  */
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control {
        atomic_t count;
 };
 
+struct btrfs_io_ctl {
+       void *cur, *orig;
+       struct page *page;
+       struct page **pages;
+       struct btrfs_root *root;
+       struct inode *inode;
+       unsigned long size;
+       int index;
+       int num_pages;
+       int entries;
+       int bitmaps;
+       unsigned check_crcs:1;
+};
+
 struct btrfs_block_group_cache {
        struct btrfs_key key;
        struct btrfs_block_group_item item;
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache {
 
        /* For dirty block groups */
        struct list_head dirty_list;
+       struct list_head io_list;
+
+       struct btrfs_io_ctl io_ctl;
 };
 
 /* delayed seq elem */
@@ -1329,6 +1352,8 @@ struct seq_list {
        u64 seq;
 };
 
+#define SEQ_LIST_INIT(name)    { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
+
 enum btrfs_orphan_cleanup_state {
        ORPHAN_CLEANUP_STARTED  = 1,
        ORPHAN_CLEANUP_DONE     = 2,
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info {
        struct mutex chunk_mutex;
        struct mutex volume_mutex;
 
+       /*
+        * this is taken to make sure we don't set block groups ro after
+        * the free space cache has been allocated on them
+        */
+       struct mutex ro_block_group_mutex;
+
        /* this is used during read/modify/write to make sure
         * no two ios are trying to mod the same stripe at the same
         * time
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info {
 
        spinlock_t delayed_iput_lock;
        struct list_head delayed_iputs;
+       struct rw_semaphore delayed_iput_sem;
 
        /* this protects tree_mod_seq_list */
        spinlock_t tree_mod_seq_lock;
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
 }
 
 /* extent-tree.c */
+
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
+
 static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
                                                 unsigned num_items)
 {
@@ -3385,6 +3420,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         u64 bytenr, u64 num_bytes, u64 parent,
                         u64 root_objectid, u64 owner, u64 offset, int no_quota);
 
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root);
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
 int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@@ -3417,7 +3454,7 @@ enum btrfs_reserve_flush_enum {
        BTRFS_RESERVE_FLUSH_ALL,
 };
 
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
@@ -3440,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
                                              unsigned short type);
 void btrfs_free_block_rsv(struct btrfs_root *root,
                          struct btrfs_block_rsv *rsv);
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
 int btrfs_block_rsv_add(struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
                        enum btrfs_reserve_flush_enum flush);
@@ -3486,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root,
                        int type);
 int btrfs_previous_extent_item(struct btrfs_root *root,
                        struct btrfs_path *path, u64 min_objectid);
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path,
                             struct btrfs_key *new_key);
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -4180,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
 static inline int is_fstree(u64 rootid)
 {
        if (rootid == BTRFS_FS_TREE_OBJECTID ||
-           (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+           ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
+             !btrfs_qgroup_level(rootid)))
                return 1;
        return 0;
 }
index 82f0c7c..cde698a 100644 (file)
@@ -1383,7 +1383,7 @@ out:
 
 
 static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
-                                    struct btrfs_root *root, int nr)
+                                    struct btrfs_fs_info *fs_info, int nr)
 {
        struct btrfs_async_delayed_work *async_work;
 
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
                        btrfs_async_run_delayed_root, NULL, NULL);
        async_work->nr = nr;
 
-       btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
+       btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
        return 0;
 }
 
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
        struct btrfs_delayed_root *delayed_root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
 
        delayed_root = btrfs_get_delayed_root(root);
 
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
 
                seq = atomic_read(&delayed_root->items_seq);
 
-               ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
+               ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
                if (ret)
                        return;
 
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
                return;
        }
 
-       btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
+       btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
 }
 
 /* Will return 0 or -ENOMEM */
index 6d16bea..8f8ed7d 100644 (file)
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
  * existing and update must have the same bytenr
  */
 static noinline void
-update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
+                        struct btrfs_delayed_ref_node *existing,
                         struct btrfs_delayed_ref_node *update)
 {
        struct btrfs_delayed_ref_head *existing_ref;
        struct btrfs_delayed_ref_head *ref;
+       int old_ref_mod;
 
        existing_ref = btrfs_delayed_node_to_head(existing);
        ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
         * only need the lock for this case cause we could be processing it
         * currently, for refs we just added we know we're a-ok.
         */
+       old_ref_mod = existing_ref->total_ref_mod;
        existing->ref_mod += update->ref_mod;
+       existing_ref->total_ref_mod += update->ref_mod;
+
+       /*
+        * If we are going to from a positive ref mod to a negative or vice
+        * versa we need to make sure to adjust pending_csums accordingly.
+        */
+       if (existing_ref->is_data) {
+               if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
+                       delayed_refs->pending_csums -= existing->num_bytes;
+               if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
+                       delayed_refs->pending_csums += existing->num_bytes;
+       }
        spin_unlock(&existing_ref->lock);
 }
 
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref->is_data = is_data;
        head_ref->ref_root = RB_ROOT;
        head_ref->processing = 0;
+       head_ref->total_ref_mod = count_mod;
 
        spin_lock_init(&head_ref->lock);
        mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        existing = htree_insert(&delayed_refs->href_root,
                                &head_ref->href_node);
        if (existing) {
-               update_existing_head_ref(&existing->node, ref);
+               update_existing_head_ref(delayed_refs, &existing->node, ref);
                /*
                 * we've updated the existing ref, free the newly
                 * allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                head_ref = existing;
        } else {
+               if (is_data && count_mod < 0)
+                       delayed_refs->pending_csums += num_bytes;
                delayed_refs->num_heads++;
                delayed_refs->num_heads_ready++;
                atomic_inc(&delayed_refs->num_entries);
index a764e23..5eb0892 100644 (file)
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
        struct rb_node href_node;
 
        struct btrfs_delayed_extent_op *extent_op;
+
+       /*
+        * This is used to track the final ref_mod from all the refs associated
+        * with this head ref, this is not adjusted as delayed refs are run,
+        * this is meant to track if we need to do the csum accounting or not.
+        */
+       int total_ref_mod;
+
        /*
         * when a new extent is allocated, it is just reserved in memory
         * The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
        /* total number of head nodes ready for processing */
        unsigned long num_heads_ready;
 
+       u64 pending_csums;
+
        /*
         * set when the tree is flushing before a transaction commit,
         * used by the throttling code to decide if new updates need
index 5ec03d9..0573848 100644 (file)
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
        case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
        case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
                srcdev = dev_replace->srcdev;
-               args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
-                       div64_u64(btrfs_device_get_total_bytes(srcdev), 1000));
+               args->status.progress_1000 = div_u64(dev_replace->cursor_left,
+                       div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
                break;
        }
        btrfs_dev_replace_unlock(dev_replace);
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data)
                btrfs_dev_replace_status(fs_info, status_args);
                progress = status_args->status.progress_1000;
                kfree(status_args);
-               do_div(progress, 10);
+               progress = div_u64(progress, 10);
                printk_in_rcu(KERN_INFO
                        "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
                        dev_replace->srcdev->missing ? "<missing disk>" :
index 639f266..2ef9a4b 100644 (file)
@@ -54,7 +54,7 @@
 #include <asm/cpufeature.h>
 #endif
 
-static struct extent_io_ops btree_extent_io_ops;
+static const struct extent_io_ops btree_extent_io_ops;
 static void end_workqueue_fn(struct btrfs_work *work);
 static void free_fs_root(struct btrfs_root *root);
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
  * compute the csum for a btree block, and either verify it or write it
  * into the csum field of the block.
  */
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+                          struct extent_buffer *buf,
                           int verify)
 {
-       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
        char *result = NULL;
        unsigned long len;
        unsigned long cur_len;
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                offset += cur_len;
        }
        if (csum_size > sizeof(inline_result)) {
-               result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
+               result = kzalloc(csum_size, GFP_NOFS);
                if (!result)
                        return 1;
        } else {
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                        printk_ratelimited(KERN_WARNING
                                "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
                                "level %d\n",
-                               root->fs_info->sb->s_id, buf->start,
+                               fs_info->sb->s_id, buf->start,
                                val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
                                kfree(result);
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
 
                if (memcmp(raw_disk_sb, result, csum_size))
                        ret = 1;
-
-               if (ret && btrfs_super_generation(disk_sb) < 10) {
-                       printk(KERN_WARNING
-                               "BTRFS: super block crcs don't match, older mkfs detected\n");
-                       ret = 0;
-               }
        }
 
        if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
  * we only fill in the checksum field in the first page of a multi-page block
  */
 
-static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
 {
        u64 start = page_offset(page);
        u64 found_start;
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
        found_start = btrfs_header_bytenr(eb);
        if (WARN_ON(found_start != start || !PageUptodate(page)))
                return 0;
-       csum_tree_block(root, eb, 0);
+       csum_tree_block(fs_info, eb, 0);
        return 0;
 }
 
-static int check_tree_block_fsid(struct btrfs_root *root,
+static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
                                 struct extent_buffer *eb)
 {
-       struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
        u8 fsid[BTRFS_UUID_SIZE];
        int ret = 1;
 
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                ret = -EIO;
                goto err;
        }
-       if (check_tree_block_fsid(root, eb)) {
+       if (check_tree_block_fsid(root->fs_info, eb)) {
                printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
                               eb->fs_info->sb->s_id, eb->start);
                ret = -EIO;
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
                                       eb, found_level);
 
-       ret = csum_tree_block(root, eb, 1);
+       ret = csum_tree_block(root->fs_info, eb, 1);
        if (ret) {
                ret = -EIO;
                goto err;
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio)
 
        bio_for_each_segment_all(bvec, bio, i) {
                root = BTRFS_I(bvec->bv_page->mapping->host)->root;
-               ret = csum_dirty_buffer(root, bvec->bv_page);
+               ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
                if (ret)
                        break;
        }
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
        return 0;
 }
 
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
                                            u64 bytenr)
 {
-       return find_extent_buffer(root->fs_info, bytenr);
+       return find_extent_buffer(fs_info, bytenr);
 }
 
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
 }
 
-void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+void clean_tree_block(struct btrfs_trans_handle *trans,
+                     struct btrfs_fs_info *fs_info,
                      struct extent_buffer *buf)
 {
-       struct btrfs_fs_info *fs_info = root->fs_info;
-
        if (btrfs_header_generation(buf) ==
            fs_info->running_transaction->transid) {
                btrfs_assert_tree_locked(buf);
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
        }
 }
 
+static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
+{
+       mutex_init(&fs_info->scrub_lock);
+       atomic_set(&fs_info->scrubs_running, 0);
+       atomic_set(&fs_info->scrub_pause_req, 0);
+       atomic_set(&fs_info->scrubs_paused, 0);
+       atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->scrub_pause_wait);
+       fs_info->scrub_workers_refcnt = 0;
+}
+
+static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
+{
+       spin_lock_init(&fs_info->balance_lock);
+       mutex_init(&fs_info->balance_mutex);
+       atomic_set(&fs_info->balance_running, 0);
+       atomic_set(&fs_info->balance_pause_req, 0);
+       atomic_set(&fs_info->balance_cancel_req, 0);
+       fs_info->balance_ctl = NULL;
+       init_waitqueue_head(&fs_info->balance_wait_q);
+}
+
+static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_root *tree_root)
+{
+       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+       set_nlink(fs_info->btree_inode, 1);
+       /*
+        * we set the i_size on the btree inode to the max possible int.
+        * the real end of the address space is determined by all of
+        * the devices in the system
+        */
+       fs_info->btree_inode->i_size = OFFSET_MAX;
+       fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
+
+       RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
+       extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
+                            fs_info->btree_inode->i_mapping);
+       BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
+       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+
+       BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
+
+       BTRFS_I(fs_info->btree_inode)->root = tree_root;
+       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+              sizeof(struct btrfs_key));
+       set_bit(BTRFS_INODE_DUMMY,
+               &BTRFS_I(fs_info->btree_inode)->runtime_flags);
+       btrfs_insert_inode_hash(fs_info->btree_inode);
+}
+
+static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
+{
+       fs_info->dev_replace.lock_owner = 0;
+       atomic_set(&fs_info->dev_replace.nesting_level, 0);
+       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+       mutex_init(&fs_info->dev_replace.lock_management_lock);
+       mutex_init(&fs_info->dev_replace.lock);
+       init_waitqueue_head(&fs_info->replace_wait);
+}
+
+static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
+{
+       spin_lock_init(&fs_info->qgroup_lock);
+       mutex_init(&fs_info->qgroup_ioctl_lock);
+       fs_info->qgroup_tree = RB_ROOT;
+       fs_info->qgroup_op_tree = RB_ROOT;
+       INIT_LIST_HEAD(&fs_info->dirty_qgroups);
+       fs_info->qgroup_seq = 1;
+       fs_info->quota_enabled = 0;
+       fs_info->pending_quota_state = 0;
+       fs_info->qgroup_ulist = NULL;
+       mutex_init(&fs_info->qgroup_rescan_lock);
+}
+
+static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
+               struct btrfs_fs_devices *fs_devices)
+{
+       int max_active = fs_info->thread_pool_size;
+       unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
+
+       fs_info->workers =
+               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
+                                     max_active, 16);
+
+       fs_info->delalloc_workers =
+               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+
+       fs_info->flush_workers =
+               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+
+       fs_info->caching_workers =
+               btrfs_alloc_workqueue("cache", flags, max_active, 0);
+
+       /*
+        * a higher idle thresh on the submit workers makes it much more
+        * likely that bios will be send down in a sane order to the
+        * devices
+        */
+       fs_info->submit_workers =
+               btrfs_alloc_workqueue("submit", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 64);
+
+       fs_info->fixup_workers =
+               btrfs_alloc_workqueue("fixup", flags, 1, 0);
+
+       /*
+        * endios are largely parallel and should have a very
+        * low idle thresh
+        */
+       fs_info->endio_workers =
+               btrfs_alloc_workqueue("endio", flags, max_active, 4);
+       fs_info->endio_meta_workers =
+               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+       fs_info->endio_meta_write_workers =
+               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+       fs_info->endio_raid56_workers =
+               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+       fs_info->endio_repair_workers =
+               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+       fs_info->rmw_workers =
+               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+       fs_info->endio_write_workers =
+               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+       fs_info->endio_freespace_worker =
+               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+       fs_info->delayed_workers =
+               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+       fs_info->readahead_workers =
+               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+       fs_info->qgroup_rescan_workers =
+               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+       fs_info->extent_workers =
+               btrfs_alloc_workqueue("extent-refs", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 8);
+
+       if (!(fs_info->workers && fs_info->delalloc_workers &&
+             fs_info->submit_workers && fs_info->flush_workers &&
+             fs_info->endio_workers && fs_info->endio_meta_workers &&
+             fs_info->endio_meta_write_workers &&
+             fs_info->endio_repair_workers &&
+             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
+             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
+             fs_info->caching_workers && fs_info->readahead_workers &&
+             fs_info->fixup_workers && fs_info->delayed_workers &&
+             fs_info->extent_workers &&
+             fs_info->qgroup_rescan_workers)) {
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+                           struct btrfs_fs_devices *fs_devices)
+{
+       int ret;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *log_tree_root;
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       u64 bytenr = btrfs_super_log_root(disk_super);
+
+       if (fs_devices->rw_devices == 0) {
+               printk(KERN_WARNING "BTRFS: log replay required "
+                      "on RO media\n");
+               return -EIO;
+       }
+
+       log_tree_root = btrfs_alloc_root(fs_info);
+       if (!log_tree_root)
+               return -ENOMEM;
+
+       __setup_root(tree_root->nodesize, tree_root->sectorsize,
+                       tree_root->stripesize, log_tree_root, fs_info,
+                       BTRFS_TREE_LOG_OBJECTID);
+
+       log_tree_root->node = read_tree_block(tree_root, bytenr,
+                       fs_info->generation + 1);
+       if (!log_tree_root->node ||
+           !extent_buffer_uptodate(log_tree_root->node)) {
+               printk(KERN_ERR "BTRFS: failed to read log tree\n");
+               free_extent_buffer(log_tree_root->node);
+               kfree(log_tree_root);
+               return -EIO;
+       }
+       /* returns with log_tree_root freed on success */
+       ret = btrfs_recover_log_trees(log_tree_root);
+       if (ret) {
+               btrfs_error(tree_root->fs_info, ret,
+                           "Failed to recover log tree");
+               free_extent_buffer(log_tree_root->node);
+               kfree(log_tree_root);
+               return ret;
+       }
+
+       if (fs_info->sb->s_flags & MS_RDONLY) {
+               ret = btrfs_commit_super(tree_root);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
+                           struct btrfs_root *tree_root)
+{
+       struct btrfs_root *root;
+       struct btrfs_key location;
+       int ret;
+
+       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+       location.type = BTRFS_ROOT_ITEM_KEY;
+       location.offset = 0;
+
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->extent_root = root;
+
+       location.objectid = BTRFS_DEV_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->dev_root = root;
+       btrfs_init_devices_late(fs_info);
+
+       location.objectid = BTRFS_CSUM_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->csum_root = root;
+
+       location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (!IS_ERR(root)) {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->quota_enabled = 1;
+               fs_info->pending_quota_state = 1;
+               fs_info->quota_root = root;
+       }
+
+       location.objectid = BTRFS_UUID_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root)) {
+               ret = PTR_ERR(root);
+               if (ret != -ENOENT)
+                       return ret;
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->uuid_root = root;
+       }
+
+       return 0;
+}
+
 int open_ctree(struct super_block *sb,
               struct btrfs_fs_devices *fs_devices,
               char *options)
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb,
        struct btrfs_super_block *disk_super;
        struct btrfs_fs_info *fs_info = btrfs_sb(sb);
        struct btrfs_root *tree_root;
-       struct btrfs_root *extent_root;
-       struct btrfs_root *csum_root;
        struct btrfs_root *chunk_root;
-       struct btrfs_root *dev_root;
-       struct btrfs_root *quota_root;
-       struct btrfs_root *uuid_root;
-       struct btrfs_root *log_tree_root;
        int ret;
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
        int max_active;
-       int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
-       bool create_uuid_tree;
-       bool check_uuid_tree;
 
        tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
        chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->qgroup_op_lock);
        spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->unused_bgs_lock);
-       mutex_init(&fs_info->unused_bg_unpin_mutex);
        rwlock_init(&fs_info->tree_mod_log_lock);
+       mutex_init(&fs_info->unused_bg_unpin_mutex);
        mutex_init(&fs_info->reloc_mutex);
        mutex_init(&fs_info->delalloc_root_mutex);
        seqlock_init(&fs_info->profiles_lock);
+       init_rwsem(&fs_info->delayed_iput_sem);
 
        init_completion(&fs_info->kobj_unregister);
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb,
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
+       fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
        spin_lock_init(&fs_info->reada_lock);
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb,
        }
        btrfs_init_delayed_root(fs_info->delayed_root);
 
-       mutex_init(&fs_info->scrub_lock);
-       atomic_set(&fs_info->scrubs_running, 0);
-       atomic_set(&fs_info->scrub_pause_req, 0);
-       atomic_set(&fs_info->scrubs_paused, 0);
-       atomic_set(&fs_info->scrub_cancel_req, 0);
-       init_waitqueue_head(&fs_info->replace_wait);
-       init_waitqueue_head(&fs_info->scrub_pause_wait);
-       fs_info->scrub_workers_refcnt = 0;
+       btrfs_init_scrub(fs_info);
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        fs_info->check_integrity_print_mask = 0;
 #endif
-
-       spin_lock_init(&fs_info->balance_lock);
-       mutex_init(&fs_info->balance_mutex);
-       atomic_set(&fs_info->balance_running, 0);
-       atomic_set(&fs_info->balance_pause_req, 0);
-       atomic_set(&fs_info->balance_cancel_req, 0);
-       fs_info->balance_ctl = NULL;
-       init_waitqueue_head(&fs_info->balance_wait_q);
+       btrfs_init_balance(fs_info);
        btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
 
        sb->s_blocksize = 4096;
        sb->s_blocksize_bits = blksize_bits(4096);
        sb->s_bdi = &fs_info->bdi;
 
-       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
-       set_nlink(fs_info->btree_inode, 1);
-       /*
-        * we set the i_size on the btree inode to the max possible int.
-        * the real end of the address space is determined by all of
-        * the devices in the system
-        */
-       fs_info->btree_inode->i_size = OFFSET_MAX;
-       fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
-
-       RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
-       extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
-                            fs_info->btree_inode->i_mapping);
-       BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
-       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
-
-       BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
-
-       BTRFS_I(fs_info->btree_inode)->root = tree_root;
-       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
-              sizeof(struct btrfs_key));
-       set_bit(BTRFS_INODE_DUMMY,
-               &BTRFS_I(fs_info->btree_inode)->runtime_flags);
-       btrfs_insert_inode_hash(fs_info->btree_inode);
+       btrfs_init_btree_inode(fs_info, tree_root);
 
        spin_lock_init(&fs_info->block_group_cache_lock);
        fs_info->block_group_cache_tree = RB_ROOT;
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
+       mutex_init(&fs_info->ro_block_group_mutex);
        init_rwsem(&fs_info->commit_root_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
        sema_init(&fs_info->uuid_tree_rescan_sem, 1);
-       fs_info->dev_replace.lock_owner = 0;
-       atomic_set(&fs_info->dev_replace.nesting_level, 0);
-       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
-       mutex_init(&fs_info->dev_replace.lock_management_lock);
-       mutex_init(&fs_info->dev_replace.lock);
 
-       spin_lock_init(&fs_info->qgroup_lock);
-       mutex_init(&fs_info->qgroup_ioctl_lock);
-       fs_info->qgroup_tree = RB_ROOT;
-       fs_info->qgroup_op_tree = RB_ROOT;
-       INIT_LIST_HEAD(&fs_info->dirty_qgroups);
-       fs_info->qgroup_seq = 1;
-       fs_info->quota_enabled = 0;
-       fs_info->pending_quota_state = 0;
-       fs_info->qgroup_ulist = NULL;
-       mutex_init(&fs_info->qgroup_rescan_lock);
+       btrfs_init_dev_replace_locks(fs_info);
+       btrfs_init_qgroup(fs_info);
 
        btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
        btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb,
 
        max_active = fs_info->thread_pool_size;
 
-       fs_info->workers =
-               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
-                                     max_active, 16);
-
-       fs_info->delalloc_workers =
-               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
-
-       fs_info->flush_workers =
-               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
-
-       fs_info->caching_workers =
-               btrfs_alloc_workqueue("cache", flags, max_active, 0);
-
-       /*
-        * a higher idle thresh on the submit workers makes it much more
-        * likely that bios will be send down in a sane order to the
-        * devices
-        */
-       fs_info->submit_workers =
-               btrfs_alloc_workqueue("submit", flags,
-                                     min_t(u64, fs_devices->num_devices,
-                                           max_active), 64);
-
-       fs_info->fixup_workers =
-               btrfs_alloc_workqueue("fixup", flags, 1, 0);
-
-       /*
-        * endios are largely parallel and should have a very
-        * low idle thresh
-        */
-       fs_info->endio_workers =
-               btrfs_alloc_workqueue("endio", flags, max_active, 4);
-       fs_info->endio_meta_workers =
-               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
-       fs_info->endio_meta_write_workers =
-               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
-       fs_info->endio_raid56_workers =
-               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
-       fs_info->endio_repair_workers =
-               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
-       fs_info->rmw_workers =
-               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
-       fs_info->endio_write_workers =
-               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
-       fs_info->endio_freespace_worker =
-               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
-       fs_info->delayed_workers =
-               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
-       fs_info->readahead_workers =
-               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
-       fs_info->qgroup_rescan_workers =
-               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
-       fs_info->extent_workers =
-               btrfs_alloc_workqueue("extent-refs", flags,
-                                     min_t(u64, fs_devices->num_devices,
-                                           max_active), 8);
-
-       if (!(fs_info->workers && fs_info->delalloc_workers &&
-             fs_info->submit_workers && fs_info->flush_workers &&
-             fs_info->endio_workers && fs_info->endio_meta_workers &&
-             fs_info->endio_meta_write_workers &&
-             fs_info->endio_repair_workers &&
-             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
-             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
-             fs_info->caching_workers && fs_info->readahead_workers &&
-             fs_info->fixup_workers && fs_info->delayed_workers &&
-             fs_info->extent_workers &&
-             fs_info->qgroup_rescan_workers)) {
-               err = -ENOMEM;
+       ret = btrfs_init_workqueues(fs_info, fs_devices);
+       if (ret) {
+               err = ret;
                goto fail_sb_buffer;
        }
 
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb,
         * keep the device that is marked to be the target device for the
         * dev_replace procedure
         */
-       btrfs_close_extra_devices(fs_info, fs_devices, 0);
+       btrfs_close_extra_devices(fs_devices, 0);
 
        if (!fs_devices->latest_bdev) {
                printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2846,9 @@ retry_root_backup:
        tree_root->commit_root = btrfs_root_node(tree_root);
        btrfs_set_root_refs(&tree_root->root_item, 1);
 
-       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
-       location.type = BTRFS_ROOT_ITEM_KEY;
-       location.offset = 0;
-
-       extent_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(extent_root)) {
-               ret = PTR_ERR(extent_root);
-               goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
-       fs_info->extent_root = extent_root;
-
-       location.objectid = BTRFS_DEV_TREE_OBJECTID;
-       dev_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(dev_root)) {
-               ret = PTR_ERR(dev_root);
-               goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
-       fs_info->dev_root = dev_root;
-       btrfs_init_devices_late(fs_info);
-
-       location.objectid = BTRFS_CSUM_TREE_OBJECTID;
-       csum_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(csum_root)) {
-               ret = PTR_ERR(csum_root);
+       ret = btrfs_read_roots(fs_info, tree_root);
+       if (ret)
                goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
-       fs_info->csum_root = csum_root;
-
-       location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
-       quota_root = btrfs_read_tree_root(tree_root, &location);
-       if (!IS_ERR(quota_root)) {
-               set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
-               fs_info->quota_enabled = 1;
-               fs_info->pending_quota_state = 1;
-               fs_info->quota_root = quota_root;
-       }
-
-       location.objectid = BTRFS_UUID_TREE_OBJECTID;
-       uuid_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(uuid_root)) {
-               ret = PTR_ERR(uuid_root);
-               if (ret != -ENOENT)
-                       goto recovery_tree_root;
-               create_uuid_tree = true;
-               check_uuid_tree = false;
-       } else {
-               set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
-               fs_info->uuid_root = uuid_root;
-               create_uuid_tree = false;
-               check_uuid_tree =
-                   generation != btrfs_super_uuid_tree_generation(disk_super);
-       }
 
        fs_info->generation = generation;
        fs_info->last_trans_committed = generation;
@@ -2792,7 +2872,7 @@ retry_root_backup:
                goto fail_block_groups;
        }
 
-       btrfs_close_extra_devices(fs_info, fs_devices, 1);
+       btrfs_close_extra_devices(fs_devices, 1);
 
        ret = btrfs_sysfs_add_one(fs_info);
        if (ret) {
@@ -2806,7 +2886,7 @@ retry_root_backup:
                goto fail_sysfs;
        }
 
-       ret = btrfs_read_block_groups(extent_root);
+       ret = btrfs_read_block_groups(fs_info->extent_root);
        if (ret) {
                printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
                goto fail_sysfs;
@@ -2864,48 +2944,11 @@ retry_root_backup:
 
        /* do not make disk changes in broken FS */
        if (btrfs_super_log_root(disk_super) != 0) {
-               u64 bytenr = btrfs_super_log_root(disk_super);
-
-               if (fs_devices->rw_devices == 0) {
-                       printk(KERN_WARNING "BTRFS: log replay required "
-                              "on RO media\n");
-                       err = -EIO;
-                       goto fail_qgroup;
-               }
-
-               log_tree_root = btrfs_alloc_root(fs_info);
-               if (!log_tree_root) {
-                       err = -ENOMEM;
-                       goto fail_qgroup;
-               }
-
-               __setup_root(nodesize, sectorsize, stripesize,
-                            log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
-
-               log_tree_root->node = read_tree_block(tree_root, bytenr,
-                                                     generation + 1);
-               if (!log_tree_root->node ||
-                   !extent_buffer_uptodate(log_tree_root->node)) {
-                       printk(KERN_ERR "BTRFS: failed to read log tree\n");
-                       free_extent_buffer(log_tree_root->node);
-                       kfree(log_tree_root);
-                       goto fail_qgroup;
-               }
-               /* returns with log_tree_root freed on success */
-               ret = btrfs_recover_log_trees(log_tree_root);
+               ret = btrfs_replay_log(fs_info, fs_devices);
                if (ret) {
-                       btrfs_error(tree_root->fs_info, ret,
-                                   "Failed to recover log tree");
-                       free_extent_buffer(log_tree_root->node);
-                       kfree(log_tree_root);
+                       err = ret;
                        goto fail_qgroup;
                }
-
-               if (sb->s_flags & MS_RDONLY) {
-                       ret = btrfs_commit_super(tree_root);
-                       if (ret)
-                               goto fail_qgroup;
-               }
        }
 
        ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3009,7 @@ retry_root_backup:
 
        btrfs_qgroup_rescan_resume(fs_info);
 
-       if (create_uuid_tree) {
+       if (!fs_info->uuid_root) {
                pr_info("BTRFS: creating UUID tree\n");
                ret = btrfs_create_uuid_tree(fs_info);
                if (ret) {
@@ -2975,8 +3018,9 @@ retry_root_backup:
                        close_ctree(tree_root);
                        return ret;
                }
-       } else if (check_uuid_tree ||
-                  btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
+       } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+                  fs_info->generation !=
+                               btrfs_super_uuid_tree_generation(disk_super)) {
                pr_info("BTRFS: checking UUID tree\n");
                ret = btrfs_check_uuid_tree(fs_info);
                if (ret) {
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root)
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret = btrfs_commit_super(root);
                if (ret)
-                       btrfs_err(root->fs_info, "commit super ret %d", ret);
+                       btrfs_err(fs_info, "commit super ret %d", ret);
        }
 
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root)
        fs_info->closing = 2;
        smp_mb();
 
-       btrfs_free_qgroup_config(root->fs_info);
+       btrfs_free_qgroup_config(fs_info);
 
        if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
-               btrfs_info(root->fs_info, "at unmount delalloc count %lld",
+               btrfs_info(fs_info, "at unmount delalloc count %lld",
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
 
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root)
 
        btrfs_free_stripe_hash_table(fs_info);
 
-       btrfs_free_block_rsv(root, root->orphan_block_rsv);
+       __btrfs_free_block_rsv(root->orphan_block_rsv);
        root->orphan_block_rsv = NULL;
 
        lock_chunks(root);
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 
                clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
                while (start <= end) {
-                       eb = btrfs_find_tree_block(root, start);
+                       eb = btrfs_find_tree_block(root->fs_info, start);
                        start += root->nodesize;
                        if (!eb)
                                continue;
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
        return 0;
 }
 
-static struct extent_io_ops btree_extent_io_ops = {
+static const struct extent_io_ops btree_extent_io_ops = {
        .readpage_end_io_hook = btree_readpage_end_io_hook,
        .readpage_io_failed_hook = btree_io_failed_hook,
        .submit_bio_hook = btree_submit_bio_hook,
index 27d44c0..d4cbfee 100644 (file)
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                   u64 bytenr);
 void clean_tree_block(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root, struct extent_buffer *buf);
+                     struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
 int open_ctree(struct super_block *sb,
               struct btrfs_fs_devices *fs_devices,
               char *options);
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root, int max_mirrors);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
                                            u64 bytenr);
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
                                      struct btrfs_key *location);
index 37d1645..8d05220 100644 (file)
@@ -152,7 +152,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 
 static struct dentry *btrfs_get_parent(struct dentry *child)
 {
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -220,8 +220,8 @@ fail:
 static int btrfs_get_name(struct dentry *parent, char *name,
                          struct dentry *child)
 {
-       struct inode *inode = child->d_inode;
-       struct inode *dir = parent->d_inode;
+       struct inode *inode = d_inode(child);
+       struct inode *dir = d_inode(parent);
        struct btrfs_path *path;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_inode_ref *iref;
index 8b353ad..1eef4ee 100644 (file)
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                 * list before we release it.
                 */
                if (btrfs_delayed_ref_is_head(ref)) {
+                       if (locked_ref->is_data &&
+                           locked_ref->total_ref_mod < 0) {
+                               spin_lock(&delayed_refs->lock);
+                               delayed_refs->pending_csums -= ref->num_bytes;
+                               spin_unlock(&delayed_refs->lock);
+                       }
                        btrfs_delayed_ref_unlock(locked_ref);
                        locked_ref = NULL;
                }
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                 */
                spin_lock(&delayed_refs->lock);
                avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
-               avg = div64_u64(avg, 4);
-               fs_info->avg_delayed_ref_runtime = avg;
+               fs_info->avg_delayed_ref_runtime = avg >> 2;    /* div by 4 */
                spin_unlock(&delayed_refs->lock);
        }
        return 0;
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
         * We don't ever fill up leaves all the way so multiply by 2 just to be
         * closer to what we're really going to want to ouse.
         */
-       return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+       return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+}
+
+/*
+ * Takes the number of bytes to be csumm'ed and figures out how many leaves it
+ * would require to store the csums for that many bytes.
+ */
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
+{
+       u64 csum_size;
+       u64 num_csums_per_leaf;
+       u64 num_csums;
+
+       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+       num_csums_per_leaf = div64_u64(csum_size,
+                       (u64)btrfs_super_csum_size(root->fs_info->super_copy));
+       num_csums = div64_u64(csum_bytes, root->sectorsize);
+       num_csums += num_csums_per_leaf - 1;
+       num_csums = div64_u64(num_csums, num_csums_per_leaf);
+       return num_csums;
 }
 
 int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
 {
        struct btrfs_block_rsv *global_rsv;
        u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
-       u64 num_bytes;
+       u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
+       u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
+       u64 num_bytes, num_dirty_bgs_bytes;
        int ret = 0;
 
        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
        if (num_heads > 1)
                num_bytes += (num_heads - 1) * root->nodesize;
        num_bytes <<= 1;
+       num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
+       num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
+                                                            num_dirty_bgs);
        global_rsv = &root->fs_info->global_block_rsv;
 
        /*
         * If we can't allocate any more chunks lets make sure we have _lots_ of
         * wiggle room since running delayed refs can create more delayed refs.
         */
-       if (global_rsv->space_info->full)
+       if (global_rsv->space_info->full) {
+               num_dirty_bgs_bytes <<= 1;
                num_bytes <<= 1;
+       }
 
        spin_lock(&global_rsv->lock);
-       if (global_rsv->reserved <= num_bytes)
+       if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
                ret = 1;
        spin_unlock(&global_rsv->lock);
        return ret;
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
        struct inode *inode = NULL;
        u64 alloc_hint = 0;
        int dcs = BTRFS_DC_ERROR;
-       int num_pages = 0;
+       u64 num_pages = 0;
        int retries = 0;
        int ret = 0;
 
@@ -3267,7 +3298,7 @@ again:
                if (ret)
                        goto out_put;
 
-               ret = btrfs_truncate_free_space_cache(root, trans, inode);
+               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
                if (ret)
                        goto out_put;
        }
@@ -3293,14 +3324,14 @@ again:
         * taking up quite a bit since it's not folded into the other space
         * cache.
         */
-       num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
+       num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
        if (!num_pages)
                num_pages = 1;
 
        num_pages *= 16;
        num_pages *= PAGE_CACHE_SIZE;
 
-       ret = btrfs_check_data_free_space(inode, num_pages);
+       ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
        if (ret)
                goto out_put;
 
@@ -3351,16 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+/*
+ * transaction commit does final block group cache writeback during a
+ * critical section where nothing is allowed to change the FS.  This is
+ * required in order for the cache to actually match the block group,
+ * but can introduce a lot of latency into the commit.
+ *
+ * So, btrfs_start_dirty_block_groups is here to kick off block group
+ * cache IO.  There's a chance we'll have to redo some of it if the
+ * block group changes again during the commit, but it greatly reduces
+ * the commit latency by getting rid of the easy block groups while
+ * we're still allowing others to join the commit.
+ */
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
        struct btrfs_block_group_cache *cache;
        struct btrfs_transaction *cur_trans = trans->transaction;
        int ret = 0;
-       struct btrfs_path *path;
+       int should_put;
+       struct btrfs_path *path = NULL;
+       LIST_HEAD(dirty);
+       struct list_head *io = &cur_trans->io_bgs;
+       int num_started = 0;
+       int loops = 0;
+
+       spin_lock(&cur_trans->dirty_bgs_lock);
+       if (!list_empty(&cur_trans->dirty_bgs)) {
+               list_splice_init(&cur_trans->dirty_bgs, &dirty);
+       }
+       spin_unlock(&cur_trans->dirty_bgs_lock);
 
-       if (list_empty(&cur_trans->dirty_bgs))
+again:
+       if (list_empty(&dirty)) {
+               btrfs_free_path(path);
                return 0;
+       }
+
+       /*
+        * make sure all the block groups on our dirty list actually
+        * exist
+        */
+       btrfs_create_pending_block_groups(trans, root);
+
+       if (!path) {
+               path = btrfs_alloc_path();
+               if (!path)
+                       return -ENOMEM;
+       }
+
+       while (!list_empty(&dirty)) {
+               cache = list_first_entry(&dirty,
+                                        struct btrfs_block_group_cache,
+                                        dirty_list);
+
+               /*
+                * cache_write_mutex is here only to save us from balance
+                * deleting this block group while we are writing out the
+                * cache
+                */
+               mutex_lock(&trans->transaction->cache_write_mutex);
+
+               /*
+                * this can happen if something re-dirties a block
+                * group that is already under IO.  Just wait for it to
+                * finish and then do it all again
+                */
+               if (!list_empty(&cache->io_list)) {
+                       list_del_init(&cache->io_list);
+                       btrfs_wait_cache_io(root, trans, cache,
+                                           &cache->io_ctl, path,
+                                           cache->key.objectid);
+                       btrfs_put_block_group(cache);
+               }
+
+
+               /*
+                * btrfs_wait_cache_io uses the cache->dirty_list to decide
+                * if it should update the cache_state.  Don't delete
+                * until after we wait.
+                *
+                * Since we're not running in the commit critical section
+                * we need the dirty_bgs_lock to protect from update_block_group
+                */
+               spin_lock(&cur_trans->dirty_bgs_lock);
+               list_del_init(&cache->dirty_list);
+               spin_unlock(&cur_trans->dirty_bgs_lock);
+
+               should_put = 1;
+
+               cache_save_setup(cache, trans, path);
+
+               if (cache->disk_cache_state == BTRFS_DC_SETUP) {
+                       cache->io_ctl.inode = NULL;
+                       ret = btrfs_write_out_cache(root, trans, cache, path);
+                       if (ret == 0 && cache->io_ctl.inode) {
+                               num_started++;
+                               should_put = 0;
+
+                               /*
+                                * the cache_write_mutex is protecting
+                                * the io_list
+                                */
+                               list_add_tail(&cache->io_list, io);
+                       } else {
+                               /*
+                                * if we failed to write the cache, the
+                                * generation will be bad and life goes on
+                                */
+                               ret = 0;
+                       }
+               }
+               if (!ret)
+                       ret = write_one_cache_group(trans, root, path, cache);
+               mutex_unlock(&trans->transaction->cache_write_mutex);
+
+               /* if its not on the io list, we need to put the block group */
+               if (should_put)
+                       btrfs_put_block_group(cache);
+
+               if (ret)
+                       break;
+       }
+
+       /*
+        * go through delayed refs for all the stuff we've just kicked off
+        * and then loop back (just once)
+        */
+       ret = btrfs_run_delayed_refs(trans, root, 0);
+       if (!ret && loops == 0) {
+               loops++;
+               spin_lock(&cur_trans->dirty_bgs_lock);
+               list_splice_init(&cur_trans->dirty_bgs, &dirty);
+               spin_unlock(&cur_trans->dirty_bgs_lock);
+               goto again;
+       }
+
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root)
+{
+       struct btrfs_block_group_cache *cache;
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       int ret = 0;
+       int should_put;
+       struct btrfs_path *path;
+       struct list_head *io = &cur_trans->io_bgs;
+       int num_started = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -3376,16 +3547,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                cache = list_first_entry(&cur_trans->dirty_bgs,
                                         struct btrfs_block_group_cache,
                                         dirty_list);
+
+               /*
+                * this can happen if cache_save_setup re-dirties a block
+                * group that is already under IO.  Just wait for it to
+                * finish and then do it all again
+                */
+               if (!list_empty(&cache->io_list)) {
+                       list_del_init(&cache->io_list);
+                       btrfs_wait_cache_io(root, trans, cache,
+                                           &cache->io_ctl, path,
+                                           cache->key.objectid);
+                       btrfs_put_block_group(cache);
+               }
+
+               /*
+                * don't remove from the dirty list until after we've waited
+                * on any pending IO
+                */
                list_del_init(&cache->dirty_list);
-               if (cache->disk_cache_state == BTRFS_DC_CLEAR)
-                       cache_save_setup(cache, trans, path);
+               should_put = 1;
+
+               cache_save_setup(cache, trans, path);
+
                if (!ret)
-                       ret = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long) -1);
-               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
-                       btrfs_write_out_cache(root, trans, cache, path);
+                       ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
+
+               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
+                       cache->io_ctl.inode = NULL;
+                       ret = btrfs_write_out_cache(root, trans, cache, path);
+                       if (ret == 0 && cache->io_ctl.inode) {
+                               num_started++;
+                               should_put = 0;
+                               list_add_tail(&cache->io_list, io);
+                       } else {
+                               /*
+                                * if we failed to write the cache, the
+                                * generation will be bad and life goes on
+                                */
+                               ret = 0;
+                       }
+               }
                if (!ret)
                        ret = write_one_cache_group(trans, root, path, cache);
+
+               /* if its not on the io list, we need to put the block group */
+               if (should_put)
+                       btrfs_put_block_group(cache);
+       }
+
+       while (!list_empty(io)) {
+               cache = list_first_entry(io, struct btrfs_block_group_cache,
+                                        io_list);
+               list_del_init(&cache->io_list);
+               btrfs_wait_cache_io(root, trans, cache,
+                                   &cache->io_ctl, path, cache->key.objectid);
                btrfs_put_block_group(cache);
        }
 
@@ -3635,19 +3851,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
  * This will check the space that the inode allocates from to make sure we have
  * enough space for bytes.
  */
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
 {
        struct btrfs_space_info *data_sinfo;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
        u64 used;
-       int ret = 0, committed = 0, alloc_chunk = 1;
+       int ret = 0;
+       int need_commit = 2;
+       int have_pinned_space;
 
        /* make sure bytes are sectorsize aligned */
        bytes = ALIGN(bytes, root->sectorsize);
 
        if (btrfs_is_free_space_inode(inode)) {
-               committed = 1;
+               need_commit = 0;
                ASSERT(current->journal_info);
        }
 
@@ -3669,7 +3887,7 @@ again:
                 * if we don't have enough free bytes in this space then we need
                 * to alloc a new chunk.
                 */
-               if (!data_sinfo->full && alloc_chunk) {
+               if (!data_sinfo->full) {
                        u64 alloc_target;
 
                        data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
@@ -3697,8 +3915,10 @@ alloc:
                        if (ret < 0) {
                                if (ret != -ENOSPC)
                                        return ret;
-                               else
+                               else {
+                                       have_pinned_space = 1;
                                        goto commit_trans;
+                               }
                        }
 
                        if (!data_sinfo)
@@ -3709,26 +3929,39 @@ alloc:
 
                /*
                 * If we don't have enough pinned space to deal with this
-                * allocation don't bother committing the transaction.
+                * allocation, and no removed chunk in current transaction,
+                * don't bother committing the transaction.
                 */
-               if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
-                                          bytes) < 0)
-                       committed = 1;
+               have_pinned_space = percpu_counter_compare(
+                       &data_sinfo->total_bytes_pinned,
+                       used + bytes - data_sinfo->total_bytes);
                spin_unlock(&data_sinfo->lock);
 
                /* commit the current transaction and try again */
 commit_trans:
-               if (!committed &&
+               if (need_commit &&
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
-                       committed = 1;
+                       need_commit--;
 
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
                                return PTR_ERR(trans);
-                       ret = btrfs_commit_transaction(trans, root);
-                       if (ret)
-                               return ret;
-                       goto again;
+                       if (have_pinned_space >= 0 ||
+                           trans->transaction->have_free_bgs ||
+                           need_commit > 0) {
+                               ret = btrfs_commit_transaction(trans, root);
+                               if (ret)
+                                       return ret;
+                               /*
+                                * make sure that all running delayed iput are
+                                * done
+                                */
+                               down_write(&root->fs_info->delayed_iput_sem);
+                               up_write(&root->fs_info->delayed_iput_sem);
+                               goto again;
+                       } else {
+                               btrfs_end_transaction(trans, root);
+                       }
                }
 
                trace_btrfs_space_reservation(root->fs_info,
@@ -3736,12 +3969,16 @@ commit_trans:
                                              data_sinfo->flags, bytes, 1);
                return -ENOSPC;
        }
+       ret = btrfs_qgroup_reserve(root, write_bytes);
+       if (ret)
+               goto out;
        data_sinfo->bytes_may_use += bytes;
        trace_btrfs_space_reservation(root->fs_info, "space_info",
                                      data_sinfo->flags, bytes, 1);
+out:
        spin_unlock(&data_sinfo->lock);
 
-       return 0;
+       return ret;
 }
 
 /*
@@ -4298,8 +4535,13 @@ out:
 static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
                                        struct btrfs_fs_info *fs_info, u64 used)
 {
-       return (used >= div_factor_fine(space_info->total_bytes, 98) &&
-               !btrfs_fs_closing(fs_info) &&
+       u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+
+       /* If we're just plain full then async reclaim just slows us down. */
+       if (space_info->bytes_used >= thresh)
+               return 0;
+
+       return (used >= thresh && !btrfs_fs_closing(fs_info) &&
                !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
 }
 
@@ -4354,10 +4596,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                if (!btrfs_need_do_async_reclaim(space_info, fs_info,
                                                 flush_state))
                        return;
-       } while (flush_state <= COMMIT_TRANS);
-
-       if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state))
-               queue_work(system_unbound_wq, work);
+       } while (flush_state < COMMIT_TRANS);
 }
 
 void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -4700,6 +4939,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
        kfree(rsv);
 }
 
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
+{
+       kfree(rsv);
+}
+
 int btrfs_block_rsv_add(struct btrfs_root *root,
                        struct btrfs_block_rsv *block_rsv, u64 num_bytes,
                        enum btrfs_reserve_flush_enum flush)
@@ -4812,10 +5056,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 
        num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
                    csum_size * 2;
-       num_bytes += div64_u64(data_used + meta_used, 50);
+       num_bytes += div_u64(data_used + meta_used, 50);
 
        if (num_bytes * 3 > meta_used)
-               num_bytes = div64_u64(meta_used, 3);
+               num_bytes = div_u64(meta_used, 3);
 
        return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
 }
@@ -4998,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
                                      u64 qgroup_reserved)
 {
        btrfs_block_rsv_release(root, rsv, (u64)-1);
-       if (qgroup_reserved)
-               btrfs_qgroup_free(root, qgroup_reserved);
 }
 
 /**
@@ -5066,30 +5308,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
                                   int reserve)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 csum_size;
-       int num_csums_per_leaf;
-       int num_csums;
-       int old_csums;
+       u64 old_csums, num_csums;
 
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
            BTRFS_I(inode)->csum_bytes == 0)
                return 0;
 
-       old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
+       old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
        if (reserve)
                BTRFS_I(inode)->csum_bytes += num_bytes;
        else
                BTRFS_I(inode)->csum_bytes -= num_bytes;
-       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
-       num_csums_per_leaf = (int)div64_u64(csum_size,
-                                           sizeof(struct btrfs_csum_item) +
-                                           sizeof(struct btrfs_disk_key));
-       num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
-       num_csums = num_csums + num_csums_per_leaf - 1;
-       num_csums = num_csums / num_csums_per_leaf;
-
-       old_csums = old_csums + num_csums_per_leaf - 1;
-       old_csums = old_csums / num_csums_per_leaf;
+       num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
 
        /* No change, no need to reserve more */
        if (old_csums == num_csums)
@@ -5163,8 +5393,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        spin_unlock(&BTRFS_I(inode)->lock);
 
        if (root->fs_info->quota_enabled) {
-               ret = btrfs_qgroup_reserve(root, num_bytes +
-                                          nr_extents * root->nodesize);
+               ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
                if (ret)
                        goto out_fail;
        }
@@ -5172,8 +5401,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
        if (unlikely(ret)) {
                if (root->fs_info->quota_enabled)
-                       btrfs_qgroup_free(root, num_bytes +
-                                               nr_extents * root->nodesize);
+                       btrfs_qgroup_free(root, nr_extents * root->nodesize);
                goto out_fail;
        }
 
@@ -5290,10 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 
        trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                      btrfs_ino(inode), to_free, 0);
-       if (root->fs_info->quota_enabled) {
-               btrfs_qgroup_free(root, num_bytes +
-                                       dropped * root->nodesize);
-       }
 
        btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                to_free);
@@ -5318,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
 {
        int ret;
 
-       ret = btrfs_check_data_free_space(inode, num_bytes);
+       ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
        if (ret)
                return ret;
 
@@ -5390,14 +5614,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                if (!alloc && cache->cached == BTRFS_CACHE_NO)
                        cache_block_group(cache, 1);
 
-               spin_lock(&trans->transaction->dirty_bgs_lock);
-               if (list_empty(&cache->dirty_list)) {
-                       list_add_tail(&cache->dirty_list,
-                                     &trans->transaction->dirty_bgs);
-                       btrfs_get_block_group(cache);
-               }
-               spin_unlock(&trans->transaction->dirty_bgs_lock);
-
                byte_in_group = bytenr - cache->key.objectid;
                WARN_ON(byte_in_group > cache->key.offset);
 
@@ -5446,6 +5662,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                                spin_unlock(&info->unused_bgs_lock);
                        }
                }
+
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+               if (list_empty(&cache->dirty_list)) {
+                       list_add_tail(&cache->dirty_list,
+                                     &trans->transaction->dirty_bgs);
+                               trans->transaction->num_dirty_bgs++;
+                       btrfs_get_block_group(cache);
+               }
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+
                btrfs_put_block_group(cache);
                total -= num_bytes;
                bytenr += num_bytes;
@@ -6956,15 +7182,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                return -ENOSPC;
        }
 
-       if (btrfs_test_opt(root, DISCARD))
-               ret = btrfs_discard_extent(root, start, len, NULL);
-
        if (pin)
                pin_down_extent(root, cache, start, len, 1);
        else {
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_discard_extent(root, start, len, NULL);
                btrfs_add_free_space(cache, start, len);
                btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
        }
+
        btrfs_put_block_group(cache);
 
        trace_btrfs_reserved_extent_free(root, start, len);
@@ -7095,9 +7321,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
                                      ins, size);
        if (ret) {
+               btrfs_free_path(path);
                btrfs_free_and_pin_reserved_extent(root, ins->objectid,
                                                   root->nodesize);
-               btrfs_free_path(path);
                return ret;
        }
 
@@ -7217,7 +7443,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        btrfs_set_header_generation(buf, trans->transid);
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
-       clean_tree_block(trans, root, buf);
+       clean_tree_block(trans, root->fs_info, buf);
        clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
 
        btrfs_set_lock_blocking(buf);
@@ -7815,7 +8041,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
        blocksize = root->nodesize;
 
-       next = btrfs_find_tree_block(root, bytenr);
+       next = btrfs_find_tree_block(root->fs_info, bytenr);
        if (!next) {
                next = btrfs_find_create_tree_block(root, bytenr);
                if (!next)
@@ -8016,7 +8242,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        btrfs_set_lock_blocking(eb);
                        path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
-               clean_tree_block(trans, root, eb);
+               clean_tree_block(trans, root->fs_info, eb);
        }
 
        if (eb == root->node) {
@@ -8533,10 +8759,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
        BUG_ON(cache->ro);
 
+again:
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
+       /*
+        * we're not allowed to set block groups readonly after the dirty
+        * block groups cache has started writing.  If it already started,
+        * back off and let this transaction commit
+        */
+       mutex_lock(&root->fs_info->ro_block_group_mutex);
+       if (trans->transaction->dirty_bg_run) {
+               u64 transid = trans->transid;
+
+               mutex_unlock(&root->fs_info->ro_block_group_mutex);
+               btrfs_end_transaction(trans, root);
+
+               ret = btrfs_wait_for_commit(root, transid);
+               if (ret)
+                       return ret;
+               goto again;
+       }
+
+
        ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
@@ -8551,6 +8797,7 @@ out:
                alloc_flags = update_block_group_flags(root, cache->flags);
                check_system_chunk(trans, root, alloc_flags);
        }
+       mutex_unlock(&root->fs_info->ro_block_group_mutex);
 
        btrfs_end_transaction(trans, root);
        return ret;
@@ -8720,7 +8967,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                min_free <<= 1;
        } else if (index == BTRFS_RAID_RAID0) {
                dev_min = fs_devices->rw_devices;
-               do_div(min_free, dev_min);
+               min_free = div64_u64(min_free, dev_min);
        }
 
        /* We need to do this so that we can look at pending chunks */
@@ -8992,6 +9239,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
        INIT_LIST_HEAD(&cache->bg_list);
        INIT_LIST_HEAD(&cache->ro_list);
        INIT_LIST_HEAD(&cache->dirty_list);
+       INIT_LIST_HEAD(&cache->io_list);
        btrfs_init_free_space_ctl(cache);
        atomic_set(&cache->trimming, 0);
 
@@ -9355,7 +9603,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                goto out;
        }
 
+       /*
+        * get the inode first so any iput calls done for the io_list
+        * aren't the final iput (no unlinks allowed now)
+        */
        inode = lookup_free_space_inode(tree_root, block_group, path);
+
+       mutex_lock(&trans->transaction->cache_write_mutex);
+       /*
+        * make sure our free spache cache IO is done before remove the
+        * free space inode
+        */
+       spin_lock(&trans->transaction->dirty_bgs_lock);
+       if (!list_empty(&block_group->io_list)) {
+               list_del_init(&block_group->io_list);
+
+               WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
+
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+               btrfs_wait_cache_io(root, trans, block_group,
+                                   &block_group->io_ctl, path,
+                                   block_group->key.objectid);
+               btrfs_put_block_group(block_group);
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+       }
+
+       if (!list_empty(&block_group->dirty_list)) {
+               list_del_init(&block_group->dirty_list);
+               btrfs_put_block_group(block_group);
+       }
+       spin_unlock(&trans->transaction->dirty_bgs_lock);
+       mutex_unlock(&trans->transaction->cache_write_mutex);
+
        if (!IS_ERR(inode)) {
                ret = btrfs_orphan_add(trans, inode);
                if (ret) {
@@ -9448,18 +9727,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
        spin_lock(&trans->transaction->dirty_bgs_lock);
        if (!list_empty(&block_group->dirty_list)) {
-               list_del_init(&block_group->dirty_list);
-               btrfs_put_block_group(block_group);
+               WARN_ON(1);
+       }
+       if (!list_empty(&block_group->io_list)) {
+               WARN_ON(1);
        }
        spin_unlock(&trans->transaction->dirty_bgs_lock);
-
        btrfs_remove_free_space_cache(block_group);
 
        spin_lock(&block_group->space_info->lock);
        list_del_init(&block_group->ro_list);
+
+       if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+               WARN_ON(block_group->space_info->total_bytes
+                       < block_group->key.offset);
+               WARN_ON(block_group->space_info->bytes_readonly
+                       < block_group->key.offset);
+               WARN_ON(block_group->space_info->disk_total
+                       < block_group->key.offset * factor);
+       }
        block_group->space_info->total_bytes -= block_group->key.offset;
        block_group->space_info->bytes_readonly -= block_group->key.offset;
        block_group->space_info->disk_total -= block_group->key.offset * factor;
+
        spin_unlock(&block_group->space_info->lock);
 
        memcpy(&key, &block_group->key, sizeof(key));
@@ -9647,8 +9937,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 
                /* Reset pinned so btrfs_put_block_group doesn't complain */
+               spin_lock(&space_info->lock);
+               spin_lock(&block_group->lock);
+
+               space_info->bytes_pinned -= block_group->pinned;
+               space_info->bytes_readonly += block_group->pinned;
+               percpu_counter_add(&space_info->total_bytes_pinned,
+                                  -block_group->pinned);
                block_group->pinned = 0;
 
+               spin_unlock(&block_group->lock);
+               spin_unlock(&space_info->lock);
+
                /*
                 * Btrfs_remove_chunk will abort the transaction if things go
                 * horribly wrong.
index d688cfe..782f3bc 100644 (file)
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                }
                ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
                                              em_len, flags);
-               if (ret)
+               if (ret) {
+                       if (ret == 1)
+                               ret = 0;
                        goto out_free;
+               }
        }
 out_free:
        free_extent_map(em);
index 695b0cc..c668f36 100644 (file)
@@ -97,7 +97,7 @@ struct extent_io_tree {
        u64 dirty_bytes;
        int track_uptodate;
        spinlock_t lock;
-       struct extent_io_ops *ops;
+       const struct extent_io_ops *ops;
 };
 
 struct extent_state {
index 84a2d18..58ece65 100644 (file)
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
        nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
        if (!dst) {
                if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
-                       btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
-                                                           GFP_NOFS);
+                       btrfs_bio->csum_allocated = kmalloc_array(nblocks,
+                                       csum_size, GFP_NOFS);
                        if (!btrfs_bio->csum_allocated) {
                                btrfs_free_path(path);
                                return -ENOMEM;
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root,
                btrfs_truncate_item(root, path, new_size, 0);
 
                key->offset = end_byte;
-               btrfs_set_item_key_safe(root, path, key);
+               btrfs_set_item_key_safe(root->fs_info, path, key);
        } else {
                BUG();
        }
index faa7d39..b072e17 100644 (file)
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
                defrag = rb_entry(node, struct inode_defrag, rb_node);
                kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
 
-               if (need_resched()) {
-                       spin_unlock(&fs_info->defrag_inodes_lock);
-                       cond_resched();
-                       spin_lock(&fs_info->defrag_inodes_lock);
-               }
+               cond_resched_lock(&fs_info->defrag_inodes_lock);
 
                node = rb_first(&fs_info->defrag_inodes);
        }
@@ -868,7 +864,7 @@ next_slot:
 
                        memcpy(&new_key, &key, sizeof(new_key));
                        new_key.offset = end;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
 
                        extent_offset += end - key.offset;
                        btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1126,7 +1122,7 @@ again:
                                     ino, bytenr, orig_offset,
                                     &other_start, &other_end)) {
                        new_key.offset = end;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
                        btrfs_set_file_extent_generation(leaf, fi,
@@ -1160,7 +1156,7 @@ again:
                                                         trans->transid);
                        path->slots[0]++;
                        new_key.offset = start;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
 
                        fi = btrfs_item_ptr(leaf, path->slots[0],
                                            struct btrfs_file_extent_item);
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                        PAGE_CACHE_SIZE / (sizeof(struct page *)));
        nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
        nrptrs = max(nrptrs, 8);
-       pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
+       pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
        if (!pages)
                return -ENOMEM;
 
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                }
 
                reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
-               ret = btrfs_check_data_free_space(inode, reserve_bytes);
+               ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
                if (ret == -ENOSPC &&
                    (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
                                              BTRFS_INODE_PREALLOC))) {
@@ -1635,8 +1631,8 @@ again:
                        btrfs_end_write_no_snapshoting(root);
 
                if (only_release_metadata && copied > 0) {
-                       u64 lockstart = round_down(pos, root->sectorsize);
-                       u64 lockend = lockstart +
+                       lockstart = round_down(pos, root->sectorsize);
+                       lockend = lockstart +
                                (dirty_pages << PAGE_CACHE_SHIFT) - 1;
 
                        set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -1809,7 +1805,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
         * otherwise subsequent syncs to a file that's been synced in this
         * transaction will appear to have already occured.
         */
+       spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
+       spin_unlock(&BTRFS_I(inode)->lock);
        if (num_written > 0) {
                err = generic_write_sync(file, pos, num_written);
                if (err < 0)
@@ -1864,7 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
        struct btrfs_log_ctx ctx;
@@ -2162,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
                u64 num_bytes;
 
                key.offset = offset;
-               btrfs_set_item_key_safe(root, path, &key);
+               btrfs_set_item_key_safe(root->fs_info, path, &key);
                fi = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_file_extent_item);
                num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -2545,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode,
 {
        struct inode *inode = file_inode(file);
        struct extent_state *cached_state = NULL;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 cur_offset;
        u64 last_byte;
        u64 alloc_start;
@@ -2570,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode,
         * Make sure we have enough space before we do the
         * allocation.
         */
-       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
        if (ret)
                return ret;
-       if (root->fs_info->quota_enabled) {
-               ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
-               if (ret)
-                       goto out_reserve_fail;
-       }
 
        mutex_lock(&inode->i_mutex);
        ret = inode_newsize_ok(inode, alloc_end);
@@ -2667,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode,
                                                        1 << inode->i_blkbits,
                                                        offset + len,
                                                        &alloc_hint);
-
-                       if (ret < 0) {
-                               free_extent_map(em);
-                               break;
-                       }
                } else if (actual_end > inode->i_size &&
                           !(mode & FALLOC_FL_KEEP_SIZE)) {
+                       struct btrfs_trans_handle *trans;
+                       struct btrfs_root *root = BTRFS_I(inode)->root;
+
                        /*
                         * We didn't need to allocate any more space, but we
                         * still extended the size of the file so we need to
-                        * update i_size.
+                        * update i_size and the inode item.
                         */
-                       inode->i_ctime = CURRENT_TIME;
-                       i_size_write(inode, actual_end);
-                       btrfs_ordered_update_i_size(inode, actual_end, NULL);
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                       } else {
+                               inode->i_ctime = CURRENT_TIME;
+                               i_size_write(inode, actual_end);
+                               btrfs_ordered_update_i_size(inode, actual_end,
+                                                           NULL);
+                               ret = btrfs_update_inode(trans, root, inode);
+                               if (ret)
+                                       btrfs_end_transaction(trans, root);
+                               else
+                                       ret = btrfs_end_transaction(trans,
+                                                                   root);
+                       }
                }
                free_extent_map(em);
+               if (ret < 0)
+                       break;
 
                cur_offset = last_byte;
                if (cur_offset >= alloc_end) {
@@ -2695,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode,
                             &cached_state, GFP_NOFS);
 out:
        mutex_unlock(&inode->i_mutex);
-       if (root->fs_info->quota_enabled)
-               btrfs_qgroup_free(root, alloc_end - alloc_start);
-out_reserve_fail:
        /* Let go of our reservation. */
        btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
        return ret;
index a719785..81fa75a 100644 (file)
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
        }
 
        mapping_set_gfp_mask(inode->i_mapping,
-                       mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+                       mapping_gfp_mask(inode->i_mapping) &
+                       ~(GFP_NOFS & ~__GFP_HIGHMEM));
 
        return inode;
 }
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root,
        key.objectid = BTRFS_FREE_SPACE_OBJECTID;
        key.offset = offset;
        key.type = 0;
-
        ret = btrfs_insert_empty_item(trans, root, path, &key,
                                      sizeof(struct btrfs_free_space_header));
        if (ret < 0) {
                btrfs_release_path(path);
                return ret;
        }
+
        leaf = path->nodes[0];
        header = btrfs_item_ptr(leaf, path->slots[0],
                                struct btrfs_free_space_header);
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
 
 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                    struct btrfs_trans_handle *trans,
+                                   struct btrfs_block_group_cache *block_group,
                                    struct inode *inode)
 {
        int ret = 0;
+       struct btrfs_path *path = btrfs_alloc_path();
+
+       if (!path) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       if (block_group) {
+               mutex_lock(&trans->transaction->cache_write_mutex);
+               if (!list_empty(&block_group->io_list)) {
+                       list_del_init(&block_group->io_list);
+
+                       btrfs_wait_cache_io(root, trans, block_group,
+                                           &block_group->io_ctl, path,
+                                           block_group->key.objectid);
+                       btrfs_put_block_group(block_group);
+               }
+
+               /*
+                * now that we've truncated the cache away, its no longer
+                * setup or written
+                */
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_CLEAR;
+               spin_unlock(&block_group->lock);
+       }
+       btrfs_free_path(path);
 
        btrfs_i_size_write(inode, 0);
        truncate_pagecache(inode, 0);
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
        /*
         * We don't need an orphan item because truncating the free space cache
         * will never be split across transactions.
+        * We don't need to check for -EAGAIN because we're a free space
+        * cache inode
         */
        ret = btrfs_truncate_inode_items(trans, root, inode,
                                         0, BTRFS_EXTENT_DATA_KEY);
        if (ret) {
+               mutex_unlock(&trans->transaction->cache_write_mutex);
                btrfs_abort_transaction(trans, root, ret);
                return ret;
        }
 
        ret = btrfs_update_inode(trans, root, inode);
+
+       if (block_group)
+               mutex_unlock(&trans->transaction->cache_write_mutex);
+
+fail:
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode)
        return 0;
 }
 
-struct io_ctl {
-       void *cur, *orig;
-       struct page *page;
-       struct page **pages;
-       struct btrfs_root *root;
-       unsigned long size;
-       int index;
-       int num_pages;
-       unsigned check_crcs:1;
-};
-
-static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
                       struct btrfs_root *root, int write)
 {
        int num_pages;
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
            (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
                return -ENOSPC;
 
-       memset(io_ctl, 0, sizeof(struct io_ctl));
+       memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
 
-       io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+       io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
        if (!io_ctl->pages)
                return -ENOMEM;
 
        io_ctl->num_pages = num_pages;
        io_ctl->root = root;
        io_ctl->check_crcs = check_crcs;
+       io_ctl->inode = inode;
 
        return 0;
 }
 
-static void io_ctl_free(struct io_ctl *io_ctl)
+static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
 {
        kfree(io_ctl->pages);
+       io_ctl->pages = NULL;
 }
 
-static void io_ctl_unmap_page(struct io_ctl *io_ctl)
+static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
 {
        if (io_ctl->cur) {
-               kunmap(io_ctl->page);
                io_ctl->cur = NULL;
                io_ctl->orig = NULL;
        }
 }
 
-static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
+static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
 {
        ASSERT(io_ctl->index < io_ctl->num_pages);
        io_ctl->page = io_ctl->pages[io_ctl->index++];
-       io_ctl->cur = kmap(io_ctl->page);
+       io_ctl->cur = page_address(io_ctl->page);
        io_ctl->orig = io_ctl->cur;
        io_ctl->size = PAGE_CACHE_SIZE;
        if (clear)
                memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
 }
 
-static void io_ctl_drop_pages(struct io_ctl *io_ctl)
+static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
 {
        int i;
 
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
        }
 }
 
-static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
                                int uptodate)
 {
        struct page *page;
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
        return 0;
 }
 
-static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
+static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
 {
        __le64 *val;
 
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
        io_ctl->cur += sizeof(u64);
 }
 
-static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
+static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
 {
        __le64 *gen;
 
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
        return 0;
 }
 
-static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
+static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
 {
        u32 *tmp;
        u32 crc = ~(u32)0;
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
                              PAGE_CACHE_SIZE - offset);
        btrfs_csum_final(crc, (char *)&crc);
        io_ctl_unmap_page(io_ctl);
-       tmp = kmap(io_ctl->pages[0]);
+       tmp = page_address(io_ctl->pages[0]);
        tmp += index;
        *tmp = crc;
-       kunmap(io_ctl->pages[0]);
 }
 
-static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
+static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
 {
        u32 *tmp, val;
        u32 crc = ~(u32)0;
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
        if (index == 0)
                offset = sizeof(u32) * io_ctl->num_pages;
 
-       tmp = kmap(io_ctl->pages[0]);
+       tmp = page_address(io_ctl->pages[0]);
        tmp += index;
        val = *tmp;
-       kunmap(io_ctl->pages[0]);
 
        io_ctl_map_page(io_ctl, 0);
        crc = btrfs_csum_data(io_ctl->orig + offset, crc,
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
        return 0;
 }
 
-static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
+static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
                            void *bitmap)
 {
        struct btrfs_free_space_entry *entry;
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
        return 0;
 }
 
-static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
+static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
 {
        if (!io_ctl->cur)
                return -ENOSPC;
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
        return 0;
 }
 
-static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
+static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
 {
        /*
         * If we're not on the boundary we know we've modified the page and we
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
        }
 }
 
-static int io_ctl_read_entry(struct io_ctl *io_ctl,
+static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
                            struct btrfs_free_space *entry, u8 *type)
 {
        struct btrfs_free_space_entry *e;
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
        return 0;
 }
 
-static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
+static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
                              struct btrfs_free_space *entry)
 {
        int ret;
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
 {
        struct btrfs_free_space_header *header;
        struct extent_buffer *leaf;
-       struct io_ctl io_ctl;
+       struct btrfs_io_ctl io_ctl;
        struct btrfs_key key;
        struct btrfs_free_space *e, *n;
        LIST_HEAD(bitmaps);
@@ -877,7 +902,7 @@ out:
 }
 
 static noinline_for_stack
-int write_cache_extent_entries(struct io_ctl *io_ctl,
+int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
                              struct btrfs_free_space_ctl *ctl,
                              struct btrfs_block_group_cache *block_group,
                              int *entries, int *bitmaps,
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
 {
        int ret;
        struct btrfs_free_cluster *cluster = NULL;
+       struct btrfs_free_cluster *cluster_locked = NULL;
        struct rb_node *node = rb_first(&ctl->free_space_offset);
        struct btrfs_trim_range *trim_entry;
 
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
        }
 
        if (!node && cluster) {
+               cluster_locked = cluster;
+               spin_lock(&cluster_locked->lock);
                node = rb_first(&cluster->root);
                cluster = NULL;
        }
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
                node = rb_next(node);
                if (!node && cluster) {
                        node = rb_first(&cluster->root);
+                       cluster_locked = cluster;
+                       spin_lock(&cluster_locked->lock);
                        cluster = NULL;
                }
        }
+       if (cluster_locked) {
+               spin_unlock(&cluster_locked->lock);
+               cluster_locked = NULL;
+       }
 
        /*
         * Make sure we don't miss any range that was removed from our rbtree
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
 
        return 0;
 fail:
+       if (cluster_locked)
+               spin_unlock(&cluster_locked->lock);
        return -ENOSPC;
 }
 
@@ -1000,7 +1036,7 @@ fail:
 static noinline_for_stack int
 write_pinned_extent_entries(struct btrfs_root *root,
                            struct btrfs_block_group_cache *block_group,
-                           struct io_ctl *io_ctl,
+                           struct btrfs_io_ctl *io_ctl,
                            int *entries)
 {
        u64 start, extent_start, extent_end, len;
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root,
 }
 
 static noinline_for_stack int
-write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list)
+write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
 {
        struct list_head *pos, *n;
        int ret;
@@ -1083,10 +1119,7 @@ static int flush_dirty_cache(struct inode *inode)
 }
 
 static void noinline_for_stack
-cleanup_write_cache_enospc(struct inode *inode,
-                          struct io_ctl *io_ctl,
-                          struct extent_state **cached_state,
-                          struct list_head *bitmap_list)
+cleanup_bitmap_list(struct list_head *bitmap_list)
 {
        struct list_head *pos, *n;
 
@@ -1095,12 +1128,85 @@ cleanup_write_cache_enospc(struct inode *inode,
                        list_entry(pos, struct btrfs_free_space, list);
                list_del_init(&entry->list);
        }
+}
+
+static void noinline_for_stack
+cleanup_write_cache_enospc(struct inode *inode,
+                          struct btrfs_io_ctl *io_ctl,
+                          struct extent_state **cached_state,
+                          struct list_head *bitmap_list)
+{
        io_ctl_drop_pages(io_ctl);
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                             i_size_read(inode) - 1, cached_state,
                             GFP_NOFS);
 }
 
+int btrfs_wait_cache_io(struct btrfs_root *root,
+                       struct btrfs_trans_handle *trans,
+                       struct btrfs_block_group_cache *block_group,
+                       struct btrfs_io_ctl *io_ctl,
+                       struct btrfs_path *path, u64 offset)
+{
+       int ret;
+       struct inode *inode = io_ctl->inode;
+
+       if (!inode)
+               return 0;
+
+       if (block_group)
+               root = root->fs_info->tree_root;
+
+       /* Flush the dirty pages in the cache file. */
+       ret = flush_dirty_cache(inode);
+       if (ret)
+               goto out;
+
+       /* Update the cache item to tell everyone this cache file is valid. */
+       ret = update_cache_item(trans, root, inode, path, offset,
+                               io_ctl->entries, io_ctl->bitmaps);
+out:
+       io_ctl_free(io_ctl);
+       if (ret) {
+               invalidate_inode_pages2(inode->i_mapping);
+               BTRFS_I(inode)->generation = 0;
+               if (block_group) {
+#ifdef DEBUG
+                       btrfs_err(root->fs_info,
+                               "failed to write free space cache for block group %llu",
+                               block_group->key.objectid);
+#endif
+               }
+       }
+       btrfs_update_inode(trans, root, inode);
+
+       if (block_group) {
+               /* the dirty list is protected by the dirty_bgs_lock */
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+
+               /* the disk_cache_state is protected by the block group lock */
+               spin_lock(&block_group->lock);
+
+               /*
+                * only mark this as written if we didn't get put back on
+                * the dirty list while waiting for IO.   Otherwise our
+                * cache state won't be right, and we won't get written again
+                */
+               if (!ret && list_empty(&block_group->dirty_list))
+                       block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+               else if (ret)
+                       block_group->disk_cache_state = BTRFS_DC_ERROR;
+
+               spin_unlock(&block_group->lock);
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+               io_ctl->inode = NULL;
+               iput(inode);
+       }
+
+       return ret;
+
+}
+
 /**
  * __btrfs_write_out_cache - write out cached info to an inode
  * @root - the root the inode belongs to
@@ -1117,20 +1223,22 @@ cleanup_write_cache_enospc(struct inode *inode,
 static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                   struct btrfs_free_space_ctl *ctl,
                                   struct btrfs_block_group_cache *block_group,
+                                  struct btrfs_io_ctl *io_ctl,
                                   struct btrfs_trans_handle *trans,
                                   struct btrfs_path *path, u64 offset)
 {
        struct extent_state *cached_state = NULL;
-       struct io_ctl io_ctl;
        LIST_HEAD(bitmap_list);
        int entries = 0;
        int bitmaps = 0;
        int ret;
+       int must_iput = 0;
 
        if (!i_size_read(inode))
                return -1;
 
-       ret = io_ctl_init(&io_ctl, inode, root, 1);
+       WARN_ON(io_ctl->pages);
+       ret = io_ctl_init(io_ctl, inode, root, 1);
        if (ret)
                return -1;
 
@@ -1143,55 +1251,57 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                        up_write(&block_group->data_rwsem);
                        BTRFS_I(inode)->generation = 0;
                        ret = 0;
+                       must_iput = 1;
                        goto out;
                }
                spin_unlock(&block_group->lock);
        }
 
        /* Lock all pages first so we can lock the extent safely. */
-       io_ctl_prepare_pages(&io_ctl, inode, 0);
+       io_ctl_prepare_pages(io_ctl, inode, 0);
 
        lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
                         0, &cached_state);
 
-       io_ctl_set_generation(&io_ctl, trans->transid);
+       io_ctl_set_generation(io_ctl, trans->transid);
 
        mutex_lock(&ctl->cache_writeout_mutex);
        /* Write out the extent entries in the free space cache */
-       ret = write_cache_extent_entries(&io_ctl, ctl,
+       spin_lock(&ctl->tree_lock);
+       ret = write_cache_extent_entries(io_ctl, ctl,
                                         block_group, &entries, &bitmaps,
                                         &bitmap_list);
-       if (ret) {
-               mutex_unlock(&ctl->cache_writeout_mutex);
-               goto out_nospc;
-       }
+       if (ret)
+               goto out_nospc_locked;
 
        /*
         * Some spaces that are freed in the current transaction are pinned,
         * they will be added into free space cache after the transaction is
         * committed, we shouldn't lose them.
+        *
+        * If this changes while we are working we'll get added back to
+        * the dirty list and redo it.  No locking needed
         */
-       ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
-       if (ret) {
-               mutex_unlock(&ctl->cache_writeout_mutex);
-               goto out_nospc;
-       }
+       ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
+       if (ret)
+               goto out_nospc_locked;
 
        /*
         * At last, we write out all the bitmaps and keep cache_writeout_mutex
         * locked while doing it because a concurrent trim can be manipulating
         * or freeing the bitmap.
         */
-       ret = write_bitmap_entries(&io_ctl, &bitmap_list);
+       ret = write_bitmap_entries(io_ctl, &bitmap_list);
+       spin_unlock(&ctl->tree_lock);
        mutex_unlock(&ctl->cache_writeout_mutex);
        if (ret)
                goto out_nospc;
 
        /* Zero out the rest of the pages just to make sure */
-       io_ctl_zero_remaining_pages(&io_ctl);
+       io_ctl_zero_remaining_pages(io_ctl);
 
        /* Everything is written out, now we dirty the pages in the file. */
-       ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
+       ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
                                0, i_size_read(inode), &cached_state);
        if (ret)
                goto out_nospc;
@@ -1202,30 +1312,44 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
         * Release the pages and unlock the extent, we will flush
         * them out later
         */
-       io_ctl_drop_pages(&io_ctl);
+       io_ctl_drop_pages(io_ctl);
 
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                             i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 
-       /* Flush the dirty pages in the cache file. */
-       ret = flush_dirty_cache(inode);
+       /*
+        * at this point the pages are under IO and we're happy,
+        * The caller is responsible for waiting on them and updating the
+        * the cache and the inode
+        */
+       io_ctl->entries = entries;
+       io_ctl->bitmaps = bitmaps;
+
+       ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
        if (ret)
                goto out;
 
-       /* Update the cache item to tell everyone this cache file is valid. */
-       ret = update_cache_item(trans, root, inode, path, offset,
-                               entries, bitmaps);
+       return 0;
+
 out:
-       io_ctl_free(&io_ctl);
+       io_ctl->inode = NULL;
+       io_ctl_free(io_ctl);
        if (ret) {
                invalidate_inode_pages2(inode->i_mapping);
                BTRFS_I(inode)->generation = 0;
        }
        btrfs_update_inode(trans, root, inode);
+       if (must_iput)
+               iput(inode);
        return ret;
 
+out_nospc_locked:
+       cleanup_bitmap_list(&bitmap_list);
+       spin_unlock(&ctl->tree_lock);
+       mutex_unlock(&ctl->cache_writeout_mutex);
+
 out_nospc:
-       cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
+       cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
 
        if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
                up_write(&block_group->data_rwsem);
@@ -1241,7 +1365,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
        struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        struct inode *inode;
        int ret = 0;
-       enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
 
        root = root->fs_info->tree_root;
 
@@ -1250,34 +1373,34 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                spin_unlock(&block_group->lock);
                return 0;
        }
-
-       if (block_group->delalloc_bytes) {
-               block_group->disk_cache_state = BTRFS_DC_WRITTEN;
-               spin_unlock(&block_group->lock);
-               return 0;
-       }
        spin_unlock(&block_group->lock);
 
        inode = lookup_free_space_inode(root, block_group, path);
        if (IS_ERR(inode))
                return 0;
 
-       ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
+       ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
+                                     &block_group->io_ctl, trans,
                                      path, block_group->key.objectid);
        if (ret) {
-               dcs = BTRFS_DC_ERROR;
-               ret = 0;
 #ifdef DEBUG
                btrfs_err(root->fs_info,
                        "failed to write free space cache for block group %llu",
                        block_group->key.objectid);
 #endif
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_ERROR;
+               spin_unlock(&block_group->lock);
+
+               block_group->io_ctl.inode = NULL;
+               iput(inode);
        }
 
-       spin_lock(&block_group->lock);
-       block_group->disk_cache_state = dcs;
-       spin_unlock(&block_group->lock);
-       iput(inode);
+       /*
+        * if ret == 0 the caller is expected to call btrfs_wait_cache_io
+        * to wait for IO and put the inode
+        */
+
        return ret;
 }
 
@@ -1298,11 +1421,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
                                   u64 offset)
 {
        u64 bitmap_start;
-       u64 bytes_per_bitmap;
+       u32 bytes_per_bitmap;
 
        bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
        bitmap_start = offset - ctl->start;
-       bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
+       bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
        bitmap_start *= bytes_per_bitmap;
        bitmap_start += ctl->start;
 
@@ -1521,10 +1644,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
        u64 bitmap_bytes;
        u64 extent_bytes;
        u64 size = block_group->key.offset;
-       u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
-       int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
+       u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+       u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
 
-       max_bitmaps = max(max_bitmaps, 1);
+       max_bitmaps = max_t(u32, max_bitmaps, 1);
 
        ASSERT(ctl->total_bitmaps <= max_bitmaps);
 
@@ -1537,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
                max_bytes = MAX_CACHE_BYTES_PER_GIG;
        else
                max_bytes = MAX_CACHE_BYTES_PER_GIG *
-                       div64_u64(size, 1024 * 1024 * 1024);
+                       div_u64(size, 1024 * 1024 * 1024);
 
        /*
         * we want to account for 1 more bitmap than what we have so we can make
@@ -1552,14 +1675,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
        }
 
        /*
-        * we want the extent entry threshold to always be at most 1/2 the maxw
+        * we want the extent entry threshold to always be at most 1/2 the max
         * bytes we can have, or whatever is less than that.
         */
        extent_bytes = max_bytes - bitmap_bytes;
-       extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
+       extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
 
        ctl->extents_thresh =
-               div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
+               div_u64(extent_bytes, sizeof(struct btrfs_free_space));
 }
 
 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1673,7 +1796,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
                 */
                if (*bytes >= align) {
                        tmp = entry->offset - ctl->start + align - 1;
-                       do_div(tmp, align);
+                       tmp = div64_u64(tmp, align);
                        tmp = tmp * align + ctl->start;
                        align_off = tmp - entry->offset;
                } else {
@@ -2402,11 +2525,8 @@ static void __btrfs_remove_free_space_cache_locked(
                } else {
                        free_bitmap(ctl, info);
                }
-               if (need_resched()) {
-                       spin_unlock(&ctl->tree_lock);
-                       cond_resched();
-                       spin_lock(&ctl->tree_lock);
-               }
+
+               cond_resched_lock(&ctl->tree_lock);
        }
 }
 
@@ -2431,11 +2551,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
 
                WARN_ON(cluster->block_group != block_group);
                __btrfs_return_cluster_to_free_space(block_group, cluster);
-               if (need_resched()) {
-                       spin_unlock(&ctl->tree_lock);
-                       cond_resched();
-                       spin_lock(&ctl->tree_lock);
-               }
+
+               cond_resched_lock(&ctl->tree_lock);
        }
        __btrfs_remove_free_space_cache_locked(ctl);
        spin_unlock(&ctl->tree_lock);
@@ -3346,11 +3463,17 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 {
        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        int ret;
+       struct btrfs_io_ctl io_ctl;
 
        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                return 0;
 
-       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
+       memset(&io_ctl, 0, sizeof(io_ctl));
+       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
+                                     trans, path, 0);
+       if (!ret)
+               ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+
        if (ret) {
                btrfs_delalloc_release_metadata(inode, inode->i_size);
 #ifdef DEBUG
index 88b2238..a16a029 100644 (file)
@@ -48,6 +48,8 @@ struct btrfs_free_space_op {
                           struct btrfs_free_space *info);
 };
 
+struct btrfs_io_ctl;
+
 struct inode *lookup_free_space_inode(struct btrfs_root *root,
                                      struct btrfs_block_group_cache
                                      *block_group, struct btrfs_path *path);
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
                                       struct btrfs_block_rsv *rsv);
 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                    struct btrfs_trans_handle *trans,
+                                   struct btrfs_block_group_cache *block_group,
                                    struct inode *inode);
 int load_free_space_cache(struct btrfs_fs_info *fs_info,
                          struct btrfs_block_group_cache *block_group);
+int btrfs_wait_cache_io(struct btrfs_root *root,
+                       struct btrfs_trans_handle *trans,
+                       struct btrfs_block_group_cache *block_group,
+                       struct btrfs_io_ctl *io_ctl,
+                       struct btrfs_path *path, u64 offset);
 int btrfs_write_out_cache(struct btrfs_root *root,
                          struct btrfs_trans_handle *trans,
                          struct btrfs_block_group_cache *block_group,
                          struct btrfs_path *path);
-
 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
                                    struct btrfs_path *path);
 int create_free_ino_inode(struct btrfs_root *root,
index 74faea3..f6a596d 100644 (file)
@@ -456,7 +456,7 @@ again:
        }
 
        if (i_size_read(inode) > 0) {
-               ret = btrfs_truncate_free_space_cache(root, trans, inode);
+               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
                if (ret) {
                        if (ret != -ENOSPC)
                                btrfs_abort_transaction(trans, root, ret);
index 43192e1..ada4d24 100644 (file)
@@ -59,6 +59,7 @@
 #include "backref.h"
 #include "hash.h"
 #include "props.h"
+#include "qgroup.h"
 
 struct btrfs_iget_args {
        struct btrfs_key *location;
@@ -470,7 +471,7 @@ again:
         */
        if (inode_need_compress(inode)) {
                WARN_ON(pages);
-               pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+               pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
                if (!pages) {
                        /* just bail out to the uncompressed code */
                        goto cont;
@@ -752,7 +753,6 @@ retry:
                        }
                        goto out_free;
                }
-
                /*
                 * here we're doing allocation and writeback of the
                 * compressed pages
@@ -3110,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
        if (empty)
                return;
 
+       down_read(&fs_info->delayed_iput_sem);
+
        spin_lock(&fs_info->delayed_iput_lock);
        list_splice_init(&fs_info->delayed_iputs, &list);
        spin_unlock(&fs_info->delayed_iput_lock);
@@ -3120,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                iput(delayed->inode);
                kfree(delayed);
        }
+
+       up_read(&root->fs_info->delayed_iput_sem);
 }
 
 /*
@@ -4016,16 +4020,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int ret;
 
        trans = __unlink_start_trans(dir);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
+       btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
 
-       ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+       ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                 dentry->d_name.name, dentry->d_name.len);
        if (ret)
                goto out;
@@ -4124,7 +4128,7 @@ out:
 
 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int err = 0;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_trans_handle *trans;
@@ -4151,7 +4155,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
                goto out;
 
        /* now the directory is empty */
-       err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+       err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                 dentry->d_name.name, dentry->d_name.len);
        if (!err)
                btrfs_i_size_write(inode, 0);
@@ -4162,6 +4166,21 @@ out:
        return err;
 }
 
+static int truncate_space_check(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               u64 bytes_deleted)
+{
+       int ret;
+
+       bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
+       ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
+                                 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
+       if (!ret)
+               trans->bytes_reserved += bytes_deleted;
+       return ret;
+
+}
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -4197,9 +4216,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        int ret;
        int err = 0;
        u64 ino = btrfs_ino(inode);
+       u64 bytes_deleted = 0;
+       bool be_nice = 0;
+       bool should_throttle = 0;
+       bool should_end = 0;
 
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
+       /*
+        * for non-free space inodes and ref cows, we want to back off from
+        * time to time
+        */
+       if (!btrfs_is_free_space_inode(inode) &&
+           test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+               be_nice = 1;
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -4229,6 +4260,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        key.type = (u8)-1;
 
 search_again:
+       /*
+        * with a 16K leaf size and 128MB extents, you can actually queue
+        * up a huge file in a single leaf.  Most of the time that
+        * bytes_deleted is > 0, it will be huge by the time we get here
+        */
+       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+               if (btrfs_should_end_transaction(trans, root)) {
+                       err = -EAGAIN;
+                       goto error;
+               }
+       }
+
+
        path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret < 0) {
@@ -4371,22 +4415,39 @@ delete:
                } else {
                        break;
                }
+               should_throttle = 0;
+
                if (found_extent &&
                    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
                     root == root->fs_info->tree_root)) {
                        btrfs_set_path_blocking(path);
+                       bytes_deleted += extent_num_bytes;
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
                                                btrfs_header_owner(leaf),
                                                ino, extent_offset, 0);
                        BUG_ON(ret);
+                       if (btrfs_should_throttle_delayed_refs(trans, root))
+                               btrfs_async_run_delayed_refs(root,
+                                       trans->delayed_ref_updates * 2, 0);
+                       if (be_nice) {
+                               if (truncate_space_check(trans, root,
+                                                        extent_num_bytes)) {
+                                       should_end = 1;
+                               }
+                               if (btrfs_should_throttle_delayed_refs(trans,
+                                                                      root)) {
+                                       should_throttle = 1;
+                               }
+                       }
                }
 
                if (found_type == BTRFS_INODE_ITEM_KEY)
                        break;
 
                if (path->slots[0] == 0 ||
-                   path->slots[0] != pending_del_slot) {
+                   path->slots[0] != pending_del_slot ||
+                   should_throttle || should_end) {
                        if (pending_del_nr) {
                                ret = btrfs_del_items(trans, root, path,
                                                pending_del_slot,
@@ -4399,6 +4460,23 @@ delete:
                                pending_del_nr = 0;
                        }
                        btrfs_release_path(path);
+                       if (should_throttle) {
+                               unsigned long updates = trans->delayed_ref_updates;
+                               if (updates) {
+                                       trans->delayed_ref_updates = 0;
+                                       ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+                                       if (ret && !err)
+                                               err = ret;
+                               }
+                       }
+                       /*
+                        * if we failed to refill our space rsv, bail out
+                        * and let the transaction restart
+                        */
+                       if (should_end) {
+                               err = -EAGAIN;
+                               goto error;
+                       }
                        goto search_again;
                } else {
                        path->slots[0]--;
@@ -4415,7 +4493,18 @@ error:
        if (last_size != (u64)-1 &&
            root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                btrfs_ordered_update_i_size(inode, last_size, NULL);
+
        btrfs_free_path(path);
+
+       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+               unsigned long updates = trans->delayed_ref_updates;
+               if (updates) {
+                       trans->delayed_ref_updates = 0;
+                       ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+                       if (ret && !err)
+                               err = ret;
+               }
+       }
        return err;
 }
 
@@ -4826,7 +4915,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int err;
 
@@ -4924,6 +5013,7 @@ void btrfs_evict_inode(struct inode *inode)
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *rsv, *global_rsv;
+       int steal_from_global = 0;
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
        int ret;
 
@@ -4991,9 +5081,20 @@ void btrfs_evict_inode(struct inode *inode)
                 * hard as possible to get this to work.
                 */
                if (ret)
-                       ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
+                       steal_from_global++;
+               else
+                       steal_from_global = 0;
+               ret = 0;
 
-               if (ret) {
+               /*
+                * steal_from_global == 0: we reserved stuff, hooray!
+                * steal_from_global == 1: we didn't reserve stuff, boo!
+                * steal_from_global == 2: we've committed, still not a lot of
+                * room but maybe we'll have room in the global reserve this
+                * time.
+                * steal_from_global == 3: abandon all hope!
+                */
+               if (steal_from_global > 2) {
                        btrfs_warn(root->fs_info,
                                "Could not get space for a delete, will truncate on mount %d",
                                ret);
@@ -5009,10 +5110,40 @@ void btrfs_evict_inode(struct inode *inode)
                        goto no_delete;
                }
 
+               /*
+                * We can't just steal from the global reserve, we need tomake
+                * sure there is room to do it, if not we need to commit and try
+                * again.
+                */
+               if (steal_from_global) {
+                       if (!btrfs_check_space_for_delayed_refs(trans, root))
+                               ret = btrfs_block_rsv_migrate(global_rsv, rsv,
+                                                             min_size);
+                       else
+                               ret = -ENOSPC;
+               }
+
+               /*
+                * Couldn't steal from the global reserve, we have too much
+                * pending stuff built up, commit the transaction and try it
+                * again.
+                */
+               if (ret) {
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret) {
+                               btrfs_orphan_del(NULL, inode);
+                               btrfs_free_block_rsv(root, rsv);
+                               goto no_delete;
+                       }
+                       continue;
+               } else {
+                       steal_from_global = 0;
+               }
+
                trans->block_rsv = rsv;
 
                ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
-               if (ret != -ENOSPC)
+               if (ret != -ENOSPC && ret != -EAGAIN)
                        break;
 
                trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -5416,10 +5547,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 static int btrfs_dentry_delete(const struct dentry *dentry)
 {
        struct btrfs_root *root;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (!inode && !IS_ROOT(dentry))
-               inode = dentry->d_parent->d_inode;
+               inode = d_inode(dentry->d_parent);
 
        if (inode) {
                root = BTRFS_I(inode)->root;
@@ -6226,7 +6357,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(dir)->root;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        u64 index;
        int err;
        int drop_inode = 0;
@@ -8129,7 +8260,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
                return 0;
 
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
        smp_mb__after_atomic();
 
        /*
@@ -8169,7 +8300,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                current->journal_info = &outstanding_extents;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                flags = DIO_LOCKING | DIO_SKIP_HOLES;
                wakeup = false;
        }
@@ -8188,7 +8319,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
        }
 out:
        if (wakeup)
-               inode_dio_done(inode);
+               inode_dio_end(inode);
        if (relock)
                mutex_lock(&inode->i_mutex);
 
@@ -8581,7 +8712,7 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
-               if (ret != -ENOSPC) {
+               if (ret != -ENOSPC && ret != -EAGAIN) {
                        err = ret;
                        break;
                }
@@ -8875,7 +9006,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
                         struct dentry *dentry, struct kstat *stat)
 {
        u64 delalloc_bytes;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        u32 blocksize = inode->i_sb->s_blocksize;
 
        generic_fillattr(inode, stat);
@@ -8896,8 +9027,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(old_dir)->root;
        struct btrfs_root *dest = BTRFS_I(new_dir)->root;
-       struct inode *new_inode = new_dentry->d_inode;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *new_inode = d_inode(new_dentry);
+       struct inode *old_inode = d_inode(old_dentry);
        struct timespec ctime = CURRENT_TIME;
        u64 index = 0;
        u64 root_objectid;
@@ -9009,7 +9140,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                        old_dentry->d_name.len);
        } else {
                ret = __btrfs_unlink_inode(trans, root, old_dir,
-                                       old_dentry->d_inode,
+                                       d_inode(old_dentry),
                                        old_dentry->d_name.name,
                                        old_dentry->d_name.len);
                if (!ret)
@@ -9033,12 +9164,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        BUG_ON(new_inode->i_nlink == 0);
                } else {
                        ret = btrfs_unlink_inode(trans, dest, new_dir,
-                                                new_dentry->d_inode,
+                                                d_inode(new_dentry),
                                                 new_dentry->d_name.name,
                                                 new_dentry->d_name.len);
                }
                if (!ret && new_inode->i_nlink == 0)
-                       ret = btrfs_orphan_add(trans, new_dentry->d_inode);
+                       ret = btrfs_orphan_add(trans, d_inode(new_dentry));
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto out_fail;
@@ -9451,6 +9582,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                btrfs_end_transaction(trans, root);
                        break;
                }
+
                btrfs_drop_extent_cache(inode, cur_offset,
                                        cur_offset + ins.offset -1, 0);
 
index 74609b9..b05653f 100644 (file)
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir,
        if (ret)
                return ret;
 
+       /*
+        * Don't create subvolume whose level is not zero. Or qgroup will be
+        * screwed up since it assume subvolme qgroup's level to be 0.
+        */
+       if (btrfs_qgroup_level(objectid))
+               return -ENOSPC;
+
        btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
        /*
         * The same as the snapshot creation, please see the comment
@@ -717,7 +724,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (ret)
                goto fail;
 
-       inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+       inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                goto fail;
@@ -761,10 +768,10 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
 {
        int error;
 
-       if (!victim->d_inode)
+       if (d_really_is_negative(victim))
                return -ENOENT;
 
-       BUG_ON(victim->d_parent->d_inode != dir);
+       BUG_ON(d_inode(victim->d_parent) != dir);
        audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
        error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
@@ -772,8 +779,8 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
                return error;
        if (IS_APPEND(dir))
                return -EPERM;
-       if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
-           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+       if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
+           IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
                return -EPERM;
        if (isdir) {
                if (!d_is_dir(victim))
@@ -792,7 +799,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
 /* copy of may_create in fs/namei.c() */
 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 {
-       if (child->d_inode)
+       if (d_really_is_positive(child))
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
@@ -810,7 +817,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
                                   u64 *async_transid, bool readonly,
                                   struct btrfs_qgroup_inherit *inherit)
 {
-       struct inode *dir  = parent->dentry->d_inode;
+       struct inode *dir  = d_inode(parent->dentry);
        struct dentry *dentry;
        int error;
 
@@ -824,7 +831,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
                goto out_unlock;
 
        error = -EEXIST;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                goto out_dput;
 
        error = btrfs_may_create(dir, dentry);
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                goto out_free;
        }
 
-       do_div(new_size, root->sectorsize);
+       new_size = div_u64(new_size, root->sectorsize);
        new_size *= root->sectorsize;
 
        printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
@@ -2294,7 +2301,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 {
        struct dentry *parent = file->f_path.dentry;
        struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct inode *inode;
        struct btrfs_root *root = BTRFS_I(dir)->root;
        struct btrfs_root *dest = NULL;
@@ -2333,12 +2340,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_unlock_dir;
        }
 
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                err = -ENOENT;
                goto out_dput;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        dest = BTRFS_I(inode)->root;
        if (!capable(CAP_SYS_ADMIN)) {
                /*
@@ -2897,6 +2904,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
        if (src == dst)
                return -EINVAL;
 
+       if (len == 0)
+               return 0;
+
        btrfs_double_lock(src, loff, dst, dst_loff, len);
 
        ret = extent_same_check_offsets(src, loff, len);
@@ -3039,7 +3049,7 @@ out:
 static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                     u64 disko)
 {
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        struct ulist *roots;
        struct ulist_iterator uiter;
        struct ulist_node *root_node = NULL;
@@ -3202,6 +3212,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
        key.offset = off;
 
        while (1) {
+               u64 next_key_min_offset = key.offset + 1;
+
                /*
                 * note the key will change type as we walk through the
                 * tree.
@@ -3282,7 +3294,7 @@ process_slot:
                        } else if (key.offset >= off + len) {
                                break;
                        }
-
+                       next_key_min_offset = key.offset + datal;
                        size = btrfs_item_size_nr(leaf, slot);
                        read_extent_buffer(leaf, buf,
                                           btrfs_item_ptr_offset(leaf, slot),
@@ -3497,7 +3509,7 @@ process_slot:
                                break;
                }
                btrfs_release_path(path);
-               key.offset++;
+               key.offset = next_key_min_offset;
        }
        ret = 0;
 
@@ -3626,6 +3638,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        if (off + len == src->i_size)
                len = ALIGN(src->i_size, bs) - off;
 
+       if (len == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
        /* verify the end result is block aligned */
        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
            !IS_ALIGNED(destoff, bs))
@@ -4624,6 +4641,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
                                                sa->src, sa->dst);
        }
 
+       /* update qgroup status and info */
+       err = btrfs_run_qgroups(trans, root->fs_info);
+       if (err < 0)
+               btrfs_error(root->fs_info, ret,
+                           "failed to update qgroup status and info\n");
        err = btrfs_end_transaction(trans, root);
        if (err && !ret)
                ret = err;
@@ -4669,8 +4691,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
 
        /* FIXME: check if the IDs really exist */
        if (sa->create) {
-               ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
-                                         NULL);
+               ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
        } else {
                ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
        }
index 617553c..a2f0513 100644 (file)
@@ -434,7 +434,7 @@ out:
        return ret;
 }
 
-struct btrfs_compress_op btrfs_lzo_compress = {
+const struct btrfs_compress_op btrfs_lzo_compress = {
        .alloc_workspace        = lzo_alloc_workspace,
        .free_workspace         = lzo_free_workspace,
        .compress_pages         = lzo_compress_pages,
index b7816ce..1b10a3c 100644 (file)
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor)
        if (factor == 10)
                return num;
        num *= factor;
-       do_div(num, 10);
-       return num;
+       return div_u64(num, 10);
 }
 
 static inline u64 div_factor_fine(u64 num, int factor)
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor)
        if (factor == 100)
                return num;
        num *= factor;
-       do_div(num, 100);
-       return num;
+       return div_u64(num, 100);
 }
 
 #endif
index 129b1dd..dca137b 100644 (file)
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode)
 
        return NULL;
 }
+
+
index 058c79e..3d65465 100644 (file)
@@ -644,9 +644,8 @@ out:
 }
 
 static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
-                                   struct btrfs_root *root, u64 qgroupid,
-                                   u64 flags, u64 max_rfer, u64 max_excl,
-                                   u64 rsv_rfer, u64 rsv_excl)
+                                   struct btrfs_root *root,
+                                   struct btrfs_qgroup *qgroup)
 {
        struct btrfs_path *path;
        struct btrfs_key key;
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
 
        key.objectid = 0;
        key.type = BTRFS_QGROUP_LIMIT_KEY;
-       key.offset = qgroupid;
+       key.offset = qgroup->qgroupid;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
        l = path->nodes[0];
        slot = path->slots[0];
        qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
-       btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
-       btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
-       btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
-       btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
-       btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
+       btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
+       btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
+       btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
+       btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
+       btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
 
        btrfs_mark_buffer_dirty(l);
 
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        fs_info->pending_quota_state = 0;
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
+       fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
        spin_unlock(&fs_info->qgroup_lock);
 
        btrfs_free_qgroup_config(fs_info);
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        list_del(&quota_root->dirty_list);
 
        btrfs_tree_lock(quota_root->node);
-       clean_tree_block(trans, tree_root, quota_root->node);
+       clean_tree_block(trans, tree_root->fs_info, quota_root->node);
        btrfs_tree_unlock(quota_root->node);
        btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
 
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
                list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
+/*
+ * The easy accounting, if we are adding/removing the only ref for an extent
+ * then this qgroup and all of the parent qgroups get their refrence and
+ * exclusive counts adjusted.
+ *
+ * Caller should hold fs_info->qgroup_lock.
+ */
+static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
+                                   struct ulist *tmp, u64 ref_root,
+                                   u64 num_bytes, int sign)
+{
+       struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup_list *glist;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       int ret = 0;
+
+       qgroup = find_qgroup_rb(fs_info, ref_root);
+       if (!qgroup)
+               goto out;
+
+       qgroup->rfer += sign * num_bytes;
+       qgroup->rfer_cmpr += sign * num_bytes;
+
+       WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+       qgroup->excl += sign * num_bytes;
+       qgroup->excl_cmpr += sign * num_bytes;
+       if (sign > 0)
+               qgroup->reserved -= num_bytes;
+
+       qgroup_dirty(fs_info, qgroup);
+
+       /* Get all of the parent groups that contain this qgroup */
+       list_for_each_entry(glist, &qgroup->groups, next_group) {
+               ret = ulist_add(tmp, glist->group->qgroupid,
+                               ptr_to_u64(glist->group), GFP_ATOMIC);
+               if (ret < 0)
+                       goto out;
+       }
+
+       /* Iterate all of the parents and adjust their reference counts */
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(tmp, &uiter))) {
+               qgroup = u64_to_ptr(unode->aux);
+               qgroup->rfer += sign * num_bytes;
+               qgroup->rfer_cmpr += sign * num_bytes;
+               WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+               qgroup->excl += sign * num_bytes;
+               if (sign > 0)
+                       qgroup->reserved -= num_bytes;
+               qgroup->excl_cmpr += sign * num_bytes;
+               qgroup_dirty(fs_info, qgroup);
+
+               /* Add any parents of the parents */
+               list_for_each_entry(glist, &qgroup->groups, next_group) {
+                       ret = ulist_add(tmp, glist->group->qgroupid,
+                                       ptr_to_u64(glist->group), GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out;
+               }
+       }
+       ret = 0;
+out:
+       return ret;
+}
+
+
+/*
+ * Quick path for updating qgroup with only excl refs.
+ *
+ * In that case, just update all parent will be enough.
+ * Or we needs to do a full rescan.
+ * Caller should also hold fs_info->qgroup_lock.
+ *
+ * Return 0 for quick update, return >0 for need to full rescan
+ * and mark INCONSISTENT flag.
+ * Return < 0 for other error.
+ */
+static int quick_update_accounting(struct btrfs_fs_info *fs_info,
+                                  struct ulist *tmp, u64 src, u64 dst,
+                                  int sign)
+{
+       struct btrfs_qgroup *qgroup;
+       int ret = 1;
+       int err = 0;
+
+       qgroup = find_qgroup_rb(fs_info, src);
+       if (!qgroup)
+               goto out;
+       if (qgroup->excl == qgroup->rfer) {
+               ret = 0;
+               err = __qgroup_excl_accounting(fs_info, tmp, dst,
+                                              qgroup->excl, sign);
+               if (err < 0) {
+                       ret = err;
+                       goto out;
+               }
+       }
+out:
+       if (ret)
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+       return ret;
+}
+
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
        struct btrfs_qgroup *parent;
        struct btrfs_qgroup *member;
        struct btrfs_qgroup_list *list;
+       struct ulist *tmp;
        int ret = 0;
 
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
+       /* Check the level of src and dst first */
+       if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
+               return -EINVAL;
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
        if (!quota_root) {
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 
        spin_lock(&fs_info->qgroup_lock);
        ret = add_relation_rb(quota_root->fs_info, src, dst);
+       if (ret < 0) {
+               spin_unlock(&fs_info->qgroup_lock);
+               goto out;
+       }
+       ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
        spin_unlock(&fs_info->qgroup_lock);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       ulist_free(tmp);
        return ret;
 }
 
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+int __del_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *parent;
        struct btrfs_qgroup *member;
        struct btrfs_qgroup_list *list;
+       struct ulist *tmp;
        int ret = 0;
        int err;
 
-       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
        quota_root = fs_info->quota_root;
        if (!quota_root) {
                ret = -EINVAL;
@@ -1088,14 +1211,27 @@ exist:
 
        spin_lock(&fs_info->qgroup_lock);
        del_relation_rb(fs_info, src, dst);
+       ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
        spin_unlock(&fs_info->qgroup_lock);
 out:
+       ulist_free(tmp);
+       return ret;
+}
+
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+                             struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+{
+       int ret = 0;
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       ret = __del_qgroup_relation(trans, fs_info, src, dst);
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
        return ret;
 }
 
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
+                       struct btrfs_fs_info *fs_info, u64 qgroupid)
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
 {
        struct btrfs_root *quota_root;
        struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup_list *list;
        int ret = 0;
 
        mutex_lock(&fs_info->qgroup_ioctl_lock);
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                ret = -ENOENT;
                goto out;
        } else {
-               /* check if there are no relations to this qgroup */
-               if (!list_empty(&qgroup->groups) ||
-                   !list_empty(&qgroup->members)) {
+               /* check if there are no children of this qgroup */
+               if (!list_empty(&qgroup->members)) {
                        ret = -EBUSY;
                        goto out;
                }
        }
        ret = del_qgroup_item(trans, quota_root, qgroupid);
 
+       while (!list_empty(&qgroup->groups)) {
+               list = list_first_entry(&qgroup->groups,
+                                       struct btrfs_qgroup_list, next_group);
+               ret = __del_qgroup_relation(trans, fs_info,
+                                          qgroupid,
+                                          list->group->qgroupid);
+               if (ret)
+                       goto out;
+       }
+
        spin_lock(&fs_info->qgroup_lock);
        del_qgroup_rb(quota_root->fs_info, qgroupid);
        spin_unlock(&fs_info->qgroup_lock);
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
                ret = -ENOENT;
                goto out;
        }
-       ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
-                                      limit->flags, limit->max_rfer,
-                                      limit->max_excl, limit->rsv_rfer,
-                                      limit->rsv_excl);
+
+       spin_lock(&fs_info->qgroup_lock);
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
+               qgroup->max_rfer = limit->max_rfer;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+               qgroup->max_excl = limit->max_excl;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
+               qgroup->rsv_rfer = limit->rsv_rfer;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
+               qgroup->rsv_excl = limit->rsv_excl;
+       qgroup->lim_flags |= limit->flags;
+
+       spin_unlock(&fs_info->qgroup_lock);
+
+       ret = update_qgroup_limit_item(trans, quota_root, qgroup);
        if (ret) {
                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
                btrfs_info(fs_info, "unable to update quota limit for %llu",
                       qgroupid);
        }
 
-       spin_lock(&fs_info->qgroup_lock);
-       qgroup->lim_flags = limit->flags;
-       qgroup->max_rfer = limit->max_rfer;
-       qgroup->max_excl = limit->max_excl;
-       qgroup->rsv_rfer = limit->rsv_rfer;
-       qgroup->rsv_excl = limit->rsv_excl;
-       spin_unlock(&fs_info->qgroup_lock);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
                return -1;
        if (oper1->bytenr > oper2->bytenr)
                return 1;
-       if (oper1->seq < oper2->seq)
-               return -1;
-       if (oper1->seq > oper2->seq)
-               return 1;
        if (oper1->ref_root < oper2->ref_root)
                return -1;
        if (oper1->ref_root > oper2->ref_root)
                return 1;
+       if (oper1->seq < oper2->seq)
+               return -1;
+       if (oper1->seq > oper2->seq)
+               return 1;
        if (oper1->type < oper2->type)
                return -1;
        if (oper1->type > oper2->type)
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-/*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their refrence and
- * exclusive counts adjusted.
- */
 static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
                                  struct btrfs_qgroup_operation *oper)
 {
-       struct btrfs_qgroup *qgroup;
        struct ulist *tmp;
-       struct btrfs_qgroup_list *glist;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
        int sign = 0;
        int ret = 0;
 
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
        spin_lock(&fs_info->qgroup_lock);
        if (!fs_info->quota_root)
                goto out;
-       qgroup = find_qgroup_rb(fs_info, oper->ref_root);
-       if (!qgroup)
-               goto out;
+
        switch (oper->type) {
        case BTRFS_QGROUP_OPER_ADD_EXCL:
                sign = 1;
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
        default:
                ASSERT(0);
        }
-       qgroup->rfer += sign * oper->num_bytes;
-       qgroup->rfer_cmpr += sign * oper->num_bytes;
-
-       WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
-       qgroup->excl += sign * oper->num_bytes;
-       qgroup->excl_cmpr += sign * oper->num_bytes;
-
-       qgroup_dirty(fs_info, qgroup);
-
-       /* Get all of the parent groups that contain this qgroup */
-       list_for_each_entry(glist, &qgroup->groups, next_group) {
-               ret = ulist_add(tmp, glist->group->qgroupid,
-                               ptr_to_u64(glist->group), GFP_ATOMIC);
-               if (ret < 0)
-                       goto out;
-       }
-
-       /* Iterate all of the parents and adjust their reference counts */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               qgroup = u64_to_ptr(unode->aux);
-               qgroup->rfer += sign * oper->num_bytes;
-               qgroup->rfer_cmpr += sign * oper->num_bytes;
-               WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
-               qgroup->excl += sign * oper->num_bytes;
-               qgroup->excl_cmpr += sign * oper->num_bytes;
-               qgroup_dirty(fs_info, qgroup);
-
-               /* Add any parents of the parents */
-               list_for_each_entry(glist, &qgroup->groups, next_group) {
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               goto out;
-               }
-       }
-       ret = 0;
+       ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
+                                      oper->num_bytes, sign);
 out:
        spin_unlock(&fs_info->qgroup_lock);
        ulist_free(tmp);
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
        struct ulist *roots = NULL;
        struct ulist *qgroups, *tmp;
        struct btrfs_qgroup *qgroup;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
        u64 seq;
        int old_roots = 0;
        int new_roots = 0;
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
        int err;
        struct btrfs_qgroup *qg;
        u64 root_obj = 0;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
 
        parents = ulist_alloc(GFP_NOFS);
        if (!parents)
@@ -2153,6 +2257,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
                list_del_init(&qgroup->dirty);
                spin_unlock(&fs_info->qgroup_lock);
                ret = update_qgroup_info_item(trans, quota_root, qgroup);
+               if (ret)
+                       fs_info->qgroup_flags |=
+                                       BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               ret = update_qgroup_limit_item(trans, quota_root, qgroup);
                if (ret)
                        fs_info->qgroup_flags |=
                                        BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                                ret = -EINVAL;
                                goto out;
                        }
+
+                       if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                        ++i_qgroups;
                }
        }
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
        if (ret)
                goto out;
 
-       if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
-               ret = update_qgroup_limit_item(trans, quota_root, objectid,
-                                              inherit->lim.flags,
-                                              inherit->lim.max_rfer,
-                                              inherit->lim.max_excl,
-                                              inherit->lim.rsv_rfer,
-                                              inherit->lim.rsv_excl);
-               if (ret)
-                       goto out;
-       }
-
        if (srcid) {
                struct btrfs_root *srcroot;
                struct btrfs_key srckey;
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                goto unlock;
        }
 
+       if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
+               dstgroup->lim_flags = inherit->lim.flags;
+               dstgroup->max_rfer = inherit->lim.max_rfer;
+               dstgroup->max_excl = inherit->lim.max_excl;
+               dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
+               dstgroup->rsv_excl = inherit->lim.rsv_excl;
+
+               ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+               if (ret) {
+                       fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+                       btrfs_info(fs_info, "unable to update quota limit for %llu",
+                              dstgroup->qgroupid);
+                       goto unlock;
+               }
+       }
+
        if (srcid) {
                srcgroup = find_qgroup_rb(fs_info, srcid);
                if (!srcgroup)
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                dstgroup->excl_cmpr = level_size;
                srcgroup->excl = level_size;
                srcgroup->excl_cmpr = level_size;
+
+               /* inherit the limit info */
+               dstgroup->lim_flags = srcgroup->lim_flags;
+               dstgroup->max_rfer = srcgroup->max_rfer;
+               dstgroup->max_excl = srcgroup->max_excl;
+               dstgroup->rsv_rfer = srcgroup->rsv_rfer;
+               dstgroup->rsv_excl = srcgroup->rsv_excl;
+
                qgroup_dirty(fs_info, dstgroup);
                qgroup_dirty(fs_info, srcgroup);
        }
@@ -2358,12 +2484,6 @@ out:
        return ret;
 }
 
-/*
- * reserve some space for a qgroup and all its parents. The reservation takes
- * place with start_transaction or dealloc_reserve, similar to ENOSPC
- * accounting. If not enough space is available, EDQUOT is returned.
- * We assume that the requested space is new for all qgroups.
- */
 int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 {
        struct btrfs_root *quota_root;
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 
 /*
  * returns < 0 on error, 0 when more leafs are to be scanned.
- * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ * returns 1 when done.
  */
 static int
 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 {
        struct btrfs_key found;
        struct ulist *roots = NULL;
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        u64 num_bytes;
        u64 seq;
        int new_roots;
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        struct ulist *tmp = NULL, *qgroups = NULL;
        struct extent_buffer *scratch_leaf = NULL;
        int err = -ENOMEM;
+       int ret = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -2660,7 +2781,7 @@ out:
        mutex_lock(&fs_info->qgroup_rescan_lock);
        fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
 
-       if (err == 2 &&
+       if (err > 0 &&
            fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
        } else if (err < 0) {
@@ -2668,13 +2789,33 @@ out:
        }
        mutex_unlock(&fs_info->qgroup_rescan_lock);
 
+       /*
+        * only update status, since the previous part has alreay updated the
+        * qgroup info.
+        */
+       trans = btrfs_start_transaction(fs_info->quota_root, 1);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               btrfs_err(fs_info,
+                         "fail to start transaction for status update: %d\n",
+                         err);
+               goto done;
+       }
+       ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+       if (ret < 0) {
+               err = ret;
+               btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
+       }
+       btrfs_end_transaction(trans, fs_info->quota_root);
+
        if (err >= 0) {
                btrfs_info(fs_info, "qgroup scan completed%s",
-                       err == 2 ? " (inconsistency flag cleared)" : "");
+                       err > 0 ? " (inconsistency flag cleared)" : "");
        } else {
                btrfs_err(fs_info, "qgroup scan failed with %d", err);
        }
 
+done:
        complete_all(&fs_info->qgroup_rescan_completion);
 }
 
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                        mutex_unlock(&fs_info->qgroup_rescan_lock);
                        goto err;
                }
-
                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
        }
 
index 18cc68c..c5242aa 100644 (file)
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info, u64 qgroupid,
-                       char *name);
+                       struct btrfs_fs_info *fs_info, u64 qgroupid);
 int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                              struct btrfs_fs_info *fs_info, u64 qgroupid);
 int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
index 5264858..fa72068 100644 (file)
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
        }
 
        x = cmpxchg(&info->stripe_hash_table, NULL, table);
-       if (x) {
-               if (is_vmalloc_addr(x))
-                       vfree(x);
-               else
-                       kfree(x);
-       }
+       if (x)
+               kvfree(x);
        return 0;
 }
 
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
        if (!info->stripe_hash_table)
                return;
        btrfs_clear_rbio_cache(info);
-       if (is_vmalloc_addr(info->stripe_hash_table))
-               vfree(info->stripe_hash_table);
-       else
-               kfree(info->stripe_hash_table);
+       kvfree(info->stripe_hash_table);
        info->stripe_hash_table = NULL;
 }
 
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
        int err;
        int i;
 
-       pointers = kzalloc(rbio->real_stripes * sizeof(void *),
-                          GFP_NOFS);
+       pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
        if (!pointers) {
                err = -ENOMEM;
                goto cleanup_io;
index d830853..74b24b0 100644 (file)
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
        mutex_lock(&inode->i_mutex);
 
        ret = btrfs_check_data_free_space(inode, cluster->end +
-                                         1 - cluster->start);
+                                         1 - cluster->start, 0);
        if (ret)
                goto out;
 
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc,
 }
 
 static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
-                                   struct inode *inode, u64 ino)
+                                   struct btrfs_block_group_cache *block_group,
+                                   struct inode *inode,
+                                   u64 ino)
 {
        struct btrfs_key key;
        struct btrfs_root *root = fs_info->tree_root;
@@ -3463,7 +3465,7 @@ truncate:
                goto out;
        }
 
-       ret = btrfs_truncate_free_space_cache(root, trans, inode);
+       ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
 
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root);
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc,
         */
        if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
                ret = delete_block_group_cache(rc->extent_root->fs_info,
+                                              rc->block_group,
                                               NULL, ref_objectid);
                if (ret != -ENOENT)
                        return ret;
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        btrfs_free_path(path);
 
        if (!IS_ERR(inode))
-               ret = delete_block_group_cache(fs_info, inode, 0);
+               ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
        else
                ret = PTR_ERR(inode);
 
index ec57687..ab58115 100644 (file)
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
         * the statistics.
         */
 
-       sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
-                                    sizeof(*sblocks_for_recheck),
-                                    GFP_NOFS);
+       sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
+                                     sizeof(*sblocks_for_recheck), GFP_NOFS);
        if (!sblocks_for_recheck) {
                spin_lock(&sctx->stat_lock);
                sctx->stat.malloc_errors++;
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
                                       unsigned long *bitmap,
                                       u64 start, u64 len)
 {
-       int offset;
+       u32 offset;
        int nsectors;
        int sectorsize = sparity->sctx->dev_root->sectorsize;
 
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
        }
 
        start -= sparity->logic_start;
-       offset = (int)do_div(start, sparity->stripe_len);
+       start = div_u64_rem(start, sparity->stripe_len, &offset);
        offset /= sectorsize;
        nsectors = (int)len / sectorsize;
 
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num,
        int j = 0;
        u64 stripe_nr;
        u64 last_offset;
-       int stripe_index;
-       int rot;
+       u32 stripe_index;
+       u32 rot;
 
        last_offset = (physical - map->stripes[num].physical) *
                      nr_data_stripes(map);
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num,
        for (i = 0; i < nr_data_stripes(map); i++) {
                *offset = last_offset + i * map->stripe_len;
 
-               stripe_nr = *offset;
-               do_div(stripe_nr, map->stripe_len);
-               do_div(stripe_nr, nr_data_stripes(map));
+               stripe_nr = div_u64(*offset, map->stripe_len);
+               stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
 
                /* Work out the disk rotation on this stripe-set */
-               rot = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
                /* calculate which stripe this data locates */
                rot += i;
                stripe_index = rot % map->num_stripes;
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        int extent_mirror_num;
        int stop_loop = 0;
 
-       nstripes = length;
        physical = map->stripes[num].physical;
        offset = 0;
-       do_div(nstripes, map->stripe_len);
+       nstripes = div_u64(length, map->stripe_len);
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
                offset = map->stripe_len * num;
                increment = map->stripe_len * map->num_stripes;
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                                                int is_dev_replace)
 {
        int ret = 0;
-       int flags = WQ_FREEZABLE | WQ_UNBOUND;
+       unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
        int max_active = fs_info->thread_pool_size;
 
        if (fs_info->scrub_workers_refcnt == 0) {
index d6033f5..a1216f9 100644 (file)
@@ -3067,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
        return NULL;
 }
 
-static int path_loop(struct send_ctx *sctx, struct fs_path *name,
-                    u64 ino, u64 gen, u64 *ancestor_ino)
-{
-       int ret = 0;
-       u64 parent_inode = 0;
-       u64 parent_gen = 0;
-       u64 start_ino = ino;
-
-       *ancestor_ino = 0;
-       while (ino != BTRFS_FIRST_FREE_OBJECTID) {
-               fs_path_reset(name);
-
-               if (is_waiting_for_rm(sctx, ino))
-                       break;
-               if (is_waiting_for_move(sctx, ino)) {
-                       if (*ancestor_ino == 0)
-                               *ancestor_ino = ino;
-                       ret = get_first_ref(sctx->parent_root, ino,
-                                           &parent_inode, &parent_gen, name);
-               } else {
-                       ret = __get_cur_name_and_parent(sctx, ino, gen,
-                                                       &parent_inode,
-                                                       &parent_gen, name);
-                       if (ret > 0) {
-                               ret = 0;
-                               break;
-                       }
-               }
-               if (ret < 0)
-                       break;
-               if (parent_inode == start_ino) {
-                       ret = 1;
-                       if (*ancestor_ino == 0)
-                               *ancestor_ino = ino;
-                       break;
-               }
-               ino = parent_inode;
-               gen = parent_gen;
-       }
-       return ret;
-}
-
 static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 {
        struct fs_path *from_path = NULL;
@@ -3120,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
        struct waiting_dir_move *dm = NULL;
        u64 rmdir_ino = 0;
        int ret;
-       u64 ancestor = 0;
 
        name = fs_path_alloc();
        from_path = fs_path_alloc();
@@ -3152,22 +3109,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                goto out;
 
        sctx->send_progress = sctx->cur_ino + 1;
-       ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
-       if (ret) {
-               LIST_HEAD(deleted_refs);
-               ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
-               ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
-                                          &pm->update_refs, &deleted_refs,
-                                          pm->is_orphan);
-               if (ret < 0)
-                       goto out;
-               if (rmdir_ino) {
-                       dm = get_waiting_dir_move(sctx, pm->ino);
-                       ASSERT(dm);
-                       dm->rmdir_ino = rmdir_ino;
-               }
-               goto out;
-       }
        fs_path_reset(name);
        to_path = name;
        name = NULL;
@@ -3610,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                        if (ret < 0)
                                goto out;
                        if (ret) {
+                               struct name_cache_entry *nce;
+
                                ret = orphanize_inode(sctx, ow_inode, ow_gen,
                                                cur->full_path);
                                if (ret < 0)
                                        goto out;
+                               /*
+                                * Make sure we clear our orphanized inode's
+                                * name from the name cache. This is because the
+                                * inode ow_inode might be an ancestor of some
+                                * other inode that will be orphanized as well
+                                * later and has an inode number greater than
+                                * sctx->send_progress. We need to prevent
+                                * future name lookups from using the old name
+                                * and get instead the orphan name.
+                                */
+                               nce = name_cache_search(sctx, ow_inode, ow_gen);
+                               if (nce) {
+                                       name_cache_delete(sctx, nce);
+                                       kfree(nce);
+                               }
                        } else {
                                ret = send_unlink(sctx, cur->full_path);
                                if (ret < 0)
@@ -5852,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                                ret = PTR_ERR(clone_root);
                                goto out;
                        }
-                       clone_sources_to_rollback = i + 1;
                        spin_lock(&clone_root->root_item_lock);
-                       clone_root->send_in_progress++;
-                       if (!btrfs_root_readonly(clone_root)) {
+                       if (!btrfs_root_readonly(clone_root) ||
+                           btrfs_root_dead(clone_root)) {
                                spin_unlock(&clone_root->root_item_lock);
                                srcu_read_unlock(&fs_info->subvol_srcu, index);
                                ret = -EPERM;
                                goto out;
                        }
+                       clone_root->send_in_progress++;
                        spin_unlock(&clone_root->root_item_lock);
                        srcu_read_unlock(&fs_info->subvol_srcu, index);
 
                        sctx->clone_roots[i].root = clone_root;
+                       clone_sources_to_rollback = i + 1;
                }
                vfree(clone_sources_tmp);
                clone_sources_tmp = NULL;
index 05fef19..9e66f5e 100644 (file)
@@ -901,6 +901,15 @@ find_root:
        if (IS_ERR(new_root))
                return ERR_CAST(new_root);
 
+       if (!(sb->s_flags & MS_RDONLY)) {
+               int ret;
+               down_read(&fs_info->cleanup_work_sem);
+               ret = btrfs_orphan_cleanup(new_root);
+               up_read(&fs_info->cleanup_work_sem);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+
        dir_id = btrfs_root_dirid(&new_root->root_item);
 setup_root:
        location.objectid = dir_id;
@@ -916,7 +925,7 @@ setup_root:
         * a reference to the dentry.  We will have already gotten a reference
         * to the inode in btrfs_fill_super so we're good to go.
         */
-       if (!new && sb->s_root->d_inode == inode) {
+       if (!new && d_inode(sb->s_root) == inode) {
                iput(inode);
                return dget(sb->s_root);
        }
@@ -1221,7 +1230,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
 
        root = mount_subtree(mnt, subvol_name);
 
-       if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
+       if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
                struct super_block *s = root->d_sb;
                dput(root);
                root = ERR_PTR(-EINVAL);
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
                avail_space = device->total_bytes - device->bytes_used;
 
                /* align with stripe_len */
-               do_div(avail_space, BTRFS_STRIPE_LEN);
+               avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
                avail_space *= BTRFS_STRIPE_LEN;
 
                /*
@@ -1886,8 +1895,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
        buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
        /* Mask in the root object ID too, to disambiguate subvols */
-       buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
-       buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
+       buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
+       buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
 
        return 0;
 }
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = {
 };
 MODULE_ALIAS_FS("btrfs");
 
+static int btrfs_control_open(struct inode *inode, struct file *file)
+{
+       /*
+        * The control file's private_data is used to hold the
+        * transaction when it is started and is used to keep
+        * track of whether a transaction is already in progress.
+        */
+       file->private_data = NULL;
+       return 0;
+}
+
 /*
  * used by btrfsctl to scan devices when no FS is mounted
  */
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = {
 };
 
 static const struct file_operations btrfs_ctl_fops = {
+       .open = btrfs_control_open,
        .unlocked_ioctl  = btrfs_control_ioctl,
        .compat_ioctl = btrfs_control_ioctl,
        .owner   = THIS_MODULE,
index 94edb0a..e8a4c86 100644 (file)
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
 static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
 static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
 
-static u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[3] = {
        [FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
        [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
        [FEAT_INCOMPAT]  = BTRFS_FEATURE_INCOMPAT_SUPP,
index f7dd298..3a4bbed 100644 (file)
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                          \
        BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
 
 /* convert from attribute */
-#define to_btrfs_feature_attr(a) \
-                       container_of(a, struct btrfs_feature_attr, kobj_attr)
-#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
-#define attr_to_btrfs_feature_attr(a) \
-                       to_btrfs_feature_attr(attr_to_btrfs_attr(a))
+static inline struct btrfs_feature_attr *
+to_btrfs_feature_attr(struct kobj_attribute *a)
+{
+       return container_of(a, struct btrfs_feature_attr, kobj_attr);
+}
+
+static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr)
+{
+       return container_of(attr, struct kobj_attribute, attr);
+}
+
+static inline struct btrfs_feature_attr *
+attr_to_btrfs_feature_attr(struct attribute *attr)
+{
+       return to_btrfs_feature_attr(attr_to_btrfs_attr(attr));
+}
+
 char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
 extern const char * const btrfs_feature_set_names[3];
 extern struct kobj_type space_info_ktype;
index 73f299e..c32a7ba 100644 (file)
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
        init_dummy_trans(&trans);
 
        test_msg("Qgroup basic add\n");
-       ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL);
+       ret = btrfs_create_qgroup(NULL, fs_info, 5);
        if (ret) {
                test_msg("Couldn't create a qgroup %d\n", ret);
                return ret;
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root)
        test_msg("Qgroup multiple refs test\n");
 
        /* We have 5 created already from the previous test */
-       ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL);
+       ret = btrfs_create_qgroup(NULL, fs_info, 256);
        if (ret) {
                test_msg("Couldn't create a qgroup %d\n", ret);
                return ret;
index 8be4278..5628e25 100644 (file)
@@ -35,7 +35,7 @@
 
 #define BTRFS_ROOT_TRANS_TAG 0
 
-static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
+static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
        [TRANS_STATE_RUNNING]           = 0U,
        [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
                                           __TRANS_START),
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
        if (atomic_dec_and_test(&transaction->use_count)) {
                BUG_ON(!list_empty(&transaction->list));
                WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
+               if (transaction->delayed_refs.pending_csums)
+                       printk(KERN_ERR "pending csums is %llu\n",
+                              transaction->delayed_refs.pending_csums);
                while (!list_empty(&transaction->pending_chunks)) {
                        struct extent_map *em;
 
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
                 */
                ASSERT(!waitqueue_active(&state->wq));
                free_extent_state(state);
-               if (need_resched()) {
-                       spin_unlock(&tree->lock);
-                       cond_resched();
-                       spin_lock(&tree->lock);
-               }
+
+               cond_resched_lock(&tree->lock);
        }
        spin_unlock(&tree->lock);
 }
@@ -222,10 +222,12 @@ loop:
        atomic_set(&cur_trans->use_count, 2);
        cur_trans->have_free_bgs = 0;
        cur_trans->start_time = get_seconds();
+       cur_trans->dirty_bg_run = 0;
 
        cur_trans->delayed_refs.href_root = RB_ROOT;
        atomic_set(&cur_trans->delayed_refs.num_entries, 0);
        cur_trans->delayed_refs.num_heads_ready = 0;
+       cur_trans->delayed_refs.pending_csums = 0;
        cur_trans->delayed_refs.num_heads = 0;
        cur_trans->delayed_refs.flushing = 0;
        cur_trans->delayed_refs.run_delayed_start = 0;
@@ -250,6 +252,9 @@ loop:
        INIT_LIST_HEAD(&cur_trans->switch_commits);
        INIT_LIST_HEAD(&cur_trans->pending_ordered);
        INIT_LIST_HEAD(&cur_trans->dirty_bgs);
+       INIT_LIST_HEAD(&cur_trans->io_bgs);
+       mutex_init(&cur_trans->cache_write_mutex);
+       cur_trans->num_dirty_bgs = 0;
        spin_lock_init(&cur_trans->dirty_bgs_lock);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
        updates = trans->delayed_ref_updates;
        trans->delayed_ref_updates = 0;
        if (updates) {
-               err = btrfs_run_delayed_refs(trans, root, updates);
+               err = btrfs_run_delayed_refs(trans, root, updates * 2);
                if (err) /* Error code will also eval true */
                        return err;
        }
@@ -1057,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
+       struct list_head *io_bgs = &trans->transaction->io_bgs;
        struct list_head *next;
        struct extent_buffer *eb;
        int ret;
@@ -1110,7 +1116,7 @@ again:
                        return ret;
        }
 
-       while (!list_empty(dirty_bgs)) {
+       while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
                ret = btrfs_write_dirty_block_groups(trans, root);
                if (ret)
                        return ret;
@@ -1810,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                return ret;
        }
 
+       if (!cur_trans->dirty_bg_run) {
+               int run_it = 0;
+
+               /* this mutex is also taken before trying to set
+                * block groups readonly.  We need to make sure
+                * that nobody has set a block group readonly
+                * after a extents from that block group have been
+                * allocated for cache files.  btrfs_set_block_group_ro
+                * will wait for the transaction to commit if it
+                * finds dirty_bg_run = 1
+                *
+                * The dirty_bg_run flag is also used to make sure only
+                * one process starts all the block group IO.  It wouldn't
+                * hurt to have more than one go through, but there's no
+                * real advantage to it either.
+                */
+               mutex_lock(&root->fs_info->ro_block_group_mutex);
+               if (!cur_trans->dirty_bg_run) {
+                       run_it = 1;
+                       cur_trans->dirty_bg_run = 1;
+               }
+               mutex_unlock(&root->fs_info->ro_block_group_mutex);
+
+               if (run_it)
+                       ret = btrfs_start_dirty_block_groups(trans, root);
+       }
+       if (ret) {
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+
        spin_lock(&root->fs_info->trans_lock);
        list_splice(&trans->ordered, &cur_trans->pending_ordered);
        if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
@@ -2003,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        assert_qgroups_uptodate(trans);
        ASSERT(list_empty(&cur_trans->dirty_bgs));
+       ASSERT(list_empty(&cur_trans->io_bgs));
        update_super_roots(root);
 
        btrfs_set_super_log_root(root->fs_info->super_copy, 0);
index 937050a..0b24755 100644 (file)
@@ -64,9 +64,19 @@ struct btrfs_transaction {
        struct list_head pending_ordered;
        struct list_head switch_commits;
        struct list_head dirty_bgs;
+       struct list_head io_bgs;
+       u64 num_dirty_bgs;
+
+       /*
+        * we need to make sure block group deletion doesn't race with
+        * free space cache writeout.  This mutex keeps them from stomping
+        * on each other
+        */
+       struct mutex cache_write_mutex;
        spinlock_t dirty_bgs_lock;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
+       int dirty_bg_run;
 };
 
 #define __TRANS_FREEZABLE      (1U << 0)
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot {
 static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
                                              struct inode *inode)
 {
+       spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->last_trans = trans->transaction->transid;
        BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+       spin_unlock(&BTRFS_I(inode)->lock);
 }
 
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
index c5b8ba3..d049683 100644 (file)
@@ -492,11 +492,19 @@ insert:
 
                if (btrfs_inode_generation(eb, src_item) == 0) {
                        struct extent_buffer *dst_eb = path->nodes[0];
+                       const u64 ino_size = btrfs_inode_size(eb, src_item);
 
+                       /*
+                        * For regular files an ino_size == 0 is used only when
+                        * logging that an inode exists, as part of a directory
+                        * fsync, and the inode wasn't fsynced before. In this
+                        * case don't set the size of the inode in the fs/subvol
+                        * tree, otherwise we would be throwing valid data away.
+                        */
                        if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
-                           S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
+                           S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
+                           ino_size != 0) {
                                struct btrfs_map_token token;
-                               u64 ino_size = btrfs_inode_size(eb, src_item);
 
                                btrfs_init_map_token(&token);
                                btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1951,6 +1959,104 @@ out:
        return ret;
 }
 
+static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_root *log,
+                             struct btrfs_path *path,
+                             const u64 ino)
+{
+       struct btrfs_key search_key;
+       struct btrfs_path *log_path;
+       int i;
+       int nritems;
+       int ret;
+
+       log_path = btrfs_alloc_path();
+       if (!log_path)
+               return -ENOMEM;
+
+       search_key.objectid = ino;
+       search_key.type = BTRFS_XATTR_ITEM_KEY;
+       search_key.offset = 0;
+again:
+       ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+process_leaf:
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       for (i = path->slots[0]; i < nritems; i++) {
+               struct btrfs_key key;
+               struct btrfs_dir_item *di;
+               struct btrfs_dir_item *log_di;
+               u32 total_size;
+               u32 cur;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, i);
+               if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
+                       ret = 0;
+                       goto out;
+               }
+
+               di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
+               total_size = btrfs_item_size_nr(path->nodes[0], i);
+               cur = 0;
+               while (cur < total_size) {
+                       u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
+                       u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
+                       u32 this_len = sizeof(*di) + name_len + data_len;
+                       char *name;
+
+                       name = kmalloc(name_len, GFP_NOFS);
+                       if (!name) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       read_extent_buffer(path->nodes[0], name,
+                                          (unsigned long)(di + 1), name_len);
+
+                       log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
+                                                   name, name_len, 0);
+                       btrfs_release_path(log_path);
+                       if (!log_di) {
+                               /* Doesn't exist in log tree, so delete it. */
+                               btrfs_release_path(path);
+                               di = btrfs_lookup_xattr(trans, root, path, ino,
+                                                       name, name_len, -1);
+                               kfree(name);
+                               if (IS_ERR(di)) {
+                                       ret = PTR_ERR(di);
+                                       goto out;
+                               }
+                               ASSERT(di);
+                               ret = btrfs_delete_one_dir_name(trans, root,
+                                                               path, di);
+                               if (ret)
+                                       goto out;
+                               btrfs_release_path(path);
+                               search_key = key;
+                               goto again;
+                       }
+                       kfree(name);
+                       if (IS_ERR(log_di)) {
+                               ret = PTR_ERR(log_di);
+                               goto out;
+                       }
+                       cur += this_len;
+                       di = (struct btrfs_dir_item *)((char *)di + this_len);
+               }
+       }
+       ret = btrfs_next_leaf(root, path);
+       if (ret > 0)
+               ret = 0;
+       else if (ret == 0)
+               goto process_leaf;
+out:
+       btrfs_free_path(log_path);
+       btrfs_release_path(path);
+       return ret;
+}
+
+
 /*
  * deletion replay happens before we copy any new directory items
  * out of the log or out of backreferences from inodes.  It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 
                        inode_item = btrfs_item_ptr(eb, i,
                                            struct btrfs_inode_item);
+                       ret = replay_xattr_deletes(wc->trans, root, log,
+                                                  path, key.objectid);
+                       if (ret)
+                               break;
                        mode = btrfs_inode_mode(eb, inode_item);
                        if (S_ISDIR(mode)) {
                                ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, root, next);
+                                       clean_tree_block(trans, root->fs_info,
+                                                       next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                }
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                if (trans) {
                                        btrfs_tree_lock(next);
                                        btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, root, next);
+                                       clean_tree_block(trans, root->fs_info,
+                                                       next);
                                        btrfs_wait_tree_block_writeback(next);
                                        btrfs_tree_unlock(next);
                                }
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                        if (trans) {
                                btrfs_tree_lock(next);
                                btrfs_set_lock_blocking(next);
-                               clean_tree_block(trans, log, next);
+                               clean_tree_block(trans, log->fs_info, next);
                                btrfs_wait_tree_block_writeback(next);
                                btrfs_tree_unlock(next);
                        }
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path,
                          struct btrfs_path *dst_path, int key_type,
+                         struct btrfs_log_ctx *ctx,
                          u64 min_offset, u64 *last_offset_ret)
 {
        struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                src = path->nodes[0];
                nritems = btrfs_header_nritems(src);
                for (i = path->slots[0]; i < nritems; i++) {
+                       struct btrfs_dir_item *di;
+
                        btrfs_item_key_to_cpu(src, &min_key, i);
 
                        if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                                err = ret;
                                goto done;
                        }
+
+                       /*
+                        * We must make sure that when we log a directory entry,
+                        * the corresponding inode, after log replay, has a
+                        * matching link count. For example:
+                        *
+                        * touch foo
+                        * mkdir mydir
+                        * sync
+                        * ln foo mydir/bar
+                        * xfs_io -c "fsync" mydir
+                        * <crash>
+                        * <mount fs and log replay>
+                        *
+                        * Would result in a fsync log that when replayed, our
+                        * file inode would have a link count of 1, but we get
+                        * two directory entries pointing to the same inode.
+                        * After removing one of the names, it would not be
+                        * possible to remove the other name, which resulted
+                        * always in stale file handle errors, and would not
+                        * be possible to rmdir the parent directory, since
+                        * its i_size could never decrement to the value
+                        * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
+                        */
+                       di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
+                       btrfs_dir_item_key_to_cpu(src, di, &tmp);
+                       if (ctx &&
+                           (btrfs_dir_transid(src, di) == trans->transid ||
+                            btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
+                           tmp.type != BTRFS_ROOT_ITEM_KEY)
+                               ctx->log_new_dentries = true;
                }
                path->slots[0] = nritems;
 
@@ -3175,7 +3321,8 @@ done:
 static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root, struct inode *inode,
                          struct btrfs_path *path,
-                         struct btrfs_path *dst_path)
+                         struct btrfs_path *dst_path,
+                         struct btrfs_log_ctx *ctx)
 {
        u64 min_key;
        u64 max_key;
@@ -3187,7 +3334,7 @@ again:
        max_key = 0;
        while (1) {
                ret = log_dir_items(trans, root, inode, path,
-                                   dst_path, key_type, min_key,
+                                   dst_path, key_type, ctx, min_key,
                                    &max_key);
                if (ret)
                        return ret;
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
        if (ret < 0) {
                return ret;
        } else if (ret > 0) {
-               *size_ret = i_size_read(inode);
+               *size_ret = 0;
        } else {
                struct btrfs_inode_item *item;
 
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
        if (S_ISDIR(inode->i_mode)) {
                int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
 
-               if (inode_only == LOG_INODE_EXISTS) {
-                       max_key_type = BTRFS_INODE_EXTREF_KEY;
-                       max_key.type = max_key_type;
-               }
+               if (inode_only == LOG_INODE_EXISTS)
+                       max_key_type = BTRFS_XATTR_ITEM_KEY;
                ret = drop_objectid_items(trans, log, path, ino, max_key_type);
        } else {
                if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                             &BTRFS_I(inode)->runtime_flags)) {
                        if (inode_only == LOG_INODE_EXISTS) {
-                               max_key.type = BTRFS_INODE_EXTREF_KEY;
+                               max_key.type = BTRFS_XATTR_ITEM_KEY;
                                ret = drop_objectid_items(trans, log, path, ino,
                                                          max_key.type);
                        } else {
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                                          &BTRFS_I(inode)->runtime_flags);
                                clear_bit(BTRFS_INODE_COPY_EVERYTHING,
                                          &BTRFS_I(inode)->runtime_flags);
-                               ret = btrfs_truncate_inode_items(trans, log,
-                                                                inode, 0, 0);
+                               while(1) {
+                                       ret = btrfs_truncate_inode_items(trans,
+                                                        log, inode, 0, 0);
+                                       if (ret != -EAGAIN)
+                                               break;
+                               }
                        }
-               } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                   &BTRFS_I(inode)->runtime_flags) ||
+               } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+                                             &BTRFS_I(inode)->runtime_flags) ||
                           inode_only == LOG_INODE_EXISTS) {
-                       if (inode_only == LOG_INODE_ALL) {
-                               clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                         &BTRFS_I(inode)->runtime_flags);
+                       if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
-                               max_key.type = BTRFS_XATTR_ITEM_KEY;
-                       } else {
-                               max_key.type = BTRFS_INODE_EXTREF_KEY;
-                       }
+                       max_key.type = BTRFS_XATTR_ITEM_KEY;
                        ret = drop_objectid_items(trans, log, path, ino,
                                                  max_key.type);
                } else {
@@ -4277,15 +4421,18 @@ log_extents:
        }
 
        if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-               ret = log_directory_changes(trans, root, inode, path, dst_path);
+               ret = log_directory_changes(trans, root, inode, path, dst_path,
+                                           ctx);
                if (ret) {
                        err = ret;
                        goto out_unlock;
                }
        }
 
+       spin_lock(&BTRFS_I(inode)->lock);
        BTRFS_I(inode)->logged_trans = trans->transid;
        BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
+       spin_unlock(&BTRFS_I(inode)->lock);
 out_unlock:
        if (unlikely(err))
                btrfs_put_logged_extents(&logged_list);
@@ -4327,9 +4474,9 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                        goto out;
 
        if (!S_ISDIR(inode->i_mode)) {
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                        goto out;
-               inode = parent->d_inode;
+               inode = d_inode(parent);
        }
 
        while (1) {
@@ -4355,7 +4502,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                        break;
                }
 
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                        break;
 
                if (IS_ROOT(parent))
@@ -4364,7 +4511,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                parent = dget_parent(parent);
                dput(old_parent);
                old_parent = parent;
-               inode = parent->d_inode;
+               inode = d_inode(parent);
 
        }
        dput(old_parent);
@@ -4372,6 +4519,181 @@ out:
        return ret;
 }
 
+struct btrfs_dir_list {
+       u64 ino;
+       struct list_head list;
+};
+
+/*
+ * Log the inodes of the new dentries of a directory. See log_dir_items() for
+ * details about the why it is needed.
+ * This is a recursive operation - if an existing dentry corresponds to a
+ * directory, that directory's new entries are logged too (same behaviour as
+ * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
+ * the dentries point to we do not lock their i_mutex, otherwise lockdep
+ * complains about the following circular lock dependency / possible deadlock:
+ *
+ *        CPU0                                        CPU1
+ *        ----                                        ----
+ * lock(&type->i_mutex_dir_key#3/2);
+ *                                            lock(sb_internal#2);
+ *                                            lock(&type->i_mutex_dir_key#3/2);
+ * lock(&sb->s_type->i_mutex_key#14);
+ *
+ * Where sb_internal is the lock (a counter that works as a lock) acquired by
+ * sb_start_intwrite() in btrfs_start_transaction().
+ * Not locking i_mutex of the inodes is still safe because:
+ *
+ * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
+ *    that while logging the inode new references (names) are added or removed
+ *    from the inode, leaving the logged inode item with a link count that does
+ *    not match the number of logged inode reference items. This is fine because
+ *    at log replay time we compute the real number of links and correct the
+ *    link count in the inode item (see replay_one_buffer() and
+ *    link_to_fixup_dir());
+ *
+ * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
+ *    while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
+ *    BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
+ *    has a size that doesn't match the sum of the lengths of all the logged
+ *    names. This does not result in a problem because if a dir_item key is
+ *    logged but its matching dir_index key is not logged, at log replay time we
+ *    don't use it to replay the respective name (see replay_one_name()). On the
+ *    other hand if only the dir_index key ends up being logged, the respective
+ *    name is added to the fs/subvol tree with both the dir_item and dir_index
+ *    keys created (see replay_one_name()).
+ *    The directory's inode item with a wrong i_size is not a problem as well,
+ *    since we don't use it at log replay time to set the i_size in the inode
+ *    item of the fs/subvol tree (see overwrite_item()).
+ */
+static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *start_inode,
+                               struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *log = root->log_root;
+       struct btrfs_path *path;
+       LIST_HEAD(dir_list);
+       struct btrfs_dir_list *dir_elem;
+       int ret = 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
+       if (!dir_elem) {
+               btrfs_free_path(path);
+               return -ENOMEM;
+       }
+       dir_elem->ino = btrfs_ino(start_inode);
+       list_add_tail(&dir_elem->list, &dir_list);
+
+       while (!list_empty(&dir_list)) {
+               struct extent_buffer *leaf;
+               struct btrfs_key min_key;
+               int nritems;
+               int i;
+
+               dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
+                                           list);
+               if (ret)
+                       goto next_dir_inode;
+
+               min_key.objectid = dir_elem->ino;
+               min_key.type = BTRFS_DIR_ITEM_KEY;
+               min_key.offset = 0;
+again:
+               btrfs_release_path(path);
+               ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+               if (ret < 0) {
+                       goto next_dir_inode;
+               } else if (ret > 0) {
+                       ret = 0;
+                       goto next_dir_inode;
+               }
+
+process_leaf:
+               leaf = path->nodes[0];
+               nritems = btrfs_header_nritems(leaf);
+               for (i = path->slots[0]; i < nritems; i++) {
+                       struct btrfs_dir_item *di;
+                       struct btrfs_key di_key;
+                       struct inode *di_inode;
+                       struct btrfs_dir_list *new_dir_elem;
+                       int log_mode = LOG_INODE_EXISTS;
+                       int type;
+
+                       btrfs_item_key_to_cpu(leaf, &min_key, i);
+                       if (min_key.objectid != dir_elem->ino ||
+                           min_key.type != BTRFS_DIR_ITEM_KEY)
+                               goto next_dir_inode;
+
+                       di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
+                       type = btrfs_dir_type(leaf, di);
+                       if (btrfs_dir_transid(leaf, di) < trans->transid &&
+                           type != BTRFS_FT_DIR)
+                               continue;
+                       btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
+                       if (di_key.type == BTRFS_ROOT_ITEM_KEY)
+                               continue;
+
+                       di_inode = btrfs_iget(root->fs_info->sb, &di_key,
+                                             root, NULL);
+                       if (IS_ERR(di_inode)) {
+                               ret = PTR_ERR(di_inode);
+                               goto next_dir_inode;
+                       }
+
+                       if (btrfs_inode_in_log(di_inode, trans->transid)) {
+                               iput(di_inode);
+                               continue;
+                       }
+
+                       ctx->log_new_dentries = false;
+                       if (type == BTRFS_FT_DIR)
+                               log_mode = LOG_INODE_ALL;
+                       btrfs_release_path(path);
+                       ret = btrfs_log_inode(trans, root, di_inode,
+                                             log_mode, 0, LLONG_MAX, ctx);
+                       iput(di_inode);
+                       if (ret)
+                               goto next_dir_inode;
+                       if (ctx->log_new_dentries) {
+                               new_dir_elem = kmalloc(sizeof(*new_dir_elem),
+                                                      GFP_NOFS);
+                               if (!new_dir_elem) {
+                                       ret = -ENOMEM;
+                                       goto next_dir_inode;
+                               }
+                               new_dir_elem->ino = di_key.objectid;
+                               list_add_tail(&new_dir_elem->list, &dir_list);
+                       }
+                       break;
+               }
+               if (i == nritems) {
+                       ret = btrfs_next_leaf(log, path);
+                       if (ret < 0) {
+                               goto next_dir_inode;
+                       } else if (ret > 0) {
+                               ret = 0;
+                               goto next_dir_inode;
+                       }
+                       goto process_leaf;
+               }
+               if (min_key.offset < (u64)-1) {
+                       min_key.offset++;
+                       goto again;
+               }
+next_dir_inode:
+               list_del(&dir_elem->list);
+               kfree(dir_elem);
+       }
+
+       btrfs_free_path(path);
+       return ret;
+}
+
 /*
  * helper function around btrfs_log_inode to make sure newly created
  * parent directories also end up in the log.  A minimal inode and backref
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
        const struct dentry * const first_parent = parent;
        const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
                                 last_committed);
+       bool log_dentries = false;
+       struct inode *orig_inode = inode;
 
        sb = inode->i_sb;
 
@@ -4449,11 +4773,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                goto end_trans;
        }
 
+       if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+               log_dentries = true;
+
        while (1) {
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                        break;
 
-               inode = parent->d_inode;
+               inode = d_inode(parent);
                if (root != BTRFS_I(inode)->root)
                        break;
 
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                dput(old_parent);
                old_parent = parent;
        }
-       ret = 0;
+       if (log_dentries)
+               ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+       else
+               ret = 0;
 end_trans:
        dput(old_parent);
        if (ret < 0) {
@@ -4515,7 +4845,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
        struct dentry *parent = dget_parent(dentry);
        int ret;
 
-       ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+       ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
                                     start, end, 0, ctx);
        dput(parent);
 
index 154990c..6916a78 100644 (file)
@@ -29,6 +29,7 @@ struct btrfs_log_ctx {
        int log_ret;
        int log_transid;
        int io_err;
+       bool log_new_dentries;
        struct list_head list;
 };
 
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
        ctx->log_ret = 0;
        ctx->log_transid = 0;
        ctx->io_err = 0;
+       ctx->log_new_dentries = false;
        INIT_LIST_HEAD(&ctx->list);
 }
 
index 8222f6f..8bcd2a0 100644 (file)
@@ -366,8 +366,8 @@ loop_lock:
                btrfsic_submit_bio(cur->bi_rw, cur);
                num_run++;
                batch_run++;
-               if (need_resched())
-                       cond_resched();
+
+               cond_resched();
 
                /*
                 * we made progress, there is more work to do and the bdi
@@ -400,8 +400,7 @@ loop_lock:
                                 * against it before looping
                                 */
                                last_waited = ioc->last_waited;
-                               if (need_resched())
-                                       cond_resched();
+                               cond_resched();
                                continue;
                        }
                        spin_lock(&device->io_lock);
@@ -609,8 +608,7 @@ error:
        return ERR_PTR(-ENOMEM);
 }
 
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
-                              struct btrfs_fs_devices *fs_devices, int step)
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
 {
        struct btrfs_device *device, *next;
        struct btrfs_device *latest_dev = NULL;
@@ -1136,11 +1134,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-again:
+
        max_hole_start = search_start;
        max_hole_size = 0;
-       hole_size = 0;
 
+again:
        if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
                ret = -ENOSPC;
                goto out;
@@ -1233,21 +1231,23 @@ next:
         * allocated dev extents, and when shrinking the device,
         * search_end may be smaller than search_start.
         */
-       if (search_end > search_start)
+       if (search_end > search_start) {
                hole_size = search_end - search_start;
 
-       if (hole_size > max_hole_size) {
-               max_hole_start = search_start;
-               max_hole_size = hole_size;
-       }
+               if (contains_pending_extent(trans, device, &search_start,
+                                           hole_size)) {
+                       btrfs_release_path(path);
+                       goto again;
+               }
 
-       if (contains_pending_extent(trans, device, &search_start, hole_size)) {
-               btrfs_release_path(path);
-               goto again;
+               if (hole_size > max_hole_size) {
+                       max_hole_start = search_start;
+                       max_hole_size = hole_size;
+               }
        }
 
        /* See above. */
-       if (hole_size < num_bytes)
+       if (max_hole_size < num_bytes)
                ret = -ENOSPC;
        else
                ret = 0;
@@ -2487,8 +2487,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           u64 chunk_tree, u64 chunk_objectid,
+                           struct btrfs_root *root, u64 chunk_objectid,
                            u64 chunk_offset)
 {
        int ret;
@@ -2580,7 +2579,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
        struct map_lookup *map;
        u64 dev_extent_len = 0;
        u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
-       u64 chunk_tree = root->fs_info->chunk_root->objectid;
        int i, ret = 0;
 
        /* Just in case */
@@ -2634,8 +2632,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                        }
                }
        }
-       ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
-                              chunk_offset);
+       ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
                goto out;
@@ -2664,8 +2661,8 @@ out:
 }
 
 static int btrfs_relocate_chunk(struct btrfs_root *root,
-                        u64 chunk_tree, u64 chunk_objectid,
-                        u64 chunk_offset)
+                               u64 chunk_objectid,
+                               u64 chunk_offset)
 {
        struct btrfs_root *extent_root;
        struct btrfs_trans_handle *trans;
@@ -2707,7 +2704,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
        struct btrfs_chunk *chunk;
        struct btrfs_key key;
        struct btrfs_key found_key;
-       u64 chunk_tree = chunk_root->root_key.objectid;
        u64 chunk_type;
        bool retried = false;
        int failed = 0;
@@ -2744,7 +2740,7 @@ again:
                btrfs_release_path(path);
 
                if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
-                       ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
+                       ret = btrfs_relocate_chunk(chunk_root,
                                                   found_key.objectid,
                                                   found_key.offset);
                        if (ret == -ENOSPC)
@@ -3022,7 +3018,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
 
                stripe_offset = btrfs_stripe_offset(leaf, stripe);
                stripe_length = btrfs_chunk_length(leaf, chunk);
-               do_div(stripe_length, factor);
+               stripe_length = div_u64(stripe_length, factor);
 
                if (stripe_offset < bargs->pend &&
                    stripe_offset + stripe_length > bargs->pstart)
@@ -3255,7 +3251,6 @@ again:
                }
 
                ret = btrfs_relocate_chunk(chunk_root,
-                                          chunk_root->root_key.objectid,
                                           found_key.objectid,
                                           found_key.offset);
                if (ret && ret != -ENOSPC)
@@ -3957,7 +3952,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        struct btrfs_dev_extent *dev_extent = NULL;
        struct btrfs_path *path;
        u64 length;
-       u64 chunk_tree;
        u64 chunk_objectid;
        u64 chunk_offset;
        int ret;
@@ -4027,13 +4021,11 @@ again:
                        break;
                }
 
-               chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
                chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                btrfs_release_path(path);
 
-               ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
-                                          chunk_offset);
+               ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
                if (ret && ret != -ENOSPC)
                        goto done;
                if (ret == -ENOSPC)
@@ -4131,7 +4123,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
        return 0;
 }
 
-static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID10] = {
                .sub_stripes    = 2,
                .dev_stripes    = 1,
@@ -4289,7 +4281,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
                             max_chunk_size);
 
-       devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+       devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
                               GFP_NOFS);
        if (!devices_info)
                return -ENOMEM;
@@ -4400,8 +4392,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
         */
        if (stripe_size * data_stripes > max_chunk_size) {
                u64 mask = (1ULL << 24) - 1;
-               stripe_size = max_chunk_size;
-               do_div(stripe_size, data_stripes);
+
+               stripe_size = div_u64(max_chunk_size, data_stripes);
 
                /* bump the answer up to a 16MB boundary */
                stripe_size = (stripe_size + mask) & ~mask;
@@ -4413,10 +4405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                        stripe_size = devices_info[ndevs-1].max_avail;
        }
 
-       do_div(stripe_size, dev_stripes);
+       stripe_size = div_u64(stripe_size, dev_stripes);
 
        /* align to BTRFS_STRIPE_LEN */
-       do_div(stripe_size, raid_stripe_len);
+       stripe_size = div_u64(stripe_size, raid_stripe_len);
        stripe_size *= raid_stripe_len;
 
        map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4954,7 +4946,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        u64 stripe_nr_orig;
        u64 stripe_nr_end;
        u64 stripe_len;
-       int stripe_index;
+       u32 stripe_index;
        int i;
        int ret = 0;
        int num_stripes;
@@ -4995,7 +4987,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         * stripe_nr counts the total number of stripes we have to stride
         * to get to this block
         */
-       do_div(stripe_nr, stripe_len);
+       stripe_nr = div64_u64(stripe_nr, stripe_len);
 
        stripe_offset = stripe_nr * stripe_len;
        BUG_ON(offset < stripe_offset);
@@ -5011,7 +5003,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                /* allow a write of a full stripe, but make sure we don't
                 * allow straddling of stripes
                 */
-               do_div(raid56_full_stripe_start, full_stripe_len);
+               raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+                               full_stripe_len);
                raid56_full_stripe_start *= full_stripe_len;
        }
 
@@ -5136,7 +5129,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        stripe_index = 0;
        stripe_nr_orig = stripe_nr;
        stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
-       do_div(stripe_nr_end, map->stripe_len);
+       stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
        stripe_end_offset = stripe_nr_end * map->stripe_len -
                            (offset + *length);
 
@@ -5144,7 +5137,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                if (rw & REQ_DISCARD)
                        num_stripes = min_t(u64, map->num_stripes,
                                            stripe_nr_end - stripe_nr_orig);
-               stripe_index = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+                               &stripe_index);
                if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
                        mirror_num = 1;
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
@@ -5170,9 +5164,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
-               int factor = map->num_stripes / map->sub_stripes;
+               u32 factor = map->num_stripes / map->sub_stripes;
 
-               stripe_index = do_div(stripe_nr, factor);
+               stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
                stripe_index *= map->sub_stripes;
 
                if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
@@ -5198,8 +5192,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                    ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
                     mirror_num > 1)) {
                        /* push stripe_nr back to the start of the full stripe */
-                       stripe_nr = raid56_full_stripe_start;
-                       do_div(stripe_nr, stripe_len * nr_data_stripes(map));
+                       stripe_nr = div_u64(raid56_full_stripe_start,
+                                       stripe_len * nr_data_stripes(map));
 
                        /* RAID[56] write or recovery. Return all stripes */
                        num_stripes = map->num_stripes;
@@ -5209,32 +5203,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                        stripe_index = 0;
                        stripe_offset = 0;
                } else {
-                       u64 tmp;
-
                        /*
                         * Mirror #0 or #1 means the original data block.
                         * Mirror #2 is RAID5 parity block.
                         * Mirror #3 is RAID6 Q block.
                         */
-                       stripe_index = do_div(stripe_nr, nr_data_stripes(map));
+                       stripe_nr = div_u64_rem(stripe_nr,
+                                       nr_data_stripes(map), &stripe_index);
                        if (mirror_num > 1)
                                stripe_index = nr_data_stripes(map) +
                                                mirror_num - 2;
 
                        /* We distribute the parity blocks across stripes */
-                       tmp = stripe_nr + stripe_index;
-                       stripe_index = do_div(tmp, map->num_stripes);
+                       div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
+                                       &stripe_index);
                        if (!(rw & (REQ_WRITE | REQ_DISCARD |
                                    REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
                                mirror_num = 1;
                }
        } else {
                /*
-                * after this do_div call, stripe_nr is the number of stripes
-                * on this device we have to walk to find the data, and
-                * stripe_index is the number of our device in the stripe array
+                * after this, stripe_nr is the number of stripes on this
+                * device we have to walk to find the data, and stripe_index is
+                * the number of our device in the stripe array
                 */
-               stripe_index = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+                               &stripe_index);
                mirror_num = stripe_index + 1;
        }
        BUG_ON(stripe_index >= map->num_stripes);
@@ -5261,7 +5255,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
            need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
            mirror_num > 1)) {
                u64 tmp;
-               int i, rot;
+               unsigned rot;
 
                bbio->raid_map = (u64 *)((void *)bbio->stripes +
                                 sizeof(struct btrfs_bio_stripe) *
@@ -5269,8 +5263,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                 sizeof(int) * tgtdev_indexes);
 
                /* Work out the disk rotation on this stripe-set */
-               tmp = stripe_nr;
-               rot = do_div(tmp, num_stripes);
+               div_u64_rem(stripe_nr, num_stripes, &rot);
 
                /* Fill in the logical address of each stripe */
                tmp = stripe_nr * nr_data_stripes(map);
@@ -5285,8 +5278,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        }
 
        if (rw & REQ_DISCARD) {
-               int factor = 0;
-               int sub_stripes = 0;
+               u32 factor = 0;
+               u32 sub_stripes = 0;
                u64 stripes_per_dev = 0;
                u32 remaining_stripes = 0;
                u32 last_stripe = 0;
@@ -5437,9 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                        }
                }
                if (found) {
-                       u64 length = map->stripe_len;
-
-                       if (physical_of_found + length <=
+                       if (physical_of_found + map->stripe_len <=
                            dev_replace->cursor_left) {
                                struct btrfs_bio_stripe *tgtdev_stripe =
                                        bbio->stripes + num_stripes;
@@ -5535,15 +5526,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        rmap_len = map->stripe_len;
 
        if (map->type & BTRFS_BLOCK_GROUP_RAID10)
-               do_div(length, map->num_stripes / map->sub_stripes);
+               length = div_u64(length, map->num_stripes / map->sub_stripes);
        else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-               do_div(length, map->num_stripes);
+               length = div_u64(length, map->num_stripes);
        else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               do_div(length, nr_data_stripes(map));
+               length = div_u64(length, nr_data_stripes(map));
                rmap_len = map->stripe_len * nr_data_stripes(map);
        }
 
-       buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+       buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
        BUG_ON(!buf); /* -ENOMEM */
 
        for (i = 0; i < map->num_stripes; i++) {
@@ -5554,11 +5545,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                        continue;
 
                stripe_nr = physical - map->stripes[i].physical;
-               do_div(stripe_nr, map->stripe_len);
+               stripe_nr = div_u64(stripe_nr, map->stripe_len);
 
                if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                        stripe_nr = stripe_nr * map->num_stripes + i;
-                       do_div(stripe_nr, map->sub_stripes);
+                       stripe_nr = div_u64(stripe_nr, map->sub_stripes);
                } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
                        stripe_nr = stripe_nr * map->num_stripes + i;
                } /* else if RAID[56], multiply by nr_data_stripes().
@@ -5835,8 +5826,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        u64 length = 0;
        u64 map_length;
        int ret;
-       int dev_nr = 0;
-       int total_devs = 1;
+       int dev_nr;
+       int total_devs;
        struct btrfs_bio *bbio = NULL;
 
        length = bio->bi_iter.bi_size;
@@ -5877,11 +5868,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                BUG();
        }
 
-       while (dev_nr < total_devs) {
+       for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
                dev = bbio->stripes[dev_nr].dev;
                if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
                        bbio_error(bbio, first_bio, logical);
-                       dev_nr++;
                        continue;
                }
 
@@ -5894,7 +5884,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                        ret = breakup_stripe_bio(root, bbio, first_bio, dev,
                                                 dev_nr, rw, async_submit);
                        BUG_ON(ret);
-                       dev_nr++;
                        continue;
                }
 
@@ -5909,7 +5898,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                submit_stripe_bio(root, bbio, bio,
                                  bbio->stripes[dev_nr].physical, dev_nr, rw,
                                  async_submit);
-               dev_nr++;
        }
        btrfs_bio_counter_dec(root->fs_info);
        return 0;
index 83069de..ebc3133 100644 (file)
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                          struct btrfs_fs_devices **fs_devices_ret);
 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
-                              struct btrfs_fs_devices *fs_devices, int step);
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
 int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
                                         char *device_path,
                                         struct btrfs_device **device);
index 883b936..6f518c9 100644 (file)
@@ -261,7 +261,7 @@ out:
 ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
        struct btrfs_key key, found_key;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -364,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
 /*
  * Check if the attribute is in a supported namespace.
  *
- * This applied after the check for the synthetic attributes in the system
+ * This is applied after the check for the synthetic attributes in the system
  * namespace.
  */
-static bool btrfs_is_valid_xattr(const char *name)
+static int btrfs_is_valid_xattr(const char *name)
 {
-       return !strncmp(name, XATTR_SECURITY_PREFIX,
-                       XATTR_SECURITY_PREFIX_LEN) ||
-              !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
-              !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
-              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
-               !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
+       int len = strlen(name);
+       int prefixlen = 0;
+
+       if (!strncmp(name, XATTR_SECURITY_PREFIX,
+                       XATTR_SECURITY_PREFIX_LEN))
+               prefixlen = XATTR_SECURITY_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               prefixlen = XATTR_SYSTEM_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+               prefixlen = XATTR_TRUSTED_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+               prefixlen = XATTR_USER_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               prefixlen = XATTR_BTRFS_PREFIX_LEN;
+       else
+               return -EOPNOTSUPP;
+
+       /*
+        * The name cannot consist of just prefix
+        */
+       if (len <= prefixlen)
+               return -EINVAL;
+
+       return 0;
 }
 
 ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
                       void *buffer, size_t size)
 {
+       int ret;
+
        /*
         * If this is a request for a synthetic attribute in the system.*
         * namespace use the generic infrastructure to resolve a handler
@@ -388,15 +408,17 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                return generic_getxattr(dentry, name, buffer, size);
 
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
-       return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
+       return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
 }
 
 int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                   size_t size, int flags)
 {
-       struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+       struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+       int ret;
 
        /*
         * The permission on security.* and system.* is not checked
@@ -413,23 +435,25 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                return generic_setxattr(dentry, name, value, size, flags);
 
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
 
        if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
-               return btrfs_set_prop(dentry->d_inode, name,
+               return btrfs_set_prop(d_inode(dentry), name,
                                      value, size, flags);
 
        if (size == 0)
                value = "";  /* empty EA, do not remove */
 
-       return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
+       return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
                                flags);
 }
 
 int btrfs_removexattr(struct dentry *dentry, const char *name)
 {
-       struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+       struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+       int ret;
 
        /*
         * The permission on security.* and system.* is not checked
@@ -446,14 +470,15 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                return generic_removexattr(dentry, name);
 
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
 
        if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
-               return btrfs_set_prop(dentry->d_inode, name,
+               return btrfs_set_prop(d_inode(dentry), name,
                                      NULL, 0, XATTR_REPLACE);
 
-       return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
+       return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
                                XATTR_REPLACE);
 }
 
index fb22fd8..82990b8 100644 (file)
@@ -403,7 +403,7 @@ next:
        return ret;
 }
 
-struct btrfs_compress_op btrfs_zlib_compress = {
+const struct btrfs_compress_op btrfs_zlib_compress = {
        .alloc_workspace        = zlib_alloc_workspace,
        .free_workspace         = zlib_free_workspace,
        .compress_pages         = zlib_compress_pages,
index fbb08e9..6af790f 100644 (file)
@@ -123,11 +123,11 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
 
        /* check parameters */
        ret = -EOPNOTSUPP;
-       if (!root->d_inode ||
-           !root->d_inode->i_op->lookup ||
-           !root->d_inode->i_op->mkdir ||
-           !root->d_inode->i_op->setxattr ||
-           !root->d_inode->i_op->getxattr ||
+       if (d_is_negative(root) ||
+           !d_backing_inode(root)->i_op->lookup ||
+           !d_backing_inode(root)->i_op->mkdir ||
+           !d_backing_inode(root)->i_op->setxattr ||
+           !d_backing_inode(root)->i_op->getxattr ||
            !root->d_sb->s_op->statfs ||
            !root->d_sb->s_op->sync_fs)
                goto error_unsupported;
index 2324262..afa023d 100644 (file)
@@ -441,12 +441,12 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
 
        fscache_set_store_limit(&object->fscache, ni_size);
 
-       oi_size = i_size_read(object->backer->d_inode);
+       oi_size = i_size_read(d_backing_inode(object->backer));
        if (oi_size == ni_size)
                return 0;
 
        cachefiles_begin_secure(cache, &saved_cred);
-       mutex_lock(&object->backer->d_inode->i_mutex);
+       mutex_lock(&d_inode(object->backer)->i_mutex);
 
        /* if there's an extension to a partial page at the end of the backing
         * file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
        ret = notify_change(object->backer, &newattrs, NULL);
 
 truncate_failed:
-       mutex_unlock(&object->backer->d_inode->i_mutex);
+       mutex_unlock(&d_inode(object->backer)->i_mutex);
        cachefiles_end_secure(cache, saved_cred);
 
        if (ret == -EIO) {
index 1e51714..ab857ab 100644 (file)
@@ -286,13 +286,13 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                if (ret < 0) {
                        cachefiles_io_error(cache, "Unlink security error");
                } else {
-                       ret = vfs_unlink(dir->d_inode, rep, NULL);
+                       ret = vfs_unlink(d_inode(dir), rep, NULL);
 
                        if (preemptive)
                                cachefiles_mark_object_buried(cache, rep);
                }
 
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
 
                if (ret == -EIO)
                        cachefiles_io_error(cache, "Unlink failed");
@@ -303,7 +303,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
 
        /* directories have to be moved to the graveyard */
        _debug("move stale object to graveyard");
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 
 try_again:
        /* first step is to make up a grave dentry in the graveyard */
@@ -355,7 +355,7 @@ try_again:
                return -EIO;
        }
 
-       if (grave->d_inode) {
+       if (d_is_positive(grave)) {
                unlock_rename(cache->graveyard, dir);
                dput(grave);
                grave = NULL;
@@ -387,8 +387,8 @@ try_again:
        if (ret < 0) {
                cachefiles_io_error(cache, "Rename security error %d", ret);
        } else {
-               ret = vfs_rename(dir->d_inode, rep,
-                                cache->graveyard->d_inode, grave, NULL, 0);
+               ret = vfs_rename(d_inode(dir), rep,
+                                d_inode(cache->graveyard), grave, NULL, 0);
                if (ret != 0 && ret != -ENOMEM)
                        cachefiles_io_error(cache,
                                            "Rename failed with error %d", ret);
@@ -415,18 +415,18 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
        _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
 
        ASSERT(object->dentry);
-       ASSERT(object->dentry->d_inode);
+       ASSERT(d_backing_inode(object->dentry));
        ASSERT(object->dentry->d_parent);
 
        dir = dget_parent(object->dentry);
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
 
        if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
                /* object allocation for the same key preemptively deleted this
                 * object's file so that it could create its own file */
                _debug("object preemptively buried");
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
                ret = 0;
        } else {
                /* we need to check that our parent is _still_ our parent - it
@@ -438,7 +438,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
                        /* it got moved, presumably by cachefilesd culling it,
                         * so it's no longer in the key path and we can ignore
                         * it */
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dir)->i_mutex);
                        ret = 0;
                }
        }
@@ -473,7 +473,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
        path.mnt = cache->mnt;
 
        ASSERT(parent->dentry);
-       ASSERT(parent->dentry->d_inode);
+       ASSERT(d_backing_inode(parent->dentry));
 
        if (!(d_is_dir(parent->dentry))) {
                // TODO: convert file to dir
@@ -497,7 +497,7 @@ lookup_again:
        /* search the current directory for the element name */
        _debug("lookup '%s'", name);
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
 
        start = jiffies;
        next = lookup_one_len(name, dir, nlen);
@@ -505,21 +505,21 @@ lookup_again:
        if (IS_ERR(next))
                goto lookup_error;
 
-       _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+       _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative");
 
        if (!key)
-               object->new = !next->d_inode;
+               object->new = !d_backing_inode(next);
 
        /* if this element of the path doesn't exist, then the lookup phase
         * failed, and we can release any readers in the certain knowledge that
         * there's nothing for them to actually read */
-       if (!next->d_inode)
+       if (d_is_negative(next))
                fscache_object_lookup_negative(&object->fscache);
 
        /* we need to create the object if it's negative */
        if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
                /* index objects and intervening tree levels must be subdirs */
-               if (!next->d_inode) {
+               if (d_is_negative(next)) {
                        ret = cachefiles_has_space(cache, 1, 0);
                        if (ret < 0)
                                goto create_error;
@@ -529,26 +529,26 @@ lookup_again:
                        if (ret < 0)
                                goto create_error;
                        start = jiffies;
-                       ret = vfs_mkdir(dir->d_inode, next, 0);
+                       ret = vfs_mkdir(d_inode(dir), next, 0);
                        cachefiles_hist(cachefiles_mkdir_histogram, start);
                        if (ret < 0)
                                goto create_error;
 
-                       ASSERT(next->d_inode);
+                       ASSERT(d_backing_inode(next));
 
                        _debug("mkdir -> %p{%p{ino=%lu}}",
-                              next, next->d_inode, next->d_inode->i_ino);
+                              next, d_backing_inode(next), d_backing_inode(next)->i_ino);
 
                } else if (!d_can_lookup(next)) {
                        pr_err("inode %lu is not a directory\n",
-                              next->d_inode->i_ino);
+                              d_backing_inode(next)->i_ino);
                        ret = -ENOBUFS;
                        goto error;
                }
 
        } else {
                /* non-index objects start out life as files */
-               if (!next->d_inode) {
+               if (d_is_negative(next)) {
                        ret = cachefiles_has_space(cache, 1, 0);
                        if (ret < 0)
                                goto create_error;
@@ -558,21 +558,21 @@ lookup_again:
                        if (ret < 0)
                                goto create_error;
                        start = jiffies;
-                       ret = vfs_create(dir->d_inode, next, S_IFREG, true);
+                       ret = vfs_create(d_inode(dir), next, S_IFREG, true);
                        cachefiles_hist(cachefiles_create_histogram, start);
                        if (ret < 0)
                                goto create_error;
 
-                       ASSERT(next->d_inode);
+                       ASSERT(d_backing_inode(next));
 
                        _debug("create -> %p{%p{ino=%lu}}",
-                              next, next->d_inode, next->d_inode->i_ino);
+                              next, d_backing_inode(next), d_backing_inode(next)->i_ino);
 
                } else if (!d_can_lookup(next) &&
                           !d_is_reg(next)
                           ) {
                        pr_err("inode %lu is not a file or directory\n",
-                              next->d_inode->i_ino);
+                              d_backing_inode(next)->i_ino);
                        ret = -ENOBUFS;
                        goto error;
                }
@@ -581,7 +581,7 @@ lookup_again:
        /* process the next component */
        if (key) {
                _debug("advance");
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
                dput(dir);
                dir = next;
                next = NULL;
@@ -617,7 +617,7 @@ lookup_again:
        /* note that we're now using this object */
        ret = cachefiles_mark_object_active(cache, object);
 
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(dir);
        dir = NULL;
 
@@ -646,7 +646,7 @@ lookup_again:
                        const struct address_space_operations *aops;
 
                        ret = -EPERM;
-                       aops = object->dentry->d_inode->i_mapping->a_ops;
+                       aops = d_backing_inode(object->dentry)->i_mapping->a_ops;
                        if (!aops->bmap)
                                goto check_error;
 
@@ -659,7 +659,7 @@ lookup_again:
        object->new = 0;
        fscache_obtained_object(&object->fscache);
 
-       _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+       _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino);
        return 0;
 
 create_error:
@@ -695,7 +695,7 @@ lookup_error:
                cachefiles_io_error(cache, "Lookup failed");
        next = NULL;
 error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(next);
 error_out2:
        dput(dir);
@@ -719,7 +719,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        _enter(",,%s", dirname);
 
        /* search the current directory for the element name */
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
 
        start = jiffies;
        subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -731,10 +731,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        }
 
        _debug("subdir -> %p %s",
-              subdir, subdir->d_inode ? "positive" : "negative");
+              subdir, d_backing_inode(subdir) ? "positive" : "negative");
 
        /* we need to create the subdir if it doesn't exist yet */
-       if (!subdir->d_inode) {
+       if (d_is_negative(subdir)) {
                ret = cachefiles_has_space(cache, 1, 0);
                if (ret < 0)
                        goto mkdir_error;
@@ -746,22 +746,22 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                ret = security_path_mkdir(&path, subdir, 0700);
                if (ret < 0)
                        goto mkdir_error;
-               ret = vfs_mkdir(dir->d_inode, subdir, 0700);
+               ret = vfs_mkdir(d_inode(dir), subdir, 0700);
                if (ret < 0)
                        goto mkdir_error;
 
-               ASSERT(subdir->d_inode);
+               ASSERT(d_backing_inode(subdir));
 
                _debug("mkdir -> %p{%p{ino=%lu}}",
                       subdir,
-                      subdir->d_inode,
-                      subdir->d_inode->i_ino);
+                      d_backing_inode(subdir),
+                      d_backing_inode(subdir)->i_ino);
        }
 
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 
        /* we need to make sure the subdir is a directory */
-       ASSERT(subdir->d_inode);
+       ASSERT(d_backing_inode(subdir));
 
        if (!d_can_lookup(subdir)) {
                pr_err("%s is not a directory\n", dirname);
@@ -770,18 +770,18 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
        }
 
        ret = -EPERM;
-       if (!subdir->d_inode->i_op->setxattr ||
-           !subdir->d_inode->i_op->getxattr ||
-           !subdir->d_inode->i_op->lookup ||
-           !subdir->d_inode->i_op->mkdir ||
-           !subdir->d_inode->i_op->create ||
-           (!subdir->d_inode->i_op->rename &&
-            !subdir->d_inode->i_op->rename2) ||
-           !subdir->d_inode->i_op->rmdir ||
-           !subdir->d_inode->i_op->unlink)
+       if (!d_backing_inode(subdir)->i_op->setxattr ||
+           !d_backing_inode(subdir)->i_op->getxattr ||
+           !d_backing_inode(subdir)->i_op->lookup ||
+           !d_backing_inode(subdir)->i_op->mkdir ||
+           !d_backing_inode(subdir)->i_op->create ||
+           (!d_backing_inode(subdir)->i_op->rename &&
+            !d_backing_inode(subdir)->i_op->rename2) ||
+           !d_backing_inode(subdir)->i_op->rmdir ||
+           !d_backing_inode(subdir)->i_op->unlink)
                goto check_error;
 
-       _leave(" = [%lu]", subdir->d_inode->i_ino);
+       _leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
        return subdir;
 
 check_error:
@@ -790,19 +790,19 @@ check_error:
        return ERR_PTR(ret);
 
 mkdir_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(subdir);
        pr_err("mkdir %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 lookup_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        ret = PTR_ERR(subdir);
        pr_err("Lookup %s failed with error %d\n", dirname, ret);
        return ERR_PTR(ret);
 
 nomem_d_alloc:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        _leave(" = -ENOMEM");
        return ERR_PTR(-ENOMEM);
 }
@@ -827,7 +827,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
        //       dir, filename);
 
        /* look up the victim */
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
 
        start = jiffies;
        victim = lookup_one_len(filename, dir, strlen(filename));
@@ -836,13 +836,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
                goto lookup_error;
 
        //_debug("victim -> %p %s",
-       //       victim, victim->d_inode ? "positive" : "negative");
+       //       victim, d_backing_inode(victim) ? "positive" : "negative");
 
        /* if the object is no longer there then we probably retired the object
         * at the netfs's request whilst the cull was in progress
         */
-       if (!victim->d_inode) {
-               mutex_unlock(&dir->d_inode->i_mutex);
+       if (d_is_negative(victim)) {
+               mutex_unlock(&d_inode(dir)->i_mutex);
                dput(victim);
                _leave(" = -ENOENT [absent]");
                return ERR_PTR(-ENOENT);
@@ -871,13 +871,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
 
 object_in_use:
        read_unlock(&cache->active_lock);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(victim);
        //_leave(" = -EBUSY [in use]");
        return ERR_PTR(-EBUSY);
 
 lookup_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        ret = PTR_ERR(victim);
        if (ret == -ENOENT) {
                /* file or dir now absent - probably retired by netfs */
@@ -913,7 +913,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
                return PTR_ERR(victim);
 
        _debug("victim -> %p %s",
-              victim, victim->d_inode ? "positive" : "negative");
+              victim, d_backing_inode(victim) ? "positive" : "negative");
 
        /* okay... the victim is not being used so we can cull it
         * - start by marking it as stale
@@ -936,7 +936,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
        return 0;
 
 error_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 error:
        dput(victim);
        if (ret == -ENOENT) {
@@ -971,7 +971,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
        if (IS_ERR(victim))
                return PTR_ERR(victim);
 
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(victim);
        //_leave(" = 0");
        return 0;
index c6cd8d7..3cbb0e8 100644 (file)
@@ -74,12 +74,12 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
 static int cachefiles_read_reissue(struct cachefiles_object *object,
                                   struct cachefiles_one_read *monitor)
 {
-       struct address_space *bmapping = object->backer->d_inode->i_mapping;
+       struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
        struct page *backpage = monitor->back_page, *backpage2;
        int ret;
 
        _enter("{ino=%lx},{%lx,%lx}",
-              object->backer->d_inode->i_ino,
+              d_backing_inode(object->backer)->i_ino,
               backpage->index, backpage->flags);
 
        /* skip if the page was truncated away completely */
@@ -157,7 +157,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
        object = container_of(op->op.object,
                              struct cachefiles_object, fscache);
 
-       _enter("{ino=%lu}", object->backer->d_inode->i_ino);
+       _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino);
 
        max = 8;
        spin_lock_irq(&object->work_lock);
@@ -247,7 +247,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
        init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
 
        /* attempt to get hold of the backing page */
-       bmapping = object->backer->d_inode->i_mapping;
+       bmapping = d_backing_inode(object->backer)->i_mapping;
        newpage = NULL;
 
        for (;;) {
@@ -408,7 +408,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
        if (!object->backer)
                goto enobufs;
 
-       inode = object->backer->d_inode;
+       inode = d_backing_inode(object->backer);
        ASSERT(S_ISREG(inode->i_mode));
        ASSERT(inode->i_mapping->a_ops->bmap);
        ASSERT(inode->i_mapping->a_ops->readpages);
@@ -468,7 +468,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
                                        struct list_head *list)
 {
        struct cachefiles_one_read *monitor = NULL;
-       struct address_space *bmapping = object->backer->d_inode->i_mapping;
+       struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
        struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
        int ret = 0;
 
@@ -705,7 +705,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
        if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
                space = 0;
 
-       inode = object->backer->d_inode;
+       inode = d_backing_inode(object->backer);
        ASSERT(S_ISREG(inode->i_mode));
        ASSERT(inode->i_mapping->a_ops->bmap);
        ASSERT(inode->i_mapping->a_ops->readpages);
index 396c18e..31bbc05 100644 (file)
@@ -55,14 +55,14 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
 {
        int ret;
 
-       ret = security_inode_mkdir(root->d_inode, root, 0);
+       ret = security_inode_mkdir(d_backing_inode(root), root, 0);
        if (ret < 0) {
                pr_err("Security denies permission to make dirs: error %d",
                       ret);
                return ret;
        }
 
-       ret = security_inode_create(root->d_inode, root, 0);
+       ret = security_inode_create(d_backing_inode(root), root, 0);
        if (ret < 0)
                pr_err("Security denies permission to create files: error %d",
                       ret);
@@ -95,7 +95,7 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
 
        /* use the cache root dir's security context as the basis with
         * which create files */
-       ret = set_create_files_as(new, root->d_inode);
+       ret = set_create_files_as(new, d_backing_inode(root));
        if (ret < 0) {
                abort_creds(new);
                cachefiles_begin_secure(cache, _saved_cred);
index a8a6874..d31c1a7 100644 (file)
@@ -33,7 +33,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
        int ret;
 
        ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
 
        if (!object->fscache.cookie)
                strcpy(type, "C3");
@@ -52,7 +52,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
 
        if (ret != -EEXIST) {
                pr_err("Can't set xattr on %pd [%lu] (err %d)\n",
-                      dentry, dentry->d_inode->i_ino,
+                      dentry, d_backing_inode(dentry)->i_ino,
                       -ret);
                goto error;
        }
@@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
                        goto bad_type_length;
 
                pr_err("Can't read xattr on %pd [%lu] (err %d)\n",
-                      dentry, dentry->d_inode->i_ino,
+                      dentry, d_backing_inode(dentry)->i_ino,
                       -ret);
                goto error;
        }
@@ -84,14 +84,14 @@ error:
 
 bad_type_length:
        pr_err("Cache object %lu type xattr length incorrect\n",
-              dentry->d_inode->i_ino);
+              d_backing_inode(dentry)->i_ino);
        ret = -EIO;
        goto error;
 
 bad_type:
        xtype[2] = 0;
        pr_err("Cache object %pd [%lu] type %s not %s\n",
-              dentry, dentry->d_inode->i_ino,
+              dentry, d_backing_inode(dentry)->i_ino,
               xtype, type);
        ret = -EIO;
        goto error;
@@ -165,7 +165,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
        int ret;
 
        ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
        ASSERT(object->fscache.cookie->def->check_aux);
 
        auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
@@ -204,7 +204,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
        _enter("%p,#%d", object, auxdata->len);
 
        ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
 
        auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp);
        if (!auxbuf) {
@@ -225,7 +225,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
 
                cachefiles_io_error_obj(object,
                                        "Can't read xattr on %lu (err %d)",
-                                       dentry->d_inode->i_ino, -ret);
+                                       d_backing_inode(dentry)->i_ino, -ret);
                goto error;
        }
 
@@ -276,7 +276,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
                        cachefiles_io_error_obj(object,
                                                "Can't update xattr on %lu"
                                                " (error %d)",
-                                               dentry->d_inode->i_ino, -ret);
+                                               d_backing_inode(dentry)->i_ino, -ret);
                        goto error;
                }
        }
@@ -291,7 +291,7 @@ error:
 
 bad_type_length:
        pr_err("Cache object %lu xattr length incorrect\n",
-              dentry->d_inode->i_ino);
+              d_backing_inode(dentry)->i_ino);
        ret = -EIO;
        goto error;
 
@@ -316,7 +316,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
                        cachefiles_io_error(cache,
                                            "Can't remove xattr from %lu"
                                            " (error %d)",
-                                           dentry->d_inode->i_ino, -ret);
+                                           d_backing_inode(dentry)->i_ino, -ret);
        }
 
        _leave(" = %d", ret);
index 155ab9c..e162bcd 100644 (file)
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                     inode, page, (int)pos, (int)len);
 
                r = ceph_update_writeable_page(file, pos, len, page);
+               if (r < 0)
+                       page_cache_release(page);
+               else
+                       *pagep = page;
        } while (r == -EAGAIN);
 
        return r;
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
 
        osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
 
-       err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
-                                   "inline_version", &inline_version,
-                                   sizeof(inline_version),
-                                   CEPH_OSD_CMPXATTR_OP_GT,
-                                   CEPH_OSD_CMPXATTR_MODE_U64);
-       if (err)
-               goto out_put;
-
-       err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
-                                   "inline_version", &inline_version,
-                                   sizeof(inline_version), 0, 0);
-       if (err)
-               goto out_put;
+       {
+               __le64 xattr_buf = cpu_to_le64(inline_version);
+               err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+                                           "inline_version", &xattr_buf,
+                                           sizeof(xattr_buf),
+                                           CEPH_OSD_CMPXATTR_OP_GT,
+                                           CEPH_OSD_CMPXATTR_MODE_U64);
+               if (err)
+                       goto out_put;
+       }
+
+       {
+               char xattr_buf[32];
+               int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
+                                        "%llu", inline_version);
+               err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+                                           "inline_version",
+                                           xattr_buf, xattr_len, 0, 0);
+               if (err)
+                       goto out_put;
+       }
 
        ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
        err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
index 8172775..be5ea6a 100644 (file)
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
        return ret;
 }
 
+static void drop_inode_snap_realm(struct ceph_inode_info *ci)
+{
+       struct ceph_snap_realm *realm = ci->i_snap_realm;
+       spin_lock(&realm->inodes_with_caps_lock);
+       list_del_init(&ci->i_snap_realm_item);
+       ci->i_snap_realm_counter++;
+       ci->i_snap_realm = NULL;
+       spin_unlock(&realm->inodes_with_caps_lock);
+       ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
+                           realm);
+}
+
 /*
  * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
  *
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
        if (removed)
                ceph_put_cap(mdsc, cap);
 
-       if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
-               struct ceph_snap_realm *realm = ci->i_snap_realm;
-               spin_lock(&realm->inodes_with_caps_lock);
-               list_del_init(&ci->i_snap_realm_item);
-               ci->i_snap_realm_counter++;
-               ci->i_snap_realm = NULL;
-               spin_unlock(&realm->inodes_with_caps_lock);
-               ceph_put_snap_realm(mdsc, realm);
-       }
+       /* when reconnect denied, we remove session caps forcibly,
+        * i_wr_ref can be non-zero. If there are ongoing write,
+        * keep i_snap_realm.
+        */
+       if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
+               drop_inode_snap_realm(ci);
+
        if (!__ceph_is_any_real_caps(ci))
                __cap_delay_cancel(mdsc, ci);
 }
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
        int was = ci->i_dirty_caps;
        int dirty = 0;
 
+       if (!ci->i_auth_cap) {
+               pr_warn("__mark_dirty_caps %p %llx mask %s, "
+                       "but no auth cap (session was closed?)\n",
+                       inode, ceph_ino(inode), ceph_cap_string(mask));
+               return 0;
+       }
+
        dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
             ceph_cap_string(mask), ceph_cap_string(was),
             ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
                                ci->i_snap_realm->cached_context);
                dout(" inode %p now dirty snapc %p auth cap %p\n",
                     &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
-               WARN_ON(!ci->i_auth_cap);
                BUG_ON(!list_empty(&ci->i_dirty_item));
                spin_lock(&mdsc->cap_dirty_lock);
                list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
        if (!mdsc->stopping && inode->i_nlink > 0) {
                if (want) {
                        retain |= CEPH_CAP_ANY;       /* be greedy */
+               } else if (S_ISDIR(inode->i_mode) &&
+                          (issued & CEPH_CAP_FILE_SHARED) &&
+                           __ceph_dir_is_complete(ci)) {
+                       /*
+                        * If a directory is complete, we want to keep
+                        * the exclusive cap. So that MDS does not end up
+                        * revoking the shared cap on every create/unlink
+                        * operation.
+                        */
+                       want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+                       retain |= want;
                } else {
+
                        retain |= CEPH_CAP_ANY_SHARED;
                        /*
                         * keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                                        wake = 1;
                                }
                        }
+                       /* see comment in __ceph_remove_cap() */
+                       if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+                               drop_inode_snap_realm(ci);
                }
        spin_unlock(&ci->i_ceph_lock);
 
@@ -3391,7 +3422,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
 int ceph_encode_dentry_release(void **p, struct dentry *dentry,
                               int mds, int drop, int unless)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct ceph_mds_request_release *rel = *p;
        struct ceph_dentry_info *di = ceph_dentry(dentry);
        int force = 0;
index 1b23551..31f8314 100644 (file)
@@ -84,7 +84,7 @@ static int mdsc_show(struct seq_file *s, void *p)
                                path = NULL;
                        spin_lock(&req->r_dentry->d_lock);
                        seq_printf(s, " #%llx/%pd (%s)",
-                                  ceph_ino(req->r_dentry->d_parent->d_inode),
+                                  ceph_ino(d_inode(req->r_dentry->d_parent)),
                                   req->r_dentry,
                                   path ? path : "");
                        spin_unlock(&req->r_dentry->d_lock);
index 83e9976..4248307 100644 (file)
@@ -49,9 +49,9 @@ int ceph_init_dentry(struct dentry *dentry)
                goto out_unlock;
        }
 
-       if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP)
                d_set_d_op(dentry, &ceph_dentry_ops);
-       else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+       else if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
                d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
        else
                d_set_d_op(dentry, &ceph_snap_dentry_ops);
@@ -77,7 +77,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
 
        spin_lock(&dentry->d_lock);
        if (!IS_ROOT(dentry)) {
-               inode = dentry->d_parent->d_inode;
+               inode = d_inode(dentry->d_parent);
                ihold(inode);
        }
        spin_unlock(&dentry->d_lock);
@@ -122,7 +122,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 {
        struct ceph_file_info *fi = file->private_data;
        struct dentry *parent = file->f_path.dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct list_head *p;
        struct dentry *dentry, *last;
        struct ceph_dentry_info *di;
@@ -161,15 +161,15 @@ more:
                }
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
                if (di->lease_shared_gen == shared_gen &&
-                   !d_unhashed(dentry) && dentry->d_inode &&
-                   ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
-                   ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
+                   !d_unhashed(dentry) && d_really_is_positive(dentry) &&
+                   ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR &&
+                   ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH &&
                    fpos_cmp(ctx->pos, di->offset) <= 0)
                        break;
                dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
                     dentry, di->offset,
                     ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
-                    !dentry->d_inode ? " null" : "");
+                    !d_inode(dentry) ? " null" : "");
                spin_unlock(&dentry->d_lock);
                p = p->prev;
                dentry = list_entry(p, struct dentry, d_child);
@@ -189,11 +189,11 @@ more:
        }
 
        dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
-            dentry, dentry, dentry->d_inode);
+            dentry, dentry, d_inode(dentry));
        if (!dir_emit(ctx, dentry->d_name.name,
                      dentry->d_name.len,
-                     ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
-                     dentry->d_inode->i_mode >> 12)) {
+                     ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino),
+                     d_inode(dentry)->i_mode >> 12)) {
                if (last) {
                        /* remember our position */
                        fi->dentry = last;
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
        /* can we use the dcache? */
        spin_lock(&ci->i_ceph_lock);
        if ((ctx->pos == 2 || fi->dentry) &&
+           ceph_test_mount_opt(fsc, DCACHE) &&
            !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
            ceph_snap(inode) != CEPH_SNAPDIR &&
            __ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
                        ceph_mdsc_put_request(req);
                        return err;
                }
-               req->r_inode = inode;
-               ihold(inode);
-               req->r_dentry = dget(file->f_path.dentry);
                /* hints to request -> mds selection code */
                req->r_direct_mode = USE_AUTH_MDS;
                req->r_direct_hash = ceph_frag_value(frag);
                req->r_direct_is_hash = true;
-               req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+               if (fi->last_name) {
+                       req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+                       if (!req->r_path2) {
+                               ceph_mdsc_put_request(req);
+                               return -ENOMEM;
+                       }
+               }
                req->r_readdir_offset = fi->next_offset;
                req->r_args.readdir.frag = cpu_to_le32(frag);
+
+               req->r_inode = inode;
+               ihold(inode);
+               req->r_dentry = dget(file->f_path.dentry);
                err = ceph_mdsc_do_request(mdsc, NULL, req);
                if (err < 0) {
                        ceph_mdsc_put_request(req);
@@ -535,7 +543,7 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
                        struct dentry *dentry, int err)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-       struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
+       struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
 
        /* .snap dir? */
        if (err == -ENOENT &&
@@ -571,8 +579,8 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
                err = 0;
                if (!req->r_reply_info.head->is_dentry) {
                        dout("ENOENT and no trace, dentry %p inode %p\n",
-                            dentry, dentry->d_inode);
-                       if (dentry->d_inode) {
+                            dentry, d_inode(dentry));
+                       if (d_really_is_positive(dentry)) {
                                d_drop(dentry);
                                err = -ENOENT;
                        } else {
@@ -619,7 +627,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                return ERR_PTR(err);
 
        /* can we conclude ENOENT locally? */
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                struct ceph_inode_info *ci = ceph_inode(dir);
                struct ceph_dentry_info *di = ceph_dentry(dentry);
 
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                            fsc->mount_options->snapdir_name,
                            dentry->d_name.len) &&
                    !is_root_ceph_dentry(dir, dentry) &&
+                   ceph_test_mount_opt(fsc, DCACHE) &&
                    __ceph_dir_is_complete(ci) &&
                    (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
                        spin_unlock(&ci->i_ceph_lock);
@@ -725,7 +734,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
        ceph_mdsc_put_request(req);
 out:
        if (!err)
-               ceph_init_inode_acls(dentry->d_inode, &acls);
+               ceph_init_inode_acls(d_inode(dentry), &acls);
        else
                d_drop(dentry);
        ceph_release_acls_info(&acls);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
                err = PTR_ERR(req);
                goto out;
        }
-       req->r_dentry = dget(dentry);
-       req->r_num_caps = 2;
        req->r_path2 = kstrdup(dest, GFP_NOFS);
+       if (!req->r_path2) {
+               err = -ENOMEM;
+               ceph_mdsc_put_request(req);
+               goto out;
+       }
        req->r_locked_dir = dir;
+       req->r_dentry = dget(dentry);
+       req->r_num_caps = 2;
        req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
        err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -821,7 +835,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        ceph_mdsc_put_request(req);
 out:
        if (!err)
-               ceph_init_inode_acls(dentry->d_inode, &acls);
+               ceph_init_inode_acls(d_inode(dentry), &acls);
        else
                d_drop(dentry);
        ceph_release_acls_info(&acls);
@@ -858,8 +872,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
        if (err) {
                d_drop(dentry);
        } else if (!req->r_reply_info.head->is_dentry) {
-               ihold(old_dentry->d_inode);
-               d_instantiate(dentry, old_dentry->d_inode);
+               ihold(d_inode(old_dentry));
+               d_instantiate(dentry, d_inode(old_dentry));
        }
        ceph_mdsc_put_request(req);
        return err;
@@ -892,7 +906,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_mds_request *req;
        int err = -EROFS;
        int op;
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_mds_request *req;
+       int op = CEPH_MDS_OP_RENAME;
        int err;
 
        if (ceph_snap(old_dir) != ceph_snap(new_dir))
                return -EXDEV;
-       if (ceph_snap(old_dir) != CEPH_NOSNAP ||
-           ceph_snap(new_dir) != CEPH_NOSNAP)
-               return -EROFS;
+       if (ceph_snap(old_dir) != CEPH_NOSNAP) {
+               if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
+                       op = CEPH_MDS_OP_RENAMESNAP;
+               else
+                       return -EROFS;
+       }
        dout("rename dir %p dentry %p to dir %p dentry %p\n",
             old_dir, old_dentry, new_dir, new_dentry);
-       req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+       req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
        if (IS_ERR(req))
                return PTR_ERR(req);
        ihold(old_dir);
@@ -957,8 +975,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
        req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
        /* release LINK_RDCACHE on source inode (mds will lock it) */
        req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
-       if (new_dentry->d_inode)
-               req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry))
+               req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
        err = ceph_mdsc_do_request(mdsc, old_dir, req);
        if (!err && !req->r_reply_info.head->is_dentry) {
                /*
@@ -1024,7 +1042,7 @@ static int dentry_lease_is_valid(struct dentry *dentry)
                        if (di->lease_renew_after &&
                            time_after(jiffies, di->lease_renew_after)) {
                                /* we should renew */
-                               dir = dentry->d_parent->d_inode;
+                               dir = d_inode(dentry->d_parent);
                                session = ceph_get_mds_session(s);
                                seq = di->lease_seq;
                                di->lease_renew_after = 0;
@@ -1074,22 +1092,22 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
-            dentry, dentry->d_inode, ceph_dentry(dentry)->offset);
+            dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
 
        dir = ceph_get_dentry_parent_inode(dentry);
 
        /* always trust cached snapped dentries, snapdir dentry */
        if (ceph_snap(dir) != CEPH_NOSNAP) {
                dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
-                    dentry, dentry->d_inode);
+                    dentry, d_inode(dentry));
                valid = 1;
-       } else if (dentry->d_inode &&
-                  ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+       } else if (d_really_is_positive(dentry) &&
+                  ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
                valid = 1;
        } else if (dentry_lease_is_valid(dentry) ||
                   dir_lease_is_valid(dir, dentry)) {
-               if (dentry->d_inode)
-                       valid = ceph_is_any_caps(dentry->d_inode);
+               if (d_really_is_positive(dentry))
+                       valid = ceph_is_any_caps(d_inode(dentry));
                else
                        valid = 1;
        }
@@ -1151,7 +1169,7 @@ static void ceph_d_prune(struct dentry *dentry)
         * we hold d_lock, so d_parent is stable, and d_fsdata is never
         * cleared until d_release
         */
-       ceph_dir_clear_complete(dentry->d_parent->d_inode);
+       ceph_dir_clear_complete(d_inode(dentry->d_parent));
 }
 
 /*
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
                dout("dir_fsync %p wait on tid %llu (until %llu)\n",
                     inode, req->r_tid, last_tid);
                if (req->r_timeout) {
-                       ret = wait_for_completion_timeout(
-                               &req->r_safe_completion, req->r_timeout);
-                       if (ret > 0)
+                       unsigned long time_left = wait_for_completion_timeout(
+                                                       &req->r_safe_completion,
+                                                       req->r_timeout);
+                       if (time_left > 0)
                                ret = 0;
-                       else if (ret == 0)
+                       else
                                ret = -EIO;  /* timed out */
                } else {
                        wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
        .getattr = ceph_getattr,
        .mkdir = ceph_mkdir,
        .rmdir = ceph_unlink,
+       .rename = ceph_rename,
 };
 
 const struct dentry_operations ceph_dentry_ops = {
index 8d7d782..fe02ae7 100644 (file)
@@ -136,8 +136,8 @@ static struct dentry *__get_parent(struct super_block *sb,
                return ERR_CAST(req);
 
        if (child) {
-               req->r_inode = child->d_inode;
-               ihold(child->d_inode);
+               req->r_inode = d_inode(child);
+               ihold(d_inode(child));
        } else {
                req->r_ino1 = (struct ceph_vino) {
                        .ino = ino,
@@ -164,7 +164,7 @@ static struct dentry *__get_parent(struct super_block *sb,
                return ERR_PTR(err);
        }
        dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
-            child ? ceph_ino(child->d_inode) : ino,
+            child ? ceph_ino(d_inode(child)) : ino,
             dentry, ceph_vinop(inode));
        return dentry;
 }
@@ -172,11 +172,11 @@ static struct dentry *__get_parent(struct super_block *sb,
 static struct dentry *ceph_get_parent(struct dentry *child)
 {
        /* don't re-export snaps */
-       if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
                return ERR_PTR(-EINVAL);
 
        dout("get_parent %p ino %llx.%llx\n",
-            child, ceph_vinop(child->d_inode));
+            child, ceph_vinop(d_inode(child)));
        return __get_parent(child->d_sb, child, 0);
 }
 
@@ -209,32 +209,32 @@ static int ceph_get_name(struct dentry *parent, char *name,
        struct ceph_mds_request *req;
        int err;
 
-       mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+       mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
                                       USE_ANY_MDS);
        if (IS_ERR(req))
                return PTR_ERR(req);
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
 
-       req->r_inode = child->d_inode;
-       ihold(child->d_inode);
-       req->r_ino2 = ceph_vino(parent->d_inode);
-       req->r_locked_dir = parent->d_inode;
+       req->r_inode = d_inode(child);
+       ihold(d_inode(child));
+       req->r_ino2 = ceph_vino(d_inode(parent));
+       req->r_locked_dir = d_inode(parent);
        req->r_num_caps = 2;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
 
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 
        if (!err) {
                struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
                memcpy(name, rinfo->dname, rinfo->dname_len);
                name[rinfo->dname_len] = 0;
                dout("get_name %p ino %llx.%llx name %s\n",
-                    child, ceph_vinop(child->d_inode), name);
+                    child, ceph_vinop(d_inode(child)), name);
        } else {
                dout("get_name %p ino %llx.%llx err %d\n",
-                    child, ceph_vinop(child->d_inode), err);
+                    child, ceph_vinop(d_inode(child)), err);
        }
 
        ceph_mdsc_put_request(req);
index b9b8eb2..3b6b522 100644 (file)
@@ -291,14 +291,14 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
        }
        if (err)
                goto out_req;
-       if (dn || dentry->d_inode == NULL || d_is_symlink(dentry)) {
+       if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) {
                /* make vfs retry on splice, ENOENT, or symlink */
                dout("atomic_open finish_no_open on dn %p\n", dn);
                err = finish_no_open(file, dn);
        } else {
                dout("atomic_open finish_open on dn %p\n", dn);
                if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
-                       ceph_init_inode_acls(dentry->d_inode, &acls);
+                       ceph_init_inode_acls(d_inode(dentry), &acls);
                        *opened |= FILE_CREATED;
                }
                err = finish_open(file, dentry, ceph_open, opened);
index 119c43c..e876e19 100644 (file)
@@ -940,7 +940,7 @@ static void update_dentry_lease(struct dentry *dentry,
             dentry, duration, ttl);
 
        /* make lease_rdcache_gen match directory */
-       dir = dentry->d_parent->d_inode;
+       dir = d_inode(dentry->d_parent);
        di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
 
        if (duration == 0)
@@ -980,7 +980,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 {
        struct dentry *realdn;
 
-       BUG_ON(dn->d_inode);
+       BUG_ON(d_inode(dn));
 
        /* dn must be unhashed */
        if (!d_unhashed(dn))
@@ -998,13 +998,13 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
                     "inode %p ino %llx.%llx\n",
                     dn, d_count(dn),
                     realdn, d_count(realdn),
-                    realdn->d_inode, ceph_vinop(realdn->d_inode));
+                    d_inode(realdn), ceph_vinop(d_inode(realdn)));
                dput(dn);
                dn = realdn;
        } else {
                BUG_ON(!ceph_dentry(dn));
                dout("dn %p attached to %p ino %llx.%llx\n",
-                    dn, dn->d_inode, ceph_vinop(dn->d_inode));
+                    dn, d_inode(dn), ceph_vinop(d_inode(dn)));
        }
        if ((!prehash || *prehash) && d_unhashed(dn))
                d_rehash(dn);
@@ -1125,11 +1125,11 @@ retry_lookup:
                                        dput(parent);
                                        goto done;
                                }
-                       } else if (dn->d_inode &&
-                                  (ceph_ino(dn->d_inode) != vino.ino ||
-                                   ceph_snap(dn->d_inode) != vino.snap)) {
+                       } else if (d_really_is_positive(dn) &&
+                                  (ceph_ino(d_inode(dn)) != vino.ino ||
+                                   ceph_snap(d_inode(dn)) != vino.snap)) {
                                dout(" dn %p points to wrong inode %p\n",
-                                    dn, dn->d_inode);
+                                    dn, d_inode(dn));
                                d_delete(dn);
                                dput(dn);
                                goto retry_lookup;
@@ -1183,7 +1183,7 @@ retry_lookup:
 
                BUG_ON(!dn);
                BUG_ON(!dir);
-               BUG_ON(dn->d_parent->d_inode != dir);
+               BUG_ON(d_inode(dn->d_parent) != dir);
                BUG_ON(ceph_ino(dir) !=
                       le64_to_cpu(rinfo->diri.in->ino));
                BUG_ON(ceph_snap(dir) !=
@@ -1235,7 +1235,7 @@ retry_lookup:
                /* null dentry? */
                if (!rinfo->head->is_target) {
                        dout("fill_trace null dentry\n");
-                       if (dn->d_inode) {
+                       if (d_really_is_positive(dn)) {
                                ceph_dir_clear_ordered(dir);
                                dout("d_delete %p\n", dn);
                                d_delete(dn);
@@ -1252,7 +1252,7 @@ retry_lookup:
                }
 
                /* attach proper inode */
-               if (!dn->d_inode) {
+               if (d_really_is_negative(dn)) {
                        ceph_dir_clear_ordered(dir);
                        ihold(in);
                        dn = splice_dentry(dn, in, &have_lease);
@@ -1261,9 +1261,9 @@ retry_lookup:
                                goto done;
                        }
                        req->r_dentry = dn;  /* may have spliced */
-               } else if (dn->d_inode && dn->d_inode != in) {
+               } else if (d_really_is_positive(dn) && d_inode(dn) != in) {
                        dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-                            dn, dn->d_inode, ceph_vinop(dn->d_inode),
+                            dn, d_inode(dn), ceph_vinop(d_inode(dn)),
                             ceph_vinop(in));
                        have_lease = false;
                }
@@ -1363,7 +1363,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                return readdir_prepopulate_inodes_only(req, session);
 
        if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
-               snapdir = ceph_get_snapdir(parent->d_inode);
+               snapdir = ceph_get_snapdir(d_inode(parent));
                parent = d_find_alias(snapdir);
                dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
                     rinfo->dir_nr, parent);
@@ -1371,7 +1371,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                dout("readdir_prepopulate %d items under dn %p\n",
                     rinfo->dir_nr, parent);
                if (rinfo->dir_dir)
-                       ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
+                       ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
        }
 
        /* FIXME: release caps/leases if error occurs */
@@ -1405,11 +1405,11 @@ retry_lookup:
                                err = ret;
                                goto out;
                        }
-               } else if (dn->d_inode &&
-                          (ceph_ino(dn->d_inode) != vino.ino ||
-                           ceph_snap(dn->d_inode) != vino.snap)) {
+               } else if (d_really_is_positive(dn) &&
+                          (ceph_ino(d_inode(dn)) != vino.ino ||
+                           ceph_snap(d_inode(dn)) != vino.snap)) {
                        dout(" dn %p points to wrong inode %p\n",
-                            dn, dn->d_inode);
+                            dn, d_inode(dn));
                        d_delete(dn);
                        dput(dn);
                        goto retry_lookup;
@@ -1423,8 +1423,8 @@ retry_lookup:
                }
 
                /* inode */
-               if (dn->d_inode) {
-                       in = dn->d_inode;
+               if (d_really_is_positive(dn)) {
+                       in = d_inode(dn);
                } else {
                        in = ceph_get_inode(parent->d_sb, vino);
                        if (IS_ERR(in)) {
@@ -1440,13 +1440,13 @@ retry_lookup:
                               req->r_request_started, -1,
                               &req->r_caps_reservation) < 0) {
                        pr_err("fill_inode badness on %p\n", in);
-                       if (!dn->d_inode)
+                       if (d_really_is_negative(dn))
                                iput(in);
                        d_drop(dn);
                        goto next_item;
                }
 
-               if (!dn->d_inode) {
+               if (d_really_is_negative(dn)) {
                        struct dentry *realdn = splice_dentry(dn, in, NULL);
                        if (IS_ERR(realdn)) {
                                err = PTR_ERR(realdn);
@@ -1693,7 +1693,7 @@ retry:
  */
 static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+       struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
        nd_set_link(nd, ci->i_symlink);
        return NULL;
 }
@@ -1714,7 +1714,7 @@ static const struct inode_operations ceph_symlink_iops = {
  */
 int ceph_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
        const unsigned int ia_valid = attr->ia_valid;
        struct ceph_mds_request *req;
@@ -1990,7 +1990,7 @@ int ceph_permission(struct inode *inode, int mask)
 int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
                 struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
        int err;
 
index 71c073f..84f37f3 100644 (file)
@@ -679,7 +679,7 @@ static struct dentry *get_nonsnap_parent(struct dentry *dentry)
         * except to resplice to another snapdir, and either the old or new
         * result is a valid result.
         */
-       while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                dentry = dentry->d_parent;
        return dentry;
 }
@@ -716,20 +716,20 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
        } else if (req->r_dentry) {
                /* ignore race with rename; old or new d_parent is okay */
                struct dentry *parent = req->r_dentry->d_parent;
-               struct inode *dir = parent->d_inode;
+               struct inode *dir = d_inode(parent);
 
                if (dir->i_sb != mdsc->fsc->sb) {
                        /* not this fs! */
-                       inode = req->r_dentry->d_inode;
+                       inode = d_inode(req->r_dentry);
                } else if (ceph_snap(dir) != CEPH_NOSNAP) {
                        /* direct snapped/virtual snapdir requests
                         * based on parent dir inode */
                        struct dentry *dn = get_nonsnap_parent(parent);
-                       inode = dn->d_inode;
+                       inode = d_inode(dn);
                        dout("__choose_mds using nonsnap parent %p\n", inode);
                } else {
                        /* dentry target */
-                       inode = req->r_dentry->d_inode;
+                       inode = d_inode(req->r_dentry);
                        if (!inode || mode == USE_AUTH_MDS) {
                                /* dir + name */
                                inode = dir;
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
        spin_unlock(&session->s_cap_lock);
 }
 
+static void cleanup_session_requests(struct ceph_mds_client *mdsc,
+                                    struct ceph_mds_session *session)
+{
+       struct ceph_mds_request *req;
+       struct rb_node *p;
+
+       dout("cleanup_session_requests mds%d\n", session->s_mds);
+       mutex_lock(&mdsc->mutex);
+       while (!list_empty(&session->s_unsafe)) {
+               req = list_first_entry(&session->s_unsafe,
+                                      struct ceph_mds_request, r_unsafe_item);
+               list_del_init(&req->r_unsafe_item);
+               pr_info(" dropping unsafe request %llu\n", req->r_tid);
+               __unregister_request(mdsc, req);
+       }
+       /* zero r_attempts, so kick_requests() will re-send requests */
+       p = rb_first(&mdsc->request_tree);
+       while (p) {
+               req = rb_entry(p, struct ceph_mds_request, r_node);
+               p = rb_next(p);
+               if (req->r_session &&
+                   req->r_session->s_mds == session->s_mds)
+                       req->r_attempts = 0;
+       }
+       mutex_unlock(&mdsc->mutex);
+}
+
 /*
  * Helper to safely iterate over all caps associated with a session, with
  * special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
             cap, ci, &ci->vfs_inode);
        spin_lock(&ci->i_ceph_lock);
        __ceph_remove_cap(cap, false);
-       if (!__ceph_is_any_real_caps(ci)) {
+       if (!ci->i_auth_cap) {
                struct ceph_mds_client *mdsc =
                        ceph_sb_to_client(inode->i_sb)->mdsc;
 
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                        mdsc->num_cap_flushing--;
                        drop = 1;
                }
-               if (drop && ci->i_wrbuffer_ref) {
-                       pr_info(" dropping dirty data for %p %lld\n",
-                               inode, ceph_ino(inode));
-                       ci->i_wrbuffer_ref = 0;
-                       ci->i_wrbuffer_ref_head = 0;
-                       drop++;
-               }
                spin_unlock(&mdsc->cap_dirty_lock);
        }
        spin_unlock(&ci->i_ceph_lock);
@@ -1712,7 +1732,7 @@ retry:
        seq = read_seqbegin(&rename_lock);
        rcu_read_lock();
        for (temp = dentry; !IS_ROOT(temp);) {
-               struct inode *inode = temp->d_inode;
+               struct inode *inode = d_inode(temp);
                if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
                        len++;  /* slash only */
                else if (stop_on_nosnap && inode &&
@@ -1736,7 +1756,7 @@ retry:
                struct inode *inode;
 
                spin_lock(&temp->d_lock);
-               inode = temp->d_inode;
+               inode = d_inode(temp);
                if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
                        dout("build_path path+%d: %p SNAPDIR\n",
                             pos, temp);
@@ -1770,7 +1790,7 @@ retry:
                goto retry;
        }
 
-       *base = ceph_ino(temp->d_inode);
+       *base = ceph_ino(d_inode(temp));
        *plen = len;
        dout("build_path on %p %d built %llx '%.*s'\n",
             dentry, d_count(dentry), *base, len, path);
@@ -1783,8 +1803,8 @@ static int build_dentry_path(struct dentry *dentry,
 {
        char *path;
 
-       if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
-               *pino = ceph_ino(dentry->d_parent->d_inode);
+       if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
+               *pino = ceph_ino(d_inode(dentry->d_parent));
                *ppath = dentry->d_name.name;
                *ppathlen = dentry->d_name.len;
                return 0;
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
  */
 static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
                                               struct ceph_mds_request *req,
-                                              int mds)
+                                              int mds, bool drop_cap_releases)
 {
        struct ceph_msg *msg;
        struct ceph_mds_request_head *head;
@@ -1925,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        releases = 0;
        if (req->r_inode_drop)
                releases += ceph_encode_inode_release(&p,
-                     req->r_inode ? req->r_inode : req->r_dentry->d_inode,
+                     req->r_inode ? req->r_inode : d_inode(req->r_dentry),
                      mds, req->r_inode_drop, req->r_inode_unless, 0);
        if (req->r_dentry_drop)
                releases += ceph_encode_dentry_release(&p, req->r_dentry,
@@ -1935,8 +1955,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
                       mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
        if (req->r_old_inode_drop)
                releases += ceph_encode_inode_release(&p,
-                     req->r_old_dentry->d_inode,
+                     d_inode(req->r_old_dentry),
                      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+
+       if (drop_cap_releases) {
+               releases = 0;
+               p = msg->front.iov_base + req->r_request_release_offset;
+       }
+
        head->num_releases = cpu_to_le16(releases);
 
        /* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
  */
 static int __prepare_send_request(struct ceph_mds_client *mdsc,
                                  struct ceph_mds_request *req,
-                                 int mds)
+                                 int mds, bool drop_cap_releases)
 {
        struct ceph_mds_request_head *rhead;
        struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
                ceph_msg_put(req->r_request);
                req->r_request = NULL;
        }
-       msg = create_request_message(mdsc, req, mds);
+       msg = create_request_message(mdsc, req, mds, drop_cap_releases);
        if (IS_ERR(msg)) {
                req->r_err = PTR_ERR(msg);
                complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
        if (req->r_request_started == 0)   /* note request start time */
                req->r_request_started = jiffies;
 
-       err = __prepare_send_request(mdsc, req, mds);
+       err = __prepare_send_request(mdsc, req, mds, false);
        if (!err) {
                ceph_msg_get(req->r_request);
                ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
        case CEPH_SESSION_CLOSE:
                if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
                        pr_info("mds%d reconnect denied\n", session->s_mds);
+               cleanup_session_requests(mdsc, session);
                remove_session_caps(session);
                wake = 2; /* for good measure */
                wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 
        mutex_lock(&mdsc->mutex);
        list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
-               err = __prepare_send_request(mdsc, req, session->s_mds);
+               err = __prepare_send_request(mdsc, req, session->s_mds, true);
                if (!err) {
                        ceph_msg_get(req->r_request);
                        ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
                        continue; /* only old requests */
                if (req->r_session &&
                    req->r_session->s_mds == session->s_mds) {
-                       err = __prepare_send_request(mdsc, req, session->s_mds);
+                       err = __prepare_send_request(mdsc, req,
+                                                    session->s_mds, true);
                        if (!err) {
                                ceph_msg_get(req->r_request);
                                ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
        spin_unlock(&session->s_cap_lock);
 
        /* trim unused caps to reduce MDS's cache rejoin time */
-       shrink_dcache_parent(mdsc->fsc->sb->s_root);
+       if (mdsc->fsc->sb->s_root)
+               shrink_dcache_parent(mdsc->fsc->sb->s_root);
 
        ceph_con_close(&session->s_con);
        ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
                    di->lease_renew_from &&
                    di->lease_renew_after == 0) {
                        unsigned long duration =
-                               le32_to_cpu(h->duration_ms) * HZ / 1000;
+                               msecs_to_jiffies(le32_to_cpu(h->duration_ms));
 
                        di->lease_seq = seq;
                        dentry->d_time = di->lease_renew_from + duration;
index 51cc23e..89e6bc3 100644 (file)
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
        case CEPH_MDS_OP_LSSNAP: return "lssnap";
        case CEPH_MDS_OP_MKSNAP: return "mksnap";
        case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+       case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
        case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
        case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
        }
index a63997b..4e99053 100644 (file)
@@ -44,7 +44,7 @@ static void ceph_put_super(struct super_block *s)
 
 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-       struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
        struct ceph_monmap *monmap = fsc->client->monc.monmap;
        struct ceph_statfs st;
        u64 fsid;
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
        fsopt->rsize = CEPH_RSIZE_DEFAULT;
        fsopt->rasize = CEPH_RASIZE_DEFAULT;
        fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+       if (!fsopt->snapdir_name) {
+               err = -ENOMEM;
+               goto out;
+       }
+
        fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
        fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
        fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
        struct ceph_mount_options *fsopt = fsc->mount_options;
-       struct ceph_options *opt = fsc->client->options;
-
-       if (opt->flags & CEPH_OPT_FSID)
-               seq_printf(m, ",fsid=%pU", &opt->fsid);
-       if (opt->flags & CEPH_OPT_NOSHARE)
-               seq_puts(m, ",noshare");
-       if (opt->flags & CEPH_OPT_NOCRC)
-               seq_puts(m, ",nocrc");
-       if (opt->flags & CEPH_OPT_NOMSGAUTH)
-               seq_puts(m, ",nocephx_require_signatures");
-       if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
-               seq_puts(m, ",notcp_nodelay");
-
-       if (opt->name)
-               seq_printf(m, ",name=%s", opt->name);
-       if (opt->key)
-               seq_puts(m, ",secret=<hidden>");
-
-       if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-               seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
-       if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-               seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-       if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-               seq_printf(m, ",osdkeepalivetimeout=%d",
-                          opt->osd_keepalive_timeout);
+       size_t pos;
+       int ret;
+
+       /* a comma between MNT/MS and client options */
+       seq_putc(m, ',');
+       pos = m->count;
+
+       ret = ceph_print_client_options(m, fsc->client);
+       if (ret)
+               return ret;
+
+       /* retract our comma if no client options */
+       if (m->count == pos)
+               m->count--;
 
        if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
                seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
                seq_puts(m, ",norbytes");
        if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
                seq_puts(m, ",noasyncreaddir");
-       if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
-               seq_puts(m, ",dcache");
-       else
+       if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
                seq_puts(m, ",nodcache");
        if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
                seq_puts(m, ",fsc");
-       else
-               seq_puts(m, ",nofsc");
 
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
        if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
                seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
        if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
                seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+
        return 0;
 }
 
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
        if (IS_ERR(req))
                return ERR_CAST(req);
        req->r_path1 = kstrdup(path, GFP_NOFS);
+       if (!req->r_path1) {
+               root = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
        req->r_ino1.ino = CEPH_INO_ROOT;
        req->r_ino1.snap = CEPH_NOSNAP;
        req->r_started = started;
@@ -976,7 +972,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
        if (IS_ERR(res))
                goto out_splat;
        dout("root %p inode %p ino %llx.%llx\n", res,
-            res->d_inode, ceph_vinop(res->d_inode));
+            d_inode(res), ceph_vinop(d_inode(res)));
        return res;
 
 out_splat:
index 04c8124..fa20e13 100644 (file)
@@ -36,7 +36,8 @@
 #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
 #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
 
-#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES | \
+                                  CEPH_MOUNT_OPT_DCACHE)
 
 #define ceph_set_mount_opt(fsc, opt) \
        (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 
 /* file.c */
 extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
 
 extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
index 5a492ca..cd7ffad 100644 (file)
@@ -776,12 +776,12 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                return generic_getxattr(dentry, name, value, size);
 
-       return __ceph_getxattr(dentry->d_inode, name, value, size);
+       return __ceph_getxattr(d_inode(dentry), name, value, size);
 }
 
 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
        u32 vir_namelen = 0;
@@ -847,7 +847,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
                              const char *value, size_t size, int flags)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_mds_request *req;
        struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
                err = PTR_ERR(req);
                goto out;
        }
-       req->r_inode = inode;
-       ihold(inode);
-       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
-       req->r_num_caps = 1;
+
        req->r_args.setxattr.flags = cpu_to_le32(flags);
        req->r_path2 = kstrdup(name, GFP_NOFS);
+       if (!req->r_path2) {
+               ceph_mdsc_put_request(req);
+               err = -ENOMEM;
+               goto out;
+       }
 
        req->r_pagelist = pagelist;
        pagelist = NULL;
 
+       req->r_inode = inode;
+       ihold(inode);
+       req->r_num_caps = 1;
+       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+
        dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
        err = ceph_mdsc_do_request(mdsc, NULL, req);
        ceph_mdsc_put_request(req);
@@ -901,7 +908,7 @@ out:
 int __ceph_setxattr(struct dentry *dentry, const char *name,
                        const void *value, size_t size, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_vxattr *vxattr;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int issued;
@@ -995,7 +1002,7 @@ out:
 int ceph_setxattr(struct dentry *dentry, const char *name,
                  const void *value, size_t size, int flags)
 {
-       if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                return -EROFS;
 
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
@@ -1011,7 +1018,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
        struct ceph_mds_client *mdsc = fsc->mdsc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_mds_request *req;
        int err;
 
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
                                       USE_AUTH_MDS);
        if (IS_ERR(req))
                return PTR_ERR(req);
+       req->r_path2 = kstrdup(name, GFP_NOFS);
+       if (!req->r_path2)
+               return -ENOMEM;
+
        req->r_inode = inode;
        ihold(inode);
-       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
        req->r_num_caps = 1;
-       req->r_path2 = kstrdup(name, GFP_NOFS);
-
+       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
        err = ceph_mdsc_do_request(mdsc, NULL, req);
        ceph_mdsc_put_request(req);
        return err;
@@ -1032,7 +1041,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
 
 int __ceph_removexattr(struct dentry *dentry, const char *name)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ceph_vxattr *vxattr;
        struct ceph_inode_info *ci = ceph_inode(inode);
        int issued;
@@ -1098,7 +1107,7 @@ out:
 
 int ceph_removexattr(struct dentry *dentry, const char *name)
 {
-       if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                return -EROFS;
 
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
index b8602f1..430e034 100644 (file)
@@ -301,7 +301,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        if (full_path == NULL)
                goto cdda_exit;
 
-       cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
+       cifs_sb = CIFS_SB(d_inode(mntpt)->i_sb);
        tlink = cifs_sb_tlink(cifs_sb);
        if (IS_ERR(tlink)) {
                mnt = ERR_CAST(tlink);
index eaab4b2..f5089bd 100644 (file)
@@ -607,7 +607,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
        p = s = full_path;
 
        do {
-               struct inode *dir = dentry->d_inode;
+               struct inode *dir = d_inode(dentry);
                struct dentry *child;
 
                if (!dir) {
index fa13d5e..84650a5 100644 (file)
@@ -1898,7 +1898,7 @@ static void
 cifs_writev_requeue(struct cifs_writedata *wdata)
 {
        int i, rc = 0;
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
        struct TCP_Server_Info *server;
        unsigned int rest_len;
 
@@ -1981,7 +1981,7 @@ cifs_writev_complete(struct work_struct *work)
 {
        struct cifs_writedata *wdata = container_of(work,
                                                struct cifs_writedata, work);
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
        int i = 0;
 
        if (wdata->result == 0) {
index b72bc29..338d569 100644 (file)
@@ -745,13 +745,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                goto lookup_out;
        }
 
-       if (direntry->d_inode != NULL) {
+       if (d_really_is_positive(direntry)) {
                cifs_dbg(FYI, "non-NULL inode in lookup\n");
        } else {
                cifs_dbg(FYI, "NULL inode in lookup\n");
        }
        cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
-                full_path, direntry->d_inode);
+                full_path, d_inode(direntry));
 
        if (pTcon->unix_ext) {
                rc = cifs_get_inode_info_unix(&newInode, full_path,
@@ -792,7 +792,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       if (direntry->d_inode) {
+       if (d_really_is_positive(direntry)) {
                if (cifs_revalidate_dentry(direntry))
                        return 0;
                else {
@@ -803,7 +803,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
                         * attributes will have been updated by
                         * cifs_revalidate_dentry().
                         */
-                       if (IS_AUTOMOUNT(direntry->d_inode) &&
+                       if (IS_AUTOMOUNT(d_inode(direntry)) &&
                           !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
                                spin_lock(&direntry->d_lock);
                                direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
index ca2bc54..cafbf10 100644 (file)
@@ -273,7 +273,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                  struct tcon_link *tlink, __u32 oplock)
 {
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct cifsFileInfo *cfile;
        struct cifs_fid_locks *fdlocks;
@@ -357,7 +357,7 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
-       struct inode *inode = cifs_file->dentry->d_inode;
+       struct inode *inode = d_inode(cifs_file->dentry);
        struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
        struct cifsInodeInfo *cifsi = CIFS_I(inode);
@@ -386,7 +386,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 
        if (list_empty(&cifsi->openFileList)) {
                cifs_dbg(FYI, "closing last open instance for inode %p\n",
-                        cifs_file->dentry->d_inode);
+                        d_inode(cifs_file->dentry));
                /*
                 * In strict cache mode we need invalidate mapping on the last
                 * close  because it may cause a error when we open this file
@@ -572,7 +572,7 @@ static int
 cifs_relock_file(struct cifsFileInfo *cfile)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        int rc = 0;
 
@@ -620,7 +620,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
                return rc;
        }
 
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
        cifs_sb = CIFS_SB(inode->i_sb);
        tcon = tlink_tcon(cfile->tlink);
        server = tcon->ses->server;
@@ -874,7 +874,7 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 {
        bool rc = false;
        struct cifs_fid_locks *cur;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
 
        list_for_each_entry(cur, &cinode->llist, llist) {
                rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
@@ -899,7 +899,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 {
        int rc = 0;
        struct cifsLockInfo *conf_lock;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
        bool exist;
 
@@ -927,7 +927,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
 static void
 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
 {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        down_write(&cinode->lock_sem);
        list_add_tail(&lock->llist, &cfile->llist->locks);
        up_write(&cinode->lock_sem);
@@ -944,7 +944,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
                 bool wait)
 {
        struct cifsLockInfo *conf_lock;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        bool exist;
        int rc = 0;
 
@@ -1125,7 +1125,7 @@ struct lock_to_push {
 static int
 cifs_push_posix_locks(struct cifsFileInfo *cfile)
 {
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct file_lock *flock;
        struct file_lock_context *flctx = inode->i_flctx;
@@ -1214,7 +1214,7 @@ static int
 cifs_push_locks(struct cifsFileInfo *cfile)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        int rc = 0;
 
@@ -1382,7 +1382,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
        unsigned int max_num, num, max_buf;
        LOCKING_ANDX_RANGE *buf, *cur;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct cifsLockInfo *li, *tmp;
        __u64 length = 1 + flock->fl_end - flock->fl_start;
        struct list_head tmp_llist;
@@ -1488,7 +1488,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
        struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
 
        if (posix_lck) {
                int posix_lock_type;
@@ -1643,7 +1643,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
        struct TCP_Server_Info *server;
        unsigned int xid;
        struct dentry *dentry = open_file->dentry;
-       struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
+       struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
        struct cifs_io_parms io_parms;
 
        cifs_sb = CIFS_SB(dentry->d_sb);
@@ -1676,7 +1676,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
                                        break;
                        }
 
-                       len = min(server->ops->wp_retry_size(dentry->d_inode),
+                       len = min(server->ops->wp_retry_size(d_inode(dentry)),
                                  (unsigned int)write_size - total_written);
                        /* iov[0] is reserved for smb header */
                        iov[1].iov_base = (char *)write_data + total_written;
@@ -1696,9 +1696,9 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
                                return rc;
                        }
                } else {
-                       spin_lock(&dentry->d_inode->i_lock);
+                       spin_lock(&d_inode(dentry)->i_lock);
                        cifs_update_eof(cifsi, *offset, bytes_written);
-                       spin_unlock(&dentry->d_inode->i_lock);
+                       spin_unlock(&d_inode(dentry)->i_lock);
                        *offset += bytes_written;
                }
        }
@@ -1706,12 +1706,12 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
        cifs_stats_bytes_written(tcon, total_written);
 
        if (total_written > 0) {
-               spin_lock(&dentry->d_inode->i_lock);
-               if (*offset > dentry->d_inode->i_size)
-                       i_size_write(dentry->d_inode, *offset);
-               spin_unlock(&dentry->d_inode->i_lock);
+               spin_lock(&d_inode(dentry)->i_lock);
+               if (*offset > d_inode(dentry)->i_size)
+                       i_size_write(d_inode(dentry), *offset);
+               spin_unlock(&d_inode(dentry)->i_lock);
        }
-       mark_inode_dirty_sync(dentry->d_inode);
+       mark_inode_dirty_sync(d_inode(dentry));
        free_xid(xid);
        return total_written;
 }
@@ -2406,7 +2406,7 @@ cifs_uncached_writev_complete(struct work_struct *work)
 {
        struct cifs_writedata *wdata = container_of(work,
                                        struct cifs_writedata, work);
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
        struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
        spin_lock(&inode->i_lock);
@@ -3794,7 +3794,7 @@ void cifs_oplock_break(struct work_struct *work)
 {
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
                                                  oplock_break);
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        struct TCP_Server_Info *server = tcon->ses->server;
index 3e126d7..55b5811 100644 (file)
@@ -1067,7 +1067,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
        int rc;
        struct cifs_fid fid;
        struct cifs_open_parms oparms;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct tcon_link *tlink;
@@ -1196,7 +1196,7 @@ cifs_drop_nlink(struct inode *inode)
 }
 
 /*
- * If dentry->d_inode is null (usually meaning the cached dentry
+ * If d_inode(dentry) is null (usually meaning the cached dentry
  * is a negative dentry) then we would attempt a standard SMB delete, but
  * if that fails we can not attempt the fall back mechanisms on EACCESS
  * but will return the EACCESS to the caller. Note that the VFS does not call
@@ -1207,7 +1207,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
        int rc = 0;
        unsigned int xid;
        char *full_path = NULL;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct cifsInodeInfo *cifs_inode;
        struct super_block *sb = dir->i_sb;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1551,13 +1551,13 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
        cifs_put_tlink(tlink);
 
        if (!rc) {
-               spin_lock(&direntry->d_inode->i_lock);
-               i_size_write(direntry->d_inode, 0);
-               clear_nlink(direntry->d_inode);
-               spin_unlock(&direntry->d_inode->i_lock);
+               spin_lock(&d_inode(direntry)->i_lock);
+               i_size_write(d_inode(direntry), 0);
+               clear_nlink(d_inode(direntry));
+               spin_unlock(&d_inode(direntry)->i_lock);
        }
 
-       cifsInode = CIFS_I(direntry->d_inode);
+       cifsInode = CIFS_I(d_inode(direntry));
        /* force revalidate to go get info when needed */
        cifsInode->time = 0;
 
@@ -1568,7 +1568,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
         */
        cifsInode->time = 0;
 
-       direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
+       d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
                current_fs_time(inode->i_sb);
 
 rmdir_exit:
@@ -1727,7 +1727,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
 
 unlink_target:
        /* Try unlinking the target dentry if it's not negative */
-       if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
+       if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
                if (d_is_dir(target_dentry))
                        tmprc = cifs_rmdir(target_dir, target_dentry);
                else
@@ -1867,7 +1867,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
 {
        unsigned int xid;
        int rc = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = dentry->d_sb;
        char *full_path = NULL;
 
@@ -1919,7 +1919,7 @@ int cifs_revalidate_file(struct file *filp)
 int cifs_revalidate_dentry(struct dentry *dentry)
 {
        int rc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        rc = cifs_revalidate_dentry_attr(dentry);
        if (rc)
@@ -1933,7 +1933,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
        struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int rc;
 
        /*
@@ -2110,7 +2110,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
        int rc;
        unsigned int xid;
        char *full_path = NULL;
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct tcon_link *tlink;
@@ -2251,7 +2251,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
        unsigned int xid;
        kuid_t uid = INVALID_UID;
        kgid_t gid = INVALID_GID;
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsInodeInfo *cifsInode = CIFS_I(inode);
        char *full_path = NULL;
@@ -2409,7 +2409,7 @@ cifs_setattr_exit:
 int
 cifs_setattr(struct dentry *direntry, struct iattr *attrs)
 {
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
 
index 2ec6037..252e672 100644 (file)
@@ -586,12 +586,12 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
         * if source file is cached (oplocked) revalidate will not go to server
         * until the file is closed or oplock broken so update nlinks locally
         */
-       if (old_file->d_inode) {
-               cifsInode = CIFS_I(old_file->d_inode);
+       if (d_really_is_positive(old_file)) {
+               cifsInode = CIFS_I(d_inode(old_file));
                if (rc == 0) {
-                       spin_lock(&old_file->d_inode->i_lock);
-                       inc_nlink(old_file->d_inode);
-                       spin_unlock(&old_file->d_inode->i_lock);
+                       spin_lock(&d_inode(old_file)->i_lock);
+                       inc_nlink(d_inode(old_file));
+                       spin_unlock(&d_inode(old_file)->i_lock);
 
                        /*
                         * parent dir timestamps will update from srv within a
@@ -629,7 +629,7 @@ cifs_hl_exit:
 void *
 cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
 {
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
        int rc = -ENOMEM;
        unsigned int xid;
        char *full_path = NULL;
index 3379463..8442b8b 100644 (file)
@@ -473,7 +473,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
                                        continue;
 
                                cifs_dbg(FYI, "file id match, oplock break\n");
-                               pCifsInode = CIFS_I(netfile->dentry->d_inode);
+                               pCifsInode = CIFS_I(d_inode(netfile->dentry));
 
                                set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
                                        &pCifsInode->flags);
index c295338..b4a4723 100644 (file)
@@ -78,7 +78,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 {
        struct dentry *dentry, *alias;
        struct inode *inode;
-       struct super_block *sb = parent->d_inode->i_sb;
+       struct super_block *sb = d_inode(parent)->i_sb;
        struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
 
        cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
@@ -88,7 +88,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
                return;
 
        if (dentry) {
-               inode = dentry->d_inode;
+               inode = d_inode(dentry);
                if (inode) {
                        /*
                         * If we're generating inode numbers, then we don't
index d297903..7bfdd60 100644 (file)
@@ -722,7 +722,7 @@ cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
 static void
 cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        cfile->fid.netfid = fid->netfid;
        cifs_set_oplock_level(cinode, oplock);
        cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
index 7198eac..2ab297d 100644 (file)
@@ -95,7 +95,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
        unsigned int max_num, num = 0, max_buf;
        struct smb2_lock_element *buf, *cur;
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct cifsLockInfo *li, *tmp;
        __u64 length = 1 + flock->fl_end - flock->fl_start;
        struct list_head tmp_llist;
@@ -231,7 +231,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
        unsigned int xid;
        unsigned int max_num, max_buf;
        struct smb2_lock_element *buf;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct cifs_fid_locks *fdlocks;
 
        xid = get_xid();
index 22dfdf1..1c59070 100644 (file)
@@ -453,7 +453,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
 
        list_for_each(tmp, &tcon->openFileList) {
                cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-               cinode = CIFS_I(cfile->dentry->d_inode);
+               cinode = CIFS_I(d_inode(cfile->dentry));
 
                if (memcmp(cinode->lease_key, rsp->LeaseKey,
                                                        SMB2_LEASE_KEY_SIZE))
@@ -590,7 +590,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                                        continue;
 
                                cifs_dbg(FYI, "file id match, oplock break\n");
-                               cinode = CIFS_I(cfile->dentry->d_inode);
+                               cinode = CIFS_I(d_inode(cfile->dentry));
 
                                if (!CIFS_CACHE_WRITE(cinode) &&
                                    rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
index eab05e1..54daee5 100644 (file)
@@ -524,7 +524,7 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
 static void
 smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
        struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
 
        cfile->fid.persistent_fid = fid->persistent_fid;
@@ -793,7 +793,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
         * If extending file more than one page make sparse. Many Linux fs
         * make files sparse by default when extending via ftruncate
         */
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
 
        if (!set_alloc && (size > inode->i_size + 8192)) {
                __u8 set_sparse = 1;
@@ -1032,7 +1032,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
 
        xid = get_xid();
 
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
        cifsi = CIFS_I(inode);
 
        /* if file not oplocked can't be sure whether asking to extend size */
@@ -1083,7 +1083,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
 
        xid = get_xid();
 
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
        cifsi = CIFS_I(inode);
 
        /* Need to make file sparse, if not already, before freeing range. */
@@ -1115,7 +1115,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
 
        xid = get_xid();
 
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
        cifsi = CIFS_I(inode);
 
        /* if file not oplocked can't be sure whether asking to extend size */
index 72a4d10..ff9e1f8 100644 (file)
@@ -50,9 +50,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
 
        if (direntry == NULL)
                return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
        if (sb == NULL)
                return -EIO;
 
@@ -111,9 +111,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
 
        if (direntry == NULL)
                return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
        if (sb == NULL)
                return -EIO;
 
@@ -177,12 +177,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
                        memcpy(pacl, ea_value, value_size);
                        if (pTcon->ses->server->ops->set_acl)
                                rc = pTcon->ses->server->ops->set_acl(pacl,
-                                               value_size, direntry->d_inode,
+                                               value_size, d_inode(direntry),
                                                full_path, CIFS_ACL_DACL);
                        else
                                rc = -EOPNOTSUPP;
                        if (rc == 0) /* force revalidate of the inode */
-                               CIFS_I(direntry->d_inode)->time = 0;
+                               CIFS_I(d_inode(direntry))->time = 0;
                        kfree(pacl);
                }
 #else
@@ -246,9 +246,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 
        if (direntry == NULL)
                return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
        if (sb == NULL)
                return -EIO;
 
@@ -324,7 +324,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
                                goto get_ea_exit; /* rc already EOPNOTSUPP */
 
                        pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
-                                       direntry->d_inode, full_path, &acllen);
+                                       d_inode(direntry), full_path, &acllen);
                        if (IS_ERR(pacl)) {
                                rc = PTR_ERR(pacl);
                                cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -382,9 +382,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
 
        if (direntry == NULL)
                return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
        if (sb == NULL)
                return -EIO;
 
index 46ee6f2..5bb630a 100644 (file)
@@ -94,8 +94,8 @@ static void coda_flag_children(struct dentry *parent, int flag)
        spin_lock(&parent->d_lock);
        list_for_each_entry(de, &parent->d_subdirs, d_child) {
                /* don't know what to do with negative dentries */
-               if (de->d_inode ) 
-                       coda_flag_inode(de->d_inode, flag);
+               if (d_inode(de) ) 
+                       coda_flag_inode(d_inode(de), flag);
        }
        spin_unlock(&parent->d_lock);
        return; 
index 60cb88c..fda9f43 100644 (file)
@@ -201,7 +201,7 @@ err_out:
 static int coda_link(struct dentry *source_de, struct inode *dir_inode, 
          struct dentry *de)
 {
-       struct inode *inode = source_de->d_inode;
+       struct inode *inode = d_inode(source_de);
         const char * name = de->d_name.name;
        int len = de->d_name.len;
        int error;
@@ -266,7 +266,7 @@ static int coda_unlink(struct inode *dir, struct dentry *de)
                return error;
 
        coda_dir_update_mtime(dir);
-       drop_nlink(de->d_inode);
+       drop_nlink(d_inode(de));
        return 0;
 }
 
@@ -279,8 +279,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
        error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
        if (!error) {
                /* VFS may delete the child */
-               if (de->d_inode)
-                       clear_nlink(de->d_inode);
+               if (d_really_is_positive(de))
+                       clear_nlink(d_inode(de));
 
                /* fix the link count of the parent */
                coda_dir_drop_nlink(dir);
@@ -303,14 +303,14 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
                             coda_i2f(new_dir), old_length, new_length,
                             (const char *) old_name, (const char *)new_name);
        if (!error) {
-               if (new_dentry->d_inode) {
+               if (d_really_is_positive(new_dentry)) {
                        if (d_is_dir(new_dentry)) {
                                coda_dir_drop_nlink(old_dir);
                                coda_dir_inc_nlink(new_dir);
                        }
                        coda_dir_update_mtime(old_dir);
                        coda_dir_update_mtime(new_dir);
-                       coda_flag_inode(new_dentry->d_inode, C_VATTR);
+                       coda_flag_inode(d_inode(new_dentry), C_VATTR);
                } else {
                        coda_flag_inode(old_dir, C_VATTR);
                        coda_flag_inode(new_dir, C_VATTR);
@@ -449,13 +449,13 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = de->d_inode;
+       inode = d_inode(de);
        if (!inode || is_root_inode(inode))
                goto out;
        if (is_bad_inode(inode))
                goto bad;
 
-       cii = ITOC(de->d_inode);
+       cii = ITOC(d_inode(de));
        if (!(cii->c_flags & (C_PURGE | C_FLUSH)))
                goto out;
 
@@ -487,11 +487,11 @@ static int coda_dentry_delete(const struct dentry * dentry)
 {
        int flags;
 
-       if (!dentry->d_inode
+       if (d_really_is_negative(dentry)
                return 0;
 
-       flags = (ITOC(dentry->d_inode)->c_flags) & C_PURGE;
-       if (is_bad_inode(dentry->d_inode) || flags) {
+       flags = (ITOC(d_inode(dentry))->c_flags) & C_PURGE;
+       if (is_bad_inode(d_inode(dentry)) || flags) {
                return 1;
        }
        return 0;
index 82ec68b..cac1390 100644 (file)
@@ -257,15 +257,15 @@ static void coda_evict_inode(struct inode *inode)
 
 int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       int err = coda_revalidate_inode(dentry->d_inode);
+       int err = coda_revalidate_inode(d_inode(dentry));
        if (!err)
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
        return err;
 }
 
 int coda_setattr(struct dentry *de, struct iattr *iattr)
 {
-       struct inode *inode = de->d_inode;
+       struct inode *inode = d_inode(de);
        struct coda_vattr vattr;
        int error;
 
index 4326d17..f36a404 100644 (file)
@@ -72,7 +72,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
        if (error)
                return error;
 
-       target_inode = path.dentry->d_inode;
+       target_inode = d_inode(path.dentry);
 
        /* return if it is not a Coda inode */
        if (target_inode->i_sb != inode->i_sb) {
index 5bb6e27..9b1ffaa 100644 (file)
@@ -820,8 +820,8 @@ int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
        case CODA_FLUSH:
                coda_cache_clear_all(sb);
                shrink_dcache_sb(sb);
-               if (sb->s_root->d_inode)
-                       coda_flag_inode(sb->s_root->d_inode, C_FLUSH);
+               if (d_really_is_positive(sb->s_root))
+                       coda_flag_inode(d_inode(sb->s_root), C_FLUSH);
                break;
 
        case CODA_PURGEUSER:
index acb3d63..c81ce7f 100644 (file)
@@ -289,7 +289,7 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry)
        configfs_set_dir_dirent_depth(p->d_fsdata, dentry->d_fsdata);
        error = configfs_create(dentry, mode, init_dir);
        if (!error) {
-               inc_nlink(p->d_inode);
+               inc_nlink(d_inode(p));
                item->ci_dentry = dentry;
        } else {
                struct configfs_dirent *sd = dentry->d_fsdata;
@@ -375,8 +375,8 @@ static void remove_dir(struct dentry * d)
        list_del_init(&sd->s_sibling);
        spin_unlock(&configfs_dirent_lock);
        configfs_put(sd);
-       if (d->d_inode)
-               simple_rmdir(parent->d_inode,d);
+       if (d_really_is_positive(d))
+               simple_rmdir(d_inode(parent),d);
 
        pr_debug(" o %pd removing done (%d)\n", d, d_count(d));
 
@@ -513,7 +513,7 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
                        /* Abort if racing with mkdir() */
                        if (sd->s_type & CONFIGFS_USET_IN_MKDIR) {
                                if (wait_mutex)
-                                       *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
+                                       *wait_mutex = &d_inode(sd->s_dentry)->i_mutex;
                                return -EAGAIN;
                        }
 
@@ -624,13 +624,13 @@ static void detach_groups(struct config_group *group)
 
                child = sd->s_dentry;
 
-               mutex_lock(&child->d_inode->i_mutex);
+               mutex_lock(&d_inode(child)->i_mutex);
 
                configfs_detach_group(sd->s_element);
-               child->d_inode->i_flags |= S_DEAD;
+               d_inode(child)->i_flags |= S_DEAD;
                dont_mount(child);
 
-               mutex_unlock(&child->d_inode->i_mutex);
+               mutex_unlock(&d_inode(child)->i_mutex);
 
                d_delete(child);
                dput(child);
@@ -672,7 +672,7 @@ static int create_default_group(struct config_group *parent_group,
                        sd = child->d_fsdata;
                        sd->s_type |= CONFIGFS_USET_DEFAULT;
                } else {
-                       BUG_ON(child->d_inode);
+                       BUG_ON(d_inode(child));
                        d_drop(child);
                        dput(child);
                }
@@ -818,11 +818,11 @@ static int configfs_attach_item(struct config_item *parent_item,
                         * the VFS may already have hit and used them. Thus,
                         * we must lock them as rmdir() would.
                         */
-                       mutex_lock(&dentry->d_inode->i_mutex);
+                       mutex_lock(&d_inode(dentry)->i_mutex);
                        configfs_remove_dir(item);
-                       dentry->d_inode->i_flags |= S_DEAD;
+                       d_inode(dentry)->i_flags |= S_DEAD;
                        dont_mount(dentry);
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                        d_delete(dentry);
                }
        }
@@ -858,16 +858,16 @@ static int configfs_attach_group(struct config_item *parent_item,
                 * We must also lock the inode to remove it safely in case of
                 * error, as rmdir() would.
                 */
-               mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
                configfs_adjust_dir_dirent_depth_before_populate(sd);
                ret = populate_groups(to_config_group(item));
                if (ret) {
                        configfs_detach_item(item);
-                       dentry->d_inode->i_flags |= S_DEAD;
+                       d_inode(dentry)->i_flags |= S_DEAD;
                        dont_mount(dentry);
                }
                configfs_adjust_dir_dirent_depth_after_populate(sd);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
                if (ret)
                        d_delete(dentry);
        }
@@ -1075,7 +1075,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
         * subsystem is really registered, and so we need to lock out
         * configfs_[un]register_subsystem().
         */
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
 
        root_sd = root->d_fsdata;
 
@@ -1111,7 +1111,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
 out_unlock_dirent_lock:
        spin_unlock(&configfs_dirent_lock);
 out_unlock_fs:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 
        /*
         * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1453,11 +1453,11 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
        down_write(&configfs_rename_sem);
        parent = item->parent->dentry;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
 
        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
-               if (!new_dentry->d_inode) {
+               if (d_really_is_negative(new_dentry)) {
                        error = config_item_set_name(item, "%s", new_name);
                        if (!error) {
                                d_add(new_dentry, NULL);
@@ -1469,7 +1469,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                        error = -EEXIST;
                dput(new_dentry);
        }
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        up_write(&configfs_rename_sem);
 
        return error;
@@ -1482,7 +1482,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
        struct configfs_dirent * parent_sd = dentry->d_fsdata;
        int err;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
        /*
         * Fake invisibility if dir belongs to a group/default groups hierarchy
         * being attached
@@ -1495,7 +1495,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
                else
                        err = 0;
        }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
 
        return err;
 }
@@ -1505,11 +1505,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
        struct dentry * dentry = file->f_path.dentry;
        struct configfs_dirent * cursor = file->private_data;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
        spin_lock(&configfs_dirent_lock);
        list_del_init(&cursor->s_sibling);
        spin_unlock(&configfs_dirent_lock);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
 
        release_configfs_dirent(cursor);
 
@@ -1567,7 +1567,7 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx)
                spin_lock(&configfs_dirent_lock);
                dentry = next->s_dentry;
                if (dentry)
-                       inode = dentry->d_inode;
+                       inode = d_inode(dentry);
                if (inode)
                        ino = inode->i_ino;
                spin_unlock(&configfs_dirent_lock);
@@ -1590,7 +1590,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry * dentry = file->f_path.dentry;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -1624,7 +1624,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&configfs_dirent_lock);
                }
        }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
        return offset;
 }
 
@@ -1654,7 +1654,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
        sd = root->d_fsdata;
        link_group(to_config_group(sd->s_element), group);
 
-       mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
 
        err = -ENOMEM;
        dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1664,7 +1664,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                err = configfs_attach_group(sd->s_element, &group->cg_item,
                                            dentry);
                if (err) {
-                       BUG_ON(dentry->d_inode);
+                       BUG_ON(d_inode(dentry));
                        d_drop(dentry);
                        dput(dentry);
                } else {
@@ -1674,7 +1674,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                }
        }
 
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 
        if (err) {
                unlink_group(group);
@@ -1695,9 +1695,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                return;
        }
 
-       mutex_lock_nested(&root->d_inode->i_mutex,
+       mutex_lock_nested(&d_inode(root)->i_mutex,
                          I_MUTEX_PARENT);
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
        mutex_lock(&configfs_symlink_mutex);
        spin_lock(&configfs_dirent_lock);
        if (configfs_detach_prep(dentry, NULL)) {
@@ -1706,13 +1706,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        spin_unlock(&configfs_dirent_lock);
        mutex_unlock(&configfs_symlink_mutex);
        configfs_detach_group(&group->cg_item);
-       dentry->d_inode->i_flags |= S_DEAD;
+       d_inode(dentry)->i_flags |= S_DEAD;
        dont_mount(dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
 
        d_delete(dentry);
 
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 
        dput(dentry);
 
index 56d2cdc..403269f 100644 (file)
@@ -326,10 +326,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
        umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
        int error = 0;
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
        error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
                                     CONFIGFS_ITEM_ATTR);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 
        return error;
 }
index 5423a6a..8d89f5f 100644 (file)
@@ -56,7 +56,7 @@ static const struct inode_operations configfs_inode_operations ={
 
 int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        struct configfs_dirent * sd = dentry->d_fsdata;
        struct iattr * sd_iattr;
        unsigned int ia_valid = iattr->ia_valid;
@@ -186,7 +186,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
        if (!dentry)
                return -ENOENT;
 
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                return -EEXIST;
 
        sd = dentry->d_fsdata;
@@ -194,7 +194,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
        if (!inode)
                return -ENOMEM;
 
-       p_inode = dentry->d_parent->d_inode;
+       p_inode = d_inode(dentry->d_parent);
        p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
        configfs_set_inode_lock_class(sd, inode);
 
@@ -236,11 +236,11 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
 
        if (dentry) {
                spin_lock(&dentry->d_lock);
-               if (!d_unhashed(dentry) && dentry->d_inode) {
+               if (!d_unhashed(dentry) && d_really_is_positive(dentry)) {
                        dget_dlock(dentry);
                        __d_drop(dentry);
                        spin_unlock(&dentry->d_lock);
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
                } else
                        spin_unlock(&dentry->d_lock);
        }
@@ -251,11 +251,11 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
        struct configfs_dirent * sd;
        struct configfs_dirent * parent_sd = dir->d_fsdata;
 
-       if (dir->d_inode == NULL)
+       if (d_really_is_negative(dir))
                /* no inode means this hasn't been made visible yet */
                return;
 
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
        list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                if (!sd->s_element)
                        continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                        break;
                }
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 }
index 0bb0aec..6f65f00 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -209,7 +209,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        }
 
        /* Protects against truncate */
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
 
        retval = dax_io(inode, iter, pos, end, get_block, &bh);
 
@@ -219,7 +219,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
        if ((retval > 0) && end_io)
                end_io(iocb, pos, retval, bh.b_private);
 
-       inode_dio_done(inode);
+       inode_dio_end(inode);
  out:
        return retval;
 }
index 517e649..830a7e7 100644 (file)
@@ -45,7 +45,7 @@ const struct file_operations debugfs_file_operations = {
 
 static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       nd_set_link(nd, dentry->d_inode->i_private);
+       nd_set_link(nd, d_inode(dentry)->i_private);
        return NULL;
 }
 
index c9ee0df..c1e7ffb 100644 (file)
@@ -46,7 +46,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
 
 static inline int debugfs_positive(struct dentry *dentry)
 {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 struct debugfs_mount_opts {
@@ -124,7 +124,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
 static int debugfs_apply_options(struct super_block *sb)
 {
        struct debugfs_fs_info *fsi = sb->s_fs_info;
-       struct inode *inode = sb->s_root->d_inode;
+       struct inode *inode = d_inode(sb->s_root);
        struct debugfs_mount_opts *opts = &fsi->mount_opts;
 
        inode->i_mode &= ~S_IALLUGO;
@@ -188,7 +188,7 @@ static struct vfsmount *debugfs_automount(struct path *path)
 {
        struct vfsmount *(*f)(void *);
        f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
-       return f(path->dentry->d_inode->i_private);
+       return f(d_inode(path->dentry)->i_private);
 }
 
 static const struct dentry_operations debugfs_dops = {
@@ -270,20 +270,20 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!parent)
                parent = debugfs_mount->mnt_root;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(dentry) && dentry->d_inode) {
+       if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                dput(dentry);
                dentry = ERR_PTR(-EEXIST);
        }
        if (IS_ERR(dentry))
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
        return dentry;
 }
 
 static struct dentry *failed_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
        dput(dentry);
        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        return NULL;
@@ -291,7 +291,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
 
 static struct dentry *end_creating(struct dentry *dentry)
 {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
        return dentry;
 }
 
@@ -344,7 +344,7 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
        inode->i_fop = fops ? fops : &debugfs_file_operations;
        inode->i_private = data;
        d_instantiate(dentry, inode);
-       fsnotify_create(dentry->d_parent->d_inode, dentry);
+       fsnotify_create(d_inode(dentry->d_parent), dentry);
        return end_creating(dentry);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_file);
@@ -384,7 +384,7 @@ struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
        struct dentry *de = debugfs_create_file(name, mode, parent, data, fops);
 
        if (de)
-               de->d_inode->i_size = file_size;
+               d_inode(de)->i_size = file_size;
        return de;
 }
 EXPORT_SYMBOL_GPL(debugfs_create_file_size);
@@ -426,8 +426,8 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
        /* directory inodes start off with i_nlink == 2 (for "." entry) */
        inc_nlink(inode);
        d_instantiate(dentry, inode);
-       inc_nlink(dentry->d_parent->d_inode);
-       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+       inc_nlink(d_inode(dentry->d_parent));
+       fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
        return end_creating(dentry);
 }
 EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -525,9 +525,9 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
        if (debugfs_positive(dentry)) {
                dget(dentry);
                if (d_is_dir(dentry))
-                       ret = simple_rmdir(parent->d_inode, dentry);
+                       ret = simple_rmdir(d_inode(parent), dentry);
                else
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
                if (!ret)
                        d_delete(dentry);
                dput(dentry);
@@ -557,12 +557,12 @@ void debugfs_remove(struct dentry *dentry)
                return;
 
        parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        ret = __debugfs_remove(dentry, parent);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        if (!ret)
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
@@ -588,12 +588,12 @@ void debugfs_remove_recursive(struct dentry *dentry)
                return;
 
        parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                return;
 
        parent = dentry;
  down:
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
  loop:
        /*
         * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -608,7 +608,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
                /* perhaps simple_empty(child) makes more sense */
                if (!list_empty(&child->d_subdirs)) {
                        spin_unlock(&parent->d_lock);
-                       mutex_unlock(&parent->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(parent)->i_mutex);
                        parent = child;
                        goto down;
                }
@@ -629,10 +629,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
        }
        spin_unlock(&parent->d_lock);
 
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        child = parent;
        parent = parent->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
 
        if (child != dentry)
                /* go up */
@@ -640,7 +640,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 
        if (!__debugfs_remove(child, parent))
                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
@@ -672,27 +672,27 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 
        trap = lock_rename(new_dir, old_dir);
        /* Source or destination directories don't exist? */
-       if (!old_dir->d_inode || !new_dir->d_inode)
+       if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
                goto exit;
        /* Source does not exist, cyclic rename, or mountpoint? */
-       if (!old_dentry->d_inode || old_dentry == trap ||
+       if (d_really_is_negative(old_dentry) || old_dentry == trap ||
            d_mountpoint(old_dentry))
                goto exit;
        dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
        /* Lookup failed, cyclic rename or target exists? */
-       if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
+       if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
                goto exit;
 
        old_name = fsnotify_oldname_init(old_dentry->d_name.name);
 
-       error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
+       error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
                dentry);
        if (error) {
                fsnotify_oldname_free(old_name);
                goto exit;
        }
        d_move(old_dentry, dentry);
-       fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
+       fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
                d_is_dir(old_dentry),
                NULL, old_dentry);
        fsnotify_oldname_free(old_name);
index cfe8466..add5663 100644 (file)
@@ -253,7 +253,7 @@ static int mknod_ptmx(struct super_block *sb)
        if (!uid_valid(root_uid) || !gid_valid(root_gid))
                return -EINVAL;
 
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
 
        /* If we have already created ptmx node, return */
        if (fsi->ptmx_dentry) {
@@ -290,7 +290,7 @@ static int mknod_ptmx(struct super_block *sb)
        fsi->ptmx_dentry = dentry;
        rc = 0;
 out:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        return rc;
 }
 
@@ -298,7 +298,7 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
 {
        struct inode *inode;
        if (fsi->ptmx_dentry) {
-               inode = fsi->ptmx_dentry->d_inode;
+               inode = d_inode(fsi->ptmx_dentry);
                inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
        }
 }
@@ -602,18 +602,18 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
 
        sprintf(s, "%d", index);
 
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
 
        dentry = d_alloc_name(root, s);
        if (dentry) {
                d_add(dentry, inode);
-               fsnotify_create(root->d_inode, dentry);
+               fsnotify_create(d_inode(root), dentry);
        } else {
                iput(inode);
                inode = ERR_PTR(-ENOMEM);
        }
 
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 
        return inode;
 }
@@ -658,7 +658,7 @@ void devpts_pty_kill(struct inode *inode)
 
        BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
 
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
 
        dentry = d_find_alias(inode);
 
@@ -667,7 +667,7 @@ void devpts_pty_kill(struct inode *inode)
        dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
        dput(dentry);           /* d_find_alias above */
 
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 }
 
 static int __init init_devpts_fs(void)
index c3b560b..745d234 100644 (file)
@@ -253,7 +253,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
        if (dio->end_io && dio->result)
                dio->end_io(dio->iocb, offset, transferred, dio->private);
 
-       inode_dio_done(dio->inode);
+       if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_end(dio->inode);
+
        if (is_async) {
                if (dio->rw & WRITE) {
                        int err;
@@ -1195,7 +1197,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        /*
         * Will be decremented at I/O completion time.
         */
-       atomic_inc(&inode->i_dio_count);
+       if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_begin(inode);
 
        retval = 0;
        sdio.blkbits = blkbits;
index 719e1ce..97315f2 100644 (file)
@@ -1326,7 +1326,7 @@ static int ecryptfs_read_headers_virt(char *page_virt,
        if (rc)
                goto out;
        if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
-               ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
+               ecryptfs_i_size_init(page_virt, d_inode(ecryptfs_dentry));
        offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
        rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
                                    &bytes_read);
@@ -1425,7 +1425,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
 {
        int rc;
        char *page_virt;
-       struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
+       struct inode *ecryptfs_inode = d_inode(ecryptfs_dentry);
        struct ecryptfs_crypt_stat *crypt_stat =
            &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
index 4000f6b..8db0b46 100644 (file)
@@ -54,11 +54,11 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                struct inode *lower_inode =
-                       ecryptfs_inode_to_lower(dentry->d_inode);
+                       ecryptfs_inode_to_lower(d_inode(dentry));
 
-               fsstack_copy_attr_all(dentry->d_inode, lower_inode);
+               fsstack_copy_attr_all(d_inode(dentry), lower_inode);
        }
        return rc;
 }
index a65786e..72afcc6 100644 (file)
@@ -130,7 +130,7 @@ struct kmem_cache *ecryptfs_file_info_cache;
 
 static int read_or_initialize_metadata(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
        struct ecryptfs_crypt_stat *crypt_stat;
        int rc;
index b08b518..fc850b5 100644 (file)
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
        struct dentry *dir;
 
        dir = dget_parent(dentry);
-       mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT);
+       mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
        return dir;
 }
 
 static void unlock_dir(struct dentry *dir)
 {
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        dput(dir);
 }
 
@@ -131,7 +131,7 @@ struct inode *ecryptfs_get_inode(struct inode *lower_inode,
 static int ecryptfs_interpose(struct dentry *lower_dentry,
                              struct dentry *dentry, struct super_block *sb)
 {
-       struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb);
+       struct inode *inode = ecryptfs_get_inode(d_inode(lower_dentry), sb);
 
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -189,21 +189,21 @@ ecryptfs_do_create(struct inode *directory_inode,
 
        lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true);
+       rc = vfs_create(d_inode(lower_dir_dentry), lower_dentry, mode, true);
        if (rc) {
                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
                       "rc = [%d]\n", __func__, rc);
                inode = ERR_PTR(rc);
                goto out_lock;
        }
-       inode = __ecryptfs_get_inode(lower_dentry->d_inode,
+       inode = __ecryptfs_get_inode(d_inode(lower_dentry),
                                     directory_inode->i_sb);
        if (IS_ERR(inode)) {
-               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
+               vfs_unlink(d_inode(lower_dir_dentry), lower_dentry, NULL);
                goto out_lock;
        }
-       fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(directory_inode, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(directory_inode, d_inode(lower_dir_dentry));
 out_lock:
        unlock_dir(lower_dir_dentry);
        return inode;
@@ -332,7 +332,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
                                     struct dentry *lower_dentry,
                                     struct inode *dir_inode)
 {
-       struct inode *inode, *lower_inode = lower_dentry->d_inode;
+       struct inode *inode, *lower_inode = d_inode(lower_dentry);
        struct ecryptfs_dentry_info *dentry_info;
        struct vfsmount *lower_mnt;
        int rc = 0;
@@ -347,14 +347,14 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
        }
 
        lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
-       fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+       fsstack_copy_attr_atime(dir_inode, d_inode(lower_dentry->d_parent));
        BUG_ON(!d_count(lower_dentry));
 
        ecryptfs_set_dentry_private(dentry, dentry_info);
        dentry_info->lower_path.mnt = lower_mnt;
        dentry_info->lower_path.dentry = lower_dentry;
 
-       if (!lower_dentry->d_inode) {
+       if (d_really_is_negative(lower_dentry)) {
                /* We want to add because we couldn't find in lower */
                d_add(dentry, NULL);
                return 0;
@@ -400,11 +400,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
        int rc = 0;
 
        lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-       mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
        lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                      lower_dir_dentry,
                                      ecryptfs_dentry->d_name.len);
-       mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -412,7 +412,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                                ecryptfs_dentry);
                goto out;
        }
-       if (lower_dentry->d_inode)
+       if (d_really_is_positive(lower_dentry))
                goto interpose;
        mount_crypt_stat = &ecryptfs_superblock_to_private(
                                ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -429,11 +429,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                       "filename; rc = [%d]\n", __func__, rc);
                goto out;
        }
-       mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
        lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                      lower_dir_dentry,
                                      encrypted_and_encoded_name_size);
-       mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
        if (IS_ERR(lower_dentry)) {
                rc = PTR_ERR(lower_dentry);
                ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -458,24 +458,24 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
        u64 file_size_save;
        int rc;
 
-       file_size_save = i_size_read(old_dentry->d_inode);
+       file_size_save = i_size_read(d_inode(old_dentry));
        lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
        lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
        dget(lower_old_dentry);
        dget(lower_new_dentry);
        lower_dir_dentry = lock_parent(lower_new_dentry);
-       rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
+       rc = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
                      lower_new_dentry, NULL);
-       if (rc || !lower_new_dentry->d_inode)
+       if (rc || d_really_is_negative(lower_new_dentry))
                goto out_lock;
        rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
        if (rc)
                goto out_lock;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       set_nlink(old_dentry->d_inode,
-                 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
-       i_size_write(new_dentry->d_inode, file_size_save);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+       set_nlink(d_inode(old_dentry),
+                 ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink);
+       i_size_write(d_inode(new_dentry), file_size_save);
 out_lock:
        unlock_dir(lower_dir_dentry);
        dput(lower_new_dentry);
@@ -485,7 +485,7 @@ out_lock:
 
 static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
 {
-       return ecryptfs_do_unlink(dir, dentry, dentry->d_inode);
+       return ecryptfs_do_unlink(dir, dentry, d_inode(dentry));
 }
 
 static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -510,20 +510,20 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
                                                  strlen(symname));
        if (rc)
                goto out_lock;
-       rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
+       rc = vfs_symlink(d_inode(lower_dir_dentry), lower_dentry,
                         encoded_symname);
        kfree(encoded_symname);
-       if (rc || !lower_dentry->d_inode)
+       if (rc || d_really_is_negative(lower_dentry))
                goto out_lock;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
        if (rc)
                goto out_lock;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
 out_lock:
        unlock_dir(lower_dir_dentry);
        dput(lower_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                d_drop(dentry);
        return rc;
 }
@@ -536,18 +536,18 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
-       if (rc || !lower_dentry->d_inode)
+       rc = vfs_mkdir(d_inode(lower_dir_dentry), lower_dentry, mode);
+       if (rc || d_really_is_negative(lower_dentry))
                goto out;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
        if (rc)
                goto out;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+       set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
 out:
        unlock_dir(lower_dir_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                d_drop(dentry);
        return rc;
 }
@@ -562,12 +562,12 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
        dget(dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
        dget(lower_dentry);
-       rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+       rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
        dput(lower_dentry);
-       if (!rc && dentry->d_inode)
-               clear_nlink(dentry->d_inode);
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
+       if (!rc && d_really_is_positive(dentry))
+               clear_nlink(d_inode(dentry));
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
        unlock_dir(lower_dir_dentry);
        if (!rc)
                d_drop(dentry);
@@ -584,17 +584,17 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
-       if (rc || !lower_dentry->d_inode)
+       rc = vfs_mknod(d_inode(lower_dir_dentry), lower_dentry, mode, dev);
+       if (rc || d_really_is_negative(lower_dentry))
                goto out;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
        if (rc)
                goto out;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
 out:
        unlock_dir(lower_dir_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                d_drop(dentry);
        return rc;
 }
@@ -617,7 +617,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dget(lower_new_dentry);
        lower_old_dir_dentry = dget_parent(lower_old_dentry);
        lower_new_dir_dentry = dget_parent(lower_new_dentry);
-       target_inode = new_dentry->d_inode;
+       target_inode = d_inode(new_dentry);
        trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
        /* source should not be ancestor of target */
        if (trap == lower_old_dentry) {
@@ -629,17 +629,17 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                rc = -ENOTEMPTY;
                goto out_lock;
        }
-       rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-                       lower_new_dir_dentry->d_inode, lower_new_dentry,
+       rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
+                       d_inode(lower_new_dir_dentry), lower_new_dentry,
                        NULL, 0);
        if (rc)
                goto out_lock;
        if (target_inode)
                fsstack_copy_attr_all(target_inode,
                                      ecryptfs_inode_to_lower(target_inode));
-       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+       fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry));
        if (new_dir != old_dir)
-               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+               fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
 out_lock:
        unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
        dput(lower_new_dir_dentry);
@@ -662,7 +662,7 @@ static char *ecryptfs_readlink_lower(struct dentry *dentry, size_t *bufsiz)
                return ERR_PTR(-ENOMEM);
        old_fs = get_fs();
        set_fs(get_ds());
-       rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
+       rc = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
                                                   (char __user *)lower_buf,
                                                   PATH_MAX);
        set_fs(old_fs);
@@ -681,8 +681,8 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
        char *buf = ecryptfs_readlink_lower(dentry, &len);
        if (IS_ERR(buf))
                goto out;
-       fsstack_copy_attr_atime(dentry->d_inode,
-                               ecryptfs_dentry_to_lower(dentry)->d_inode);
+       fsstack_copy_attr_atime(d_inode(dentry),
+                               d_inode(ecryptfs_dentry_to_lower(dentry)));
        buf[len] = '\0';
 out:
        nd_set_link(nd, buf);
@@ -738,7 +738,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
                          struct iattr *lower_ia)
 {
        int rc = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ecryptfs_crypt_stat *crypt_stat;
        loff_t i_size = i_size_read(inode);
        loff_t lower_size_before_truncate;
@@ -751,7 +751,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
        rc = ecryptfs_get_lower_file(dentry, inode);
        if (rc)
                return rc;
-       crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+       crypt_stat = &ecryptfs_inode_to_private(d_inode(dentry))->crypt_stat;
        /* Switch on growing or shrinking file */
        if (ia->ia_size > i_size) {
                char zero[] = { 0x00 };
@@ -858,7 +858,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
        struct iattr lower_ia = { .ia_valid = 0 };
        int rc;
 
-       rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length);
+       rc = ecryptfs_inode_newsize_ok(d_inode(dentry), new_length);
        if (rc)
                return rc;
 
@@ -866,9 +866,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
        if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
                struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
-               mutex_lock(&lower_dentry->d_inode->i_mutex);
+               mutex_lock(&d_inode(lower_dentry)->i_mutex);
                rc = notify_change(lower_dentry, &lower_ia, NULL);
-               mutex_unlock(&lower_dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(lower_dentry)->i_mutex);
        }
        return rc;
 }
@@ -900,10 +900,10 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        struct inode *lower_inode;
        struct ecryptfs_crypt_stat *crypt_stat;
 
-       crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+       crypt_stat = &ecryptfs_inode_to_private(d_inode(dentry))->crypt_stat;
        if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
                ecryptfs_init_crypt_stat(crypt_stat);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        lower_inode = ecryptfs_inode_to_lower(inode);
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        mutex_lock(&crypt_stat->cs_mutex);
@@ -967,9 +967,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                lower_ia.ia_valid &= ~ATTR_MODE;
 
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
        rc = notify_change(lower_dentry, &lower_ia, NULL);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
 out:
        fsstack_copy_attr_all(inode, lower_inode);
        return rc;
@@ -983,7 +983,7 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
 
        mount_crypt_stat = &ecryptfs_superblock_to_private(
                                                dentry->d_sb)->mount_crypt_stat;
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
        if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
                char *target;
                size_t targetsiz;
@@ -1007,9 +1007,9 @@ static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 
        rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
        if (!rc) {
-               fsstack_copy_attr_all(dentry->d_inode,
-                                     ecryptfs_inode_to_lower(dentry->d_inode));
-               generic_fillattr(dentry->d_inode, stat);
+               fsstack_copy_attr_all(d_inode(dentry),
+                                     ecryptfs_inode_to_lower(d_inode(dentry)));
+               generic_fillattr(d_inode(dentry), stat);
                stat->blocks = lower_stat.blocks;
        }
        return rc;
@@ -1023,14 +1023,14 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        struct dentry *lower_dentry;
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->setxattr) {
+       if (!d_inode(lower_dentry)->i_op->setxattr) {
                rc = -EOPNOTSUPP;
                goto out;
        }
 
        rc = vfs_setxattr(lower_dentry, name, value, size, flags);
-       if (!rc && dentry->d_inode)
-               fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
+       if (!rc && d_really_is_positive(dentry))
+               fsstack_copy_attr_all(d_inode(dentry), d_inode(lower_dentry));
 out:
        return rc;
 }
@@ -1041,14 +1041,14 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
 {
        int rc = 0;
 
-       if (!lower_dentry->d_inode->i_op->getxattr) {
+       if (!d_inode(lower_dentry)->i_op->getxattr) {
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value,
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
                                                   size);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
 out:
        return rc;
 }
@@ -1068,13 +1068,13 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
        struct dentry *lower_dentry;
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->listxattr) {
+       if (!d_inode(lower_dentry)->i_op->listxattr) {
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
 out:
        return rc;
 }
@@ -1085,13 +1085,13 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
        struct dentry *lower_dentry;
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->removexattr) {
+       if (!d_inode(lower_dentry)->i_op->removexattr) {
                rc = -EOPNOTSUPP;
                goto out;
        }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
 out:
        return rc;
 }
index f1ea610..866bb18 100644 (file)
@@ -144,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
        /* Corresponding dput() and mntput() are done when the
         * lower file is fput() when all eCryptfs files for the inode are
         * released. */
-       flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
+       flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
        (*lower_file) = dentry_open(&req.path, flags, cred);
        if (!IS_ERR(*lower_file))
                goto out;
index c095d32..4f4d047 100644 (file)
@@ -546,11 +546,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                goto out_free;
        }
 
-       if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) {
+       if (check_ruid && !uid_eq(d_inode(path.dentry)->i_uid, current_uid())) {
                rc = -EPERM;
                printk(KERN_ERR "Mount of device (uid: %d) not owned by "
                       "requested user (uid: %d)\n",
-                       i_uid_read(path.dentry->d_inode),
+                       i_uid_read(d_inode(path.dentry)),
                        from_kuid(&init_user_ns, current_uid()));
                goto out_free;
        }
@@ -584,7 +584,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                goto out_free;
        }
 
-       inode = ecryptfs_get_inode(path.dentry->d_inode, s);
+       inode = ecryptfs_get_inode(d_inode(path.dentry), s);
        rc = PTR_ERR(inode);
        if (IS_ERR(inode))
                goto out_free;
index 4626976..cf20852 100644 (file)
@@ -420,7 +420,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        void *xattr_virt;
        struct dentry *lower_dentry =
                ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
-       struct inode *lower_inode = lower_dentry->d_inode;
+       struct inode *lower_inode = d_inode(lower_dentry);
        int rc;
 
        if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
index 07ab497..3381b9d 100644 (file)
@@ -145,12 +145,12 @@ out:
 
 static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct efivar_entry *var = dentry->d_inode->i_private;
+       struct efivar_entry *var = d_inode(dentry)->i_private;
 
        if (efivar_entry_delete(var))
                return -EINVAL;
 
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
        dput(dentry);
        return 0;
 };
index ddbce42..59fedbc 100644 (file)
@@ -144,7 +144,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
 
        name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
 
-       inode = efivarfs_get_inode(sb, root->d_inode, S_IFREG | 0644, 0);
+       inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0);
        if (!inode)
                goto fail_name;
 
index bbee8f0..40ba9cc 100644 (file)
@@ -111,9 +111,9 @@ struct dentry *efs_get_parent(struct dentry *child)
        struct dentry *parent = ERR_PTR(-ENOENT);
        efs_ino_t ino;
 
-       ino = efs_find_entry(child->d_inode, "..", 2);
+       ino = efs_find_entry(d_inode(child), "..", 2);
        if (ino)
-               parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino));
+               parent = d_obtain_alias(efs_iget(d_inode(child)->i_sb, ino));
 
        return parent;
 }
index d7defd5..4deb0b0 100644 (file)
@@ -379,7 +379,7 @@ ino_t exofs_parent_ino(struct dentry *child)
        struct exofs_dir_entry *de;
        ino_t ino;
 
-       de = exofs_dotdot(child->d_inode, &page);
+       de = exofs_dotdot(d_inode(child), &page);
        if (!de)
                return 0;
 
@@ -429,7 +429,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
 
 int exofs_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const unsigned char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        unsigned chunk_size = exofs_chunk_size(dir);
index 35073aa..786e4cc 100644 (file)
@@ -1028,7 +1028,7 @@ static int _do_truncate(struct inode *inode, loff_t newsize)
  */
 int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        /* if we are about to modify an object, and it hasn't been
index 2890746..5ae25e4 100644 (file)
@@ -141,7 +141,7 @@ out_fail:
 static int exofs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        inode->i_ctime = CURRENT_TIME;
        inode_inc_link_count(inode);
@@ -191,7 +191,7 @@ out_dir:
 
 static int exofs_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct exofs_dir_entry *de;
        struct page *page;
        int err = -ENOENT;
@@ -213,7 +213,7 @@ out:
 
 static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int err = -ENOTEMPTY;
 
        if (exofs_empty_dir(inode)) {
@@ -230,8 +230,8 @@ static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
 static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
                struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct page *dir_page = NULL;
        struct exofs_dir_entry *dir_de = NULL;
        struct page *old_page;
index fcc2e56..b795c56 100644 (file)
@@ -958,7 +958,7 @@ static struct dentry *exofs_get_parent(struct dentry *child)
        if (!ino)
                return ERR_PTR(-ESTALE);
 
-       return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(exofs_iget(d_inode(child)->i_sb, ino));
 }
 
 static struct inode *exofs_nfs_get_inode(struct super_block *sb,
index 832e262..6f6f3a4 100644 (file)
@@ -37,7 +37,7 @@
 
 static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct exofs_i_info *oi = exofs_i(dentry->d_inode);
+       struct exofs_i_info *oi = exofs_i(d_inode(dentry));
 
        nd_set_link(nd, (char *)oi->i_data);
        return NULL;
index 6e1d4ab..796b491 100644 (file)
@@ -486,7 +486,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
  */
 int ext2_add_link (struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        unsigned chunk_size = ext2_chunk_size(dir);
index 6c14bb8..5c04a0d 100644 (file)
@@ -278,7 +278,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
        avefreeb = free_blocks / ngroups;
        ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
-       if ((parent == sb->s_root->d_inode) ||
+       if ((parent == d_inode(sb->s_root)) ||
            (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
                struct ext2_group_desc *best_desc = NULL;
                int best_ndir = inodes_per_group;
index 5d92139..f460ae3 100644 (file)
@@ -1544,7 +1544,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
 
 int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, iattr);
index ce42293..3e074a9 100644 (file)
@@ -79,10 +79,10 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, uns
 struct dentry *ext2_get_parent(struct dentry *child)
 {
        struct qstr dotdot = QSTR_INIT("..", 2);
-       unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
+       unsigned long ino = ext2_inode_by_name(d_inode(child), &dotdot);
        if (!ino)
                return ERR_PTR(-ENOENT);
-       return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext2_iget(d_inode(child)->i_sb, ino));
 } 
 
 /*
@@ -208,7 +208,7 @@ out_fail:
 static int ext2_link (struct dentry * old_dentry, struct inode * dir,
        struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int err;
 
        dquot_initialize(dir);
@@ -275,7 +275,7 @@ out_dir:
 
 static int ext2_unlink(struct inode * dir, struct dentry *dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        struct ext2_dir_entry_2 * de;
        struct page * page;
        int err = -ENOENT;
@@ -299,7 +299,7 @@ out:
 
 static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        int err = -ENOTEMPTY;
 
        if (ext2_empty_dir(inode)) {
@@ -316,8 +316,8 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
 static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
        struct inode * new_dir, struct dentry * new_dentry )
 {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
        struct page * dir_page = NULL;
        struct ext2_dir_entry_2 * dir_de = NULL;
        struct page * old_page;
index 565cf81..20608f1 100644 (file)
@@ -23,7 +23,7 @@
 
 static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
+       struct ext2_inode_info *ei = EXT2_I(d_inode(dentry));
        nd_set_link(nd, (char *)ei->i_data);
        return NULL;
 }
index 9142614..0b6bfd3 100644 (file)
@@ -243,7 +243,7 @@ cleanup:
 static int
 ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct buffer_head *bh = NULL;
        struct ext2_xattr_entry *entry;
        char *end;
@@ -319,7 +319,7 @@ cleanup:
 /*
  * Inode operation listxattr()
  *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
  */
 ssize_t
 ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
index c0ebc4d..702fc68 100644 (file)
@@ -28,7 +28,7 @@ ext2_xattr_security_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
                              buffer, size);
 }
 
@@ -38,7 +38,7 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
                              value, size, flags);
 }
 
index 7e19257..42b6e98 100644 (file)
@@ -32,7 +32,7 @@ ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
                              buffer, size);
 }
 
@@ -42,7 +42,7 @@ ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
                              value, size, flags);
 }
 
index f470e44..ecdc460 100644 (file)
@@ -36,7 +36,7 @@ ext2_xattr_user_get(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER,
                              name, buffer, size);
 }
 
@@ -49,7 +49,7 @@ ext2_xattr_user_set(struct dentry *dentry, const char *name,
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
 
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_USER,
                              name, value, size, flags);
 }
 
index a1b8102..3ad242e 100644 (file)
@@ -210,7 +210,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
        avefreeb = freeb / ngroups;
        ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
-       if ((parent == sb->s_root->d_inode) ||
+       if ((parent == d_inode(sb->s_root)) ||
            (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
                int best_ndir = inodes_per_group;
                int best_group = -1;
index 13c0868..2ee2dc4 100644 (file)
@@ -3240,7 +3240,7 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
  */
 int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error, rc = 0;
        const unsigned int ia_valid = attr->ia_valid;
 
index f197736..4264b9b 100644 (file)
@@ -1049,19 +1049,19 @@ struct dentry *ext3_get_parent(struct dentry *child)
        struct ext3_dir_entry_2 * de;
        struct buffer_head *bh;
 
-       bh = ext3_find_entry(child->d_inode, &dotdot, &de);
+       bh = ext3_find_entry(d_inode(child), &dotdot, &de);
        if (!bh)
                return ERR_PTR(-ENOENT);
        ino = le32_to_cpu(de->inode);
        brelse(bh);
 
-       if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
-               ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+       if (!ext3_valid_inum(d_inode(child)->i_sb, ino)) {
+               ext3_error(d_inode(child)->i_sb, "ext3_get_parent",
                           "bad inode number: %lu", ino);
                return ERR_PTR(-EIO);
        }
 
-       return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext3_iget(d_inode(child)->i_sb, ino));
 }
 
 #define S_SHIFT 12
@@ -1243,7 +1243,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                             struct inode *inode, struct ext3_dir_entry_2 *de,
                             struct buffer_head * bh)
 {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        unsigned long   offset = 0;
@@ -1330,7 +1330,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        struct buffer_head *bh2;
@@ -1435,7 +1435,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
        struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct buffer_head * bh;
        struct ext3_dir_entry_2 *de;
        struct super_block * sb;
@@ -1489,7 +1489,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
        struct dx_entry *entries, *at;
        struct dx_hash_info hinfo;
        struct buffer_head * bh;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block * sb = dir->i_sb;
        struct ext3_dir_entry_2 *de;
        int err;
@@ -2111,7 +2111,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go in
         * separate transaction */
        dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
 
        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
@@ -2125,7 +2125,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
        if (IS_DIRSYNC(dir))
                handle->h_sync = 1;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        retval = -EIO;
        if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -2173,7 +2173,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
 
        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
@@ -2187,7 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
        if (!bh)
                goto end_unlink;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        retval = -EIO;
        if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -2328,7 +2328,7 @@ static int ext3_link (struct dentry * old_dentry,
                struct inode * dir, struct dentry *dentry)
 {
        handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int err, retries = 0;
 
        if (inode->i_nlink >= EXT3_LINK_MAX)
@@ -2391,8 +2391,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
-       if (new_dentry->d_inode)
-               dquot_initialize(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry))
+               dquot_initialize(d_inode(new_dentry));
        handle = ext3_journal_start(old_dir, 2 *
                                        EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2409,12 +2409,12 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
         *  and merrily kill the link to whatever was created under the
         *  same name. Goodbye sticky bit ;-<
         */
-       old_inode = old_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
        retval = -ENOENT;
        if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
                goto end_rename;
 
-       new_inode = new_dentry->d_inode;
+       new_inode = d_inode(new_dentry);
        new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de);
        if (new_bh) {
                if (!new_inode) {
index f037b4b..a9312f0 100644 (file)
@@ -1170,7 +1170,7 @@ static int parse_options (char *options, struct super_block *sb,
                                return 0;
                        }
 
-                       journal_inode = path.dentry->d_inode;
+                       journal_inode = d_inode(path.dentry);
                        if (!S_ISBLK(journal_inode->i_mode)) {
                                ext3_msg(sb, KERN_ERR, "error: journal path %s "
                                        "is not a block device", journal_path);
@@ -2947,7 +2947,7 @@ static int ext3_write_info(struct super_block *sb, int type)
        handle_t *handle;
 
        /* Data block + inode block */
-       handle = ext3_journal_start(sb->s_root->d_inode, 2);
+       handle = ext3_journal_start(d_inode(sb->s_root), 2);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit_info(sb, type);
@@ -2994,7 +2994,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
         * When we journal data on quota file, we have to flush journal to see
         * all updates to the file when we bypass pagecache...
         */
-       if (ext3_should_journal_data(path->dentry->d_inode)) {
+       if (ext3_should_journal_data(d_inode(path->dentry))) {
                /*
                 * We don't need to lock updates but journal_flush() could
                 * otherwise be livelocked...
index 6b01c3e..ea96df3 100644 (file)
@@ -23,7 +23,7 @@
 
 static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+       struct ext3_inode_info *ei = EXT3_I(d_inode(dentry));
        nd_set_link(nd, (char*)ei->i_data);
        return NULL;
 }
index 24215dc..7cf3650 100644 (file)
@@ -137,7 +137,7 @@ ext3_xattr_handler(int name_index)
 /*
  * Inode operation listxattr()
  *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
  */
 ssize_t
 ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -355,7 +355,7 @@ ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
 static int
 ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct buffer_head *bh = NULL;
        int error;
 
@@ -391,7 +391,7 @@ cleanup:
 static int
 ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ext3_xattr_ibody_header *header;
        struct ext3_inode *raw_inode;
        struct ext3_iloc iloc;
@@ -432,7 +432,7 @@ ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
        int i_error, b_error;
 
-       down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+       down_read(&EXT3_I(d_inode(dentry))->xattr_sem);
        i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
        if (i_error < 0) {
                b_error = 0;
@@ -445,7 +445,7 @@ ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                if (b_error < 0)
                        i_error = 0;
        }
-       up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+       up_read(&EXT3_I(d_inode(dentry))->xattr_sem);
        return i_error + b_error;
 }
 
index 722c2bf..c9506d5 100644 (file)
@@ -29,7 +29,7 @@ ext3_xattr_security_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY,
                              name, buffer, size);
 }
 
@@ -39,7 +39,7 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY,
                              name, value, size, flags);
 }
 
index d75727c..206cc66 100644 (file)
@@ -32,7 +32,7 @@ ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED,
                              name, buffer, size);
 }
 
@@ -42,7 +42,7 @@ ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED, name,
                              value, size, flags);
 }
 
index 5612af3..021508a 100644 (file)
@@ -34,7 +34,7 @@ ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
                return -EINVAL;
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_USER,
                              name, buffer, size);
 }
 
@@ -46,7 +46,7 @@ ext3_xattr_user_set(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_USER,
                              name, value, size, flags);
 }
 
index e9d632e..8850254 100644 (file)
@@ -55,7 +55,7 @@ static int ext4_sync_parent(struct inode *inode)
                dentry = d_find_any_alias(inode);
                if (!dentry)
                        break;
-               next = igrab(dentry->d_parent->d_inode);
+               next = igrab(d_inode(dentry->d_parent));
                dput(dentry);
                if (!next)
                        break;
index 2cf18a2..1eaa6cb 100644 (file)
@@ -443,7 +443,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
        ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
        if (S_ISDIR(mode) &&
-           ((parent == sb->s_root->d_inode) ||
+           ((parent == d_inode(sb->s_root)) ||
             (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                int best_ndir = inodes_per_group;
                int ret = -1;
index 3580629..9588240 100644 (file)
@@ -682,11 +682,11 @@ retry:
                 * via ext4_inode_block_unlocked_dio(). Check inode's state
                 * while holding extra i_dio_count ref.
                 */
-               atomic_inc(&inode->i_dio_count);
+               inode_dio_begin(inode);
                smp_mb();
                if (unlikely(ext4_test_inode_state(inode,
                                                    EXT4_STATE_DIOREAD_LOCK))) {
-                       inode_dio_done(inode);
+                       inode_dio_end(inode);
                        goto locked;
                }
                if (IS_DAX(inode))
@@ -697,7 +697,7 @@ retry:
                                                   inode->i_sb->s_bdev, iter,
                                                   offset, ext4_get_block, NULL,
                                                   NULL, 0);
-               inode_dio_done(inode);
+               inode_dio_end(inode);
        } else {
 locked:
                if (IS_DAX(inode))
index feb2caf..095c7a2 100644 (file)
@@ -1000,7 +1000,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
                                     struct ext4_iloc *iloc,
                                     void *inline_start, int inline_size)
 {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        int             err;
@@ -1254,7 +1254,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
        int ret, inline_size;
        void *inline_start;
        struct ext4_iloc iloc;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
 
        ret = ext4_get_inode_loc(dir, &iloc);
        if (ret)
index 366476e..cbd0654 100644 (file)
@@ -3077,7 +3077,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
         */
        if (iov_iter_rw(iter) == WRITE)
-               atomic_inc(&inode->i_dio_count);
+               inode_dio_begin(inode);
 
        /* If we do a overwrite dio, i_mutex locking can be released */
        overwrite = *((int *)iocb->private);
@@ -3182,7 +3182,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
 retake_lock:
        if (iov_iter_rw(iter) == WRITE)
-               inode_dio_done(inode);
+               inode_dio_end(inode);
        /* take i_mutex locking again if we do a ovewrite dio */
        if (overwrite) {
                up_read(&EXT4_I(inode)->i_data_sem);
@@ -4637,7 +4637,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
  */
 int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error, rc = 0;
        int orphan = 0;
        const unsigned int ia_valid = attr->ia_valid;
@@ -4785,7 +4785,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
        struct inode *inode;
        unsigned long long delalloc_blocks;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        generic_fillattr(inode, stat);
 
        /*
index 3cb267a..b52374e 100644 (file)
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
                EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
        owner[0] = i_uid_read(inode);
        owner[1] = i_gid_read(inode);
-       tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+       tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root),
                                   S_IFREG, NULL, goal, owner);
        if (IS_ERR(tmp_inode)) {
                retval = PTR_ERR(tmp_inode);
index ef22cd9..7223b0b 100644 (file)
@@ -1664,7 +1664,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
        struct ext4_dir_entry_2 * de;
        struct buffer_head *bh;
 
-       bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
+       bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
        if (IS_ERR(bh))
                return (struct dentry *) bh;
        if (!bh)
@@ -1672,13 +1672,13 @@ struct dentry *ext4_get_parent(struct dentry *child)
        ino = le32_to_cpu(de->inode);
        brelse(bh);
 
-       if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
-               EXT4_ERROR_INODE(child->d_inode,
+       if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) {
+               EXT4_ERROR_INODE(d_inode(child),
                                 "bad parent inode number: %u", ino);
                return ERR_PTR(-EIO);
        }
 
-       return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
 }
 
 /*
@@ -1988,7 +1988,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                             struct inode *inode, struct ext4_dir_entry_2 *de,
                             struct buffer_head *bh)
 {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
        const char      *name = dentry->d_name.name;
        int             namelen = dentry->d_name.len;
        unsigned int    blocksize = dir->i_sb->s_blocksize;
@@ -2048,7 +2048,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                            struct inode *inode, struct buffer_head *bh)
 {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
        struct ext4_fname_crypto_ctx *ctx = NULL;
        int res;
@@ -2202,7 +2202,7 @@ out_frames:
 static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                          struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct buffer_head *bh = NULL;
        struct ext4_dir_entry_2 *de;
        struct ext4_dir_entry_tail *t;
@@ -2287,7 +2287,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
        struct dx_entry *entries, *at;
        struct dx_hash_info hinfo;
        struct buffer_head *bh;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct super_block *sb = dir->i_sb;
        struct ext4_dir_entry_2 *de;
        int err;
@@ -3063,7 +3063,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go in
         * separate transaction */
        dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
 
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -3072,7 +3072,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
        if (!bh)
                goto end_rmdir;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        retval = -EIO;
        if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -3132,7 +3132,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
 
        retval = -ENOENT;
        bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -3141,7 +3141,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
        if (!bh)
                goto end_unlink;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        retval = -EIO;
        if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -3339,7 +3339,7 @@ static int ext4_link(struct dentry *old_dentry,
                     struct inode *dir, struct dentry *dentry)
 {
        handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int err, retries = 0;
 
        if (inode->i_nlink >= EXT4_LINK_MAX)
@@ -3613,12 +3613,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ext4_renament old = {
                .dir = old_dir,
                .dentry = old_dentry,
-               .inode = old_dentry->d_inode,
+               .inode = d_inode(old_dentry),
        };
        struct ext4_renament new = {
                .dir = new_dir,
                .dentry = new_dentry,
-               .inode = new_dentry->d_inode,
+               .inode = d_inode(new_dentry),
        };
        int force_reread;
        int retval;
@@ -3809,12 +3809,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ext4_renament old = {
                .dir = old_dir,
                .dentry = old_dentry,
-               .inode = old_dentry->d_inode,
+               .inode = d_inode(old_dentry),
        };
        struct ext4_renament new = {
                .dir = new_dir,
                .dentry = new_dentry,
-               .inode = new_dentry->d_inode,
+               .inode = d_inode(new_dentry),
        };
        u8 new_file_type;
        int retval;
index 821f22d..f06d058 100644 (file)
@@ -1556,7 +1556,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                        return -1;
                }
 
-               journal_inode = path.dentry->d_inode;
+               journal_inode = d_inode(path.dentry);
                if (!S_ISBLK(journal_inode->i_mode)) {
                        ext4_msg(sb, KERN_ERR, "error: journal path %s "
                                "is not a block device", journal_path);
@@ -5217,7 +5217,7 @@ static int ext4_write_info(struct super_block *sb, int type)
        handle_t *handle;
 
        /* Data block + inode block */
-       handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
+       handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit_info(sb, type);
@@ -5265,7 +5265,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
         * all updates to the file when we bypass pagecache...
         */
        if (EXT4_SB(sb)->s_journal &&
-           ext4_should_journal_data(path->dentry->d_inode)) {
+           ext4_should_journal_data(d_inode(path->dentry))) {
                /*
                 * We don't need to lock updates but journal_flush() could
                 * otherwise be livelocked...
index 136ca0e..19f78f2 100644 (file)
@@ -28,7 +28,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
        struct page *cpage = NULL;
        char *caddr, *paddr = NULL;
        struct ext4_str cstr, pstr;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ext4_fname_crypto_ctx *ctx = NULL;
        struct ext4_encrypted_symlink_data *sd;
        loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
@@ -43,8 +43,8 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
                return ctx;
 
        if (ext4_inode_is_fast_symlink(inode)) {
-               caddr = (char *) EXT4_I(dentry->d_inode)->i_data;
-               max_size = sizeof(EXT4_I(dentry->d_inode)->i_data);
+               caddr = (char *) EXT4_I(inode)->i_data;
+               max_size = sizeof(EXT4_I(inode)->i_data);
        } else {
                cpage = read_mapping_page(inode->i_mapping, 0, NULL);
                if (IS_ERR(cpage)) {
@@ -113,7 +113,7 @@ static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
 
 static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
+       struct ext4_inode_info *ei = EXT4_I(d_inode(dentry));
        nd_set_link(nd, (char *) ei->i_data);
        return NULL;
 }
index 759842f..16e28c0 100644 (file)
@@ -178,7 +178,7 @@ ext4_xattr_handler(int name_index)
 /*
  * Inode operation listxattr()
  *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
  */
 ssize_t
 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -423,7 +423,7 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
 static int
 ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct buffer_head *bh = NULL;
        int error;
        struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
@@ -460,7 +460,7 @@ cleanup:
 static int
 ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ext4_xattr_ibody_header *header;
        struct ext4_inode *raw_inode;
        struct ext4_iloc iloc;
@@ -501,7 +501,7 @@ ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
        int ret, ret2;
 
-       down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+       down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
        ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
        if (ret < 0)
                goto errout;
@@ -514,7 +514,7 @@ ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                goto errout;
        ret += ret2;
 errout:
-       up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+       up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
        return ret;
 }
 
index d2a2006..95d90e0 100644 (file)
@@ -33,7 +33,7 @@ ext4_xattr_security_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
                              name, buffer, size);
 }
 
@@ -43,7 +43,7 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
                              name, value, size, flags);
 }
 
index 95f1f4a..891ee2d 100644 (file)
@@ -36,7 +36,7 @@ ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
                              name, buffer, size);
 }
 
@@ -46,7 +46,7 @@ ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
                              name, value, size, flags);
 }
 
index 0edb761..6ed932b 100644 (file)
@@ -37,7 +37,7 @@ ext4_xattr_user_get(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER,
                              name, buffer, size);
 }
 
@@ -49,7 +49,7 @@ ext4_xattr_user_set(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (!test_opt(dentry->d_sb, XATTR_USER))
                return -EOPNOTSUPP;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER,
                              name, value, size, flags);
 }
 
index c06a25e..d8921cf 100644 (file)
@@ -1482,7 +1482,7 @@ bool f2fs_empty_dir(struct inode *);
 
 static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 {
-       return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
+       return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name,
                                inode, inode->i_ino, inode->i_mode);
 }
 
index a6f3f61..2b52e48 100644 (file)
@@ -574,7 +574,7 @@ void f2fs_truncate(struct inode *inode)
 int f2fs_getattr(struct vfsmount *mnt,
                         struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        generic_fillattr(inode, stat);
        stat->blocks <<= 3;
        return 0;
@@ -613,7 +613,7 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 
 int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct f2fs_inode_info *fi = F2FS_I(inode);
        int err;
 
index 407dde3..7e3794e 100644 (file)
@@ -151,7 +151,7 @@ out:
 static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        int err;
 
@@ -182,10 +182,10 @@ out:
 struct dentry *f2fs_get_parent(struct dentry *child)
 {
        struct qstr dotdot = QSTR_INIT("..", 2);
-       unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
+       unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot);
        if (!ino)
                return ERR_PTR(-ENOENT);
-       return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(f2fs_iget(d_inode(child)->i_sb, ino));
 }
 
 static int __recover_dot_dentries(struct inode *dir, nid_t pino)
@@ -263,7 +263,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct f2fs_dir_entry *de;
        struct page *page;
        int err = -ENOENT;
@@ -403,7 +403,7 @@ out_fail:
 
 static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        if (f2fs_empty_dir(inode))
                return f2fs_unlink(dir, dentry);
        return -ENOTEMPTY;
@@ -451,8 +451,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct page *old_dir_page;
        struct page *old_page, *new_page;
        struct f2fs_dir_entry *old_dir_entry = NULL;
@@ -578,8 +578,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct page *old_dir_page, *new_dir_page;
        struct page *old_page, *new_page;
        struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
index b0fd2f2..9757f65 100644 (file)
@@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
        }
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
+       return f2fs_getxattr(d_inode(dentry), type, name, buffer, size, NULL);
 }
 
 static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -108,7 +108,7 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
        if (strcmp(name, "") == 0)
                return -EINVAL;
 
-       return f2fs_setxattr(dentry->d_inode, type, name,
+       return f2fs_setxattr(d_inode(dentry), type, name,
                                        value, size, NULL, flags);
 }
 
@@ -130,7 +130,7 @@ static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
 static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
                void *buffer, size_t size, int type)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (strcmp(name, "") != 0)
                return -EINVAL;
@@ -143,7 +143,7 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
 static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags, int type)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (strcmp(name, "") != 0)
                return -EINVAL;
@@ -444,7 +444,7 @@ cleanup:
 
 ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct f2fs_xattr_entry *entry;
        void *base_addr;
        int error = 0;
index cf50d93..442d50a 100644 (file)
@@ -305,7 +305,7 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
 
 int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        generic_fillattr(inode, stat);
        stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
 
@@ -377,7 +377,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 int fat_setattr(struct dentry *dentry, struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        unsigned int ia_valid;
        int error;
 
index cc6a854..b7e2b33 100644 (file)
@@ -308,7 +308,7 @@ out:
 static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct super_block *sb = dir->i_sb;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct fat_slot_info sinfo;
        int err;
 
@@ -402,7 +402,7 @@ out:
 /***** Unlink a file */
 static int msdos_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        struct fat_slot_info sinfo;
        int err;
@@ -440,8 +440,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
        int err, old_attrs, is_dir, update_dotdot, corrupt = 0;
 
        old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
 
        err = fat_scan(old_dir, old_name, &old_sinfo);
        if (err) {
index 7e0974e..7092584 100644 (file)
@@ -33,7 +33,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
 {
        int ret = 1;
        spin_lock(&dentry->d_lock);
-       if (dentry->d_time != dentry->d_parent->d_inode->i_version)
+       if (dentry->d_time != d_inode(dentry->d_parent)->i_version)
                ret = 0;
        spin_unlock(&dentry->d_lock);
        return ret;
@@ -45,7 +45,7 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        /* This is not negative dentry. Always valid. */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                return 1;
        return vfat_revalidate_shortname(dentry);
 }
@@ -65,7 +65,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
         * positive dentry isn't good idea. So it's unsupported like
         * rename("filename", "FILENAME") for now.
         */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                return 1;
 
        /*
@@ -801,7 +801,7 @@ out:
 
 static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
        int err;
@@ -832,7 +832,7 @@ out:
 
 static int vfat_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
        int err;
@@ -915,8 +915,8 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct super_block *sb = old_dir->i_sb;
 
        old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
        mutex_lock(&MSDOS_SB(sb)->s_lock);
        err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo);
        if (err)
index 93e1493..eb19265 100644 (file)
@@ -266,7 +266,7 @@ struct inode *fat_rebuild_parent(struct super_block *sb, int parent_logstart)
  * Find the parent for a directory that is not currently connected to
  * the filesystem root.
  *
- * On entry, the caller holds child_dir->d_inode->i_mutex.
+ * On entry, the caller holds d_inode(child_dir)->i_mutex.
  */
 static struct dentry *fat_get_parent(struct dentry *child_dir)
 {
@@ -276,7 +276,7 @@ static struct dentry *fat_get_parent(struct dentry *child_dir)
        struct inode *parent_inode = NULL;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
-       if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) {
+       if (!fat_get_dotdot_entry(d_inode(child_dir), &bh, &de)) {
                int parent_logstart = fat_get_start(sbi, de);
                parent_inode = fat_dget(sb, parent_logstart);
                if (!parent_inode && sbi->options.nfs == FAT_NFS_NOSTALE_RO)
index c36aeaf..8b9229e 100644 (file)
@@ -76,7 +76,7 @@ const struct address_space_operations vxfs_immed_aops = {
 static void *
 vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
 {
-       struct vxfs_inode_info          *vip = VXFS_INO(dp->d_inode);
+       struct vxfs_inode_info          *vip = VXFS_INO(d_inode(dp));
        nd_set_link(np, vip->vii_immed.vi_immed);
        return NULL;
 }
index 205e0d5..f863ac6 100644 (file)
@@ -244,7 +244,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
                return 0;
 
        parent = fuse_control_sb->s_root;
-       inc_nlink(parent->d_inode);
+       inc_nlink(d_inode(parent));
        sprintf(name, "%u", fc->dev);
        parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
                                     &simple_dir_inode_operations,
@@ -283,11 +283,11 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
 
        for (i = fc->ctl_ndents - 1; i >= 0; i--) {
                struct dentry *dentry = fc->ctl_dentry[i];
-               dentry->d_inode->i_private = NULL;
+               d_inode(dentry)->i_private = NULL;
                d_drop(dentry);
                dput(dentry);
        }
-       drop_nlink(fuse_control_sb->s_root->d_inode);
+       drop_nlink(d_inode(fuse_control_sb->s_root));
 }
 
 static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
index 1545b71..0572bca 100644 (file)
@@ -192,7 +192,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
        struct fuse_inode *fi;
        int ret;
 
-       inode = ACCESS_ONCE(entry->d_inode);
+       inode = d_inode_rcu(entry);
        if (inode && is_bad_inode(inode))
                goto invalid;
        else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
@@ -220,7 +220,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                attr_version = fuse_get_attr_version(fc);
 
                parent = dget_parent(entry);
-               fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
+               fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
                                 &entry->d_name, &outarg);
                ret = fuse_simple_request(fc, &args);
                dput(parent);
@@ -254,7 +254,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                                return -ECHILD;
                } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
                        parent = dget_parent(entry);
-                       fuse_advise_use_readdirplus(parent->d_inode);
+                       fuse_advise_use_readdirplus(d_inode(parent));
                        dput(parent);
                }
        }
@@ -487,7 +487,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
                        entry = res;
        }
 
-       if (!(flags & O_CREAT) || entry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(entry))
                goto no_open;
 
        /* Only creates */
@@ -653,7 +653,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
        args.in.args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
-               struct inode *inode = entry->d_inode;
+               struct inode *inode = d_inode(entry);
                struct fuse_inode *fi = get_fuse_inode(inode);
 
                spin_lock(&fc->lock);
@@ -689,7 +689,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
        args.in.args[0].value = entry->d_name.name;
        err = fuse_simple_request(fc, &args);
        if (!err) {
-               clear_nlink(entry->d_inode);
+               clear_nlink(d_inode(entry));
                fuse_invalidate_attr(dir);
                fuse_invalidate_entry_cache(entry);
        } else if (err == -EINTR)
@@ -721,12 +721,12 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
        err = fuse_simple_request(fc, &args);
        if (!err) {
                /* ctime changes */
-               fuse_invalidate_attr(oldent->d_inode);
-               fuse_update_ctime(oldent->d_inode);
+               fuse_invalidate_attr(d_inode(oldent));
+               fuse_update_ctime(d_inode(oldent));
 
                if (flags & RENAME_EXCHANGE) {
-                       fuse_invalidate_attr(newent->d_inode);
-                       fuse_update_ctime(newent->d_inode);
+                       fuse_invalidate_attr(d_inode(newent));
+                       fuse_update_ctime(d_inode(newent));
                }
 
                fuse_invalidate_attr(olddir);
@@ -734,10 +734,10 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                        fuse_invalidate_attr(newdir);
 
                /* newent will end up negative */
-               if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
-                       fuse_invalidate_attr(newent->d_inode);
+               if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
+                       fuse_invalidate_attr(d_inode(newent));
                        fuse_invalidate_entry_cache(newent);
-                       fuse_update_ctime(newent->d_inode);
+                       fuse_update_ctime(d_inode(newent));
                }
        } else if (err == -EINTR) {
                /* If request was interrupted, DEITY only knows if the
@@ -746,7 +746,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                   directory), then there can be inconsistency between
                   the dcache and the real filesystem.  Tough luck. */
                fuse_invalidate_entry(oldent);
-               if (newent->d_inode)
+               if (d_really_is_positive(newent))
                        fuse_invalidate_entry(newent);
        }
 
@@ -788,7 +788,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
 {
        int err;
        struct fuse_link_in inarg;
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
 
@@ -961,9 +961,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
        fuse_invalidate_attr(parent);
        fuse_invalidate_entry(entry);
 
-       if (child_nodeid != 0 && entry->d_inode) {
-               mutex_lock(&entry->d_inode->i_mutex);
-               if (get_node_id(entry->d_inode) != child_nodeid) {
+       if (child_nodeid != 0 && d_really_is_positive(entry)) {
+               mutex_lock(&d_inode(entry)->i_mutex);
+               if (get_node_id(d_inode(entry)) != child_nodeid) {
                        err = -ENOENT;
                        goto badentry;
                }
@@ -977,13 +977,13 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
                                err = -ENOTEMPTY;
                                goto badentry;
                        }
-                       entry->d_inode->i_flags |= S_DEAD;
+                       d_inode(entry)->i_flags |= S_DEAD;
                }
                dont_mount(entry);
-               clear_nlink(entry->d_inode);
+               clear_nlink(d_inode(entry));
                err = 0;
  badentry:
-               mutex_unlock(&entry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(entry)->i_mutex);
                if (!err)
                        d_delete(entry);
        } else {
@@ -1169,7 +1169,7 @@ static int fuse_direntplus_link(struct file *file,
        struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
        struct dentry *dentry;
        struct dentry *alias;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct fuse_conn *fc;
        struct inode *inode;
 
@@ -1205,7 +1205,7 @@ static int fuse_direntplus_link(struct file *file,
        name.hash = full_name_hash(name.name, name.len);
        dentry = d_lookup(parent, &name);
        if (dentry) {
-               inode = dentry->d_inode;
+               inode = d_inode(dentry);
                if (!inode) {
                        d_drop(dentry);
                } else if (get_node_id(inode) != o->nodeid ||
@@ -1367,7 +1367,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
 
 static char *read_link(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
        char *link;
@@ -1712,7 +1712,7 @@ error:
 
 static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
 
        if (!fuse_allow_current_process(get_fuse_conn(inode)))
                return -EACCES;
@@ -1726,7 +1726,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
                        struct kstat *stat)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
 
        if (!fuse_allow_current_process(fc))
@@ -1738,7 +1738,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
 static int fuse_setxattr(struct dentry *entry, const char *name,
                         const void *value, size_t size, int flags)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
        struct fuse_setxattr_in inarg;
@@ -1774,7 +1774,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
                             void *value, size_t size)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
        struct fuse_getxattr_in inarg;
@@ -1815,7 +1815,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
 
 static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
        struct fuse_getxattr_in inarg;
@@ -1857,7 +1857,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 
 static int fuse_removexattr(struct dentry *entry, const char *name)
 {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
        FUSE_ARGS(args);
        int err;
index e8799c1..082ac1c 100644 (file)
@@ -421,7 +421,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        memset(&outarg, 0, sizeof(outarg));
        args.in.numargs = 0;
        args.in.h.opcode = FUSE_STATFS;
-       args.in.h.nodeid = get_node_id(dentry->d_inode);
+       args.in.h.nodeid = get_node_id(d_inode(dentry));
        args.out.numargs = 1;
        args.out.args[0].size = sizeof(outarg);
        args.out.args[0].value = &outarg;
@@ -740,7 +740,7 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
 
 static struct dentry *fuse_get_parent(struct dentry *child)
 {
-       struct inode *child_inode = child->d_inode;
+       struct inode *child_inode = d_inode(child);
        struct fuse_conn *fc = get_fuse_conn(child_inode);
        struct inode *inode;
        struct dentry *parent;
index 589f4ea..30822b1 100644 (file)
@@ -48,9 +48,9 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        parent = dget_parent(dentry);
-       sdp = GFS2_SB(parent->d_inode);
-       dip = GFS2_I(parent->d_inode);
-       inode = dentry->d_inode;
+       sdp = GFS2_SB(d_inode(parent));
+       dip = GFS2_I(d_inode(parent));
+       inode = d_inode(dentry);
 
        if (inode) {
                if (is_bad_inode(inode))
@@ -68,7 +68,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
                        goto fail;
        } 
 
-       error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
+       error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
        switch (error) {
        case 0:
                if (!inode)
@@ -113,10 +113,10 @@ static int gfs2_dentry_delete(const struct dentry *dentry)
 {
        struct gfs2_inode *ginode;
 
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                return 0;
 
-       ginode = GFS2_I(dentry->d_inode);
+       ginode = GFS2_I(d_inode(dentry));
        if (!ginode->i_iopen_gh.gh_gl)
                return 0;
 
index c41d255..5d15e94 100644 (file)
@@ -49,7 +49,7 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
        fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
        *len = GFS2_SMALL_FH_SIZE;
 
-       if (!parent || inode == sb->s_root->d_inode)
+       if (!parent || inode == d_inode(sb->s_root))
                return *len;
 
        ip = GFS2_I(parent);
@@ -88,8 +88,8 @@ static int get_name_filldir(struct dir_context *ctx, const char *name,
 static int gfs2_get_name(struct dentry *parent, char *name,
                         struct dentry *child)
 {
-       struct inode *dir = parent->d_inode;
-       struct inode *inode = child->d_inode;
+       struct inode *dir = d_inode(parent);
+       struct inode *inode = d_inode(child);
        struct gfs2_inode *dip, *ip;
        struct get_name_filldir gnfd = {
                .ctx.actor = get_name_filldir,
@@ -128,7 +128,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
 
 static struct dentry *gfs2_get_parent(struct dentry *child)
 {
-       return d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
+       return d_obtain_alias(gfs2_lookupi(d_inode(child), &gfs2_qdotdot, 1));
 }
 
 static struct dentry *gfs2_get_dentry(struct super_block *sb,
index 08bc84d..1b3ca7a 100644 (file)
@@ -295,7 +295,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
 
        if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
            (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
-            dir == sb->s_root->d_inode)) {
+            dir == d_inode(sb->s_root))) {
                igrab(dir);
                return dir;
        }
@@ -687,7 +687,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        }
        gfs2_set_inode_flags(inode);
 
-       if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
+       if ((GFS2_I(d_inode(sdp->sd_root_dir)) == dip) ||
            (dip->i_diskflags & GFS2_DIF_TOPDIR))
                aflags |= GFS2_AF_ORLOV;
 
@@ -888,7 +888,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 {
        struct gfs2_inode *dip = GFS2_I(dir);
        struct gfs2_sbd *sdp = GFS2_SB(dir);
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder ghs[2];
        struct buffer_head *dibh;
@@ -1055,7 +1055,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
 static int gfs2_unlink_inode(struct gfs2_inode *dip,
                             const struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        int error;
 
@@ -1091,7 +1091,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct gfs2_inode *dip = GFS2_I(dir);
        struct gfs2_sbd *sdp = GFS2_SB(dir);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder ghs[3];
        struct gfs2_rgrpd *rgd;
@@ -1241,7 +1241,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
                return PTR_ERR(d);
        if (d != NULL)
                dentry = d;
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                if (!(*opened & FILE_OPENED))
                        return finish_no_open(file, d);
                dput(d);
@@ -1282,7 +1282,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
                        error = -EINVAL;
                        break;
                }
-               if (dir == sb->s_root->d_inode) {
+               if (dir == d_inode(sb->s_root)) {
                        error = 0;
                        break;
                }
@@ -1321,7 +1321,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
 {
        struct gfs2_inode *odip = GFS2_I(odir);
        struct gfs2_inode *ndip = GFS2_I(ndir);
-       struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(odentry));
        struct gfs2_inode *nip = NULL;
        struct gfs2_sbd *sdp = GFS2_SB(odir);
        struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
@@ -1332,8 +1332,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        unsigned int x;
        int error;
 
-       if (ndentry->d_inode) {
-               nip = GFS2_I(ndentry->d_inode);
+       if (d_really_is_positive(ndentry)) {
+               nip = GFS2_I(d_inode(ndentry));
                if (ip == nip)
                        return 0;
        }
@@ -1457,7 +1457,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        /* Check out the dir to be renamed */
 
        if (dir_rename) {
-               error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+               error = gfs2_permission(d_inode(odentry), MAY_WRITE);
                if (error)
                        goto out_gunlock;
        }
@@ -1550,7 +1550,7 @@ out:
 
 static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
        struct gfs2_holder i_gh;
        struct buffer_head *dibh;
        unsigned int size;
@@ -1742,7 +1742,7 @@ out:
 
 static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder i_gh;
        int error;
@@ -1798,7 +1798,7 @@ out:
 static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                        struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int error;
@@ -1821,7 +1821,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
 static int gfs2_setxattr(struct dentry *dentry, const char *name,
                         const void *data, size_t size, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int ret;
@@ -1841,7 +1841,7 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
 static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
                             void *data, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int ret;
@@ -1862,7 +1862,7 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
 
 static int gfs2_removexattr(struct dentry *dentry, const char *name)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
        int ret;
index efc8e25..35b49f4 100644 (file)
@@ -647,7 +647,7 @@ out_unlock:
 
 static int init_journal(struct gfs2_sbd *sdp, int undo)
 {
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
        struct gfs2_holder ji_gh;
        struct gfs2_inode *ip;
        int jindex = 1;
@@ -782,7 +782,7 @@ static struct lock_class_key gfs2_quota_imutex_key;
 static int init_inodes(struct gfs2_sbd *sdp, int undo)
 {
        int error = 0;
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
 
        if (undo)
                goto fail_qinode;
@@ -848,7 +848,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
        char buf[30];
        int error = 0;
        struct gfs2_inode *ip;
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
 
        if (sdp->sd_args.ar_spectator)
                return 0;
@@ -1357,7 +1357,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
                return ERR_PTR(error);
        }
        s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
-                path.dentry->d_inode->i_sb->s_bdev);
+                d_inode(path.dentry)->i_sb->s_bdev);
        path_put(&path);
        if (IS_ERR(s)) {
                pr_warn("gfs2 mount does not exist\n");
index 1666382..859c6ed 100644 (file)
@@ -1171,7 +1171,7 @@ static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *s
 
 static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
-       struct super_block *sb = dentry->d_inode->i_sb;
+       struct super_block *sb = d_inode(dentry)->i_sb;
        struct gfs2_sbd *sdp = sb->s_fs_info;
        struct gfs2_statfs_change_host sc;
        int error;
index fd260ce..4c096fa 100644 (file)
@@ -420,7 +420,7 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
 
 ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
        struct gfs2_ea_request er;
        struct gfs2_holder i_gh;
        int error;
@@ -586,7 +586,7 @@ out:
 static int gfs2_xattr_get(struct dentry *dentry, const char *name,
                void *buffer, size_t size, int type)
 {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
        struct gfs2_ea_location el;
        int error;
 
@@ -1230,7 +1230,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
 static int gfs2_xattr_set(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags, int type)
 {
-       return __gfs2_xattr_set(dentry->d_inode, name, value,
+       return __gfs2_xattr_set(d_inode(dentry), name, value,
                                size, flags, type);
 }
 
index e057ec5..8d931b1 100644 (file)
@@ -16,7 +16,7 @@
 int hfs_setxattr(struct dentry *dentry, const char *name,
                 const void *value, size_t size, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfs_find_data fd;
        hfs_cat_rec rec;
        struct hfs_cat_file *file;
@@ -59,7 +59,7 @@ out:
 ssize_t hfs_getxattr(struct dentry *dentry, const char *name,
                         void *value, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfs_find_data fd;
        hfs_cat_rec rec;
        struct hfs_cat_file *file;
@@ -105,7 +105,7 @@ out:
 
 ssize_t hfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (!S_ISREG(inode->i_mode) || HFS_IS_RSRC(inode))
                return -EOPNOTSUPP;
index 36d1a6a..70788e0 100644 (file)
@@ -253,7 +253,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  */
 static int hfs_remove(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int res;
 
        if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
@@ -285,18 +285,18 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        int res;
 
        /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                res = hfs_remove(new_dir, new_dentry);
                if (res)
                        return res;
        }
 
-       res = hfs_cat_move(old_dentry->d_inode->i_ino,
+       res = hfs_cat_move(d_inode(old_dentry)->i_ino,
                           old_dir, &old_dentry->d_name,
                           new_dir, &new_dentry->d_name);
        if (!res)
                hfs_cat_build_key(old_dir->i_sb,
-                                 (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
+                                 (btree_key *)&HFS_I(d_inode(old_dentry))->cat_key,
                                  new_dir->i_ino, &new_dentry->d_name);
        return res;
 }
index 75fd5d8..b99ebdd 100644 (file)
@@ -600,7 +600,7 @@ static int hfs_file_release(struct inode *inode, struct file *file)
 
 int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
        int error;
 
index 91b91fd..2875961 100644 (file)
@@ -21,7 +21,7 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        if(!inode)
                return 1;
 
index 3074609..d0f39dc 100644 (file)
@@ -81,7 +81,7 @@ again:
                                        HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
                                                create_date ||
                                entry.file.create_date ==
-                                       HFSPLUS_I(sb->s_root->d_inode)->
+                                       HFSPLUS_I(d_inode(sb->s_root))->
                                                create_date) &&
                                HFSPLUS_SB(sb)->hidden_dir) {
                        struct qstr str;
@@ -296,8 +296,8 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
                        struct dentry *dst_dentry)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb);
-       struct inode *inode = src_dentry->d_inode;
-       struct inode *src_dir = src_dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(src_dentry);
+       struct inode *src_dir = d_inode(src_dentry->d_parent);
        struct qstr str;
        char name[32];
        u32 cnid, id;
@@ -353,7 +353,7 @@ out:
 static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct qstr str;
        char name[32];
        u32 cnid;
@@ -410,7 +410,7 @@ out:
 static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int res;
 
        if (inode->i_size != 2)
@@ -529,7 +529,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
        int res;
 
        /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                if (d_is_dir(new_dentry))
                        res = hfsplus_rmdir(new_dir, new_dentry);
                else
index b0afedb..6dd107d 100644 (file)
@@ -243,7 +243,7 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
 
 static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, attr);
index 8e98f5d..0624ce4 100644 (file)
@@ -26,7 +26,7 @@
 static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
 {
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
        struct hfsplus_vh *vh = sbi->s_vhdr;
        struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
index 89f262d..416b1db 100644 (file)
@@ -440,7 +440,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
                return -ENOMEM;
        strcpy(xattr_name, prefix);
        strcpy(xattr_name + prefixlen, name);
-       res = __hfsplus_setxattr(dentry->d_inode, xattr_name, value, size,
+       res = __hfsplus_setxattr(d_inode(dentry), xattr_name, value, size,
                                 flags);
        kfree(xattr_name);
        return res;
@@ -600,7 +600,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
        strcpy(xattr_name, prefix);
        strcpy(xattr_name + prefixlen, name);
 
-       res = __hfsplus_getxattr(dentry->d_inode, xattr_name, value, size);
+       res = __hfsplus_getxattr(d_inode(dentry), xattr_name, value, size);
        kfree(xattr_name);
        return res;
 
@@ -620,7 +620,7 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry,
                                                char *buffer, size_t size)
 {
        ssize_t res = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfs_find_data fd;
        u16 entry_type;
        u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
@@ -688,7 +688,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
        ssize_t err;
        ssize_t res = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hfs_find_data fd;
        u16 key_len = 0;
        struct hfsplus_attr_key attr_key;
@@ -868,7 +868,7 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
         * creates), so we pass the name through unmodified (after
         * ensuring it doesn't conflict with another namespace).
         */
-       return __hfsplus_getxattr(dentry->d_inode, name, buffer, size);
+       return __hfsplus_getxattr(d_inode(dentry), name, buffer, size);
 }
 
 static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
@@ -890,7 +890,7 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
         * creates), so we pass the name through unmodified (after
         * ensuring it doesn't conflict with another namespace).
         */
-       return __hfsplus_setxattr(dentry->d_inode, name, buffer, size, flags);
+       return __hfsplus_setxattr(d_inode(dentry), name, buffer, size, flags);
 }
 
 static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
index b83a034..ef26317 100644 (file)
@@ -807,7 +807,7 @@ static int hostfs_permission(struct inode *ino, int desired)
 
 static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hostfs_iattr attrs;
        char *name;
        int err;
index 7ce4b74..933c737 100644 (file)
@@ -257,7 +257,7 @@ void hpfs_write_inode_nolock(struct inode *i)
 
 int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error = -EINVAL;
 
        hpfs_lock(inode->i_sb);
index bdbc2c3..a0872f2 100644 (file)
@@ -359,7 +359,7 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
        unsigned len = dentry->d_name.len;
        struct quad_buffer_head qbh;
        struct hpfs_dirent *de;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        dnode_secno dno;
        int r;
        int rep = 0;
@@ -433,7 +433,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
        unsigned len = dentry->d_name.len;
        struct quad_buffer_head qbh;
        struct hpfs_dirent *de;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        dnode_secno dno;
        int n_items = 0;
        int err;
@@ -522,8 +522,8 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        unsigned old_len = old_dentry->d_name.len;
        const unsigned char *new_name = new_dentry->d_name.name;
        unsigned new_len = new_dentry->d_name.len;
-       struct inode *i = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *i = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct quad_buffer_head qbh, qbh1;
        struct hpfs_dirent *dep, *nde;
        struct hpfs_dirent de;
index 043ac9d..fa2bd53 100644 (file)
@@ -153,9 +153,9 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
                return ERR_PTR(-ENOENT);
 
        parent = HPPFS_I(ino)->proc_dentry;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        proc_dentry = lookup_one_len(name->name, parent, name->len);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
 
        if (IS_ERR(proc_dentry))
                return proc_dentry;
@@ -637,25 +637,25 @@ static const struct super_operations hppfs_sbops = {
 static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
                          int buflen)
 {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
-       return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
+       return d_inode(proc_dentry)->i_op->readlink(proc_dentry, buffer,
                                                    buflen);
 }
 
 static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
 
-       return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
+       return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd);
 }
 
 static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
                           void *cookie)
 {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
 
-       if (proc_dentry->d_inode->i_op->put_link)
-               proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
+       if (d_inode(proc_dentry)->i_op->put_link)
+               d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie);
 }
 
 static const struct inode_operations hppfs_dir_iops = {
@@ -670,7 +670,7 @@ static const struct inode_operations hppfs_link_iops = {
 
 static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
 {
-       struct inode *proc_ino = dentry->d_inode;
+       struct inode *proc_ino = d_inode(dentry);
        struct inode *inode = new_inode(sb);
 
        if (!inode) {
index 2640d88..87724c1 100644 (file)
@@ -393,7 +393,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct hstate *h = hstate_inode(inode);
        int error;
        unsigned int ia_valid = attr->ia_valid;
@@ -587,7 +587,7 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
-       struct hstate *h = hstate_inode(dentry->d_inode);
+       struct hstate *h = hstate_inode(d_inode(dentry));
 
        buf->f_type = HUGETLBFS_MAGIC;
        buf->f_bsize = huge_page_size(h);
index f00b16f..ea37cd1 100644 (file)
@@ -1587,7 +1587,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
 void touch_atime(const struct path *path)
 {
        struct vfsmount *mnt = path->mnt;
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_inode(path->dentry);
        struct timespec now;
 
        if (inode->i_flags & S_NOATIME)
@@ -1639,7 +1639,7 @@ EXPORT_SYMBOL(touch_atime);
  */
 int should_remove_suid(struct dentry *dentry)
 {
-       umode_t mode = dentry->d_inode->i_mode;
+       umode_t mode = d_inode(dentry)->i_mode;
        int kill = 0;
 
        /* suid always must be killed */
@@ -1675,7 +1675,7 @@ static int __remove_suid(struct dentry *dentry, int kill)
 int file_remove_suid(struct file *file)
 {
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int killsuid;
        int killpriv;
        int error = 0;
@@ -1945,20 +1945,6 @@ void inode_dio_wait(struct inode *inode)
 }
 EXPORT_SYMBOL(inode_dio_wait);
 
-/*
- * inode_dio_done - signal finish of a direct I/O requests
- * @inode: inode the direct I/O happens on
- *
- * This is called once we've finished processing a direct I/O request,
- * and is used to wake up callers waiting for direct I/O to be quiesced.
- */
-void inode_dio_done(struct inode *inode)
-{
-       if (atomic_dec_and_test(&inode->i_dio_count))
-               wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
-}
-EXPORT_SYMBOL(inode_dio_done);
-
 /*
  * inode_set_flags - atomically set some inode flags
  *
index 12088d8..0c5f721 100644 (file)
@@ -44,7 +44,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
 {
        unsigned long parent_block = 0;
        unsigned long parent_offset = 0;
-       struct inode *child_inode = child->d_inode;
+       struct inode *child_inode = d_inode(child);
        struct iso_inode_info *e_child_inode = ISOFS_I(child_inode);
        struct iso_directory_record *de = NULL;
        struct buffer_head * bh = NULL;
index f21b6fb..1ba5c97 100644 (file)
@@ -224,14 +224,14 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
 {
        struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
        struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
-       struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(d_inode(dentry));
        int ret;
        uint32_t now = get_seconds();
 
        ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
                              dentry->d_name.len, dead_f, now);
        if (dead_f->inocache)
-               set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
+               set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
        if (!ret)
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
        return ret;
@@ -241,8 +241,8 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
 
 static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry)
 {
-       struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dentry->d_inode->i_sb);
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
+       struct jffs2_sb_info *c = JFFS2_SB_INFO(d_inode(old_dentry)->i_sb);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(old_dentry));
        struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
        int ret;
        uint8_t type;
@@ -256,7 +256,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
                return -EPERM;
 
        /* XXX: This is ugly */
-       type = (old_dentry->d_inode->i_mode & S_IFMT) >> 12;
+       type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
        if (!type) type = DT_REG;
 
        now = get_seconds();
@@ -264,11 +264,11 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 
        if (!ret) {
                mutex_lock(&f->sem);
-               set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
+               set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
                mutex_unlock(&f->sem);
-               d_instantiate(dentry, old_dentry->d_inode);
+               d_instantiate(dentry, d_inode(old_dentry));
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-               ihold(old_dentry->d_inode);
+               ihold(d_inode(old_dentry));
        }
        return ret;
 }
@@ -585,7 +585,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
 {
        struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
        struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
        struct jffs2_full_dirent *fd;
        int ret;
        uint32_t now = get_seconds();
@@ -599,7 +599,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
                              dentry->d_name.len, f, now);
        if (!ret) {
                dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-               clear_nlink(dentry->d_inode);
+               clear_nlink(d_inode(dentry));
                drop_nlink(dir_i);
        }
        return ret;
@@ -770,8 +770,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
         * the VFS can't check whether the victim is empty. The filesystem
         * needs to do that for itself.
         */
-       if (new_dentry->d_inode) {
-               victim_f = JFFS2_INODE_INFO(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry)) {
+               victim_f = JFFS2_INODE_INFO(d_inode(new_dentry));
                if (d_is_dir(new_dentry)) {
                        struct jffs2_full_dirent *fd;
 
@@ -794,12 +794,12 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
        /* Make a hard link */
 
        /* XXX: This is ugly */
-       type = (old_dentry->d_inode->i_mode & S_IFMT) >> 12;
+       type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
        if (!type) type = DT_REG;
 
        now = get_seconds();
        ret = jffs2_do_link(c, JFFS2_INODE_INFO(new_dir_i),
-                           old_dentry->d_inode->i_ino, type,
+                           d_inode(old_dentry)->i_ino, type,
                            new_dentry->d_name.name, new_dentry->d_name.len, now);
 
        if (ret)
@@ -808,9 +808,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
        if (victim_f) {
                /* There was a victim. Kill it off nicely */
                if (d_is_dir(new_dentry))
-                       clear_nlink(new_dentry->d_inode);
+                       clear_nlink(d_inode(new_dentry));
                else
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                /* Don't oops if the victim was a dirent pointing to an
                   inode which didn't exist. */
                if (victim_f->inocache) {
@@ -836,9 +836,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 
        if (ret) {
                /* Oh shit. We really ought to make a single node which can do both atomically */
-               struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
+               struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(old_dentry));
                mutex_lock(&f->sem);
-               inc_nlink(old_dentry->d_inode);
+               inc_nlink(d_inode(old_dentry));
                if (f->inocache && !d_is_dir(old_dentry))
                        f->inocache->pino_nlink++;
                mutex_unlock(&f->sem);
@@ -846,8 +846,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
                pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
                          __func__, ret);
                /* Might as well let the VFS know */
-               d_instantiate(new_dentry, old_dentry->d_inode);
-               ihold(old_dentry->d_inode);
+               d_instantiate(new_dentry, d_inode(old_dentry));
+               ihold(d_inode(old_dentry));
                new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
                return ret;
        }
index 601afd1..fe5ea08 100644 (file)
@@ -190,7 +190,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
 
 int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int rc;
 
        rc = inode_change_ok(inode, iattr);
index aca97f3..d4b43fb 100644 (file)
@@ -54,7 +54,7 @@ static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
        if (!strcmp(name, ""))
                return -EINVAL;
 
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
                                 name, buffer, size);
 }
 
@@ -64,7 +64,7 @@ static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
        if (!strcmp(name, ""))
                return -EINVAL;
 
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
                                 name, buffer, size, flags);
 }
 
index 3d76f28..d86c5e3 100644 (file)
@@ -140,14 +140,14 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
 
        BUG_ON(!d_is_dir(child));
 
-       f = JFFS2_INODE_INFO(child->d_inode);
+       f = JFFS2_INODE_INFO(d_inode(child));
 
        pino = f->inocache->pino_nlink;
 
        JFFS2_DEBUG("Parent of directory ino #%u is #%u\n",
                    f->inocache->ino, pino);
 
-       return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
+       return d_obtain_alias(jffs2_iget(d_inode(child)->i_sb, pino));
 }
 
 static const struct export_operations jffs2_export_ops = {
index c7c77b0..1fefa25 100644 (file)
@@ -31,7 +31,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
 
 static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
        char *p = (char *)f->target;
 
        /*
index 762c7a3..f092fee 100644 (file)
@@ -960,7 +960,7 @@ static const struct xattr_handler *xprefix_to_handler(int xprefix) {
 
 ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
        struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
        struct jffs2_inode_cache *ic = f->inocache;
@@ -1266,7 +1266,6 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
        if (rc) {
                JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
                              __func__, rc, totlen);
-               rc = rc ? rc : -EBADFD;
                goto out;
        }
        rc = save_xattr_ref(c, ref);
index 1c86819..ceaf9c6 100644 (file)
@@ -21,7 +21,7 @@ static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
 {
        if (!strcmp(name, ""))
                return -EINVAL;
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
                                 name, buffer, size);
 }
 
@@ -30,7 +30,7 @@ static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
 {
        if (!strcmp(name, ""))
                return -EINVAL;
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
                                 name, buffer, size, flags);
 }
 
index 916b5c9..a71391e 100644 (file)
@@ -21,7 +21,7 @@ static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
 {
        if (!strcmp(name, ""))
                return -EINVAL;
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
                                 name, buffer, size);
 }
 
@@ -30,7 +30,7 @@ static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
 {
        if (!strcmp(name, ""))
                return -EINVAL;
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
                                 name, buffer, size, flags);
 }
 
index ae46788..e98d39d 100644 (file)
@@ -100,7 +100,7 @@ static int jfs_release(struct inode *inode, struct file *file)
 
 int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int rc;
 
        rc = inode_change_ok(inode, iattr);
index 38fdc53..66db7bc 100644 (file)
@@ -346,7 +346,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
 {
        int rc;
        tid_t tid;              /* transaction id */
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
        ino_t ino;
        struct component_name dname;
        struct inode *iplist[2];
@@ -472,7 +472,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 {
        int rc;
        tid_t tid;              /* transaction id */
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
        ino_t ino;
        struct component_name dname;    /* object name */
        struct inode *iplist[2];
@@ -791,7 +791,7 @@ static int jfs_link(struct dentry *old_dentry,
 {
        int rc;
        tid_t tid;
-       struct inode *ip = old_dentry->d_inode;
+       struct inode *ip = d_inode(old_dentry);
        ino_t ino;
        struct component_name dname;
        struct btstack btstack;
@@ -879,7 +879,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
        struct component_name dname;
        int ssize;              /* source pathname size */
        struct btstack btstack;
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
        unchar *i_fastsymlink;
        s64 xlen = 0;
        int bmask = 0, xsize;
@@ -1086,8 +1086,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dquot_initialize(old_dir);
        dquot_initialize(new_dir);
 
-       old_ip = old_dentry->d_inode;
-       new_ip = new_dentry->d_inode;
+       old_ip = d_inode(old_dentry);
+       new_ip = d_inode(new_dentry);
 
        if ((rc = get_UCSname(&old_dname, old_dentry)))
                goto out1;
@@ -1500,9 +1500,9 @@ struct dentry *jfs_get_parent(struct dentry *dentry)
        unsigned long parent_ino;
 
        parent_ino =
-               le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot);
+               le32_to_cpu(JFS_IP(d_inode(dentry))->i_dtroot.header.idotdot);
 
-       return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino));
+       return d_obtain_alias(jfs_iget(d_inode(dentry)->i_sb, parent_ino));
 }
 
 const struct inode_operations jfs_dir_inode_operations = {
@@ -1578,7 +1578,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
         * positive dentry isn't good idea. So it's unsupported like
         * rename("filename", "FILENAME") for now.
         */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                return 1;
 
        /*
index 205b946..80f42bc 100644 (file)
@@ -24,7 +24,7 @@
 
 static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       char *s = JFS_IP(dentry->d_inode)->i_inline;
+       char *s = JFS_IP(d_inode(dentry))->i_inline;
        nd_set_link(nd, s);
        return NULL;
 }
index 46325d5..48b15a6 100644 (file)
@@ -849,7 +849,7 @@ int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
 int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                 size_t value_len, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct jfs_inode_info *ji = JFS_IP(inode);
        int rc;
        tid_t tid;
@@ -872,7 +872,7 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
        tid = txBegin(inode->i_sb, 0);
        mutex_lock(&ji->commit_mutex);
-       rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+       rc = __jfs_setxattr(tid, d_inode(dentry), name, value, value_len,
                            flags);
        if (!rc)
                rc = txCommit(tid, 1, &inode, 0);
@@ -959,7 +959,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
                        return -EOPNOTSUPP;
        }
 
-       err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
+       err = __jfs_getxattr(d_inode(dentry), name, data, buf_size);
 
        return err;
 }
@@ -976,7 +976,7 @@ static inline int can_list(struct jfs_ea *ea)
 
 ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        char *buffer;
        ssize_t size = 0;
        int xattr_size;
@@ -1029,7 +1029,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
 
 int jfs_removexattr(struct dentry *dentry, const char *name)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct jfs_inode_info *ji = JFS_IP(inode);
        int rc;
        tid_t tid;
@@ -1047,7 +1047,7 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
 
        tid = txBegin(inode->i_sb, 0);
        mutex_lock(&ji->commit_mutex);
-       rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+       rc = __jfs_setxattr(tid, d_inode(dentry), name, NULL, 0, XATTR_REPLACE);
        if (!rc)
                rc = txCommit(tid, 1, &inode, 0);
        txEnd(tid);
index 6acc964..f131fc2 100644 (file)
@@ -444,7 +444,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        /* Always perform fresh lookup for negatives */
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                goto out_bad_unlocked;
 
        kn = dentry->d_fsdata;
index 9000874..2da8493 100644 (file)
@@ -111,7 +111,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 
 int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct kernfs_node *kn = dentry->d_fsdata;
        int error;
 
@@ -172,11 +172,11 @@ int kernfs_iop_setxattr(struct dentry *dentry, const char *name,
 
        if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-               error = security_inode_setsecurity(dentry->d_inode, suffix,
+               error = security_inode_setsecurity(d_inode(dentry), suffix,
                                                value, size, flags);
                if (error)
                        return error;
-               error = security_inode_getsecctx(dentry->d_inode,
+               error = security_inode_getsecctx(d_inode(dentry),
                                                &secdata, &secdata_len);
                if (error)
                        return error;
@@ -271,7 +271,7 @@ int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
                   struct kstat *stat)
 {
        struct kernfs_node *kn = dentry->d_fsdata;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        mutex_lock(&kernfs_mutex);
        kernfs_refresh_inode(kn, inode);
index 0ab6512..cb1fb4b 100644 (file)
 
 static inline int simple_positive(struct dentry *dentry)
 {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
                   struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        generic_fillattr(inode, stat);
        stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
        return 0;
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(dcache_dir_close);
 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 {
        struct dentry *dentry = file->f_path.dentry;
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
        switch (whence) {
                case 1:
                        offset += file->f_pos;
@@ -102,7 +102,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        if (offset >= 0)
                                break;
                default:
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                        return -EINVAL;
        }
        if (offset != file->f_pos) {
@@ -129,7 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                        spin_unlock(&dentry->d_lock);
                }
        }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
        return offset;
 }
 EXPORT_SYMBOL(dcache_dir_lseek);
@@ -169,7 +169,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
                spin_unlock(&next->d_lock);
                spin_unlock(&dentry->d_lock);
                if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
-                             next->d_inode->i_ino, dt_type(next->d_inode)))
+                             d_inode(next)->i_ino, dt_type(d_inode(next))))
                        return 0;
                spin_lock(&dentry->d_lock);
                spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
@@ -270,7 +270,7 @@ EXPORT_SYMBOL(simple_open);
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        inc_nlink(inode);
@@ -304,7 +304,7 @@ EXPORT_SYMBOL(simple_empty);
 
 int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        drop_nlink(inode);
@@ -318,7 +318,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
        if (!simple_empty(dentry))
                return -ENOTEMPTY;
 
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
        simple_unlink(dir, dentry);
        drop_nlink(dir);
        return 0;
@@ -328,16 +328,16 @@ EXPORT_SYMBOL(simple_rmdir);
 int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
                struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = d_is_dir(old_dentry);
 
        if (!simple_empty(new_dentry))
                return -ENOTEMPTY;
 
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                simple_unlink(new_dir, new_dentry);
                if (they_are_dirs) {
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                        drop_nlink(old_dir);
                }
        } else if (they_are_dirs) {
@@ -368,7 +368,7 @@ EXPORT_SYMBOL(simple_rename);
  */
 int simple_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, iattr);
index 665ef5a..a563ddb 100644 (file)
@@ -31,7 +31,7 @@
 static struct hlist_head       nlm_files[FILE_NRHASH];
 static DEFINE_MUTEX(nlm_file_mutex);
 
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 {
        u32 *fhp = (u32*)f->data;
index 6bdc347..4cf38f1 100644 (file)
@@ -213,7 +213,7 @@ static void abort_transaction(struct inode *inode, struct logfs_transaction *ta)
 static int logfs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct logfs_super *super = logfs_super(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct logfs_transaction *ta;
        struct page *page;
        pgoff_t index;
@@ -271,7 +271,7 @@ static inline int logfs_empty_dir(struct inode *dir)
 
 static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (!logfs_empty_dir(inode))
                return -ENOTEMPTY;
@@ -537,7 +537,7 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
 static int logfs_link(struct dentry *old_dentry, struct inode *dir,
                struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
        ihold(inode);
@@ -607,7 +607,7 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry,
        /* 2. write target dd */
        mutex_lock(&super->s_dirop_mutex);
        logfs_add_transaction(new_dir, ta);
-       err = logfs_write_dir(new_dir, new_dentry, old_dentry->d_inode);
+       err = logfs_write_dir(new_dir, new_dentry, d_inode(old_dentry));
        if (!err)
                err = write_inode(new_dir);
 
@@ -658,8 +658,8 @@ static int logfs_rename_target(struct inode *old_dir, struct dentry *old_dentry,
                               struct inode *new_dir, struct dentry *new_dentry)
 {
        struct logfs_super *super = logfs_super(old_dir->i_sb);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        int isdir = S_ISDIR(old_inode->i_mode);
        struct logfs_disk_dentry dd;
        struct logfs_transaction *ta;
@@ -719,7 +719,7 @@ out:
 static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry)
 {
-       if (new_dentry->d_inode)
+       if (d_really_is_positive(new_dentry))
                return logfs_rename_target(old_dir, old_dentry,
                                           new_dir, new_dentry);
        return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry);
index b2c13f7..1a6f016 100644 (file)
@@ -241,7 +241,7 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
 static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int err = 0;
 
        err = inode_change_ok(inode, attr);
index dfaf6fa..118e4e7 100644 (file)
@@ -156,7 +156,7 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
 {
        const char * name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
-       struct inode * dir = dentry->d_parent->d_inode;
+       struct inode * dir = d_inode(dentry->d_parent);
        struct super_block * sb = dir->i_sb;
        struct minix_sb_info * sbi = minix_sb(sb);
        unsigned long n;
@@ -203,7 +203,7 @@ found:
 
 int minix_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const char * name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        struct super_block * sb = dir->i_sb;
index 6d63e27..94f0eb9 100644 (file)
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
 
 static int minix_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, attr);
index 3f57af1..1182d1e 100644 (file)
@@ -626,8 +626,8 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
 int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
        struct super_block *sb = dentry->d_sb;
-       generic_fillattr(dentry->d_inode, stat);
-       if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
+       generic_fillattr(d_inode(dentry), stat);
+       if (INODE_VERSION(d_inode(dentry)) == MINIX_V1)
                stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
        else
                stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
index cd950e2..a795a11 100644 (file)
@@ -104,7 +104,7 @@ out_fail:
 static int minix_link(struct dentry * old_dentry, struct inode * dir,
        struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        inode->i_ctime = CURRENT_TIME_SEC;
        inode_inc_link_count(inode);
@@ -151,7 +151,7 @@ out_dir:
 static int minix_unlink(struct inode * dir, struct dentry *dentry)
 {
        int err = -ENOENT;
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        struct page * page;
        struct minix_dir_entry * de;
 
@@ -171,7 +171,7 @@ end_unlink:
 
 static int minix_rmdir(struct inode * dir, struct dentry *dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        int err = -ENOTEMPTY;
 
        if (minix_empty_dir(inode)) {
@@ -187,8 +187,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
 static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
                           struct inode * new_dir, struct dentry *new_dentry)
 {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
        struct page * dir_page = NULL;
        struct minix_dir_entry * dir_de = NULL;
        struct page * old_page;
index ffab2e0..4a8d998 100644 (file)
@@ -1590,7 +1590,8 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
 
        if (should_follow_link(path->dentry, follow)) {
                if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(unlazy_walk(nd, path->dentry))) {
+                       if (unlikely(nd->path.mnt != path->mnt ||
+                                    unlazy_walk(nd, path->dentry))) {
                                err = -ECHILD;
                                goto out_err;
                        }
@@ -3045,7 +3046,8 @@ finish_lookup:
 
        if (should_follow_link(path->dentry, !symlink_ok)) {
                if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(unlazy_walk(nd, path->dentry))) {
+                       if (unlikely(nd->path.mnt != path->mnt ||
+                                    unlazy_walk(nd, path->dentry))) {
                                error = -ECHILD;
                                goto out;
                        }
index e7ca827..80021c7 100644 (file)
@@ -127,7 +127,7 @@ static inline int ncp_case_sensitive(const struct inode *i)
 static int 
 ncp_hash_dentry(const struct dentry *dentry, struct qstr *this)
 {
-       struct inode *inode = ACCESS_ONCE(dentry->d_inode);
+       struct inode *inode = d_inode_rcu(dentry);
 
        if (!inode)
                return 0;
@@ -162,7 +162,7 @@ ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
        if (len != name->len)
                return 1;
 
-       pinode = ACCESS_ONCE(parent->d_inode);
+       pinode = d_inode_rcu(parent);
        if (!pinode)
                return 1;
 
@@ -180,7 +180,7 @@ ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
 static int
 ncp_delete_dentry(const struct dentry * dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (inode) {
                if (is_bad_inode(inode))
@@ -224,7 +224,7 @@ ncp_force_unlink(struct inode *dir, struct dentry* dentry)
        memset(&info, 0, sizeof(info));
        
         /* remove the Read-Only flag on the NW server */
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        old_nwattr = NCP_FINFO(inode)->nwattr;
        info.attributes = old_nwattr & ~(aRONLY|aDELETEINHIBIT|aRENAMEINHIBIT);
@@ -254,7 +254,7 @@ ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_na
 {
        struct nw_modify_dos_info info;
         int res=0x90,res2;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
        __le32 old_nwattr = NCP_FINFO(old_inode)->nwattr;
        __le32 new_nwattr = 0; /* shut compiler warning */
        int old_nwattr_changed = 0;
@@ -268,8 +268,8 @@ ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_na
        res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info);
        if (!res2)
                old_nwattr_changed = 1;
-       if (new_dentry && new_dentry->d_inode) {
-               new_nwattr = NCP_FINFO(new_dentry->d_inode)->nwattr;
+       if (new_dentry && d_really_is_positive(new_dentry)) {
+               new_nwattr = NCP_FINFO(d_inode(new_dentry))->nwattr;
                info.attributes = new_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
                res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info);
                if (!res2)
@@ -324,9 +324,9 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                return -ECHILD;
 
        parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
 
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                goto finished;
 
        server = NCP_SERVER(dir);
@@ -367,7 +367,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
         * what we remember, it's not valid any more.
         */
        if (!res) {
-               struct inode *inode = dentry->d_inode;
+               struct inode *inode = d_inode(dentry);
 
                mutex_lock(&inode->i_mutex);
                if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
@@ -388,7 +388,7 @@ finished:
 
 static time_t ncp_obtain_mtime(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ncp_server *server = NCP_SERVER(inode);
        struct nw_info_struct i;
 
@@ -404,7 +404,7 @@ static time_t ncp_obtain_mtime(struct dentry *dentry)
 static inline void
 ncp_invalidate_dircache_entries(struct dentry *parent)
 {
-       struct ncp_server *server = NCP_SERVER(parent->d_inode);
+       struct ncp_server *server = NCP_SERVER(d_inode(parent));
        struct dentry *dentry;
 
        spin_lock(&parent->d_lock);
@@ -418,7 +418,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 static int ncp_readdir(struct file *file, struct dir_context *ctx)
 {
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct page *page = NULL;
        struct ncp_server *server = NCP_SERVER(inode);
        union  ncp_dir_cache *cache = NULL;
@@ -491,13 +491,13 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
                                goto invalid_cache;
                        }
                        spin_unlock(&dentry->d_lock);
-                       if (!dent->d_inode) {
+                       if (d_really_is_negative(dent)) {
                                dput(dent);
                                goto invalid_cache;
                        }
                        over = !dir_emit(ctx, dent->d_name.name,
                                        dent->d_name.len,
-                                       dent->d_inode->i_ino, DT_UNKNOWN);
+                                       d_inode(dent)->i_ino, DT_UNKNOWN);
                        dput(dent);
                        if (over)
                                goto finished;
@@ -571,7 +571,7 @@ static void ncp_d_prune(struct dentry *dentry)
 {
        if (!dentry->d_fsdata)  /* not referenced from page cache */
                return;
-       NCP_FINFO(dentry->d_parent->d_inode)->flags &= ~NCPI_DIR_CACHE;
+       NCP_FINFO(d_inode(dentry->d_parent))->flags &= ~NCPI_DIR_CACHE;
 }
 
 static int
@@ -580,7 +580,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                int inval_childs)
 {
        struct dentry *newdent, *dentry = file->f_path.dentry;
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
        struct ncp_cache_control ctl = *ctrl;
        struct qstr qname;
        int valid = 0;
@@ -621,7 +621,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                dentry_update_name_case(newdent, &qname);
        }
 
-       if (!newdent->d_inode) {
+       if (d_really_is_negative(newdent)) {
                struct inode *inode;
 
                entry->opened = 0;
@@ -637,7 +637,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                        spin_unlock(&dentry->d_lock);
                }
        } else {
-               struct inode *inode = newdent->d_inode;
+               struct inode *inode = d_inode(newdent);
 
                mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
                ncp_update_inode2(inode, entry);
@@ -659,10 +659,10 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                        ctl.cache = kmap(ctl.page);
        }
        if (ctl.cache) {
-               if (newdent->d_inode) {
+               if (d_really_is_positive(newdent)) {
                        newdent->d_fsdata = newdent;
                        ctl.cache->dentry[ctl.idx] = newdent;
-                       ino = newdent->d_inode->i_ino;
+                       ino = d_inode(newdent)->i_ino;
                        ncp_new_dentry(newdent);
                }
                valid = 1;
@@ -807,7 +807,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                }
                dent = sb->s_root;
                if (dent) {
-                       struct inode* ino = dent->d_inode;
+                       struct inode* ino = d_inode(dent);
                        if (ino) {
                                ncp_update_known_namespace(server, volNumber, NULL);
                                NCP_FINFO(ino)->volNumber = volNumber;
@@ -815,7 +815,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                                NCP_FINFO(ino)->DosDirNum = DosDirNum;
                                result = 0;
                        } else {
-                               ncp_dbg(1, "sb->s_root->d_inode == NULL!\n");
+                               ncp_dbg(1, "d_inode(sb->s_root) == NULL!\n");
                        }
                } else {
                        ncp_dbg(1, "sb->s_root == NULL!\n");
@@ -1055,7 +1055,7 @@ out:
 
 static int ncp_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ncp_server *server;
        int error;
 
index 01a9e16..9605a2f 100644 (file)
@@ -812,7 +812,7 @@ static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
        if (!d) {
                goto dflt;
        }
-       i = d->d_inode;
+       i = d_inode(d);
        if (!i) {
                goto dflt;
        }
@@ -865,7 +865,7 @@ dflt:;
 
 int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int result = 0;
        __le32 info_mask;
        struct nw_modify_dos_info info;
@@ -878,7 +878,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
                goto out;
 
        result = -EPERM;
-       if (IS_DEADDIR(dentry->d_inode))
+       if (IS_DEADDIR(d_inode(dentry)))
                goto out;
 
        /* ageing the dentry to force validation */
index cf7e043..79b1130 100644 (file)
@@ -376,7 +376,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                struct dentry* dentry = inode->i_sb->s_root;
 
                                if (dentry) {
-                                       struct inode* s_inode = dentry->d_inode;
+                                       struct inode* s_inode = d_inode(dentry);
 
                                        if (s_inode) {
                                                sr.volNumber = NCP_FINFO(s_inode)->volNumber;
@@ -384,7 +384,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                sr.namespace = server->name_space[sr.volNumber];
                                                result = 0;
                                        } else
-                                               ncp_dbg(1, "s_root->d_inode==NULL\n");
+                                               ncp_dbg(1, "d_inode(s_root)==NULL\n");
                                } else
                                        ncp_dbg(1, "s_root==NULL\n");
                        } else {
@@ -431,7 +431,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                if (result == 0) {
                                        dentry = inode->i_sb->s_root;
                                        if (dentry) {
-                                               struct inode* s_inode = dentry->d_inode;
+                                               struct inode* s_inode = d_inode(dentry);
 
                                                if (s_inode) {
                                                        NCP_FINFO(s_inode)->volNumber = vnum;
@@ -439,7 +439,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                        NCP_FINFO(s_inode)->DosDirNum = dosde;
                                                        server->root_setuped = 1;
                                                } else {
-                                                       ncp_dbg(1, "s_root->d_inode==NULL\n");
+                                                       ncp_dbg(1, "d_inode(s_root)==NULL\n");
                                                        result = -EIO;
                                                }
                                        } else {
index 2b502a0..88dbbc9 100644 (file)
@@ -727,7 +727,7 @@ int
 ncp_del_file_or_subdir2(struct ncp_server *server,
                        struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        __u8  volnum;
        __le32 dirent;
 
index 1a63bfd..421b6f9 100644 (file)
@@ -156,7 +156,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
                goto failfree;
        }
 
-       inode=dentry->d_inode;
+       inode=d_inode(dentry);
 
        if (ncp_make_open(inode, O_WRONLY))
                goto failfree;
index 1e987ac..8664417 100644 (file)
@@ -22,7 +22,7 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
 obj-$(CONFIG_NFS_V4) += nfsv4.o
 CFLAGS_nfs4trace.o += -I$(src)
 nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
-         delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
+         delegation.o nfs4idmap.o callback.o callback_xdr.o callback_proc.o \
          nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \
          dns_resolve.o nfs4trace.o
 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
index 1cac3c1..d2554fe 100644 (file)
@@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
        .free_deviceid_node             = bl_free_deviceid_node,
        .pg_read_ops                    = &bl_pg_read_ops,
        .pg_write_ops                   = &bl_pg_write_ops,
+       .sync                           = pnfs_generic_sync,
 };
 
 static int __init nfs4blocklayout_init(void)
index 5aed4f9..e535599 100644 (file)
@@ -33,7 +33,7 @@ bl_free_deviceid_node(struct nfs4_deviceid_node *d)
                container_of(d, struct pnfs_block_dev, node);
 
        bl_free_device(dev);
-       kfree(dev);
+       kfree_rcu(dev, node.rcu);
 }
 
 static int
index 351be92..8d129bb 100644 (file)
@@ -128,7 +128,7 @@ nfs41_callback_svc(void *vrqstp)
                if (try_to_freeze())
                        continue;
 
-               prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
                spin_lock_bh(&serv->sv_cb_lock);
                if (!list_empty(&serv->sv_cb_list)) {
                        req = list_first_entry(&serv->sv_cb_list,
@@ -142,10 +142,10 @@ nfs41_callback_svc(void *vrqstp)
                                error);
                } else {
                        spin_unlock_bh(&serv->sv_cb_lock);
-                       /* schedule_timeout to game the hung task watchdog */
-                       schedule_timeout(60 * HZ);
+                       schedule();
                        finish_wait(&serv->sv_cb_waitq, &wq);
                }
+               flush_signals(current);
        }
        return 0;
 }
index 1987415..892aeff 100644 (file)
@@ -31,7 +31,6 @@
 #include <linux/lockd/bind.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
-#include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/in6.h>
index a6ad688..029d688 100644 (file)
@@ -378,7 +378,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                if (freeme == NULL)
                        goto out;
        }
-       list_add_rcu(&delegation->super_list, &server->delegations);
+       list_add_tail_rcu(&delegation->super_list, &server->delegations);
        rcu_assign_pointer(nfsi->delegation, delegation);
        delegation = NULL;
 
@@ -514,7 +514,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
 
        delegation = nfs_inode_detach_delegation(inode);
        if (delegation != NULL)
-               nfs_do_return_delegation(inode, delegation, 0);
+               nfs_do_return_delegation(inode, delegation, 1);
 }
 
 /**
index c19e16f..b2c8b31 100644 (file)
@@ -416,15 +416,14 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
        struct nfs_inode *nfsi;
 
-       if (dentry->d_inode == NULL)
-               goto different;
+       if (d_really_is_negative(dentry))
+               return 0;
 
-       nfsi = NFS_I(dentry->d_inode);
+       nfsi = NFS_I(d_inode(dentry));
        if (entry->fattr->fileid == nfsi->fileid)
                return 1;
        if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
                return 1;
-different:
        return 0;
 }
 
@@ -473,7 +472,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
        struct qstr filename = QSTR_INIT(entry->name, entry->len);
        struct dentry *dentry;
        struct dentry *alias;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct inode *inode;
        int status;
 
@@ -497,9 +496,9 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
                        goto out;
                if (nfs_same_file(dentry, entry)) {
                        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-                       status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
+                       status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
                        if (!status)
-                               nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
+                               nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
                        goto out;
                } else {
                        d_invalidate(dentry);
@@ -544,6 +543,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
        if (scratch == NULL)
                return -ENOMEM;
 
+       if (buflen == 0)
+               goto out_nopages;
+
        xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
        xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
@@ -565,6 +567,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
                        break;
        } while (!entry->eof);
 
+out_nopages:
        if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
                array = nfs_readdir_get_array(page);
                if (!IS_ERR(array)) {
@@ -870,7 +873,7 @@ static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
 static int nfs_readdir(struct file *file, struct dir_context *ctx)
 {
        struct dentry   *dentry = file->f_path.dentry;
-       struct inode    *inode = dentry->d_inode;
+       struct inode    *inode = d_inode(dentry);
        nfs_readdir_descriptor_t my_desc,
                        *desc = &my_desc;
        struct nfs_open_dir_context *dir_ctx = file->private_data;
@@ -1118,15 +1121,15 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
        if (flags & LOOKUP_RCU) {
                parent = ACCESS_ONCE(dentry->d_parent);
-               dir = ACCESS_ONCE(parent->d_inode);
+               dir = d_inode_rcu(parent);
                if (!dir)
                        return -ECHILD;
        } else {
                parent = dget_parent(dentry);
-               dir = parent->d_inode;
+               dir = d_inode(parent);
        }
        nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        if (!inode) {
                if (nfs_neg_need_reval(dir, dentry, flags)) {
@@ -1242,7 +1245,7 @@ out_error:
 }
 
 /*
- * A weaker form of d_revalidate for revalidating just the dentry->d_inode
+ * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
  * when we don't really care about the dentry name. This is called when a
  * pathwalk ends on a dentry that was not found via a normal lookup in the
  * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
@@ -1253,7 +1256,7 @@ out_error:
 static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
 {
        int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        /*
         * I believe we can only get a negative dentry here in the case of a
@@ -1287,7 +1290,7 @@ static int nfs_dentry_delete(const struct dentry *dentry)
                dentry, dentry->d_flags);
 
        /* Unhash any dentry with a stale inode */
-       if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
+       if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
                return 1;
 
        if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1491,7 +1494,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
        int err;
 
        /* Expect a negative dentry */
-       BUG_ON(dentry->d_inode);
+       BUG_ON(d_inode(dentry));
 
        dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
                        dir->i_sb->s_id, dir->i_ino, dentry);
@@ -1587,7 +1590,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
        if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
                goto no_open;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        /* We can't create new files in nfs_open_revalidate(), so we
         * optimize away revalidation of negative dentries.
@@ -1598,12 +1601,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
                if (flags & LOOKUP_RCU) {
                        parent = ACCESS_ONCE(dentry->d_parent);
-                       dir = ACCESS_ONCE(parent->d_inode);
+                       dir = d_inode_rcu(parent);
                        if (!dir)
                                return -ECHILD;
                } else {
                        parent = dget_parent(dentry);
-                       dir = parent->d_inode;
+                       dir = d_inode(parent);
                }
                if (!nfs_neg_need_reval(dir, dentry, flags))
                        ret = 1;
@@ -1643,14 +1646,14 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
                                struct nfs4_label *label)
 {
        struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct inode *inode;
        int error = -EACCES;
 
        d_drop(dentry);
 
        /* We may have been initialized further down */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                goto out;
        if (fhandle->size == 0) {
                error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
@@ -1768,7 +1771,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir);
 
 static void nfs_dentry_handle_enoent(struct dentry *dentry)
 {
-       if (dentry->d_inode != NULL && !d_unhashed(dentry))
+       if (d_really_is_positive(dentry) && !d_unhashed(dentry))
                d_delete(dentry);
 }
 
@@ -1780,13 +1783,13 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
                        dir->i_sb->s_id, dir->i_ino, dentry);
 
        trace_nfs_rmdir_enter(dir, dentry);
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                nfs_wait_on_sillyrename(dentry);
                error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
                /* Ensure the VFS deletes this inode */
                switch (error) {
                case 0:
-                       clear_nlink(dentry->d_inode);
+                       clear_nlink(d_inode(dentry));
                        break;
                case -ENOENT:
                        nfs_dentry_handle_enoent(dentry);
@@ -1808,8 +1811,8 @@ EXPORT_SYMBOL_GPL(nfs_rmdir);
  */
 static int nfs_safe_remove(struct dentry *dentry)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
-       struct inode *inode = dentry->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
+       struct inode *inode = d_inode(dentry);
        int error = -EBUSY;
                
        dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
@@ -1853,7 +1856,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
        if (d_count(dentry) > 1) {
                spin_unlock(&dentry->d_lock);
                /* Start asynchronous writeout of the inode */
-               write_inode_now(dentry->d_inode, 0);
+               write_inode_now(d_inode(dentry), 0);
                error = nfs_sillyrename(dir, dentry);
                goto out;
        }
@@ -1931,7 +1934,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
         * No big deal if we can't add this page to the page cache here.
         * READLINK will get the missing page from the server if needed.
         */
-       if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0,
+       if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
                                                        GFP_KERNEL)) {
                SetPageUptodate(page);
                unlock_page(page);
@@ -1950,7 +1953,7 @@ EXPORT_SYMBOL_GPL(nfs_symlink);
 int
 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int error;
 
        dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
@@ -1997,8 +2000,8 @@ EXPORT_SYMBOL_GPL(nfs_link);
 int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                      struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct dentry *dentry = NULL, *rehash = NULL;
        struct rpc_task *task;
        int error = -EBUSY;
index 682f65f..38678d9 100644 (file)
@@ -129,22 +129,25 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
        int i;
        ssize_t count;
 
-       WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count);
-
-       count = dreq->mirrors[hdr->pgio_mirror_idx].count;
-       if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
-               count = hdr->io_start + hdr->good_bytes - dreq->io_start;
-               dreq->mirrors[hdr->pgio_mirror_idx].count = count;
-       }
-
-       /* update the dreq->count by finding the minimum agreed count from all
-        * mirrors */
-       count = dreq->mirrors[0].count;
+       if (dreq->mirror_count == 1) {
+               dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
+               dreq->count += hdr->good_bytes;
+       } else {
+               /* mirrored writes */
+               count = dreq->mirrors[hdr->pgio_mirror_idx].count;
+               if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
+                       count = hdr->io_start + hdr->good_bytes - dreq->io_start;
+                       dreq->mirrors[hdr->pgio_mirror_idx].count = count;
+               }
+               /* update the dreq->count by finding the minimum agreed count from all
+                * mirrors */
+               count = dreq->mirrors[0].count;
 
-       for (i = 1; i < dreq->mirror_count; i++)
-               count = min(count, dreq->mirrors[i].count);
+               for (i = 1; i < dreq->mirror_count; i++)
+                       count = min(count, dreq->mirrors[i].count);
 
-       dreq->count = count;
+               dreq->count = count;
+       }
 }
 
 /*
@@ -258,18 +261,11 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
        if (!IS_SWAPFILE(inode))
                return 0;
 
-#ifndef CONFIG_NFS_SWAP
-       dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-                       iocb->ki_filp, (long long) pos, iter->nr_segs);
-
-       return -EINVAL;
-#else
        VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
 
        if (iov_iter_rw(iter) == READ)
                return nfs_file_direct_read(iocb, iter, pos);
        return nfs_file_direct_write(iocb, iter);
-#endif /* CONFIG_NFS_SWAP */
 }
 
 static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -386,7 +382,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
        if (write)
                nfs_zap_mapping(inode, inode->i_mapping);
 
-       inode_dio_done(inode);
+       inode_dio_end(inode);
 
        if (dreq->iocb) {
                long res = (long) dreq->error;
@@ -403,8 +399,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
 static void nfs_direct_readpage_release(struct nfs_page *req)
 {
        dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
-               req->wb_context->dentry->d_inode->i_sb->s_id,
-               (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+               d_inode(req->wb_context->dentry)->i_sb->s_id,
+               (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
                req->wb_bytes,
                (long long)req_offset(req));
        nfs_release_request(req);
@@ -486,7 +482,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                             &nfs_direct_read_completion_ops);
        get_dreq(dreq);
        desc.pg_dreq = dreq;
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
 
        while (iov_iter_count(iter)) {
                struct page **pagevec;
@@ -538,7 +534,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
         * generic layer handle the completion.
         */
        if (requested_bytes == 0) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                nfs_direct_req_release(dreq);
                return result < 0 ? result : -EIO;
        }
@@ -872,7 +868,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                              &nfs_direct_write_completion_ops);
        desc.pg_dreq = dreq;
        get_dreq(dreq);
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
 
        NFS_I(inode)->write_io += iov_iter_count(iter);
        while (iov_iter_count(iter)) {
@@ -928,7 +924,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
         * generic layer handle the completion.
         */
        if (requested_bytes == 0) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                nfs_direct_req_release(dreq);
                return result < 0 ? result : -EIO;
        }
@@ -1030,6 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
                        if (i_size_read(inode) < iocb->ki_pos)
                                i_size_write(inode, iocb->ki_pos);
                        spin_unlock(&inode->i_lock);
+                       generic_write_sync(file, pos, result);
                }
        }
        nfs_direct_req_release(dreq);
index c40e436..8b8d83a 100644 (file)
@@ -280,6 +280,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 
        trace_nfs_fsync_enter(inode);
 
+       nfs_inode_dio_wait(inode);
        do {
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
@@ -782,7 +783,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
         * Flush all pending writes before doing anything
         * with locks..
         */
-       nfs_sync_mapping(filp->f_mapping);
+       vfs_fsync(filp, 0);
 
        l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
        if (!IS_ERR(l_ctx)) {
index 91e88a7..a46bf6d 100644 (file)
@@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
            hdr->res.verf->committed != NFS_DATA_SYNC)
                return;
 
-       pnfs_set_layoutcommit(hdr);
+       pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                       hdr->mds_offset + hdr->res.count);
        dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
                (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
@@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
        }
 
        if (data->verf.committed == NFS_UNSTABLE)
-               pnfs_commit_set_layoutcommit(data);
+               pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
 
        return 0;
 }
@@ -1086,7 +1087,7 @@ filelayout_alloc_deviceid_node(struct nfs_server *server,
 }
 
 static void
-filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
+filelayout_free_deviceid_node(struct nfs4_deviceid_node *d)
 {
        nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
 }
@@ -1137,7 +1138,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .read_pagelist          = filelayout_read_pagelist,
        .write_pagelist         = filelayout_write_pagelist,
        .alloc_deviceid_node    = filelayout_alloc_deviceid_node,
-       .free_deviceid_node     = filelayout_free_deveiceid_node,
+       .free_deviceid_node     = filelayout_free_deviceid_node,
+       .sync                   = pnfs_nfs_generic_sync,
 };
 
 static int __init nfs4filelayout_init(void)
index 4f372e2..4946ef4 100644 (file)
@@ -55,7 +55,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
                        nfs4_pnfs_ds_put(ds);
        }
        kfree(dsaddr->stripe_indices);
-       kfree(dsaddr);
+       kfree_rcu(dsaddr, id_node.rcu);
 }
 
 /* Decode opaque device data and return the result */
index 315cc68..7d05089 100644 (file)
 #include <linux/module.h>
 
 #include <linux/sunrpc/metrics.h>
-#include <linux/nfs_idmap.h>
 
 #include "flexfilelayout.h"
 #include "../nfs4session.h"
+#include "../nfs4idmap.h"
 #include "../internal.h"
 #include "../delegation.h"
 #include "../nfs4trace.h"
@@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
 static void
 ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)
 {
-       pnfs_set_layoutcommit(hdr);
+       pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                       hdr->mds_offset + hdr->res.count);
        dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
                (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
 }
@@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
        }
 
        if (data->verf.committed == NFS_UNSTABLE)
-               pnfs_commit_set_layoutcommit(data);
+               pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
 
        return 0;
 }
@@ -1414,7 +1415,7 @@ ff_layout_get_ds_info(struct inode *inode)
 }
 
 static void
-ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d)
+ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)
 {
        nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds,
                                                  id_node));
@@ -1498,7 +1499,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
        .pg_read_ops            = &ff_layout_pg_read_ops,
        .pg_write_ops           = &ff_layout_pg_write_ops,
        .get_ds_info            = ff_layout_get_ds_info,
-       .free_deviceid_node     = ff_layout_free_deveiceid_node,
+       .free_deviceid_node     = ff_layout_free_deviceid_node,
        .mark_request_commit    = pnfs_layout_mark_request_commit,
        .clear_request_commit   = pnfs_generic_clear_request_commit,
        .scan_commit_lists      = pnfs_generic_scan_commit_lists,
@@ -1508,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
        .write_pagelist         = ff_layout_write_pagelist,
        .alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
        .encode_layoutreturn    = ff_layout_encode_layoutreturn,
+       .sync                   = pnfs_nfs_generic_sync,
 };
 
 static int __init nfs4flexfilelayout_init(void)
index e2c01f2..77a2d02 100644 (file)
@@ -30,7 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
 {
        nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
        nfs4_pnfs_ds_put(mirror_ds->ds);
-       kfree(mirror_ds);
+       kfree_rcu(mirror_ds, id_node.rcu);
 }
 
 /* Decode opaque device data and construct new_ds using it */
index 9ac3846..a608ffd 100644 (file)
@@ -56,11 +56,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
                 * This again causes shrink_dcache_for_umount_subtree() to
                 * Oops, since the test for IS_ROOT() will fail.
                 */
-               spin_lock(&sb->s_root->d_inode->i_lock);
+               spin_lock(&d_inode(sb->s_root)->i_lock);
                spin_lock(&sb->s_root->d_lock);
                hlist_del_init(&sb->s_root->d_u.d_alias);
                spin_unlock(&sb->s_root->d_lock);
-               spin_unlock(&sb->s_root->d_inode->i_lock);
+               spin_unlock(&d_inode(sb->s_root)->i_lock);
        }
        return 0;
 }
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
deleted file mode 100644 (file)
index 857e2a9..0000000
+++ /dev/null
@@ -1,792 +0,0 @@
-/*
- * fs/nfs/idmap.c
- *
- *  UID and GID to name mapping for clients.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/types.h>
-#include <linux/parser.h>
-#include <linux/fs.h>
-#include <linux/nfs_idmap.h>
-#include <net/net_namespace.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
-#include <linux/nfs_fs.h>
-#include <linux/nfs_fs_sb.h>
-#include <linux/key.h>
-#include <linux/keyctl.h>
-#include <linux/key-type.h>
-#include <keys/user-type.h>
-#include <linux/module.h>
-
-#include "internal.h"
-#include "netns.h"
-#include "nfs4trace.h"
-
-#define NFS_UINT_MAXLEN 11
-
-static const struct cred *id_resolver_cache;
-static struct key_type key_type_id_resolver_legacy;
-
-struct idmap_legacy_upcalldata {
-       struct rpc_pipe_msg pipe_msg;
-       struct idmap_msg idmap_msg;
-       struct key_construction *key_cons;
-       struct idmap *idmap;
-};
-
-struct idmap {
-       struct rpc_pipe_dir_object idmap_pdo;
-       struct rpc_pipe         *idmap_pipe;
-       struct idmap_legacy_upcalldata *idmap_upcall_data;
-       struct mutex            idmap_mutex;
-};
-
-/**
- * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
- * @fattr: fully initialised struct nfs_fattr
- * @owner_name: owner name string cache
- * @group_name: group name string cache
- */
-void nfs_fattr_init_names(struct nfs_fattr *fattr,
-               struct nfs4_string *owner_name,
-               struct nfs4_string *group_name)
-{
-       fattr->owner_name = owner_name;
-       fattr->group_name = group_name;
-}
-
-static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
-{
-       fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
-       kfree(fattr->owner_name->data);
-}
-
-static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
-{
-       fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
-       kfree(fattr->group_name->data);
-}
-
-static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       struct nfs4_string *owner = fattr->owner_name;
-       kuid_t uid;
-
-       if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
-               return false;
-       if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
-               fattr->uid = uid;
-               fattr->valid |= NFS_ATTR_FATTR_OWNER;
-       }
-       return true;
-}
-
-static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       struct nfs4_string *group = fattr->group_name;
-       kgid_t gid;
-
-       if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
-               return false;
-       if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
-               fattr->gid = gid;
-               fattr->valid |= NFS_ATTR_FATTR_GROUP;
-       }
-       return true;
-}
-
-/**
- * nfs_fattr_free_names - free up the NFSv4 owner and group strings
- * @fattr: a fully initialised nfs_fattr structure
- */
-void nfs_fattr_free_names(struct nfs_fattr *fattr)
-{
-       if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
-               nfs_fattr_free_owner_name(fattr);
-       if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
-               nfs_fattr_free_group_name(fattr);
-}
-
-/**
- * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
- * @server: pointer to the filesystem nfs_server structure
- * @fattr: a fully initialised nfs_fattr structure
- *
- * This helper maps the cached NFSv4 owner/group strings in fattr into
- * their numeric uid/gid equivalents, and then frees the cached strings.
- */
-void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       if (nfs_fattr_map_owner_name(server, fattr))
-               nfs_fattr_free_owner_name(fattr);
-       if (nfs_fattr_map_group_name(server, fattr))
-               nfs_fattr_free_group_name(fattr);
-}
-
-int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
-{
-       unsigned long val;
-       char buf[16];
-
-       if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
-               return 0;
-       memcpy(buf, name, namelen);
-       buf[namelen] = '\0';
-       if (kstrtoul(buf, 0, &val) != 0)
-               return 0;
-       *res = val;
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric);
-
-static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
-{
-       return snprintf(buf, buflen, "%u", id);
-}
-
-static struct key_type key_type_id_resolver = {
-       .name           = "id_resolver",
-       .preparse       = user_preparse,
-       .free_preparse  = user_free_preparse,
-       .instantiate    = generic_key_instantiate,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
-};
-
-static int nfs_idmap_init_keyring(void)
-{
-       struct cred *cred;
-       struct key *keyring;
-       int ret = 0;
-
-       printk(KERN_NOTICE "NFS: Registering the %s key type\n",
-               key_type_id_resolver.name);
-
-       cred = prepare_kernel_cred(NULL);
-       if (!cred)
-               return -ENOMEM;
-
-       keyring = keyring_alloc(".id_resolver",
-                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
-                               (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                               KEY_USR_VIEW | KEY_USR_READ,
-                               KEY_ALLOC_NOT_IN_QUOTA, NULL);
-       if (IS_ERR(keyring)) {
-               ret = PTR_ERR(keyring);
-               goto failed_put_cred;
-       }
-
-       ret = register_key_type(&key_type_id_resolver);
-       if (ret < 0)
-               goto failed_put_key;
-
-       ret = register_key_type(&key_type_id_resolver_legacy);
-       if (ret < 0)
-               goto failed_reg_legacy;
-
-       set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
-       cred->thread_keyring = keyring;
-       cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
-       id_resolver_cache = cred;
-       return 0;
-
-failed_reg_legacy:
-       unregister_key_type(&key_type_id_resolver);
-failed_put_key:
-       key_put(keyring);
-failed_put_cred:
-       put_cred(cred);
-       return ret;
-}
-
-static void nfs_idmap_quit_keyring(void)
-{
-       key_revoke(id_resolver_cache->thread_keyring);
-       unregister_key_type(&key_type_id_resolver);
-       unregister_key_type(&key_type_id_resolver_legacy);
-       put_cred(id_resolver_cache);
-}
-
-/*
- * Assemble the description to pass to request_key()
- * This function will allocate a new string and update dest to point
- * at it.  The caller is responsible for freeing dest.
- *
- * On error 0 is returned.  Otherwise, the length of dest is returned.
- */
-static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
-                               const char *type, size_t typelen, char **desc)
-{
-       char *cp;
-       size_t desclen = typelen + namelen + 2;
-
-       *desc = kmalloc(desclen, GFP_KERNEL);
-       if (!*desc)
-               return -ENOMEM;
-
-       cp = *desc;
-       memcpy(cp, type, typelen);
-       cp += typelen;
-       *cp++ = ':';
-
-       memcpy(cp, name, namelen);
-       cp += namelen;
-       *cp = '\0';
-       return desclen;
-}
-
-static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
-                                        const char *type, struct idmap *idmap)
-{
-       char *desc;
-       struct key *rkey;
-       ssize_t ret;
-
-       ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
-       if (ret <= 0)
-               return ERR_PTR(ret);
-
-       rkey = request_key(&key_type_id_resolver, desc, "");
-       if (IS_ERR(rkey)) {
-               mutex_lock(&idmap->idmap_mutex);
-               rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
-                                               desc, "", 0, idmap);
-               mutex_unlock(&idmap->idmap_mutex);
-       }
-       if (!IS_ERR(rkey))
-               set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
-
-       kfree(desc);
-       return rkey;
-}
-
-static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
-                                const char *type, void *data,
-                                size_t data_size, struct idmap *idmap)
-{
-       const struct cred *saved_cred;
-       struct key *rkey;
-       struct user_key_payload *payload;
-       ssize_t ret;
-
-       saved_cred = override_creds(id_resolver_cache);
-       rkey = nfs_idmap_request_key(name, namelen, type, idmap);
-       revert_creds(saved_cred);
-
-       if (IS_ERR(rkey)) {
-               ret = PTR_ERR(rkey);
-               goto out;
-       }
-
-       rcu_read_lock();
-       rkey->perm |= KEY_USR_VIEW;
-
-       ret = key_validate(rkey);
-       if (ret < 0)
-               goto out_up;
-
-       payload = rcu_dereference(rkey->payload.rcudata);
-       if (IS_ERR_OR_NULL(payload)) {
-               ret = PTR_ERR(payload);
-               goto out_up;
-       }
-
-       ret = payload->datalen;
-       if (ret > 0 && ret <= data_size)
-               memcpy(data, payload->data, ret);
-       else
-               ret = -EINVAL;
-
-out_up:
-       rcu_read_unlock();
-       key_put(rkey);
-out:
-       return ret;
-}
-
-/* ID -> Name */
-static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
-                                    size_t buflen, struct idmap *idmap)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       int id_len;
-       ssize_t ret;
-
-       id_len = snprintf(id_str, sizeof(id_str), "%u", id);
-       ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
-       if (ret < 0)
-               return -EINVAL;
-       return ret;
-}
-
-/* Name -> ID */
-static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
-                              __u32 *id, struct idmap *idmap)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       long id_long;
-       ssize_t data_size;
-       int ret = 0;
-
-       data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
-       if (data_size <= 0) {
-               ret = -EINVAL;
-       } else {
-               ret = kstrtol(id_str, 10, &id_long);
-               *id = (__u32)id_long;
-       }
-       return ret;
-}
-
-/* idmap classic begins here */
-
-enum {
-       Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
-};
-
-static const match_table_t nfs_idmap_tokens = {
-       { Opt_find_uid, "uid:%s" },
-       { Opt_find_gid, "gid:%s" },
-       { Opt_find_user, "user:%s" },
-       { Opt_find_group, "group:%s" },
-       { Opt_find_err, NULL }
-};
-
-static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
-static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
-                                  size_t);
-static void idmap_release_pipe(struct inode *);
-static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
-
-static const struct rpc_pipe_ops idmap_upcall_ops = {
-       .upcall         = rpc_pipe_generic_upcall,
-       .downcall       = idmap_pipe_downcall,
-       .release_pipe   = idmap_release_pipe,
-       .destroy_msg    = idmap_pipe_destroy_msg,
-};
-
-static struct key_type key_type_id_resolver_legacy = {
-       .name           = "id_legacy",
-       .preparse       = user_preparse,
-       .free_preparse  = user_free_preparse,
-       .instantiate    = generic_key_instantiate,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
-       .request_key    = nfs_idmap_legacy_upcall,
-};
-
-static void nfs_idmap_pipe_destroy(struct dentry *dir,
-               struct rpc_pipe_dir_object *pdo)
-{
-       struct idmap *idmap = pdo->pdo_data;
-       struct rpc_pipe *pipe = idmap->idmap_pipe;
-
-       if (pipe->dentry) {
-               rpc_unlink(pipe->dentry);
-               pipe->dentry = NULL;
-       }
-}
-
-static int nfs_idmap_pipe_create(struct dentry *dir,
-               struct rpc_pipe_dir_object *pdo)
-{
-       struct idmap *idmap = pdo->pdo_data;
-       struct rpc_pipe *pipe = idmap->idmap_pipe;
-       struct dentry *dentry;
-
-       dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       pipe->dentry = dentry;
-       return 0;
-}
-
-static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
-       .create = nfs_idmap_pipe_create,
-       .destroy = nfs_idmap_pipe_destroy,
-};
-
-int
-nfs_idmap_new(struct nfs_client *clp)
-{
-       struct idmap *idmap;
-       struct rpc_pipe *pipe;
-       int error;
-
-       idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
-       if (idmap == NULL)
-               return -ENOMEM;
-
-       rpc_init_pipe_dir_object(&idmap->idmap_pdo,
-                       &nfs_idmap_pipe_dir_object_ops,
-                       idmap);
-
-       pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
-       if (IS_ERR(pipe)) {
-               error = PTR_ERR(pipe);
-               goto err;
-       }
-       idmap->idmap_pipe = pipe;
-       mutex_init(&idmap->idmap_mutex);
-
-       error = rpc_add_pipe_dir_object(clp->cl_net,
-                       &clp->cl_rpcclient->cl_pipedir_objects,
-                       &idmap->idmap_pdo);
-       if (error)
-               goto err_destroy_pipe;
-
-       clp->cl_idmap = idmap;
-       return 0;
-err_destroy_pipe:
-       rpc_destroy_pipe_data(idmap->idmap_pipe);
-err:
-       kfree(idmap);
-       return error;
-}
-
-void
-nfs_idmap_delete(struct nfs_client *clp)
-{
-       struct idmap *idmap = clp->cl_idmap;
-
-       if (!idmap)
-               return;
-       clp->cl_idmap = NULL;
-       rpc_remove_pipe_dir_object(clp->cl_net,
-                       &clp->cl_rpcclient->cl_pipedir_objects,
-                       &idmap->idmap_pdo);
-       rpc_destroy_pipe_data(idmap->idmap_pipe);
-       kfree(idmap);
-}
-
-int nfs_idmap_init(void)
-{
-       int ret;
-       ret = nfs_idmap_init_keyring();
-       if (ret != 0)
-               goto out;
-out:
-       return ret;
-}
-
-void nfs_idmap_quit(void)
-{
-       nfs_idmap_quit_keyring();
-}
-
-static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
-                                    struct idmap_msg *im,
-                                    struct rpc_pipe_msg *msg)
-{
-       substring_t substr;
-       int token, ret;
-
-       im->im_type = IDMAP_TYPE_GROUP;
-       token = match_token(desc, nfs_idmap_tokens, &substr);
-
-       switch (token) {
-       case Opt_find_uid:
-               im->im_type = IDMAP_TYPE_USER;
-       case Opt_find_gid:
-               im->im_conv = IDMAP_CONV_NAMETOID;
-               ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
-               break;
-
-       case Opt_find_user:
-               im->im_type = IDMAP_TYPE_USER;
-       case Opt_find_group:
-               im->im_conv = IDMAP_CONV_IDTONAME;
-               ret = match_int(&substr, &im->im_id);
-               break;
-
-       default:
-               ret = -EINVAL;
-               goto out;
-       }
-
-       msg->data = im;
-       msg->len  = sizeof(struct idmap_msg);
-
-out:
-       return ret;
-}
-
-static bool
-nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
-               struct idmap_legacy_upcalldata *data)
-{
-       if (idmap->idmap_upcall_data != NULL) {
-               WARN_ON_ONCE(1);
-               return false;
-       }
-       idmap->idmap_upcall_data = data;
-       return true;
-}
-
-static void
-nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
-{
-       struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
-
-       kfree(idmap->idmap_upcall_data);
-       idmap->idmap_upcall_data = NULL;
-       complete_request_key(cons, ret);
-}
-
-static void
-nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
-{
-       if (idmap->idmap_upcall_data != NULL)
-               nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
-}
-
-static int nfs_idmap_legacy_upcall(struct key_construction *cons,
-                                  const char *op,
-                                  void *aux)
-{
-       struct idmap_legacy_upcalldata *data;
-       struct rpc_pipe_msg *msg;
-       struct idmap_msg *im;
-       struct idmap *idmap = (struct idmap *)aux;
-       struct key *key = cons->key;
-       int ret = -ENOMEM;
-
-       /* msg and im are freed in idmap_pipe_destroy_msg */
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
-       if (!data)
-               goto out1;
-
-       msg = &data->pipe_msg;
-       im = &data->idmap_msg;
-       data->idmap = idmap;
-       data->key_cons = cons;
-
-       ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
-       if (ret < 0)
-               goto out2;
-
-       ret = -EAGAIN;
-       if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
-               goto out2;
-
-       ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
-       if (ret < 0)
-               nfs_idmap_abort_pipe_upcall(idmap, ret);
-
-       return ret;
-out2:
-       kfree(data);
-out1:
-       complete_request_key(cons, ret);
-       return ret;
-}
-
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
-{
-       return key_instantiate_and_link(key, data, datalen,
-                                       id_resolver_cache->thread_keyring,
-                                       authkey);
-}
-
-static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
-               struct idmap_msg *upcall,
-               struct key *key, struct key *authkey)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       size_t len;
-       int ret = -ENOKEY;
-
-       /* ret = -ENOKEY */
-       if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
-               goto out;
-       switch (im->im_conv) {
-       case IDMAP_CONV_NAMETOID:
-               if (strcmp(upcall->im_name, im->im_name) != 0)
-                       break;
-               /* Note: here we store the NUL terminator too */
-               len = sprintf(id_str, "%d", im->im_id) + 1;
-               ret = nfs_idmap_instantiate(key, authkey, id_str, len);
-               break;
-       case IDMAP_CONV_IDTONAME:
-               if (upcall->im_id != im->im_id)
-                       break;
-               len = strlen(im->im_name);
-               ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
-               break;
-       default:
-               ret = -EINVAL;
-       }
-out:
-       return ret;
-}
-
-static ssize_t
-idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
-{
-       struct rpc_inode *rpci = RPC_I(file_inode(filp));
-       struct idmap *idmap = (struct idmap *)rpci->private;
-       struct key_construction *cons;
-       struct idmap_msg im;
-       size_t namelen_in;
-       int ret = -ENOKEY;
-
-       /* If instantiation is successful, anyone waiting for key construction
-        * will have been woken up and someone else may now have used
-        * idmap_key_cons - so after this point we may no longer touch it.
-        */
-       if (idmap->idmap_upcall_data == NULL)
-               goto out_noupcall;
-
-       cons = idmap->idmap_upcall_data->key_cons;
-
-       if (mlen != sizeof(im)) {
-               ret = -ENOSPC;
-               goto out;
-       }
-
-       if (copy_from_user(&im, src, mlen) != 0) {
-               ret = -EFAULT;
-               goto out;
-       }
-
-       if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
-               ret = -ENOKEY;
-               goto out;
-       }
-
-       namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
-       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
-               ret = -EINVAL;
-               goto out;
-}
-
-       ret = nfs_idmap_read_and_verify_message(&im,
-                       &idmap->idmap_upcall_data->idmap_msg,
-                       cons->key, cons->authkey);
-       if (ret >= 0) {
-               key_set_timeout(cons->key, nfs_idmap_cache_timeout);
-               ret = mlen;
-       }
-
-out:
-       nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
-out_noupcall:
-       return ret;
-}
-
-static void
-idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
-{
-       struct idmap_legacy_upcalldata *data = container_of(msg,
-                       struct idmap_legacy_upcalldata,
-                       pipe_msg);
-       struct idmap *idmap = data->idmap;
-
-       if (msg->errno)
-               nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
-}
-
-static void
-idmap_release_pipe(struct inode *inode)
-{
-       struct rpc_inode *rpci = RPC_I(inode);
-       struct idmap *idmap = (struct idmap *)rpci->private;
-
-       nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
-}
-
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       __u32 id = -1;
-       int ret = 0;
-
-       if (!nfs_map_string_to_numeric(name, namelen, &id))
-               ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
-       if (ret == 0) {
-               *uid = make_kuid(&init_user_ns, id);
-               if (!uid_valid(*uid))
-                       ret = -ERANGE;
-       }
-       trace_nfs4_map_name_to_uid(name, namelen, id, ret);
-       return ret;
-}
-
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       __u32 id = -1;
-       int ret = 0;
-
-       if (!nfs_map_string_to_numeric(name, namelen, &id))
-               ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
-       if (ret == 0) {
-               *gid = make_kgid(&init_user_ns, id);
-               if (!gid_valid(*gid))
-                       ret = -ERANGE;
-       }
-       trace_nfs4_map_group_to_gid(name, namelen, id, ret);
-       return ret;
-}
-
-int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       int ret = -EINVAL;
-       __u32 id;
-
-       id = from_kuid(&init_user_ns, uid);
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(id, buf, buflen);
-       trace_nfs4_map_uid_to_name(buf, ret, id, ret);
-       return ret;
-}
-int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       int ret = -EINVAL;
-       __u32 id;
-
-       id = from_kgid(&init_user_ns, gid);
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(id, buf, buflen);
-       trace_nfs4_map_gid_to_group(buf, ret, id, ret);
-       return ret;
-}
index d42dff6..f734562 100644 (file)
@@ -133,6 +133,13 @@ void nfs_evict_inode(struct inode *inode)
        nfs_clear_inode(inode);
 }
 
+int nfs_sync_inode(struct inode *inode)
+{
+       nfs_inode_dio_wait(inode);
+       return nfs_wb_all(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_sync_inode);
+
 /**
  * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
  */
@@ -192,7 +199,6 @@ void nfs_zap_caches(struct inode *inode)
        nfs_zap_caches_locked(inode);
        spin_unlock(&inode->i_lock);
 }
-EXPORT_SYMBOL_GPL(nfs_zap_caches);
 
 void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
 {
@@ -495,7 +501,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
 int
 nfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct nfs_fattr *fattr;
        int error = -ENOMEM;
 
@@ -525,10 +531,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
        trace_nfs_setattr_enter(inode);
 
        /* Write all dirty data */
-       if (S_ISREG(inode->i_mode)) {
-               nfs_inode_dio_wait(inode);
-               nfs_wb_all(inode);
-       }
+       if (S_ISREG(inode->i_mode))
+               nfs_sync_inode(inode);
 
        fattr = nfs_alloc_fattr();
        if (fattr == NULL)
@@ -621,7 +625,7 @@ static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
        struct dentry *parent;
 
        parent = dget_parent(dentry);
-       nfs_force_use_readdirplus(parent->d_inode);
+       nfs_force_use_readdirplus(d_inode(parent));
        dput(parent);
 }
 
@@ -637,15 +641,16 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
 
 int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
        int err = 0;
 
        trace_nfs_getattr_enter(inode);
        /* Flush out writes to the server in order to update c/mtime.  */
        if (S_ISREG(inode->i_mode)) {
-               nfs_inode_dio_wait(inode);
-               err = filemap_write_and_wait(inode->i_mapping);
+               mutex_lock(&inode->i_mutex);
+               err = nfs_sync_inode(inode);
+               mutex_unlock(&inode->i_mutex);
                if (err)
                        goto out;
        }
@@ -708,7 +713,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
 struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
 {
        struct nfs_lock_context *res, *new = NULL;
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
 
        spin_lock(&inode->i_lock);
        res = __nfs_find_lock_context(ctx);
@@ -736,7 +741,7 @@ EXPORT_SYMBOL_GPL(nfs_get_lock_context);
 void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 {
        struct nfs_open_context *ctx = l_ctx->open_context;
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
 
        if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
                return;
@@ -763,7 +768,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
                return;
        if (!is_sync)
                return;
-       inode = ctx->dentry->d_inode;
+       inode = d_inode(ctx->dentry);
        if (!list_empty(&NFS_I(inode)->open_files))
                return;
        server = NFS_SERVER(inode);
@@ -810,7 +815,7 @@ EXPORT_SYMBOL_GPL(get_nfs_open_context);
 
 static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 {
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
        struct super_block *sb = ctx->dentry->d_sb;
 
        if (!list_empty(&ctx->list)) {
@@ -842,7 +847,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
  */
 void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
 {
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
        struct nfs_inode *nfsi = NFS_I(inode);
 
        spin_lock(&inode->i_lock);
@@ -885,7 +890,7 @@ static void nfs_file_clear_open_context(struct file *filp)
        struct nfs_open_context *ctx = nfs_file_open_context(filp);
 
        if (ctx) {
-               struct inode *inode = ctx->dentry->d_inode;
+               struct inode *inode = d_inode(ctx->dentry);
 
                filp->private_data = NULL;
                spin_lock(&inode->i_lock);
@@ -1588,6 +1593,19 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
 }
 EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
 
+
+static inline bool nfs_fileid_valid(struct nfs_inode *nfsi,
+                                   struct nfs_fattr *fattr)
+{
+       bool ret1 = true, ret2 = true;
+
+       if (fattr->valid & NFS_ATTR_FATTR_FILEID)
+               ret1 = (nfsi->fileid == fattr->fileid);
+       if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+               ret2 = (nfsi->fileid == fattr->mounted_on_fileid);
+       return ret1 || ret2;
+}
+
 /*
  * Many nfs protocol calls return the new file attributes after
  * an operation.  Here we update the inode to reflect the state
@@ -1614,7 +1632,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        nfs_display_fhandle_hash(NFS_FH(inode)),
                        atomic_read(&inode->i_count), fattr->valid);
 
-       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
+       if (!nfs_fileid_valid(nfsi, fattr)) {
                printk(KERN_ERR "NFS: server %s error: fileid changed\n"
                        "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
                        NFS_SERVER(inode)->nfs_client->cl_hostname,
@@ -1819,7 +1837,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 struct inode *nfs_alloc_inode(struct super_block *sb)
 {
        struct nfs_inode *nfsi;
-       nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
+       nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
        if (!nfsi)
                return NULL;
        nfsi->flags = 0UL;
index b5a0afc..c8162c6 100644 (file)
@@ -139,7 +139,7 @@ EXPORT_SYMBOL_GPL(nfs_path);
 struct vfsmount *nfs_d_automount(struct path *path)
 {
        struct vfsmount *mnt;
-       struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
+       struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
        struct nfs_fh *fh = NULL;
        struct nfs_fattr *fattr = NULL;
 
@@ -180,16 +180,16 @@ out_nofree:
 static int
 nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       if (NFS_FH(dentry->d_inode)->size != 0)
+       if (NFS_FH(d_inode(dentry))->size != 0)
                return nfs_getattr(mnt, dentry, stat);
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
        return 0;
 }
 
 static int
 nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       if (NFS_FH(dentry->d_inode)->size != 0)
+       if (NFS_FH(d_inode(dentry))->size != 0)
                return nfs_setattr(dentry, attr);
        return -EACCES;
 }
@@ -279,7 +279,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
        struct dentry *parent = dget_parent(dentry);
 
        /* Look it up again to get its attributes */
-       err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL);
+       err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL);
        dput(parent);
        if (err != 0)
                return ERR_PTR(err);
index 658e586..1ebe2fc 100644 (file)
@@ -279,7 +279,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
 ssize_t
 nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        ssize_t result = 0;
        int error;
 
index 1f11d25..cb28cce 100644 (file)
@@ -120,7 +120,7 @@ static int
 nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                        struct iattr *sattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct nfs3_sattrargs   arg = {
                .fh             = NFS_FH(inode),
                .sattr          = sattr,
@@ -386,13 +386,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                 * not sure this buys us anything (and I'd have
                 * to revamp the NFSv3 XDR code) */
                status = nfs3_proc_setattr(dentry, data->res.fattr, sattr);
-               nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
+               nfs_post_op_update_inode(d_inode(dentry), data->res.fattr);
                dprintk("NFS reply setattr (post-create): %d\n", status);
                if (status != 0)
                        goto out_release_acls;
        }
 
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
 out_release_acls:
        posix_acl_release(acl);
@@ -570,7 +570,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
        if (status != 0)
                goto out_release_acls;
 
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
 out_release_acls:
        posix_acl_release(acl);
@@ -623,7 +623,7 @@ static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                  u64 cookie, struct page **pages, unsigned int count, int plus)
 {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
        __be32                  *verf = NFS_I(dir)->cookieverf;
        struct nfs3_readdirargs arg = {
                .fh             = NFS_FH(dir),
@@ -715,7 +715,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        if (status != 0)
                goto out_release_acls;
 
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
 
 out_release_acls:
        posix_acl_release(acl);
index cb17072..3a9e752 100644 (file)
@@ -36,13 +36,16 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
                                 loff_t offset, loff_t len)
 {
        struct inode *inode = file_inode(filep);
+       struct nfs_server *server = NFS_SERVER(inode);
        struct nfs42_falloc_args args = {
                .falloc_fh      = NFS_FH(inode),
                .falloc_offset  = offset,
                .falloc_length  = len,
+               .falloc_bitmask = server->cache_consistency_bitmask,
+       };
+       struct nfs42_falloc_res res = {
+               .falloc_server  = server,
        };
-       struct nfs42_falloc_res res;
-       struct nfs_server *server = NFS_SERVER(inode);
        int status;
 
        msg->rpc_argp = &args;
@@ -52,8 +55,17 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
        if (status)
                return status;
 
-       return nfs4_call_sync(server->client, server, msg,
-                             &args.seq_args, &res.seq_res, 0);
+       res.falloc_fattr = nfs_alloc_fattr();
+       if (!res.falloc_fattr)
+               return -ENOMEM;
+
+       status = nfs4_call_sync(server->client, server, msg,
+                               &args.seq_args, &res.seq_res, 0);
+       if (status == 0)
+               status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+
+       kfree(res.falloc_fattr);
+       return status;
 }
 
 static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
@@ -84,9 +96,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
        if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
                return -EOPNOTSUPP;
 
+       mutex_lock(&inode->i_mutex);
+
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+
+       mutex_unlock(&inode->i_mutex);
        return err;
 }
 
@@ -101,9 +117,16 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
        if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
                return -EOPNOTSUPP;
 
+       nfs_wb_all(inode);
+       mutex_lock(&inode->i_mutex);
+
        err = nfs42_proc_fallocate(&msg, filep, offset, len);
+       if (err == 0)
+               truncate_pagecache_range(inode, offset, (offset + len) -1);
        if (err == -EOPNOTSUPP)
                NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+
+       mutex_unlock(&inode->i_mutex);
        return err;
 }
 
index 038a7e1..1a25b27 100644 (file)
 
 #define NFS4_enc_allocate_sz           (compound_encode_hdr_maxsz + \
                                         encode_putfh_maxsz + \
-                                        encode_allocate_maxsz)
+                                        encode_allocate_maxsz + \
+                                        encode_getattr_maxsz)
 #define NFS4_dec_allocate_sz           (compound_decode_hdr_maxsz + \
                                         decode_putfh_maxsz + \
-                                        decode_allocate_maxsz)
+                                        decode_allocate_maxsz + \
+                                        decode_getattr_maxsz)
 #define NFS4_enc_deallocate_sz         (compound_encode_hdr_maxsz + \
                                         encode_putfh_maxsz + \
-                                        encode_deallocate_maxsz)
+                                        encode_deallocate_maxsz + \
+                                        encode_getattr_maxsz)
 #define NFS4_dec_deallocate_sz         (compound_decode_hdr_maxsz + \
                                         decode_putfh_maxsz + \
-                                        decode_deallocate_maxsz)
+                                        decode_deallocate_maxsz + \
+                                        decode_getattr_maxsz)
 #define NFS4_enc_seek_sz               (compound_encode_hdr_maxsz + \
                                         encode_putfh_maxsz + \
                                         encode_seek_maxsz)
@@ -92,6 +96,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->falloc_fh, &hdr);
        encode_allocate(xdr, args, &hdr);
+       encode_getfattr(xdr, args->falloc_bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -110,6 +115,7 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
        encode_sequence(xdr, &args->seq_args, &hdr);
        encode_putfh(xdr, args->falloc_fh, &hdr);
        encode_deallocate(xdr, args, &hdr);
+       encode_getfattr(xdr, args->falloc_bitmask, &hdr);
        encode_nops(&hdr);
 }
 
@@ -183,6 +189,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
        if (status)
                goto out;
        status = decode_allocate(xdr, res);
+       if (status)
+               goto out;
+       decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
 out:
        return status;
 }
@@ -207,6 +216,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
        if (status)
                goto out;
        status = decode_deallocate(xdr, res);
+       if (status)
+               goto out;
+       decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
 out:
        return status;
 }
index 86d6214..e42be52 100644 (file)
@@ -4,7 +4,6 @@
  */
 #include <linux/module.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs_mount.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/auth.h>
@@ -15,6 +14,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "nfs4session.h"
+#include "nfs4idmap.h"
 #include "pnfs.h"
 #include "netns.h"
 
@@ -1130,7 +1130,7 @@ error:
  */
 static int nfs_probe_destination(struct nfs_server *server)
 {
-       struct inode *inode = server->super->s_root->d_inode;
+       struct inode *inode = d_inode(server->super->s_root);
        struct nfs_fattr *fattr;
        int error;
 
index 0181cde..f58c17b 100644 (file)
@@ -10,6 +10,8 @@
 #include "fscache.h"
 #include "pnfs.h"
 
+#include "nfstrace.h"
+
 #ifdef CONFIG_NFS_V4_2
 #include "nfs42.h"
 #endif
@@ -46,7 +48,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        openflags &= ~(O_CREAT|O_EXCL);
 
        parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
 
        ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
        err = PTR_ERR(ctx);
@@ -57,7 +59,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
        if (openflags & O_TRUNC) {
                attr.ia_valid |= ATTR_SIZE;
                attr.ia_size = 0;
-               nfs_wb_all(inode);
+               nfs_sync_inode(inode);
        }
 
        inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
@@ -74,7 +76,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
                        goto out_drop;
                }
        }
-       if (inode != dentry->d_inode)
+       if (inode != d_inode(dentry))
                goto out_drop;
 
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -100,6 +102,9 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        int ret;
        struct inode *inode = file_inode(file);
 
+       trace_nfs_fsync_enter(inode);
+
+       nfs_inode_dio_wait(inode);
        do {
                ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                if (ret != 0)
@@ -107,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                mutex_lock(&inode->i_mutex);
                ret = nfs_file_fsync_commit(file, start, end, datasync);
                if (!ret)
-                       ret = pnfs_layoutcommit_inode(inode, true);
+                       ret = pnfs_sync_inode(inode, !!datasync);
                mutex_unlock(&inode->i_mutex);
                /*
                 * If nfs_file_fsync_commit detected a server reboot, then
@@ -118,6 +123,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                end = LLONG_MAX;
        } while (ret == -EAGAIN);
 
+       trace_nfs_fsync_exit(inode, ret);
        return ret;
 }
 
@@ -152,15 +158,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
        if (ret < 0)
                return ret;
 
-       mutex_lock(&inode->i_mutex);
        if (mode & FALLOC_FL_PUNCH_HOLE)
-               ret = nfs42_proc_deallocate(filep, offset, len);
-       else
-               ret = nfs42_proc_allocate(filep, offset, len);
-       mutex_unlock(&inode->i_mutex);
-
-       nfs_zap_caches(inode);
-       return ret;
+               return nfs42_proc_deallocate(filep, offset, len);
+       return nfs42_proc_allocate(filep, offset, len);
 }
 #endif /* CONFIG_NFS_V4_2 */
 
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
new file mode 100644 (file)
index 0000000..2e1737c
--- /dev/null
@@ -0,0 +1,792 @@
+/*
+ * fs/nfs/idmap.c
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/types.h>
+#include <linux/parser.h>
+#include <linux/fs.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <linux/module.h>
+
+#include "internal.h"
+#include "netns.h"
+#include "nfs4idmap.h"
+#include "nfs4trace.h"
+
+#define NFS_UINT_MAXLEN 11
+
+static const struct cred *id_resolver_cache;
+static struct key_type key_type_id_resolver_legacy;
+
+struct idmap_legacy_upcalldata {
+       struct rpc_pipe_msg pipe_msg;
+       struct idmap_msg idmap_msg;
+       struct key_construction *key_cons;
+       struct idmap *idmap;
+};
+
+struct idmap {
+       struct rpc_pipe_dir_object idmap_pdo;
+       struct rpc_pipe         *idmap_pipe;
+       struct idmap_legacy_upcalldata *idmap_upcall_data;
+       struct mutex            idmap_mutex;
+};
+
+/**
+ * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
+ * @fattr: fully initialised struct nfs_fattr
+ * @owner_name: owner name string cache
+ * @group_name: group name string cache
+ */
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+               struct nfs4_string *owner_name,
+               struct nfs4_string *group_name)
+{
+       fattr->owner_name = owner_name;
+       fattr->group_name = group_name;
+}
+
+static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
+{
+       fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
+       kfree(fattr->owner_name->data);
+}
+
+static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
+{
+       fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
+       kfree(fattr->group_name->data);
+}
+
+static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       struct nfs4_string *owner = fattr->owner_name;
+       kuid_t uid;
+
+       if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
+               return false;
+       if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
+               fattr->uid = uid;
+               fattr->valid |= NFS_ATTR_FATTR_OWNER;
+       }
+       return true;
+}
+
+static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       struct nfs4_string *group = fattr->group_name;
+       kgid_t gid;
+
+       if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
+               return false;
+       if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
+               fattr->gid = gid;
+               fattr->valid |= NFS_ATTR_FATTR_GROUP;
+       }
+       return true;
+}
+
+/**
+ * nfs_fattr_free_names - free up the NFSv4 owner and group strings
+ * @fattr: a fully initialised nfs_fattr structure
+ */
+void nfs_fattr_free_names(struct nfs_fattr *fattr)
+{
+       if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
+               nfs_fattr_free_owner_name(fattr);
+       if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
+               nfs_fattr_free_group_name(fattr);
+}
+
+/**
+ * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
+ * @server: pointer to the filesystem nfs_server structure
+ * @fattr: a fully initialised nfs_fattr structure
+ *
+ * This helper maps the cached NFSv4 owner/group strings in fattr into
+ * their numeric uid/gid equivalents, and then frees the cached strings.
+ */
+void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       if (nfs_fattr_map_owner_name(server, fattr))
+               nfs_fattr_free_owner_name(fattr);
+       if (nfs_fattr_map_group_name(server, fattr))
+               nfs_fattr_free_group_name(fattr);
+}
+
+int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
+{
+       unsigned long val;
+       char buf[16];
+
+       if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
+               return 0;
+       memcpy(buf, name, namelen);
+       buf[namelen] = '\0';
+       if (kstrtoul(buf, 0, &val) != 0)
+               return 0;
+       *res = val;
+       return 1;
+}
+EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric);
+
+static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
+{
+       return snprintf(buf, buflen, "%u", id);
+}
+
+static struct key_type key_type_id_resolver = {
+       .name           = "id_resolver",
+       .preparse       = user_preparse,
+       .free_preparse  = user_free_preparse,
+       .instantiate    = generic_key_instantiate,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+};
+
+static int nfs_idmap_init_keyring(void)
+{
+       struct cred *cred;
+       struct key *keyring;
+       int ret = 0;
+
+       printk(KERN_NOTICE "NFS: Registering the %s key type\n",
+               key_type_id_resolver.name);
+
+       cred = prepare_kernel_cred(NULL);
+       if (!cred)
+               return -ENOMEM;
+
+       keyring = keyring_alloc(".id_resolver",
+                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
+                               (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                               KEY_USR_VIEW | KEY_USR_READ,
+                               KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(keyring)) {
+               ret = PTR_ERR(keyring);
+               goto failed_put_cred;
+       }
+
+       ret = register_key_type(&key_type_id_resolver);
+       if (ret < 0)
+               goto failed_put_key;
+
+       ret = register_key_type(&key_type_id_resolver_legacy);
+       if (ret < 0)
+               goto failed_reg_legacy;
+
+       set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
+       cred->thread_keyring = keyring;
+       cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+       id_resolver_cache = cred;
+       return 0;
+
+failed_reg_legacy:
+       unregister_key_type(&key_type_id_resolver);
+failed_put_key:
+       key_put(keyring);
+failed_put_cred:
+       put_cred(cred);
+       return ret;
+}
+
+static void nfs_idmap_quit_keyring(void)
+{
+       key_revoke(id_resolver_cache->thread_keyring);
+       unregister_key_type(&key_type_id_resolver);
+       unregister_key_type(&key_type_id_resolver_legacy);
+       put_cred(id_resolver_cache);
+}
+
+/*
+ * Assemble the description to pass to request_key()
+ * This function will allocate a new string and update dest to point
+ * at it.  The caller is responsible for freeing dest.
+ *
+ * On error 0 is returned.  Otherwise, the length of dest is returned.
+ */
+static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
+                               const char *type, size_t typelen, char **desc)
+{
+       char *cp;
+       size_t desclen = typelen + namelen + 2;
+
+       *desc = kmalloc(desclen, GFP_KERNEL);
+       if (!*desc)
+               return -ENOMEM;
+
+       cp = *desc;
+       memcpy(cp, type, typelen);
+       cp += typelen;
+       *cp++ = ':';
+
+       memcpy(cp, name, namelen);
+       cp += namelen;
+       *cp = '\0';
+       return desclen;
+}
+
+static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
+                                        const char *type, struct idmap *idmap)
+{
+       char *desc;
+       struct key *rkey;
+       ssize_t ret;
+
+       ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
+       if (ret <= 0)
+               return ERR_PTR(ret);
+
+       rkey = request_key(&key_type_id_resolver, desc, "");
+       if (IS_ERR(rkey)) {
+               mutex_lock(&idmap->idmap_mutex);
+               rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
+                                               desc, "", 0, idmap);
+               mutex_unlock(&idmap->idmap_mutex);
+       }
+       if (!IS_ERR(rkey))
+               set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
+
+       kfree(desc);
+       return rkey;
+}
+
+static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
+                                const char *type, void *data,
+                                size_t data_size, struct idmap *idmap)
+{
+       const struct cred *saved_cred;
+       struct key *rkey;
+       struct user_key_payload *payload;
+       ssize_t ret;
+
+       saved_cred = override_creds(id_resolver_cache);
+       rkey = nfs_idmap_request_key(name, namelen, type, idmap);
+       revert_creds(saved_cred);
+
+       if (IS_ERR(rkey)) {
+               ret = PTR_ERR(rkey);
+               goto out;
+       }
+
+       rcu_read_lock();
+       rkey->perm |= KEY_USR_VIEW;
+
+       ret = key_validate(rkey);
+       if (ret < 0)
+               goto out_up;
+
+       payload = rcu_dereference(rkey->payload.rcudata);
+       if (IS_ERR_OR_NULL(payload)) {
+               ret = PTR_ERR(payload);
+               goto out_up;
+       }
+
+       ret = payload->datalen;
+       if (ret > 0 && ret <= data_size)
+               memcpy(data, payload->data, ret);
+       else
+               ret = -EINVAL;
+
+out_up:
+       rcu_read_unlock();
+       key_put(rkey);
+out:
+       return ret;
+}
+
+/* ID -> Name */
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
+                                    size_t buflen, struct idmap *idmap)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       int id_len;
+       ssize_t ret;
+
+       id_len = snprintf(id_str, sizeof(id_str), "%u", id);
+       ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
+       if (ret < 0)
+               return -EINVAL;
+       return ret;
+}
+
+/* Name -> ID */
+static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
+                              __u32 *id, struct idmap *idmap)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       long id_long;
+       ssize_t data_size;
+       int ret = 0;
+
+       data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
+       if (data_size <= 0) {
+               ret = -EINVAL;
+       } else {
+               ret = kstrtol(id_str, 10, &id_long);
+               *id = (__u32)id_long;
+       }
+       return ret;
+}
+
+/* idmap classic begins here */
+
+enum {
+       Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
+};
+
+static const match_table_t nfs_idmap_tokens = {
+       { Opt_find_uid, "uid:%s" },
+       { Opt_find_gid, "gid:%s" },
+       { Opt_find_user, "user:%s" },
+       { Opt_find_group, "group:%s" },
+       { Opt_find_err, NULL }
+};
+
+static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
+static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
+                                  size_t);
+static void idmap_release_pipe(struct inode *);
+static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static const struct rpc_pipe_ops idmap_upcall_ops = {
+       .upcall         = rpc_pipe_generic_upcall,
+       .downcall       = idmap_pipe_downcall,
+       .release_pipe   = idmap_release_pipe,
+       .destroy_msg    = idmap_pipe_destroy_msg,
+};
+
+static struct key_type key_type_id_resolver_legacy = {
+       .name           = "id_legacy",
+       .preparse       = user_preparse,
+       .free_preparse  = user_free_preparse,
+       .instantiate    = generic_key_instantiate,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+       .request_key    = nfs_idmap_legacy_upcall,
+};
+
+static void nfs_idmap_pipe_destroy(struct dentry *dir,
+               struct rpc_pipe_dir_object *pdo)
+{
+       struct idmap *idmap = pdo->pdo_data;
+       struct rpc_pipe *pipe = idmap->idmap_pipe;
+
+       if (pipe->dentry) {
+               rpc_unlink(pipe->dentry);
+               pipe->dentry = NULL;
+       }
+}
+
+static int nfs_idmap_pipe_create(struct dentry *dir,
+               struct rpc_pipe_dir_object *pdo)
+{
+       struct idmap *idmap = pdo->pdo_data;
+       struct rpc_pipe *pipe = idmap->idmap_pipe;
+       struct dentry *dentry;
+
+       dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       pipe->dentry = dentry;
+       return 0;
+}
+
+static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
+       .create = nfs_idmap_pipe_create,
+       .destroy = nfs_idmap_pipe_destroy,
+};
+
+int
+nfs_idmap_new(struct nfs_client *clp)
+{
+       struct idmap *idmap;
+       struct rpc_pipe *pipe;
+       int error;
+
+       idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
+       if (idmap == NULL)
+               return -ENOMEM;
+
+       rpc_init_pipe_dir_object(&idmap->idmap_pdo,
+                       &nfs_idmap_pipe_dir_object_ops,
+                       idmap);
+
+       pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
+       if (IS_ERR(pipe)) {
+               error = PTR_ERR(pipe);
+               goto err;
+       }
+       idmap->idmap_pipe = pipe;
+       mutex_init(&idmap->idmap_mutex);
+
+       error = rpc_add_pipe_dir_object(clp->cl_net,
+                       &clp->cl_rpcclient->cl_pipedir_objects,
+                       &idmap->idmap_pdo);
+       if (error)
+               goto err_destroy_pipe;
+
+       clp->cl_idmap = idmap;
+       return 0;
+err_destroy_pipe:
+       rpc_destroy_pipe_data(idmap->idmap_pipe);
+err:
+       kfree(idmap);
+       return error;
+}
+
+void
+nfs_idmap_delete(struct nfs_client *clp)
+{
+       struct idmap *idmap = clp->cl_idmap;
+
+       if (!idmap)
+               return;
+       clp->cl_idmap = NULL;
+       rpc_remove_pipe_dir_object(clp->cl_net,
+                       &clp->cl_rpcclient->cl_pipedir_objects,
+                       &idmap->idmap_pdo);
+       rpc_destroy_pipe_data(idmap->idmap_pipe);
+       kfree(idmap);
+}
+
+int nfs_idmap_init(void)
+{
+       int ret;
+       ret = nfs_idmap_init_keyring();
+       if (ret != 0)
+               goto out;
+out:
+       return ret;
+}
+
+void nfs_idmap_quit(void)
+{
+       nfs_idmap_quit_keyring();
+}
+
+static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
+                                    struct idmap_msg *im,
+                                    struct rpc_pipe_msg *msg)
+{
+       substring_t substr;
+       int token, ret;
+
+       im->im_type = IDMAP_TYPE_GROUP;
+       token = match_token(desc, nfs_idmap_tokens, &substr);
+
+       switch (token) {
+       case Opt_find_uid:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_gid:
+               im->im_conv = IDMAP_CONV_NAMETOID;
+               ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
+               break;
+
+       case Opt_find_user:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_group:
+               im->im_conv = IDMAP_CONV_IDTONAME;
+               ret = match_int(&substr, &im->im_id);
+               break;
+
+       default:
+               ret = -EINVAL;
+               goto out;
+       }
+
+       msg->data = im;
+       msg->len  = sizeof(struct idmap_msg);
+
+out:
+       return ret;
+}
+
+static bool
+nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
+               struct idmap_legacy_upcalldata *data)
+{
+       if (idmap->idmap_upcall_data != NULL) {
+               WARN_ON_ONCE(1);
+               return false;
+       }
+       idmap->idmap_upcall_data = data;
+       return true;
+}
+
+static void
+nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
+{
+       struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
+
+       kfree(idmap->idmap_upcall_data);
+       idmap->idmap_upcall_data = NULL;
+       complete_request_key(cons, ret);
+}
+
+static void
+nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
+{
+       if (idmap->idmap_upcall_data != NULL)
+               nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+}
+
+static int nfs_idmap_legacy_upcall(struct key_construction *cons,
+                                  const char *op,
+                                  void *aux)
+{
+       struct idmap_legacy_upcalldata *data;
+       struct rpc_pipe_msg *msg;
+       struct idmap_msg *im;
+       struct idmap *idmap = (struct idmap *)aux;
+       struct key *key = cons->key;
+       int ret = -ENOMEM;
+
+       /* msg and im are freed in idmap_pipe_destroy_msg */
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto out1;
+
+       msg = &data->pipe_msg;
+       im = &data->idmap_msg;
+       data->idmap = idmap;
+       data->key_cons = cons;
+
+       ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
+       if (ret < 0)
+               goto out2;
+
+       ret = -EAGAIN;
+       if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
+               goto out2;
+
+       ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
+       if (ret < 0)
+               nfs_idmap_abort_pipe_upcall(idmap, ret);
+
+       return ret;
+out2:
+       kfree(data);
+out1:
+       complete_request_key(cons, ret);
+       return ret;
+}
+
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
+{
+       return key_instantiate_and_link(key, data, datalen,
+                                       id_resolver_cache->thread_keyring,
+                                       authkey);
+}
+
+static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
+               struct idmap_msg *upcall,
+               struct key *key, struct key *authkey)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       size_t len;
+       int ret = -ENOKEY;
+
+       /* ret = -ENOKEY */
+       if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
+               goto out;
+       switch (im->im_conv) {
+       case IDMAP_CONV_NAMETOID:
+               if (strcmp(upcall->im_name, im->im_name) != 0)
+                       break;
+               /* Note: here we store the NUL terminator too */
+               len = sprintf(id_str, "%d", im->im_id) + 1;
+               ret = nfs_idmap_instantiate(key, authkey, id_str, len);
+               break;
+       case IDMAP_CONV_IDTONAME:
+               if (upcall->im_id != im->im_id)
+                       break;
+               len = strlen(im->im_name);
+               ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+out:
+       return ret;
+}
+
+static ssize_t
+idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+       struct rpc_inode *rpci = RPC_I(file_inode(filp));
+       struct idmap *idmap = (struct idmap *)rpci->private;
+       struct key_construction *cons;
+       struct idmap_msg im;
+       size_t namelen_in;
+       int ret = -ENOKEY;
+
+       /* If instantiation is successful, anyone waiting for key construction
+        * will have been woken up and someone else may now have used
+        * idmap_key_cons - so after this point we may no longer touch it.
+        */
+       if (idmap->idmap_upcall_data == NULL)
+               goto out_noupcall;
+
+       cons = idmap->idmap_upcall_data->key_cons;
+
+       if (mlen != sizeof(im)) {
+               ret = -ENOSPC;
+               goto out;
+       }
+
+       if (copy_from_user(&im, src, mlen) != 0) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+       if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
+               ret = -ENOKEY;
+               goto out;
+       }
+
+       namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
+       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
+               ret = -EINVAL;
+               goto out;
+}
+
+       ret = nfs_idmap_read_and_verify_message(&im,
+                       &idmap->idmap_upcall_data->idmap_msg,
+                       cons->key, cons->authkey);
+       if (ret >= 0) {
+               key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+               ret = mlen;
+       }
+
+out:
+       nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+out_noupcall:
+       return ret;
+}
+
+static void
+idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+       struct idmap_legacy_upcalldata *data = container_of(msg,
+                       struct idmap_legacy_upcalldata,
+                       pipe_msg);
+       struct idmap *idmap = data->idmap;
+
+       if (msg->errno)
+               nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
+}
+
+static void
+idmap_release_pipe(struct inode *inode)
+{
+       struct rpc_inode *rpci = RPC_I(inode);
+       struct idmap *idmap = (struct idmap *)rpci->private;
+
+       nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
+}
+
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       __u32 id = -1;
+       int ret = 0;
+
+       if (!nfs_map_string_to_numeric(name, namelen, &id))
+               ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
+       if (ret == 0) {
+               *uid = make_kuid(&init_user_ns, id);
+               if (!uid_valid(*uid))
+                       ret = -ERANGE;
+       }
+       trace_nfs4_map_name_to_uid(name, namelen, id, ret);
+       return ret;
+}
+
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       __u32 id = -1;
+       int ret = 0;
+
+       if (!nfs_map_string_to_numeric(name, namelen, &id))
+               ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
+       if (ret == 0) {
+               *gid = make_kgid(&init_user_ns, id);
+               if (!gid_valid(*gid))
+                       ret = -ERANGE;
+       }
+       trace_nfs4_map_group_to_gid(name, namelen, id, ret);
+       return ret;
+}
+
+int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       int ret = -EINVAL;
+       __u32 id;
+
+       id = from_kuid(&init_user_ns, uid);
+       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+               ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
+       if (ret < 0)
+               ret = nfs_map_numeric_to_string(id, buf, buflen);
+       trace_nfs4_map_uid_to_name(buf, ret, id, ret);
+       return ret;
+}
+int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       int ret = -EINVAL;
+       __u32 id;
+
+       id = from_kgid(&init_user_ns, gid);
+       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+               ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
+       if (ret < 0)
+               ret = nfs_map_numeric_to_string(id, buf, buflen);
+       trace_nfs4_map_gid_to_group(buf, ret, id, ret);
+       return ret;
+}
diff --git a/fs/nfs/nfs4idmap.h b/fs/nfs/nfs4idmap.h
new file mode 100644 (file)
index 0000000..de44d73
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * fs/nfs/nfs4idmap.h
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef NFS_IDMAP_H
+#define NFS_IDMAP_H
+
+#include <linux/uidgid.h>
+#include <uapi/linux/nfs_idmap.h>
+
+
+/* Forward declaration to make this header independent of others */
+struct nfs_client;
+struct nfs_server;
+struct nfs_fattr;
+struct nfs4_string;
+
+int nfs_idmap_init(void);
+void nfs_idmap_quit(void);
+int nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
+
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+               struct nfs4_string *owner_name,
+               struct nfs4_string *group_name);
+void nfs_fattr_free_names(struct nfs_fattr *);
+void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
+
+int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
+int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
+int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
+int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
+
+int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
+
+extern unsigned int nfs_idmap_cache_timeout;
+#endif /* NFS_IDMAP_H */
index 3d83cb1..f592672 100644 (file)
@@ -375,7 +375,7 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
        dprintk("%s: getting locations for %pd2\n",
                __func__, dentry);
 
-       err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
+       err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
        dput(parent);
        if (err != 0 ||
            fs_locations->nlocations <= 0 ||
@@ -396,7 +396,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
 {
        rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
        struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct qstr *name = &dentry->d_name;
        struct rpc_clnt *client;
        struct vfsmount *mnt;
index 627f37c..45b35b9 100644 (file)
@@ -51,7 +51,6 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
-#include <linux/nfs_idmap.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
@@ -63,6 +62,7 @@
 #include "callback.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "fscache.h"
 
@@ -185,7 +185,8 @@ const u32 nfs4_fattr_bitmap[3] = {
        | FATTR4_WORD1_SPACE_USED
        | FATTR4_WORD1_TIME_ACCESS
        | FATTR4_WORD1_TIME_METADATA
-       | FATTR4_WORD1_TIME_MODIFY,
+       | FATTR4_WORD1_TIME_MODIFY
+       | FATTR4_WORD1_MOUNTED_ON_FILEID,
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
        FATTR4_WORD2_SECURITY_LABEL
 #endif
@@ -293,7 +294,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
                *p++ = xdr_one;                         /* bitmap length */
                *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
                *p++ = htonl(8);              /* attribute buffer length */
-               p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
+               p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
        }
        
        *p++ = xdr_one;                                  /* next */
@@ -305,7 +306,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
        *p++ = xdr_one;                         /* bitmap length */
        *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
        *p++ = htonl(8);              /* attribute buffer length */
-       p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+       p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
 
        readdir->pgbase = (char *)p - (char *)start;
        readdir->count -= readdir->pgbase;
@@ -1004,7 +1005,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
                gfp_t gfp_mask)
 {
        struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct nfs_server *server = NFS_SERVER(dir);
        struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
        struct nfs4_opendata *p;
@@ -1057,7 +1058,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        case NFS4_OPEN_CLAIM_FH:
        case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
        case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
-               p->o_arg.fh = NFS_FH(dentry->d_inode);
+               p->o_arg.fh = NFS_FH(d_inode(dentry));
        }
        if (attrs != NULL && attrs->ia_valid != 0) {
                __u32 verf[2];
@@ -1794,7 +1795,7 @@ static const struct rpc_call_ops nfs4_open_confirm_ops = {
  */
 static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 {
-       struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
+       struct nfs_server *server = NFS_SERVER(d_inode(data->dir));
        struct rpc_task *task;
        struct  rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
@@ -1951,7 +1952,7 @@ static const struct rpc_call_ops nfs4_open_ops = {
 
 static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
        struct nfs_server *server = NFS_SERVER(dir);
        struct nfs_openargs *o_arg = &data->o_arg;
        struct nfs_openres *o_res = &data->o_res;
@@ -1998,7 +1999,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
 
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
        struct nfs_openres *o_res = &data->o_res;
         int status;
 
@@ -2067,7 +2068,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
  */
 static int _nfs4_proc_open(struct nfs4_opendata *data)
 {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
        struct nfs_server *server = NFS_SERVER(dir);
        struct nfs_openargs *o_arg = &data->o_arg;
        struct nfs_openres *o_res = &data->o_res;
@@ -2314,7 +2315,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
 
        dentry = opendata->dentry;
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                /* FIXME: Is this d_drop() ever needed? */
                d_drop(dentry);
                dentry = d_add_unique(dentry, igrab(state->inode));
@@ -2325,7 +2326,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                        ctx->dentry = dget(dentry);
                }
                nfs_set_verifier(dentry,
-                               nfs_save_change_attribute(opendata->dir->d_inode));
+                               nfs_save_change_attribute(d_inode(opendata->dir)));
        }
 
        ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
@@ -2333,7 +2334,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                goto out;
 
        ctx->state = state;
-       if (dentry->d_inode == state->inode) {
+       if (d_inode(dentry) == state->inode) {
                nfs_inode_attach_open_context(ctx);
                if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
                        nfs4_schedule_stateid_recovery(server, state);
@@ -2374,10 +2375,10 @@ static int _nfs4_do_open(struct inode *dir,
        status = nfs4_recover_expired_lease(server);
        if (status != 0)
                goto err_put_state_owner;
-       if (dentry->d_inode != NULL)
-               nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
+       if (d_really_is_positive(dentry))
+               nfs4_return_incompatible_delegation(d_inode(dentry), fmode);
        status = -ENOMEM;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                claim = NFS4_OPEN_CLAIM_FH;
        opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
                        label, claim, GFP_KERNEL);
@@ -2400,8 +2401,8 @@ static int _nfs4_do_open(struct inode *dir,
                }
                opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
        }
-       if (dentry->d_inode != NULL)
-               opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
+       if (d_really_is_positive(dentry))
+               opendata->state = nfs4_get_open_state(d_inode(dentry), sp);
 
        status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx);
        if (status != 0)
@@ -3095,16 +3096,13 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
                         struct nfs_fsinfo *info,
                         bool auth_probe)
 {
-       int status;
+       int status = 0;
 
-       switch (auth_probe) {
-       case false:
+       if (!auth_probe)
                status = nfs4_lookup_root(server, fhandle, info);
-               if (status != -NFS4ERR_WRONGSEC)
-                       break;
-       default:
+
+       if (auth_probe || status == NFS4ERR_WRONGSEC)
                status = nfs4_do_find_root_sec(server, fhandle, info);
-       }
 
        if (status == 0)
                status = nfs4_server_capabilities(server, fhandle);
@@ -3254,7 +3252,7 @@ static int
 nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                  struct iattr *sattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct rpc_cred *cred = NULL;
        struct nfs4_state *state = NULL;
        struct nfs4_label *label = NULL;
@@ -3871,13 +3869,13 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                u64 cookie, struct page **pages, unsigned int count, int plus)
 {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
        struct nfs4_readdir_arg args = {
                .fh = NFS_FH(dir),
                .pages = pages,
                .pgbase = 0,
                .count = count,
-               .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+               .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
                .plus = plus,
        };
        struct nfs4_readdir_res res;
@@ -3914,8 +3912,8 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
        do {
                err = _nfs4_proc_readdir(dentry, cred, cookie,
                                pages, count, plus);
-               trace_nfs4_readdir(dentry->d_inode, err);
-               err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err,
+               trace_nfs4_readdir(d_inode(dentry), err);
+               err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
                                &exception);
        } while (exception.retry);
        return err;
@@ -4830,7 +4828,7 @@ nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen)
        struct nfs4_label ilabel, *olabel = NULL;
        struct nfs_fattr fattr;
        struct rpc_cred *cred;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int status;
 
        if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -5670,7 +5668,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
        data->rpc_status = task->tk_status;
        switch (task->tk_status) {
        case 0:
-               renew_lease(NFS_SERVER(data->ctx->dentry->d_inode),
+               renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
                                data->timestamp);
                if (data->arg.new_lock) {
                        data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
@@ -6112,7 +6110,7 @@ static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
        if (strcmp(key, "") != 0)
                return -EINVAL;
 
-       return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
+       return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
 }
 
 static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
@@ -6121,7 +6119,7 @@ static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
        if (strcmp(key, "") != 0)
                return -EINVAL;
 
-       return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
+       return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
 }
 
 static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
@@ -6130,7 +6128,7 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
 {
        size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
 
-       if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+       if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
                return 0;
 
        if (list && len <= list_len)
@@ -6158,7 +6156,7 @@ static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key,
                                   void *buf, size_t buflen, int type)
 {
        if (security_ismaclabel(key))
-               return nfs4_get_security_label(dentry->d_inode, buf, buflen);
+               return nfs4_get_security_label(d_inode(dentry), buf, buflen);
        return -EOPNOTSUPP;
 }
 
@@ -6168,10 +6166,10 @@ static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list,
 {
        size_t len = 0;
 
-       if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) {
-               len = security_inode_listsecurity(dentry->d_inode, NULL, 0);
+       if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
+               len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
                if (list && len <= list_len)
-                       security_inode_listsecurity(dentry->d_inode, list, len);
+                       security_inode_listsecurity(d_inode(dentry), list, len);
        }
        return len;
 }
@@ -7944,6 +7942,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
 {
        struct nfs4_getdeviceinfo_args args = {
                .pdev = pdev,
+               .notify_types = NOTIFY_DEVICEID4_CHANGE |
+                       NOTIFY_DEVICEID4_DELETE,
        };
        struct nfs4_getdeviceinfo_res res = {
                .pdev = pdev,
@@ -7958,6 +7958,11 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
 
        dprintk("--> %s\n", __func__);
        status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+       if (res.notification & ~args.notify_types)
+               dprintk("%s: unsupported notification\n", __func__);
+       if (res.notification != args.notify_types)
+               pdev->nocache = 1;
+
        dprintk("<-- %s status=%d\n", __func__, status);
 
        return status;
index f95e3b5..2782cfc 100644 (file)
@@ -42,7 +42,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
@@ -57,6 +56,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
@@ -1902,7 +1902,7 @@ static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
                goto out;
        }
 
-       inode = server->super->s_root->d_inode;
+       inode = d_inode(server->super->s_root);
        result = nfs4_proc_get_locations(inode, locations, page, cred);
        if (result) {
                dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
@@ -2021,7 +2021,7 @@ restart:
 
                rcu_read_unlock();
 
-               inode = server->super->s_root->d_inode;
+               inode = d_inode(server->super->s_root);
                status = nfs4_proc_fsid_present(inode, cred);
                if (status != -NFS4ERR_MOVED)
                        goto restart;   /* wasn't this one */
index 75090fe..6fb7cb6 100644 (file)
@@ -3,12 +3,12 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs4_mount.h>
 #include <linux/nfs_fs.h>
 #include "delegation.h"
 #include "internal.h"
 #include "nfs4_fs.h"
+#include "nfs4idmap.h"
 #include "dns_resolve.h"
 #include "pnfs.h"
 #include "nfs.h"
@@ -91,10 +91,11 @@ static void nfs4_evict_inode(struct inode *inode)
 {
        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
-       pnfs_return_layout(inode);
-       pnfs_destroy_layout(NFS_I(inode));
        /* If we are holding a delegation, return it! */
        nfs_inode_return_delegation_noreclaim(inode);
+       /* Note that above delegreturn would trigger pnfs return-on-close */
+       pnfs_return_layout(inode);
+       pnfs_destroy_layout(NFS_I(inode));
        /* First call standard NFS clear_inode() code */
        nfs_clear_inode(inode);
 }
index b6ebe7e..0fbd3ab 100644 (file)
@@ -6,10 +6,10 @@
  * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com>
  */
 #include <linux/sysctl.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs_fs.h>
 
 #include "nfs4_fs.h"
+#include "nfs4idmap.h"
 #include "callback.h"
 
 static const int nfs_set_port_min = 0;
index 1c32adb..470af1a 100644 (file)
@@ -418,7 +418,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
                                __entry->fileid = 0;
                                __entry->fhandle = 0;
                        }
-                       __entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode);
+                       __entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
                        __assign_str(name, ctx->dentry->d_name.name);
                ),
 
@@ -1110,7 +1110,7 @@ TRACE_EVENT(nfs4_layoutget,
                ),
 
                TP_fast_assign(
-                       const struct inode *inode = ctx->dentry->d_inode;
+                       const struct inode *inode = d_inode(ctx->dentry);
                        __entry->dev = inode->i_sb->s_dev;
                        __entry->fileid = NFS_FILEID(inode);
                        __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
index 5c399ec..0aea978 100644 (file)
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
@@ -1920,7 +1920,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
 
        p = reserve_space(xdr, 4 + 4);
        *p++ = cpu_to_be32(1);                  /* bitmap length */
-       *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE);
+       *p++ = cpu_to_be32(args->notify_types);
 }
 
 static void
@@ -5753,8 +5753,9 @@ out_overflow:
 
 #if defined(CONFIG_NFS_V4_1)
 static int decode_getdeviceinfo(struct xdr_stream *xdr,
-                               struct pnfs_device *pdev)
+                               struct nfs4_getdeviceinfo_res *res)
 {
+       struct pnfs_device *pdev = res->pdev;
        __be32 *p;
        uint32_t len, type;
        int status;
@@ -5802,12 +5803,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
                if (unlikely(!p))
                        goto out_overflow;
 
-               if (be32_to_cpup(p++) &
-                   ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) {
-                       dprintk("%s: unsupported notification\n",
-                               __func__);
-               }
-
+               res->notification = be32_to_cpup(p++);
                for (i = 1; i < len; i++) {
                        if (be32_to_cpup(p++)) {
                                dprintk("%s: unsupported notification\n",
@@ -7061,7 +7057,7 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
        status = decode_sequence(xdr, &res->seq_res, rqstp);
        if (status != 0)
                goto out;
-       status = decode_getdeviceinfo(xdr, res->pdev);
+       status = decode_getdeviceinfo(xdr, res);
 out:
        return status;
 }
@@ -7365,6 +7361,11 @@ nfs4_stat_to_errno(int stat)
        .p_name   = #proc,                                      \
 }
 
+#define STUB(proc)             \
+[NFSPROC4_CLNT_##proc] = {     \
+       .p_name = #proc,        \
+}
+
 struct rpc_procinfo    nfs4_procedures[] = {
        PROC(READ,              enc_read,               dec_read),
        PROC(WRITE,             enc_write,              dec_write),
@@ -7417,6 +7418,7 @@ struct rpc_procinfo       nfs4_procedures[] = {
        PROC(SECINFO_NO_NAME,   enc_secinfo_no_name,    dec_secinfo_no_name),
        PROC(TEST_STATEID,      enc_test_stateid,       dec_test_stateid),
        PROC(FREE_STATEID,      enc_free_stateid,       dec_free_stateid),
+       STUB(GETDEVICELIST),
        PROC(BIND_CONN_TO_SESSION,
                        enc_bind_conn_to_session, dec_bind_conn_to_session),
        PROC(DESTROY_CLIENTID,  enc_destroy_clientid,   dec_destroy_clientid),
index 4eb0aea..c74f7af 100644 (file)
@@ -7,3 +7,6 @@
 
 #define CREATE_TRACE_POINTS
 #include "nfstrace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
index 24e1d74..5aaed36 100644 (file)
@@ -57,7 +57,7 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d)
 
        dprintk("%s: free od=%p\n", __func__, de->od.od);
        osduld_put_device(de->od.od);
-       kfree(de);
+       kfree_rcu(d, rcu);
 }
 
 struct objio_segment {
@@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
        .pg_read_ops             = &objio_pg_read_ops,
        .pg_write_ops            = &objio_pg_write_ops,
 
+       .sync                    = pnfs_generic_sync,
+
        .free_deviceid_node      = objio_free_deviceid_node,
 
        .encode_layoutcommit     = objlayout_encode_layoutcommit,
index d57190a..282b393 100644 (file)
@@ -938,7 +938,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
        if (prev) {
                if (!nfs_match_open_context(req->wb_context, prev->wb_context))
                        return false;
-               flctx = req->wb_context->dentry->d_inode->i_flctx;
+               flctx = d_inode(req->wb_context->dentry)->i_flctx;
                if (flctx != NULL &&
                    !(list_empty_careful(&flctx->flc_posix) &&
                      list_empty_careful(&flctx->flc_flock)) &&
index 4f802b0..2306062 100644 (file)
@@ -1090,6 +1090,7 @@ bool pnfs_roc(struct inode *ino)
        pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
        spin_unlock(&ino->i_lock);
        pnfs_free_lseg_list(&tmp_list);
+       pnfs_layoutcommit_inode(ino, true);
        return true;
 
 out_noroc:
@@ -1104,8 +1105,10 @@ out_noroc:
                }
        }
        spin_unlock(&ino->i_lock);
-       if (layoutreturn)
+       if (layoutreturn) {
+               pnfs_layoutcommit_inode(ino, true);
                pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+       }
        return false;
 }
 
@@ -1841,7 +1844,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
 {
        trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
        if (!hdr->pnfs_error) {
-               pnfs_set_layoutcommit(hdr);
+               pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                               hdr->mds_offset + hdr->res.count);
                hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
        } else
                pnfs_ld_handle_write_error(hdr);
@@ -1902,7 +1906,6 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
        pnfs_put_lseg(hdr->lseg);
        nfs_pgio_header_free(hdr);
 }
-EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
@@ -2032,7 +2035,6 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
        pnfs_put_lseg(hdr->lseg);
        nfs_pgio_header_free(hdr);
 }
-EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
@@ -2099,64 +2101,34 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
+pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
+               loff_t end_pos)
 {
-       struct inode *inode = hdr->inode;
        struct nfs_inode *nfsi = NFS_I(inode);
-       loff_t end_pos = hdr->mds_offset + hdr->res.count;
        bool mark_as_dirty = false;
 
        spin_lock(&inode->i_lock);
        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
-               mark_as_dirty = true;
-               dprintk("%s: Set layoutcommit for inode %lu ",
-                       __func__, inode->i_ino);
-       }
-       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
-               /* references matched in nfs4_layoutcommit_release */
-               pnfs_get_lseg(hdr->lseg);
-       }
-       if (end_pos > nfsi->layout->plh_lwb)
                nfsi->layout->plh_lwb = end_pos;
-       spin_unlock(&inode->i_lock);
-       dprintk("%s: lseg %p end_pos %llu\n",
-               __func__, hdr->lseg, nfsi->layout->plh_lwb);
-
-       /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
-        * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
-       if (mark_as_dirty)
-               mark_inode_dirty_sync(inode);
-}
-EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
-
-void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data)
-{
-       struct inode *inode = data->inode;
-       struct nfs_inode *nfsi = NFS_I(inode);
-       bool mark_as_dirty = false;
-
-       spin_lock(&inode->i_lock);
-       if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
                mark_as_dirty = true;
                dprintk("%s: Set layoutcommit for inode %lu ",
                        __func__, inode->i_ino);
-       }
-       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) {
+       } else if (end_pos > nfsi->layout->plh_lwb)
+               nfsi->layout->plh_lwb = end_pos;
+       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
                /* references matched in nfs4_layoutcommit_release */
-               pnfs_get_lseg(data->lseg);
+               pnfs_get_lseg(lseg);
        }
-       if (data->lwb > nfsi->layout->plh_lwb)
-               nfsi->layout->plh_lwb = data->lwb;
        spin_unlock(&inode->i_lock);
        dprintk("%s: lseg %p end_pos %llu\n",
-               __func__, data->lseg, nfsi->layout->plh_lwb);
+               __func__, lseg, nfsi->layout->plh_lwb);
 
        /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
         * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
        if (mark_as_dirty)
                mark_inode_dirty_sync(inode);
 }
-EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit);
+EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 {
@@ -2216,7 +2188,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
        pnfs_list_write_lseg(inode, &data->lseg_list);
 
        end_pos = nfsi->layout->plh_lwb;
-       nfsi->layout->plh_lwb = 0;
 
        nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
        spin_unlock(&inode->i_lock);
@@ -2233,11 +2204,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
                status = ld->prepare_layoutcommit(&data->args);
                if (status) {
                        spin_lock(&inode->i_lock);
-                       if (end_pos < nfsi->layout->plh_lwb)
+                       set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
+                       if (end_pos > nfsi->layout->plh_lwb)
                                nfsi->layout->plh_lwb = end_pos;
                        spin_unlock(&inode->i_lock);
                        put_rpccred(data->cred);
-                       set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
                        goto clear_layoutcommitting;
                }
        }
@@ -2258,6 +2229,13 @@ clear_layoutcommitting:
 }
 EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
 
+int
+pnfs_generic_sync(struct inode *inode, bool datasync)
+{
+       return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_sync);
+
 struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
 {
        struct nfs4_threshold *thp;
index 635f086..1e6308f 100644 (file)
@@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type {
                               int how,
                               struct nfs_commit_info *cinfo);
 
+       int (*sync)(struct inode *inode, bool datasync);
+
        /*
         * Return PNFS_ATTEMPTED to indicate the layout code has attempted
         * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -203,6 +205,7 @@ struct pnfs_device {
        struct page **pages;
        unsigned int  pgbase;
        unsigned int  pglen;    /* reply buffer length */
+       unsigned char nocache : 1;/* May not be cached */
 };
 
 #define NFS4_PNFS_GETDEVLIST_MAXNUM 16
@@ -263,10 +266,11 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_header *);
-void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data);
+void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int pnfs_generic_sync(struct inode *inode, bool datasync);
+int pnfs_nfs_generic_sync(struct inode *inode, bool datasync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
 void pnfs_ld_write_done(struct nfs_pgio_header *);
@@ -291,6 +295,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
 enum {
        NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
        NFS_DEVICEID_UNAVAILABLE,       /* device temporarily unavailable */
+       NFS_DEVICEID_NOCACHE,           /* device may not be cached */
 };
 
 /* pnfs_dev.c */
@@ -302,6 +307,7 @@ struct nfs4_deviceid_node {
        unsigned long                   flags;
        unsigned long                   timestamp_unavailable;
        struct nfs4_deviceid            deviceid;
+       struct rcu_head                 rcu;
        atomic_t                        ref;
 };
 
@@ -426,7 +432,7 @@ static inline bool
 pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
                         struct nfs_commit_info *cinfo, u32 ds_commit_idx)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (lseg == NULL || ld->mark_request_commit == NULL)
@@ -438,7 +444,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
 static inline bool
 pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 
        if (ld == NULL || ld->clear_request_commit == NULL)
@@ -486,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode)
        return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE;
 }
 
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+       if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+               return 0;
+       return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync);
+}
+
 static inline bool
 pnfs_layoutcommit_outstanding(struct inode *inode)
 {
@@ -568,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode)
        return false;
 }
 
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+       return 0;
+}
+
 static inline bool
 pnfs_roc(struct inode *ino)
 {
index aa2ec00..2961fcd 100644 (file)
@@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server,
         */
        d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev,
                        gfp_flags);
+       if (d && pdev->nocache)
+               set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
 
 out_free_pages:
        for (i = 0; i < max_pages; i++)
@@ -175,8 +177,8 @@ __nfs4_find_get_deviceid(struct nfs_server *server,
        rcu_read_lock();
        d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id,
                        hash);
-       if (d != NULL)
-               atomic_inc(&d->ref);
+       if (d != NULL && !atomic_inc_not_zero(&d->ref))
+               d = NULL;
        rcu_read_unlock();
        return d;
 }
@@ -235,12 +237,11 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
                return;
        }
        hlist_del_init_rcu(&d->node);
+       clear_bit(NFS_DEVICEID_NOCACHE, &d->flags);
        spin_unlock(&nfs4_deviceid_lock);
-       synchronize_rcu();
 
        /* balance the initial ref set in pnfs_insert_deviceid */
-       if (atomic_dec_and_test(&d->ref))
-               d->ld->free_deviceid_node(d);
+       nfs4_put_deviceid_node(d);
 }
 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
 
@@ -271,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);
 bool
 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 {
+       if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) {
+               if (atomic_add_unless(&d->ref, -1, 2))
+                       return false;
+               nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid);
+       }
        if (!atomic_dec_and_test(&d->ref))
                return false;
        d->ld->free_deviceid_node(d);
@@ -314,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
                if (d->nfs_client == clp && atomic_read(&d->ref)) {
                        hlist_del_init_rcu(&d->node);
                        hlist_add_head(&d->tmpnode, &tmp);
+                       clear_bit(NFS_DEVICEID_NOCACHE, &d->flags);
                }
        rcu_read_unlock();
        spin_unlock(&nfs4_deviceid_lock);
@@ -321,12 +328,10 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
        if (hlist_empty(&tmp))
                return;
 
-       synchronize_rcu();
        while (!hlist_empty(&tmp)) {
                d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
                hlist_del(&d->tmpnode);
-               if (atomic_dec_and_test(&d->ref))
-                       d->ld->free_deviceid_node(d);
+               nfs4_put_deviceid_node(d);
        }
 }
 
index 54e36b3..f37e25b 100644 (file)
@@ -561,7 +561,7 @@ static bool load_v3_ds_connect(void)
        return(get_v3_ds_connect != NULL);
 }
 
-void __exit nfs4_pnfs_v3_ds_connect_unload(void)
+void nfs4_pnfs_v3_ds_connect_unload(void)
 {
        if (get_v3_ds_connect) {
                symbol_put(nfs3_set_ds_client);
@@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
        nfs_request_add_commit_list(req, list, cinfo);
 }
 EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
+
+int
+pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
+{
+       if (datasync)
+               return 0;
+       return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
+
index c63189a..b417bbc 100644 (file)
@@ -118,7 +118,7 @@ static int
 nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                 struct iattr *sattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct nfs_sattrargs    arg = { 
                .fh     = NFS_FH(inode),
                .sattr  = sattr
@@ -487,7 +487,7 @@ static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                 u64 cookie, struct page **pages, unsigned int count, int plus)
 {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
        struct nfs_readdirargs  arg = {
                .fh             = NFS_FH(dir),
                .cookie         = cookie,
index b8f5c63..ae0ff7a 100644 (file)
@@ -117,7 +117,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 
 static void nfs_readpage_release(struct nfs_page *req)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
 
        dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
                (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@@ -284,7 +284,7 @@ int nfs_readpage(struct file *file, struct page *page)
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                page, PAGE_CACHE_SIZE, page_file_index(page));
        nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
-       nfs_inc_stats(inode, NFSIOS_READPAGES);
+       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
        /*
         * Try to flush any pending writes to the file..
index 322b2de..f175b83 100644 (file)
@@ -43,7 +43,6 @@
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
 #include <linux/in6.h>
@@ -433,7 +432,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct nfs_server *server = NFS_SB(dentry->d_sb);
        unsigned char blockbits;
        unsigned long blockres;
-       struct nfs_fh *fh = NFS_FH(dentry->d_inode);
+       struct nfs_fh *fh = NFS_FH(d_inode(dentry));
        struct nfs_fsstat res;
        int error = -ENOMEM;
 
@@ -447,7 +446,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 
                pd_dentry = dget_parent(dentry);
                if (pd_dentry != NULL) {
-                       nfs_zap_caches(pd_dentry->d_inode);
+                       nfs_zap_caches(d_inode(pd_dentry));
                        dput(pd_dentry);
                }
        }
@@ -2193,7 +2192,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
            data->version != nfss->nfs_client->rpc_ops->version ||
            data->minorversion != nfss->nfs_client->cl_minorversion ||
            data->retrans != nfss->client->cl_timeout->to_retries ||
-           data->selected_flavor != nfss->client->cl_auth->au_flavor ||
+           !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) ||
            data->acregmin != nfss->acregmin / HZ ||
            data->acregmax != nfss->acregmax / HZ ||
            data->acdirmin != nfss->acdirmin / HZ ||
@@ -2241,7 +2240,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
        data->wsize = nfss->wsize;
        data->retrans = nfss->client->cl_timeout->to_retries;
        data->selected_flavor = nfss->client->cl_auth->au_flavor;
-       data->auth_info = nfss->auth_info;
        data->acregmin = nfss->acregmin / HZ;
        data->acregmax = nfss->acregmax / HZ;
        data->acdirmin = nfss->acdirmin / HZ;
@@ -2526,7 +2524,7 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
                          struct nfs_mount_info *mount_info)
 {
        /* clone any lsm security options from the parent to the new sb */
-       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
+       if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
                return -ESTALE;
        return security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
 }
index 05c9e02..2d56200 100644 (file)
@@ -45,7 +45,7 @@ error:
 
 static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct page *page;
        void *err;
 
index de54129..fa538b2 100644 (file)
@@ -143,7 +143,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                nfs_free_dname(data);
                ret = nfs_copy_dname(alias, data);
                spin_lock(&alias->d_lock);
-               if (ret == 0 && alias->d_inode != NULL &&
+               if (ret == 0 && d_really_is_positive(alias) &&
                    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
                        devname_garbage = alias->d_fsdata;
                        alias->d_fsdata = data;
@@ -190,7 +190,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
        parent = dget_parent(dentry);
        if (parent == NULL)
                goto out_free;
-       dir = parent->d_inode;
+       dir = d_inode(parent);
        /* Non-exclusive lock protects against concurrent lookup() calls */
        spin_lock(&dir->i_lock);
        if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -210,21 +210,21 @@ out_free:
 
 void nfs_wait_on_sillyrename(struct dentry *dentry)
 {
-       struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
 
        wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
 }
 
 void nfs_block_sillyrename(struct dentry *dentry)
 {
-       struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
 
        wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
 }
 
 void nfs_unblock_sillyrename(struct dentry *dentry)
 {
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
        struct nfs_inode *nfsi = NFS_I(dir);
        struct nfs_unlinkdata *data;
 
@@ -367,8 +367,8 @@ static void nfs_async_rename_release(void *calldata)
        struct nfs_renamedata   *data = calldata;
        struct super_block *sb = data->old_dir->i_sb;
 
-       if (data->old_dentry->d_inode)
-               nfs_mark_for_revalidate(data->old_dentry->d_inode);
+       if (d_really_is_positive(data->old_dentry))
+               nfs_mark_for_revalidate(d_inode(data->old_dentry));
 
        dput(data->old_dentry);
        dput(data->new_dentry);
@@ -529,10 +529,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
        if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
                goto out;
 
-       fileid = NFS_FILEID(dentry->d_inode);
+       fileid = NFS_FILEID(d_inode(dentry));
 
        /* Return delegation in anticipation of the rename */
-       NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
+       NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry));
 
        sdentry = NULL;
        do {
@@ -554,7 +554,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
                 */
                if (IS_ERR(sdentry))
                        goto out;
-       } while (sdentry->d_inode != NULL); /* need negative lookup */
+       } while (d_inode(sdentry) != NULL); /* need negative lookup */
 
        /* queue unlink first. Can't do this from rpc_release as it
         * has to allocate memory
index 7599310..d12a4be 100644 (file)
@@ -580,7 +580,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
        int ret;
 
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-       nfs_inc_stats(inode, NFSIOS_WRITEPAGES);
+       nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
        nfs_pageio_cond_complete(pgio, page_file_index(page));
        ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
@@ -702,7 +702,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
  */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page *head;
 
@@ -861,7 +861,7 @@ static void
 nfs_clear_request_commit(struct nfs_page *req)
 {
        if (test_bit(PG_CLEAN, &req->wb_flags)) {
-               struct inode *inode = req->wb_context->dentry->d_inode;
+               struct inode *inode = d_inode(req->wb_context->dentry);
                struct nfs_commit_info cinfo;
 
                nfs_init_cinfo_from_inode(&cinfo, inode);
@@ -1591,7 +1591,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
                     struct nfs_commit_info *cinfo)
 {
        struct nfs_page *first = nfs_list_entry(head->next);
-       struct inode *inode = first->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(first->wb_context->dentry);
 
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
@@ -1690,7 +1690,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
 
                dprintk("NFS:       commit (%s/%llu %d@%lld)",
                        req->wb_context->dentry->d_sb->s_id,
-                       (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                       (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
                        req->wb_bytes,
                        (long long)req_offset(req));
                if (status < 0) {
@@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode);
  */
 int nfs_wb_all(struct inode *inode)
 {
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = LONG_MAX,
-               .range_start = 0,
-               .range_end = LLONG_MAX,
-       };
        int ret;
 
        trace_nfs_writeback_inode_enter(inode);
 
-       ret = sync_inode(inode, &wbc);
+       ret = filemap_write_and_wait(inode->i_mapping);
+       if (!ret) {
+               ret = nfs_commit_inode(inode, FLUSH_SYNC);
+               if (!ret)
+                       pnfs_sync_inode(inode, true);
+       }
 
        trace_nfs_writeback_inode_exit(inode, ret);
        return ret;
index fc2d108..a0b77fc 100644 (file)
@@ -108,7 +108,7 @@ config NFSD_V4_SECURITY_LABEL
 
 config NFSD_FAULT_INJECTION
        bool "NFS server manual fault injection"
-       depends on NFSD_V4 && DEBUG_KERNEL
+       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
        help
          This option enables support for manually injecting faults
          into the NFS server.  This is intended to be used for
index c3e3b6e..f79521a 100644 (file)
@@ -599,7 +599,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
                                goto out4;
                }
 
-               err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
+               err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags,
                                   exp.ex_uuid);
                if (err)
                        goto out4;
@@ -691,8 +691,7 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b)
        struct svc_export *orig = container_of(a, struct svc_export, h);
        struct svc_export *new = container_of(b, struct svc_export, h);
        return orig->ex_client == new->ex_client &&
-               orig->ex_path.dentry == new->ex_path.dentry &&
-               orig->ex_path.mnt == new->ex_path.mnt;
+               path_equal(&orig->ex_path, &new->ex_path);
 }
 
 static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
@@ -891,7 +890,7 @@ exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
                printk("nfsd: exp_rootfh path not found %s", name);
                return err;
        }
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
 
        dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
                 name, path.dentry, clp->name,
@@ -1159,6 +1158,7 @@ static struct flags {
        { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
        { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
        { NFSEXP_V4ROOT, {"v4root", ""}},
+       { NFSEXP_PNFS, {"pnfs", ""}},
        { 0, {"", ""}}
 };
 
index ac54ea6..d54701f 100644 (file)
@@ -42,7 +42,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
        if (nfserr)
                RETURN_STATUS(nfserr);
 
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
 
        if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
                RETURN_STATUS(nfserr_inval);
@@ -103,7 +103,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
        if (nfserr)
                goto out;
 
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
        if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
                error = -EOPNOTSUPP;
                goto out_errno;
@@ -266,9 +266,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
         * nfsd_dispatch actually ensures the following cannot happen.
         * However, it seems fragile to depend on that.
         */
-       if (dentry == NULL || dentry->d_inode == NULL)
+       if (dentry == NULL || d_really_is_negative(dentry))
                return 0;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
        *p++ = htonl(resp->mask);
index 34cbbab..882b1a1 100644 (file)
@@ -39,7 +39,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
        if (nfserr)
                RETURN_STATUS(nfserr);
 
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
 
        if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
                RETURN_STATUS(nfserr_inval);
@@ -94,7 +94,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
        if (nfserr)
                goto out;
 
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
        if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
                error = -EOPNOTSUPP;
                goto out_errno;
@@ -174,8 +174,8 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
        struct dentry *dentry = resp->fh.fh_dentry;
 
        p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
-       if (resp->status == 0 && dentry && dentry->d_inode) {
-               struct inode *inode = dentry->d_inode;
+       if (resp->status == 0 && dentry && d_really_is_positive(dentry)) {
+               struct inode *inode = d_inode(dentry);
                struct kvec *head = rqstp->rq_res.head;
                unsigned int base;
                int n;
index 12f2aab..7b755b7 100644 (file)
@@ -166,7 +166,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
                                  rqstp->rq_vec, argp->vlen,
                                  &resp->count);
        if (nfserr == 0) {
-               struct inode    *inode = resp->fh.fh_dentry->d_inode;
+               struct inode    *inode = d_inode(resp->fh.fh_dentry);
 
                resp->eof = (argp->offset + resp->count) >= inode->i_size;
        }
@@ -551,7 +551,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
         * different read/write sizes for file systems known to have
         * problems with large blocks */
        if (nfserr == 0) {
-               struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+               struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb;
 
                /* Note that we don't care for remote fs's here */
                if (sb->s_magic == MSDOS_SUPER_MAGIC) {
@@ -587,7 +587,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
        nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
 
        if (nfserr == 0) {
-               struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+               struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb;
 
                /* Note that we don't care for remote fs's here */
                switch (sb->s_magic) {
index 39c5eb3..e4b2b43 100644 (file)
@@ -146,7 +146,7 @@ static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
        default:
        case FSIDSOURCE_DEV:
                p = xdr_encode_hyper(p, (u64)huge_encode_dev
-                                    (fhp->fh_dentry->d_inode->i_sb->s_dev));
+                                    (d_inode(fhp->fh_dentry)->i_sb->s_dev));
                break;
        case FSIDSOURCE_FSID:
                p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
@@ -203,14 +203,14 @@ static __be32 *
 encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
        struct dentry *dentry = fhp->fh_dentry;
-       if (dentry && dentry->d_inode) {
+       if (dentry && d_really_is_positive(dentry)) {
                __be32 err;
                struct kstat stat;
 
                err = fh_getattr(fhp, &stat);
                if (!err) {
                        *p++ = xdr_one;         /* attributes follow */
-                       lease_get_mtime(dentry->d_inode, &stat.mtime);
+                       lease_get_mtime(d_inode(dentry), &stat.mtime);
                        return encode_fattr3(rqstp, p, fhp, &stat);
                }
        }
@@ -233,7 +233,7 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
        struct dentry   *dentry = fhp->fh_dentry;
 
-       if (dentry && dentry->d_inode && fhp->fh_post_saved) {
+       if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) {
                if (fhp->fh_pre_saved) {
                        *p++ = xdr_one;
                        p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
@@ -260,11 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
                printk("nfsd: inode locked twice during operation.\n");
 
        err = fh_getattr(fhp, &fhp->fh_post_attr);
-       fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
+       fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
        if (err) {
                fhp->fh_post_saved = 0;
                /* Grab the ctime anyway - set_change_info might use it */
-               fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+               fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
        } else
                fhp->fh_post_saved = 1;
 }
@@ -628,7 +628,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
                                        struct nfsd3_attrstat *resp)
 {
        if (resp->status == 0) {
-               lease_get_mtime(resp->fh.fh_dentry->d_inode,
+               lease_get_mtime(d_inode(resp->fh.fh_dentry),
                                &resp->stat.mtime);
                p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
        }
@@ -828,7 +828,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
                return rv;
        if (d_mountpoint(dchild))
                goto out;
-       if (!dchild->d_inode)
+       if (d_really_is_negative(dchild))
                goto out;
        rv = fh_compose(fhp, exp, dchild, &cd->fh);
 out:
index 59fd766..67242bf 100644 (file)
@@ -139,7 +139,7 @@ int
 nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
                struct nfs4_acl **acl)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error = 0;
        struct posix_acl *pacl = NULL, *dpacl = NULL;
        unsigned int flags = 0;
@@ -499,43 +499,13 @@ static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_s
        state->mask.allow |= astate->allow;
 }
 
-/*
- * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
- * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
- * to traditional read/write/execute permissions.
- *
- * It's problematic to reject acls that use certain mode bits, because it
- * places the burden on users to learn the rules about which bits one
- * particular server sets, without giving the user a lot of help--we return an
- * error that could mean any number of different things.  To make matters
- * worse, the problematic bits might be introduced by some application that's
- * automatically mapping from some other acl model.
- *
- * So wherever possible we accept anything, possibly erring on the side of
- * denying more permissions than necessary.
- *
- * However we do reject *explicit* DENY's of a few bits representing
- * permissions we could never deny:
- */
-
-static inline int check_deny(u32 mask, int isowner)
-{
-       if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
-               return -EINVAL;
-       if (!isowner)
-               return 0;
-       if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
-               return -EINVAL;
-       return 0;
-}
-
 static struct posix_acl *
 posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 {
        struct posix_acl_entry *pace;
        struct posix_acl *pacl;
        int nace;
-       int i, error = 0;
+       int i;
 
        /*
         * ACLs with no ACEs are treated differently in the inheritable
@@ -560,17 +530,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 
        pace = pacl->a_entries;
        pace->e_tag = ACL_USER_OBJ;
-       error = check_deny(state->owner.deny, 1);
-       if (error)
-               goto out_err;
        low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
 
        for (i=0; i < state->users->n; i++) {
                pace++;
                pace->e_tag = ACL_USER;
-               error = check_deny(state->users->aces[i].perms.deny, 0);
-               if (error)
-                       goto out_err;
                low_mode_from_nfs4(state->users->aces[i].perms.allow,
                                        &pace->e_perm, flags);
                pace->e_uid = state->users->aces[i].uid;
@@ -579,18 +543,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 
        pace++;
        pace->e_tag = ACL_GROUP_OBJ;
-       error = check_deny(state->group.deny, 0);
-       if (error)
-               goto out_err;
        low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
        add_to_mask(state, &state->group);
 
        for (i=0; i < state->groups->n; i++) {
                pace++;
                pace->e_tag = ACL_GROUP;
-               error = check_deny(state->groups->aces[i].perms.deny, 0);
-               if (error)
-                       goto out_err;
                low_mode_from_nfs4(state->groups->aces[i].perms.allow,
                                        &pace->e_perm, flags);
                pace->e_gid = state->groups->aces[i].gid;
@@ -605,15 +563,9 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
 
        pace++;
        pace->e_tag = ACL_OTHER;
-       error = check_deny(state->other.deny, 0);
-       if (error)
-               goto out_err;
        low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
 
        return pacl;
-out_err:
-       posix_acl_release(pacl);
-       return ERR_PTR(error);
 }
 
 static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
@@ -828,7 +780,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
                return error;
 
        dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
                return nfserr_attrnotsupp;
index 92b9d97..864e200 100644 (file)
@@ -52,7 +52,7 @@
 static inline void
 nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
 {
-       struct inode *inode = resfh->fh_dentry->d_inode;
+       struct inode *inode = d_inode(resfh->fh_dentry);
        int status;
 
        mutex_lock(&inode->i_mutex);
@@ -110,7 +110,7 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
         * in current environment or not.
         */
        if (bmval[0] & FATTR4_WORD0_ACL) {
-               if (!IS_POSIXACL(dentry->d_inode))
+               if (!IS_POSIXACL(d_inode(dentry)))
                        return nfserr_attrnotsupp;
        }
 
@@ -209,7 +209,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 
 static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
 {
-       umode_t mode = fh->fh_dentry->d_inode->i_mode;
+       umode_t mode = d_inode(fh->fh_dentry)->i_mode;
 
        if (S_ISREG(mode))
                return nfs_ok;
@@ -470,7 +470,7 @@ out:
                fh_put(resfh);
                kfree(resfh);
        }
-       nfsd4_cleanup_open_state(cstate, open, status);
+       nfsd4_cleanup_open_state(cstate, open);
        nfsd4_bump_seqid(cstate, status);
        return status;
 }
@@ -881,7 +881,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                    &exp, &dentry);
        if (err)
                return err;
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                exp_put(exp);
                err = nfserr_noent;
        } else
@@ -1030,6 +1030,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                return status;
        }
+       if (!file)
+               return nfserr_bad_stateid;
 
        status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
                                     fallocate->falloc_offset,
@@ -1069,6 +1071,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                return status;
        }
+       if (!file)
+               return nfserr_bad_stateid;
 
        switch (seek->seek_whence) {
        case NFS4_CONTENT_DATA:
@@ -1308,7 +1312,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
        if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls))
                goto out_put_stid;
 
-       nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
+       nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
                                     current_fh, lgp);
        if (nfserr)
                goto out_put_stid;
@@ -1342,7 +1346,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
        ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
        if (!ops)
                goto out;
-       inode = current_fh->fh_dentry->d_inode;
+       inode = d_inode(current_fh->fh_dentry);
 
        nfserr = nfserr_inval;
        if (new_size <= seg->offset) {
@@ -1815,7 +1819,7 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
                bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
        }
        if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
-               ret += NFSD4_MAX_SEC_LABEL_LEN + 12;
+               ret += NFS4_MAXLABELLEN + 12;
                bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
        }
        /*
@@ -2282,13 +2286,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
                .op_func = (nfsd4op_func)nfsd4_allocate,
                .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
                .op_name = "OP_ALLOCATE",
-               .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_DEALLOCATE] = {
                .op_func = (nfsd4op_func)nfsd4_deallocate,
                .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
                .op_name = "OP_DEALLOCATE",
-               .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
        },
        [OP_SEEK] = {
                .op_func = (nfsd4op_func)nfsd4_seek,
index 1c307f0..d88ea7b 100644 (file)
@@ -192,14 +192,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
        dir = nn->rec_file->f_path.dentry;
        /* lock the parent */
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
 
        dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
                goto out_unlock;
        }
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                /*
                 * In the 4.1 case, where we're called from
                 * reclaim_complete(), records from the previous reboot
@@ -209,11 +209,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 * as well be forgiving and just succeed silently.
                 */
                goto out_put;
-       status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
+       status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
 out_put:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        if (status == 0) {
                if (nn->in_grace) {
                        crp = nfs4_client_to_reclaim(dname, nn);
@@ -285,7 +285,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
        }
 
        status = iterate_dir(nn->rec_file, &ctx.ctx);
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
        while (!list_empty(&ctx.names)) {
                struct name_list *entry;
                entry = list_entry(ctx.names.next, struct name_list, list);
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                list_del(&entry->list);
                kfree(entry);
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        nfs4_reset_creds(original_cred);
        return status;
 }
@@ -316,20 +316,20 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
 
        dir = nn->rec_file->f_path.dentry;
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_one_len(name, dir, namlen);
        if (IS_ERR(dentry)) {
                status = PTR_ERR(dentry);
                goto out_unlock;
        }
        status = -ENOENT;
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                goto out;
-       status = vfs_rmdir(dir->d_inode, dentry);
+       status = vfs_rmdir(d_inode(dir), dentry);
 out:
        dput(dentry);
 out_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
        return status;
 }
 
@@ -385,7 +385,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
        if (nfs4_has_reclaimed_state(child->d_name.name, nn))
                return 0;
 
-       status = vfs_rmdir(parent->d_inode, child);
+       status = vfs_rmdir(d_inode(parent), child);
        if (status)
                printk("failed to remove client recovery directory %pd\n",
                                child);
index 326a545..38f2d7a 100644 (file)
@@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
        return sid->sequence % SESSION_HASH_SIZE;
 }
 
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 static inline void
 dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
 {
@@ -4049,7 +4049,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                status = nfserr_bad_stateid;
                if (nfsd4_is_deleg_cur(open))
                        goto out;
-               status = nfserr_jukebox;
        }
 
        /*
@@ -4118,7 +4117,7 @@ out:
 }
 
 void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
-                             struct nfsd4_open *open, __be32 status)
+                             struct nfsd4_open *open)
 {
        if (open->op_openowner) {
                struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
@@ -4473,7 +4472,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
        struct nfs4_ol_stateid *stp = NULL;
        struct nfs4_delegation *dp = NULL;
        struct svc_fh *current_fh = &cstate->current_fh;
-       struct inode *ino = current_fh->fh_dentry->d_inode;
+       struct inode *ino = d_inode(current_fh->fh_dentry);
        struct nfsd_net *nn = net_generic(net, nfsd_net_id);
        struct file *file = NULL;
        __be32 status;
@@ -5171,7 +5170,7 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
        struct nfs4_file *fi = ost->st_stid.sc_file;
        struct nfs4_openowner *oo = openowner(ost->st_stateowner);
        struct nfs4_client *cl = oo->oo_owner.so_client;
-       struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
+       struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
        struct nfs4_lockowner *lo;
        unsigned int strhashval;
 
index 5fb7e78..158badf 100644 (file)
@@ -424,7 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                len += 4;
                dummy32 = be32_to_cpup(p++);
                READ_BUF(dummy32);
-               if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)
+               if (dummy32 > NFS4_MAXLABELLEN)
                        return nfserr_badlabel;
                len += (XDR_QUADLEN(dummy32) << 2);
                READMEM(buf, dummy32);
@@ -2020,7 +2020,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
         * dentries/path components in an array.
         */
        for (;;) {
-               if (cur.dentry == root->dentry && cur.mnt == root->mnt)
+               if (path_equal(&cur, root))
                        break;
                if (cur.dentry == cur.mnt->mnt_root) {
                        if (follow_up(&cur))
@@ -2292,7 +2292,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
        if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) ||
                        bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
-               err = security_inode_getsecctx(dentry->d_inode,
+               err = security_inode_getsecctx(d_inode(dentry),
                                                &context, &contextlen);
                contextsupport = (err == 0);
                if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
@@ -2384,7 +2384,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                p = xdr_reserve_space(xdr, 8);
                if (!p)
                        goto out_resource;
-               p = encode_change(p, &stat, dentry->d_inode);
+               p = encode_change(p, &stat, d_inode(dentry));
        }
        if (bmval0 & FATTR4_WORD0_SIZE) {
                p = xdr_reserve_space(xdr, 8);
@@ -2807,7 +2807,7 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
        dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
        if (IS_ERR(dentry))
                return nfserrno(PTR_ERR(dentry));
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                /*
                 * nfsd_buffered_readdir drops the i_mutex between
                 * readdir and calling this callback, leaving a window
@@ -3324,7 +3324,7 @@ static __be32 nfsd4_encode_splice_read(
        }
 
        eof = (read->rd_offset + maxcount >=
-              read->rd_fhp->fh_dentry->d_inode->i_size);
+              d_inode(read->rd_fhp->fh_dentry)->i_size);
 
        *(p++) = htonl(eof);
        *(p++) = htonl(maxcount);
@@ -3401,7 +3401,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
        xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
 
        eof = (read->rd_offset + maxcount >=
-              read->rd_fhp->fh_dentry->d_inode->i_size);
+              d_inode(read->rd_fhp->fh_dentry)->i_size);
 
        tmp = htonl(eof);
        write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
@@ -3422,6 +3422,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
        unsigned long maxcount;
        struct xdr_stream *xdr = &resp->xdr;
        struct file *file = read->rd_filp;
+       struct svc_fh *fhp = read->rd_fhp;
        int starting_len = xdr->buf->len;
        struct raparms *ra;
        __be32 *p;
@@ -3445,12 +3446,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
        maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
        maxcount = min_t(unsigned long, maxcount, read->rd_length);
 
-       if (!read->rd_filp) {
+       if (read->rd_filp)
+               err = nfsd_permission(resp->rqstp, fhp->fh_export,
+                               fhp->fh_dentry,
+                               NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
+       else
                err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
                                                &file, &ra);
-               if (err)
-                       goto err_truncate;
-       }
+       if (err)
+               goto err_truncate;
 
        if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
                err = nfsd4_encode_splice_read(resp, read, file, maxcount);
index aa47d75..9690cb4 100644 (file)
@@ -1250,15 +1250,15 @@ static int __init init_nfsd(void)
        int retval;
        printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
 
-       retval = register_cld_notifier();
-       if (retval)
-               return retval;
        retval = register_pernet_subsys(&nfsd_net_ops);
        if (retval < 0)
-               goto out_unregister_notifier;
-       retval = nfsd4_init_slabs();
+               return retval;
+       retval = register_cld_notifier();
        if (retval)
                goto out_unregister_pernet;
+       retval = nfsd4_init_slabs();
+       if (retval)
+               goto out_unregister_notifier;
        retval = nfsd4_init_pnfs();
        if (retval)
                goto out_free_slabs;
@@ -1290,10 +1290,10 @@ out_exit_pnfs:
        nfsd4_exit_pnfs();
 out_free_slabs:
        nfsd4_free_slabs();
-out_unregister_pernet:
-       unregister_pernet_subsys(&nfsd_net_ops);
 out_unregister_notifier:
        unregister_cld_notifier();
+out_unregister_pernet:
+       unregister_pernet_subsys(&nfsd_net_ops);
        return retval;
 }
 
@@ -1308,8 +1308,8 @@ static void __exit exit_nfsd(void)
        nfsd4_exit_pnfs();
        nfsd_fault_inject_cleanup();
        unregister_filesystem(&nfsd_fs_type);
-       unregister_pernet_subsys(&nfsd_net_ops);
        unregister_cld_notifier();
+       unregister_pernet_subsys(&nfsd_net_ops);
 }
 
 MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
index 565c4da..cf98052 100644 (file)
@@ -24,7 +24,7 @@
 #include "export.h"
 
 #undef ifdebug
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 # define ifdebug(flag)         if (nfsd_debug & NFSDDBG_##flag)
 #else
 # define ifdebug(flag)         if (0)
index e9fa966..350041a 100644 (file)
@@ -38,7 +38,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
                /* make sure parents give x permission to user */
                int err;
                parent = dget_parent(tdentry);
-               err = inode_permission(parent->d_inode, MAY_EXEC);
+               err = inode_permission(d_inode(parent), MAY_EXEC);
                if (err < 0) {
                        dput(parent);
                        break;
@@ -340,7 +340,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
        if (error)
                goto out;
 
-       error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+       error = nfsd_mode_check(rqstp, d_inode(dentry)->i_mode, type);
        if (error)
                goto out;
 
@@ -412,8 +412,8 @@ static inline void _fh_update_old(struct dentry *dentry,
                                  struct svc_export *exp,
                                  struct knfsd_fh *fh)
 {
-       fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino);
-       fh->ofh_generation = dentry->d_inode->i_generation;
+       fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
+       fh->ofh_generation = d_inode(dentry)->i_generation;
        if (d_is_dir(dentry) ||
            (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
                fh->ofh_dirino = 0;
@@ -426,7 +426,7 @@ static bool is_root_export(struct svc_export *exp)
 
 static struct super_block *exp_sb(struct svc_export *exp)
 {
-       return exp->ex_path.dentry->d_inode->i_sb;
+       return d_inode(exp->ex_path.dentry)->i_sb;
 }
 
 static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
@@ -520,12 +520,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
         *
         */
 
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        dev_t ex_dev = exp_sb(exp)->s_dev;
 
        dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
                MAJOR(ex_dev), MINOR(ex_dev),
-               (long) exp->ex_path.dentry->d_inode->i_ino,
+               (long) d_inode(exp->ex_path.dentry)->i_ino,
                dentry,
                (inode ? inode->i_ino : 0));
 
@@ -558,7 +558,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
                fhp->fh_handle.ofh_dev =  old_encode_dev(ex_dev);
                fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
                fhp->fh_handle.ofh_xino =
-                       ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino);
+                       ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
                fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
                if (inode)
                        _fh_update_old(dentry, exp, &fhp->fh_handle);
@@ -570,7 +570,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
                mk_fsid(fhp->fh_handle.fh_fsid_type,
                        fhp->fh_handle.fh_fsid,
                        ex_dev,
-                       exp->ex_path.dentry->d_inode->i_ino,
+                       d_inode(exp->ex_path.dentry)->i_ino,
                        exp->ex_fsid, exp->ex_uuid);
 
                if (inode)
@@ -597,7 +597,7 @@ fh_update(struct svc_fh *fhp)
                goto out_bad;
 
        dentry = fhp->fh_dentry;
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                goto out_negative;
        if (fhp->fh_handle.fh_version != 1) {
                _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
index f229204..1e90dad 100644 (file)
@@ -225,7 +225,7 @@ fill_pre_wcc(struct svc_fh *fhp)
 {
        struct inode    *inode;
 
-       inode = fhp->fh_dentry->d_inode;
+       inode = d_inode(fhp->fh_dentry);
        if (!fhp->fh_pre_saved) {
                fhp->fh_pre_mtime = inode->i_mtime;
                fhp->fh_pre_ctime = inode->i_ctime;
@@ -264,7 +264,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
                return;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        mutex_lock_nested(&inode->i_mutex, subclass);
        fill_pre_wcc(fhp);
        fhp->fh_locked = 1;
@@ -284,7 +284,7 @@ fh_unlock(struct svc_fh *fhp)
 {
        if (fhp->fh_locked) {
                fill_post_wcc(fhp);
-               mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
                fhp->fh_locked = 0;
        }
 }
index b868073..aecbcd3 100644 (file)
@@ -223,7 +223,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
        }
        fh_init(newfhp, NFS_FHSIZE);
        nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
-       if (!nfserr && !dchild->d_inode)
+       if (!nfserr && d_really_is_negative(dchild))
                nfserr = nfserr_noent;
        dput(dchild);
        if (nfserr) {
@@ -241,7 +241,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
                }
        }
 
-       inode = newfhp->fh_dentry->d_inode;
+       inode = d_inode(newfhp->fh_dentry);
 
        /* Unfudge the mode bits */
        if (attr->ia_valid & ATTR_MODE) {
index 412d706..79d964a 100644 (file)
@@ -187,7 +187,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
        *p++ = htonl((u32) stat->ino);
        *p++ = htonl((u32) stat->atime.tv_sec);
        *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
-       lease_get_mtime(dentry->d_inode, &time); 
+       lease_get_mtime(d_inode(dentry), &time); 
        *p++ = htonl((u32) time.tv_sec);
        *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
        *p++ = htonl((u32) stat->ctime.tv_sec);
index 3685265..84d770b 100644 (file)
@@ -174,7 +174,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
                return 1;
        if (!(exp->ex_flags & NFSEXP_V4ROOT))
                return 0;
-       return dentry->d_inode != NULL;
+       return d_inode(dentry) != NULL;
 }
 
 __be32
@@ -270,7 +270,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
         * dentry may be negative, it may need to be updated.
         */
        err = fh_compose(resfh, exp, dentry, fhp);
-       if (!err && !dentry->d_inode)
+       if (!err && d_really_is_negative(dentry))
                err = nfserr_noent;
 out:
        dput(dentry);
@@ -284,7 +284,7 @@ out:
 static int
 commit_metadata(struct svc_fh *fhp)
 {
-       struct inode *inode = fhp->fh_dentry->d_inode;
+       struct inode *inode = d_inode(fhp->fh_dentry);
        const struct export_operations *export_ops = inode->i_sb->s_export_op;
 
        if (!EX_ISSYNC(fhp->fh_export))
@@ -364,7 +364,7 @@ static __be32
 nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
                struct iattr *iap)
 {
-       struct inode *inode = fhp->fh_dentry->d_inode;
+       struct inode *inode = d_inode(fhp->fh_dentry);
        int host_err;
 
        if (iap->ia_size < inode->i_size) {
@@ -426,7 +426,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        }
 
        dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        /* Ignore any mode updates on symlinks */
        if (S_ISLNK(inode->i_mode))
@@ -495,7 +495,7 @@ out:
  */
 int nfsd4_is_junction(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (inode == NULL)
                return 0;
@@ -521,9 +521,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        dentry = fhp->fh_dentry;
 
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
        host_error = security_inode_setsecctx(dentry, label->data, label->len);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
        return nfserrno(host_error);
 }
 #else
@@ -706,7 +706,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 
        path.mnt = fhp->fh_export->ex_path.mnt;
        path.dentry = fhp->fh_dentry;
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
 
        /* Disallow write access to files with the append-only bit set
         * or any access when mandatory locking enabled
@@ -1211,7 +1211,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
 
        dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
 
        err = nfserr_notdir;
        if (!dirp->i_op->lookup)
@@ -1250,7 +1250,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         * Make sure the child dentry is still negative ...
         */
        err = nfserr_exist;
-       if (dchild->d_inode) {
+       if (d_really_is_positive(dchild)) {
                dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
                        dentry, dchild);
                goto out; 
@@ -1353,7 +1353,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out;
 
        dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
 
        /* Get all the sanity checks out of the way before
         * we lock the parent. */
@@ -1376,7 +1376,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out_nfserr;
 
        /* If file doesn't exist, check for permissions to create one */
-       if (!dchild->d_inode) {
+       if (d_really_is_negative(dchild)) {
                err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
                if (err)
                        goto out;
@@ -1397,7 +1397,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                v_atime = verifier[1]&0x7fffffff;
        }
        
-       if (dchild->d_inode) {
+       if (d_really_is_positive(dchild)) {
                err = 0;
 
                switch (createmode) {
@@ -1420,17 +1420,17 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                        }
                        break;
                case NFS3_CREATE_EXCLUSIVE:
-                       if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
-                           && dchild->d_inode->i_atime.tv_sec == v_atime
-                           && dchild->d_inode->i_size  == 0 ) {
+                       if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+                           && d_inode(dchild)->i_atime.tv_sec == v_atime
+                           && d_inode(dchild)->i_size  == 0 ) {
                                if (created)
                                        *created = 1;
                                break;
                        }
                case NFS4_CREATE_EXCLUSIVE4_1:
-                       if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
-                           && dchild->d_inode->i_atime.tv_sec == v_atime
-                           && dchild->d_inode->i_size  == 0 ) {
+                       if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+                           && d_inode(dchild)->i_atime.tv_sec == v_atime
+                           && d_inode(dchild)->i_size  == 0 ) {
                                if (created)
                                        *created = 1;
                                goto set_attr;
@@ -1513,7 +1513,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 
        path.mnt = fhp->fh_export->ex_path.mnt;
        path.dentry = fhp->fh_dentry;
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
 
        err = nfserr_inval;
        if (!inode->i_op->readlink)
@@ -1576,7 +1576,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
 
-       host_err = vfs_symlink(dentry->d_inode, dnew, path);
+       host_err = vfs_symlink(d_inode(dentry), dnew, path);
        err = nfserrno(host_err);
        if (!err)
                err = nfserrno(commit_metadata(fhp));
@@ -1632,7 +1632,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 
        fh_lock_nested(ffhp, I_MUTEX_PARENT);
        ddir = ffhp->fh_dentry;
-       dirp = ddir->d_inode;
+       dirp = d_inode(ddir);
 
        dnew = lookup_one_len(name, ddir, len);
        host_err = PTR_ERR(dnew);
@@ -1642,7 +1642,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        dold = tfhp->fh_dentry;
 
        err = nfserr_noent;
-       if (!dold->d_inode)
+       if (d_really_is_negative(dold))
                goto out_dput;
        host_err = vfs_link(dold, dirp, dnew, NULL);
        if (!host_err) {
@@ -1689,10 +1689,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                goto out;
 
        fdentry = ffhp->fh_dentry;
-       fdir = fdentry->d_inode;
+       fdir = d_inode(fdentry);
 
        tdentry = tfhp->fh_dentry;
-       tdir = tdentry->d_inode;
+       tdir = d_inode(tdentry);
 
        err = nfserr_perm;
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
@@ -1717,7 +1717,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                goto out_nfserr;
 
        host_err = -ENOENT;
-       if (!odentry->d_inode)
+       if (d_really_is_negative(odentry))
                goto out_dput_old;
        host_err = -EINVAL;
        if (odentry == trap)
@@ -1790,21 +1790,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 
        fh_lock_nested(fhp, I_MUTEX_PARENT);
        dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
 
        rdentry = lookup_one_len(fname, dentry, flen);
        host_err = PTR_ERR(rdentry);
        if (IS_ERR(rdentry))
                goto out_nfserr;
 
-       if (!rdentry->d_inode) {
+       if (d_really_is_negative(rdentry)) {
                dput(rdentry);
                err = nfserr_noent;
                goto out;
        }
 
        if (!type)
-               type = rdentry->d_inode->i_mode & S_IFMT;
+               type = d_inode(rdentry)->i_mode & S_IFMT;
 
        if (type != S_IFDIR)
                host_err = vfs_unlink(dirp, rdentry, NULL);
@@ -2015,7 +2015,7 @@ __be32
 nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                                        struct dentry *dentry, int acc)
 {
-       struct inode    *inode = dentry->d_inode;
+       struct inode    *inode = d_inode(dentry);
        int             err;
 
        if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
index 0bda93e..f982ae8 100644 (file)
@@ -40,7 +40,6 @@
 #include "state.h"
 #include "nfsd.h"
 
-#define NFSD4_MAX_SEC_LABEL_LEN        2048
 #define NFSD4_MAX_TAGLEN       128
 #define XDR_LEN(n)                     (((n) + 3) & ~3)
 
@@ -632,7 +631,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 {
        BUG_ON(!fhp->fh_pre_saved);
        cinfo->atomic = fhp->fh_post_saved;
-       cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+       cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
 
        cinfo->before_change = fhp->fh_pre_change;
        cinfo->after_change = fhp->fh_post_change;
@@ -683,7 +682,7 @@ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
                struct svc_fh *current_fh, struct nfsd4_open *open);
 extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
 extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
-               struct nfsd4_open *open, __be32 status);
+               struct nfsd4_open *open);
 extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
 extern __be32 nfsd4_close(struct svc_rqst *rqstp,
index 197a63e..0ee0bed 100644 (file)
@@ -435,7 +435,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
  */
 int nilfs_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const unsigned char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        unsigned chunk_size = nilfs_chunk_size(dir);
index be936df..258d9fe 100644 (file)
@@ -835,7 +835,7 @@ void nilfs_evict_inode(struct inode *inode)
 int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
        struct nilfs_transaction_info ti;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        int err;
 
index 0f84b25..2218083 100644 (file)
@@ -192,7 +192,7 @@ out_fail:
 static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
                      struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct nilfs_transaction_info ti;
        int err;
 
@@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
        if (!de)
                goto out;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        err = -EIO;
        if (le64_to_cpu(de->inode) != inode->i_ino)
                goto out;
@@ -318,7 +318,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
 
        if (!err) {
                nilfs_mark_inode_dirty(dir);
-               nilfs_mark_inode_dirty(dentry->d_inode);
+               nilfs_mark_inode_dirty(d_inode(dentry));
                err = nilfs_transaction_commit(dir->i_sb);
        } else
                nilfs_transaction_abort(dir->i_sb);
@@ -328,7 +328,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
 
 static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct nilfs_transaction_info ti;
        int err;
 
@@ -358,8 +358,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
 static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir,  struct dentry *new_dentry)
 {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct page *dir_page = NULL;
        struct nilfs_dir_entry *dir_de = NULL;
        struct page *old_page;
@@ -453,13 +453,13 @@ static struct dentry *nilfs_get_parent(struct dentry *child)
        struct qstr dotdot = QSTR_INIT("..", 2);
        struct nilfs_root *root;
 
-       ino = nilfs_inode_by_name(child->d_inode, &dotdot);
+       ino = nilfs_inode_by_name(d_inode(child), &dotdot);
        if (!ino)
                return ERR_PTR(-ENOENT);
 
-       root = NILFS_I(child->d_inode)->i_root;
+       root = NILFS_I(d_inode(child))->i_root;
 
-       inode = nilfs_iget(child->d_inode->i_sb, root, ino);
+       inode = nilfs_iget(d_inode(child)->i_sb, root, ino);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
 
index c1725f2..f47585b 100644 (file)
@@ -610,7 +610,7 @@ static int nilfs_unfreeze(struct super_block *sb)
 static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
-       struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+       struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
        struct the_nilfs *nilfs = root->nilfs;
        u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
        unsigned long long blocks;
@@ -681,7 +681,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
 {
        struct super_block *sb = dentry->d_sb;
        struct the_nilfs *nilfs = sb->s_fs_info;
-       struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+       struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
 
        if (!nilfs_test_opt(nilfs, BARRIER))
                seq_puts(seq, ",nobarrier");
@@ -1190,7 +1190,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 
                sb->s_flags &= ~MS_RDONLY;
 
-               root = NILFS_I(sb->s_root->d_inode)->i_root;
+               root = NILFS_I(d_inode(sb->s_root))->i_root;
                err = nilfs_attach_log_writer(sb, root);
                if (err)
                        goto restore_opts;
index af1b24f..99521e7 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -13,7 +13,7 @@ static const struct file_operations ns_file_operations = {
 
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
 
        return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
@@ -22,7 +22,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 
 static void ns_prune_dentry(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        if (inode) {
                struct ns_common *ns = inode->i_private;
                atomic_long_set(&ns->stashed, 0);
index 1d0c21d..d284f07 100644 (file)
@@ -2889,7 +2889,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
  */
 int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *vi = dentry->d_inode;
+       struct inode *vi = d_inode(dentry);
        int err;
        unsigned int ia_valid = attr->ia_valid;
 
index b3973c2..0f35b80 100644 (file)
@@ -292,14 +292,14 @@ const struct inode_operations ntfs_dir_inode_ops = {
  * The code is based on the ext3 ->get_parent() implementation found in
  * fs/ext3/namei.c::ext3_get_parent().
  *
- * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
+ * Note: ntfs_get_parent() is called with @d_inode(child_dent)->i_mutex down.
  *
  * Return the dentry of the parent directory on success or the error code on
  * error (IS_ERR() is true).
  */
 static struct dentry *ntfs_get_parent(struct dentry *child_dent)
 {
-       struct inode *vi = child_dent->d_inode;
+       struct inode *vi = d_inode(child_dent);
        ntfs_inode *ni = NTFS_I(vi);
        MFT_RECORD *mrec;
        ntfs_attr_search_ctx *ctx;
index 4fda7a5..2903730 100644 (file)
@@ -42,8 +42,8 @@
 void ocfs2_dentry_attach_gen(struct dentry *dentry)
 {
        unsigned long gen =
-               OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
-       BUG_ON(dentry->d_inode);
+               OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
+       BUG_ON(d_inode(dentry));
        dentry->d_fsdata = (void *)gen;
 }
 
@@ -57,7 +57,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        osb = OCFS2_SB(dentry->d_sb);
 
        trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
@@ -71,7 +71,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
                unsigned long gen = (unsigned long) dentry->d_fsdata;
                unsigned long pgen;
                spin_lock(&dentry->d_lock);
-               pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
+               pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
                spin_unlock(&dentry->d_lock);
                trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
                                                       dentry->d_name.name,
@@ -146,7 +146,7 @@ static int ocfs2_match_dentry(struct dentry *dentry,
        if (skip_unhashed && d_unhashed(dentry))
                return 0;
 
-       parent = dentry->d_parent->d_inode;
+       parent = d_inode(dentry->d_parent);
        /* Negative parent dentry? */
        if (!parent)
                return 0;
@@ -243,7 +243,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
        if (!inode)
                return 0;
 
-       if (!dentry->d_inode && dentry->d_fsdata) {
+       if (d_really_is_negative(dentry) && dentry->d_fsdata) {
                /* Converting a negative dentry to positive
                   Clear dentry->d_fsdata */
                dentry->d_fsdata = dl = NULL;
@@ -446,7 +446,7 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
 {
        int ret;
        struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        /*
         * Move within the same directory, so the actual lock info won't
index f0344b7..3d8639f 100644 (file)
@@ -72,7 +72,7 @@ static inline int ocfs2_add_entry(handle_t *handle,
                                  struct buffer_head *parent_fe_bh,
                                  struct ocfs2_dir_lookup_result *lookup)
 {
-       return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
+       return __ocfs2_add_entry(handle, d_inode(dentry->d_parent),
                                 dentry->d_name.name, dentry->d_name.len,
                                 inode, blkno, parent_fe_bh, lookup);
 }
index 061ba6a..b5cf27d 100644 (file)
@@ -208,7 +208,7 @@ static int dlmfs_file_release(struct inode *inode,
 static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        attr->ia_valid &= ~ATTR_SIZE;
        error = inode_change_ok(inode, attr);
@@ -549,7 +549,7 @@ static int dlmfs_unlink(struct inode *dir,
                        struct dentry *dentry)
 {
        int status;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        mlog(0, "unlink inode %lu\n", inode->i_ino);
 
index 540dc4b..827fc98 100644 (file)
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
        int status;
        u64 blkno;
        struct dentry *parent;
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
 
        trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
                               (unsigned long long)OCFS2_I(dir)->ip_blkno);
index 913fc25..d8b670c 100644 (file)
@@ -1126,7 +1126,7 @@ out:
 int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int status = 0, size_change;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        struct ocfs2_super *osb = OCFS2_SB(sb);
        struct buffer_head *bh = NULL;
@@ -1275,8 +1275,8 @@ int ocfs2_getattr(struct vfsmount *mnt,
                  struct dentry *dentry,
                  struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
-       struct super_block *sb = dentry->d_inode->i_sb;
+       struct inode *inode = d_inode(dentry);
+       struct super_block *sb = d_inode(dentry)->i_sb;
        struct ocfs2_super *osb = sb->s_fs_info;
        int err;
 
@@ -2114,7 +2114,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
 {
        int ret = 0, meta_level = 0;
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        loff_t end;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        int full_coherency = !(osb->s_mount_opt &
index be71ca0..b254416 100644 (file)
@@ -1209,7 +1209,7 @@ int ocfs2_drop_inode(struct inode *inode)
  */
 int ocfs2_inode_revalidate(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int status = 0;
 
        trace_ocfs2_inode_revalidate(inode,
index 09f90cb..176fe6a 100644 (file)
@@ -689,8 +689,8 @@ static int ocfs2_link(struct dentry *old_dentry,
                      struct dentry *dentry)
 {
        handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
-       struct inode *old_dir = old_dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(old_dentry);
+       struct inode *old_dir = d_inode(old_dentry->d_parent);
        int err;
        struct buffer_head *fe_bh = NULL;
        struct buffer_head *old_dir_bh = NULL;
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir,
        int status;
        int child_locked = 0;
        bool is_unlinkable = false;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct inode *orphan_dir = NULL;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
        u64 blkno;
@@ -898,7 +898,7 @@ static int ocfs2_unlink(struct inode *dir,
 
        dquot_initialize(dir);
 
-       BUG_ON(dentry->d_parent->d_inode != dir);
+       BUG_ON(d_inode(dentry->d_parent) != dir);
 
        if (inode == osb->root_inode)
                return -EPERM;
@@ -1209,8 +1209,8 @@ static int ocfs2_rename(struct inode *old_dir,
 {
        int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0;
        int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0;
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct inode *orphan_dir = NULL;
        struct ocfs2_dinode *newfe = NULL;
        char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
@@ -1454,7 +1454,7 @@ static int ocfs2_rename(struct inode *old_dir,
                        should_add_orphan = true;
                }
        } else {
-               BUG_ON(new_dentry->d_parent->d_inode != new_dir);
+               BUG_ON(d_inode(new_dentry->d_parent) != new_dir);
 
                status = ocfs2_check_dir_for_entry(new_dir,
                                                   new_dentry->d_name.name,
index df3a500..d8c6af1 100644 (file)
@@ -4194,7 +4194,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
                           bool preserve)
 {
        int ret;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct buffer_head *new_bh = NULL;
 
        if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
@@ -4263,7 +4263,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                         struct dentry *new_dentry, bool preserve)
 {
        int error;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct buffer_head *old_bh = NULL;
        struct inode *new_orphan_inode = NULL;
        struct posix_acl *default_acl, *acl;
@@ -4357,7 +4357,7 @@ out:
 /* copied from may_create in VFS. */
 static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
 {
-       if (child->d_inode)
+       if (d_really_is_positive(child))
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
@@ -4375,7 +4375,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
 static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
                             struct dentry *new_dentry, bool preserve)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int error;
 
        if (!inode)
@@ -4463,7 +4463,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
        }
 
        error = ocfs2_vfs_reflink(old_path.dentry,
-                                 new_path.dentry->d_inode,
+                                 d_inode(new_path.dentry),
                                  new_dentry, preserve);
 out_dput:
        done_path_create(&new_path, new_dentry);
index 4ca7533..d03bfbf 100644 (file)
@@ -1020,7 +1020,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
        int ret = 0, i_ret = 0, b_ret = 0;
        struct buffer_head *di_bh = NULL;
        struct ocfs2_dinode *di = NULL;
-       struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
+       struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
 
        if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
                return -EOPNOTSUPP;
@@ -1028,7 +1028,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
        if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
                return ret;
 
-       ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
+       ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
        if (ret < 0) {
                mlog_errno(ret);
                return ret;
@@ -1037,7 +1037,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
        di = (struct ocfs2_dinode *)di_bh->b_data;
 
        down_read(&oi->ip_xattr_sem);
-       i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
+       i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
        if (i_ret < 0)
                b_ret = 0;
        else {
@@ -1045,13 +1045,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
                        buffer += i_ret;
                        size -= i_ret;
                }
-               b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
+               b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
                                               buffer, size);
                if (b_ret < 0)
                        i_ret = 0;
        }
        up_read(&oi->ip_xattr_sem);
-       ocfs2_inode_unlock(dentry->d_inode, 0);
+       ocfs2_inode_unlock(d_inode(dentry), 0);
 
        brelse(di_bh);
 
@@ -7257,7 +7257,7 @@ static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
                               name, buffer, size);
 }
 
@@ -7267,7 +7267,7 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
        if (strcmp(name, "") == 0)
                return -EINVAL;
 
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
                               name, value, size, flags);
 }
 
@@ -7347,7 +7347,7 @@ static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
 {
        if (strcmp(name, "") == 0)
                return -EINVAL;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
                               name, buffer, size);
 }
 
@@ -7357,7 +7357,7 @@ static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
        if (strcmp(name, "") == 0)
                return -EINVAL;
 
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
                               name, value, size, flags);
 }
 
@@ -7399,7 +7399,7 @@ static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
                return -EINVAL;
        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
                return -EOPNOTSUPP;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
                               buffer, size);
 }
 
@@ -7413,7 +7413,7 @@ static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
        if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
                return -EOPNOTSUPP;
 
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER,
                               name, value, size, flags);
 }
 
index 1b8e9e8..f833bf8 100644 (file)
@@ -110,7 +110,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
 
 static int omfs_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        struct omfs_inode *oi;
@@ -155,7 +155,7 @@ out:
 
 static int omfs_delete_entry(struct dentry *dentry)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct inode *dirty;
        const char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
@@ -237,7 +237,7 @@ static int omfs_dir_is_empty(struct inode *inode)
 
 static int omfs_remove(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int ret;
 
 
@@ -373,8 +373,8 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx,
 static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *new_inode = new_dentry->d_inode;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *new_inode = d_inode(new_dentry);
+       struct inode *old_inode = d_inode(old_dentry);
        int err;
 
        if (new_inode) {
index f993be7..d9e26cf 100644 (file)
@@ -346,7 +346,7 @@ const struct file_operations omfs_file_operations = {
 
 static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, attr);
index 6796f04..98e5a52 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EINVAL;
 
        /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+       if (mode & ~FALLOC_FL_SUPPORTED_MASK)
                return -EOPNOTSUPP;
 
        /* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
            (mode & ~FALLOC_FL_COLLAPSE_RANGE))
                return -EINVAL;
 
+       /* Insert range should only be used exclusively. */
+       if ((mode & FALLOC_FL_INSERT_RANGE) &&
+           (mode & ~FALLOC_FL_INSERT_RANGE))
+               return -EINVAL;
+
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
 
index 822da5b..8865f79 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -627,7 +627,7 @@ static struct vfsmount *pipe_mnt __read_mostly;
 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
-                               dentry->d_inode->i_ino);
+                               d_inode(dentry)->i_ino);
 }
 
 static const struct dentry_operations pipefs_dentry_operations = {
index 3a48bb7..84bb65b 100644 (file)
@@ -774,12 +774,12 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name,
        struct posix_acl *acl;
        int error;
 
-       if (!IS_POSIXACL(dentry->d_inode))
+       if (!IS_POSIXACL(d_backing_inode(dentry)))
                return -EOPNOTSUPP;
        if (d_is_symlink(dentry))
                return -EOPNOTSUPP;
 
-       acl = get_acl(dentry->d_inode, type);
+       acl = get_acl(d_backing_inode(dentry), type);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (acl == NULL)
@@ -795,7 +795,7 @@ static int
 posix_acl_xattr_set(struct dentry *dentry, const char *name,
                const void *value, size_t size, int flags, int type)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct posix_acl *acl = NULL;
        int ret;
 
@@ -834,7 +834,7 @@ posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size,
        const char *xname;
        size_t size;
 
-       if (!IS_POSIXACL(dentry->d_inode))
+       if (!IS_POSIXACL(d_backing_inode(dentry)))
                return -EOPNOTSUPP;
        if (d_is_symlink(dentry))
                return -EOPNOTSUPP;
index 7a3b82f..093ca14 100644 (file)
@@ -169,7 +169,7 @@ static int get_task_root(struct task_struct *task, struct path *root)
 
 static int proc_cwd_link(struct dentry *dentry, struct path *path)
 {
-       struct task_struct *task = get_proc_task(dentry->d_inode);
+       struct task_struct *task = get_proc_task(d_inode(dentry));
        int result = -ENOENT;
 
        if (task) {
@@ -186,7 +186,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
 
 static int proc_root_link(struct dentry *dentry, struct path *path)
 {
-       struct task_struct *task = get_proc_task(dentry->d_inode);
+       struct task_struct *task = get_proc_task(d_inode(dentry));
        int result = -ENOENT;
 
        if (task) {
@@ -514,7 +514,7 @@ static int proc_fd_access_allowed(struct inode *inode)
 int proc_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (attr->ia_valid & ATTR_MODE)
                return -EPERM;
@@ -1362,7 +1362,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
        struct mm_struct *mm;
        struct file *exe_file;
 
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
        if (!task)
                return -ENOENT;
        mm = get_task_mm(task);
@@ -1382,7 +1382,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
 
 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct path path;
        int error = -EACCES;
 
@@ -1427,7 +1427,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
 {
        int error = -EACCES;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct path path;
 
        /* Are we allowed to snoop on the tasks file descriptors? */
@@ -1497,7 +1497,7 @@ out_unlock:
 
 int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct task_struct *task;
        const struct cred *cred;
        struct pid_namespace *pid = dentry->d_sb->s_fs_info;
@@ -1554,7 +1554,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        task = get_proc_task(inode);
 
        if (task) {
@@ -1588,7 +1588,7 @@ int pid_delete_dentry(const struct dentry *dentry)
         * If so, then don't put the dentry on the lru list,
         * kill it immediately.
         */
-       return proc_inode_is_dead(dentry->d_inode);
+       return proc_inode_is_dead(d_inode(dentry));
 }
 
 const struct dentry_operations pid_dentry_operations =
@@ -1626,12 +1626,12 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
                child = d_alloc(dir, &qname);
                if (!child)
                        goto end_instantiate;
-               if (instantiate(dir->d_inode, child, task, ptr) < 0) {
+               if (instantiate(d_inode(dir), child, task, ptr) < 0) {
                        dput(child);
                        goto end_instantiate;
                }
        }
-       inode = child->d_inode;
+       inode = d_inode(child);
        ino = inode->i_ino;
        type = inode->i_mode >> 12;
        dput(child);
@@ -1674,7 +1674,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
                goto out_notask;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        task = get_proc_task(inode);
        if (!task)
                goto out_notask;
@@ -1727,7 +1727,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
        int rc;
 
        rc = -ENOENT;
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
        if (!task)
                goto out;
 
@@ -2863,13 +2863,13 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
                return 0;
 
        if (pos == TGID_OFFSET - 2) {
-               struct inode *inode = ns->proc_self->d_inode;
+               struct inode *inode = d_inode(ns->proc_self);
                if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
                        return 0;
                ctx->pos = pos = pos + 1;
        }
        if (pos == TGID_OFFSET - 1) {
-               struct inode *inode = ns->proc_thread_self->d_inode;
+               struct inode *inode = d_inode(ns->proc_thread_self);
                if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
                        return 0;
                ctx->pos = pos = pos + 1;
@@ -3188,7 +3188,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
 
 static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct task_struct *p = get_proc_task(inode);
        generic_fillattr(inode, stat);
 
index af84ad0..6e5fcd0 100644 (file)
@@ -91,7 +91,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
        if (flags & LOOKUP_RCU)
                return -ECHILD;
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        task = get_proc_task(inode);
        fd = proc_fd(inode);
 
@@ -151,14 +151,14 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
        struct task_struct *task;
        int ret = -ENOENT;
 
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
        if (task) {
                files = get_files_struct(task);
                put_task_struct(task);
        }
 
        if (files) {
-               int fd = proc_fd(dentry->d_inode);
+               int fd = proc_fd(d_inode(dentry));
                struct file *fd_file;
 
                spin_lock(&files->file_lock);
index be65b20..df6327a 100644 (file)
@@ -101,7 +101,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
 
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct proc_dir_entry *de = PDE(inode);
        int error;
 
@@ -120,7 +120,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
                        struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct proc_dir_entry *de = PDE(inode);
        if (de && de->nlink)
                set_nlink(inode, de->nlink);
index 7697b66..8272aab 100644 (file)
@@ -396,7 +396,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
 
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct proc_dir_entry *pde = PDE(dentry->d_inode);
+       struct proc_dir_entry *pde = PDE(d_inode(dentry));
        if (unlikely(!use_pde(pde)))
                return ERR_PTR(-EINVAL);
        nd_set_link(nd, pde->data);
index c9eac45..e512642 100644 (file)
@@ -32,7 +32,7 @@ static const struct proc_ns_operations *ns_entries[] = {
 
 static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
        struct task_struct *task;
        struct path ns_path;
@@ -53,7 +53,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
 
 static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
        struct task_struct *task;
        char name[50];
index 1bde894..350984a 100644 (file)
@@ -142,7 +142,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
 static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
                struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct net *net;
 
        net = get_proc_task_net(inode);
index f92d5dd..fea2561 100644 (file)
@@ -604,7 +604,7 @@ static bool proc_sys_fill_cache(struct file *file,
                        return false;
                }
        }
-       inode = child->d_inode;
+       inode = d_inode(child);
        ino  = inode->i_ino;
        type = inode->i_mode >> 12;
        dput(child);
@@ -710,7 +710,7 @@ static int proc_sys_permission(struct inode *inode, int mask)
 
 static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
@@ -727,7 +727,7 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
 
 static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ctl_table_header *head = grab_header(inode);
        struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 
@@ -773,12 +773,12 @@ static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
 {
        if (flags & LOOKUP_RCU)
                return -ECHILD;
-       return !PROC_I(dentry->d_inode)->sysctl->unregistering;
+       return !PROC_I(d_inode(dentry))->sysctl->unregistering;
 }
 
 static int proc_sys_delete(const struct dentry *dentry)
 {
-       return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+       return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
 }
 
 static int sysctl_is_seen(struct ctl_table_header *p)
@@ -805,7 +805,7 @@ static int proc_sys_compare(const struct dentry *parent, const struct dentry *de
        /* Although proc doesn't have negative dentries, rcu-walk means
         * that inode here can be NULL */
        /* AV: can it, indeed? */
-       inode = ACCESS_ONCE(dentry->d_inode);
+       inode = d_inode_rcu(dentry);
        if (!inode)
                return 1;
        if (name->len != len)
index e74ac9f..b7fa4bf 100644 (file)
@@ -195,7 +195,7 @@ void __init proc_root_init(void)
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
 )
 {
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
        stat->nlink = proc_root.nlink + nr_processes();
        return 0;
 }
index 4348bb8..6195b4a 100644 (file)
@@ -46,7 +46,7 @@ static unsigned self_inum;
 
 int proc_setup_self(struct super_block *s)
 {
-       struct inode *root_inode = s->s_root->d_inode;
+       struct inode *root_inode = d_inode(s->s_root);
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *self;
        
index 59075b5..a837199 100644 (file)
@@ -47,7 +47,7 @@ static unsigned thread_self_inum;
 
 int proc_setup_thread_self(struct super_block *s)
 {
-       struct inode *root_inode = s->s_root->d_inode;
+       struct inode *root_inode = d_inode(s->s_root);
        struct pid_namespace *ns = s->s_fs_info;
        struct dentry *thread_self;
 
index 56e1ffd..dc43b5f 100644 (file)
@@ -190,7 +190,7 @@ static const struct file_operations pstore_file_operations = {
  */
 static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct pstore_private *p = dentry->d_inode->i_private;
+       struct pstore_private *p = d_inode(dentry)->i_private;
        int err;
 
        err = pstore_check_syslog_permissions(p);
@@ -199,7 +199,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 
        if (p->psi->erase)
                p->psi->erase(p->type, p->id, p->count,
-                             dentry->d_inode->i_ctime, p->psi);
+                             d_inode(dentry)->i_ctime, p->psi);
        else
                return -EPERM;
 
@@ -376,7 +376,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
                break;
        }
 
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
 
        dentry = d_alloc_name(root, name);
        if (!dentry)
@@ -396,12 +396,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
        list_add(&private->list, &allpstore);
        spin_unlock_irqrestore(&allpstore_lock, flags);
 
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
 
        return 0;
 
 fail_lockedalloc:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        kfree(private);
 fail_alloc:
        iput(inode);
index 44e7392..32d2e1a 100644 (file)
@@ -182,7 +182,7 @@ static const char *qnx6_checkroot(struct super_block *s)
        static char match_root[2][3] = {".\0\0", "..\0"};
        int i, error = 0;
        struct qnx6_dir_entry *dir_entry;
-       struct inode *root = s->s_root->d_inode;
+       struct inode *root = d_inode(s->s_root);
        struct address_space *mapping = root->i_mapping;
        struct page *page = read_mapping_page(mapping, 0, NULL);
        if (IS_ERR(page))
index ecc25cf..20d1f74 100644 (file)
@@ -2328,7 +2328,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
        if (path->dentry->d_sb != sb)
                error = -EXDEV;
        else
-               error = vfs_load_quota_inode(path->dentry->d_inode, type,
+               error = vfs_load_quota_inode(d_inode(path->dentry), type,
                                             format_id, DQUOT_USAGE_ENABLED |
                                             DQUOT_LIMITS_ENABLED);
        return error;
@@ -2392,20 +2392,20 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
        struct dentry *dentry;
        int error;
 
-       mutex_lock(&sb->s_root->d_inode->i_mutex);
+       mutex_lock(&d_inode(sb->s_root)->i_mutex);
        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
-       mutex_unlock(&sb->s_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(sb->s_root)->i_mutex);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                error = -ENOENT;
                goto out;
        }
 
        error = security_quota_on(dentry);
        if (!error)
-               error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+               error = vfs_load_quota_inode(d_inode(dentry), type, format_id,
                                DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 
 out:
index 0b38bef..ba1323a 100644 (file)
@@ -163,7 +163,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
  */
 static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        unsigned int old_ia_valid = ia->ia_valid;
        int ret = 0;
 
index 0a7dc94..4a024e2 100644 (file)
@@ -53,8 +53,8 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
 static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh)
 {
        struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root;
-       return (privroot->d_inode &&
-               deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
+       return (d_really_is_positive(privroot) &&
+               deh->deh_objectid == INODE_PKEY(d_inode(privroot))->k_objectid);
 }
 
 int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
index 742242b..f6f2fba 100644 (file)
@@ -3308,7 +3308,7 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
 int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        unsigned int ia_valid;
        int error;
 
index cd11358..b55a074 100644 (file)
@@ -400,7 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
        struct inode *inode = NULL;
        struct reiserfs_dir_entry de;
        INITIALIZE_PATH(path_to_entry);
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
 
        if (dir->i_nlink == 0) {
                return ERR_PTR(-ENOENT);
@@ -917,7 +917,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
                goto end_rmdir;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        reiserfs_update_inode_transaction(inode);
        reiserfs_update_inode_transaction(dir);
@@ -987,7 +987,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
 
        dquot_initialize(dir);
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        /*
         * in this transaction we can be doing at max two balancings and
@@ -1174,7 +1174,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
                         struct dentry *dentry)
 {
        int retval;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct reiserfs_transaction_handle th;
        /*
         * We need blocks for transaction + update of quotas for
@@ -1311,8 +1311,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        dquot_initialize(old_dir);
        dquot_initialize(new_dir);
 
-       old_inode = old_dentry->d_inode;
-       new_dentry_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_dentry_inode = d_inode(new_dentry);
 
        /*
         * make sure that oldname still exists and points to an object we
index 68b5f18..0111ad0 100644 (file)
@@ -1687,7 +1687,7 @@ static __u32 find_hash_out(struct super_block *s)
        __u32 hash = DEFAULT_HASH;
        __u32 deh_hashval, teahash, r5hash, yurahash;
 
-       inode = s->s_root->d_inode;
+       inode = d_inode(s->s_root);
 
        make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
        retval = search_by_entry_key(s, &key, &path, &de);
@@ -2347,7 +2347,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                err = -EXDEV;
                goto out;
        }
-       inode = path->dentry->d_inode;
+       inode = d_inode(path->dentry);
        /*
         * We must not pack tails for quota files on reiserfs for quota
         * IO to work
index 4e781e6..e87f9b5 100644 (file)
@@ -87,9 +87,9 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
 
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
 
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
        error = dir->i_op->unlink(dir, dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
 
        if (!error)
                d_delete(dentry);
@@ -102,11 +102,11 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
 
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
 
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
        error = dir->i_op->rmdir(dir, dentry);
        if (!error)
-               dentry->d_inode->i_flags |= S_DEAD;
-       mutex_unlock(&dentry->d_inode->i_mutex);
+               d_inode(dentry)->i_flags |= S_DEAD;
+       mutex_unlock(&d_inode(dentry)->i_mutex);
        if (!error)
                d_delete(dentry);
 
@@ -120,26 +120,26 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
        struct dentry *privroot = REISERFS_SB(sb)->priv_root;
        struct dentry *xaroot;
 
-       if (!privroot->d_inode)
+       if (d_really_is_negative(privroot))
                return ERR_PTR(-ENODATA);
 
-       mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
 
        xaroot = dget(REISERFS_SB(sb)->xattr_root);
        if (!xaroot)
                xaroot = ERR_PTR(-ENODATA);
-       else if (!xaroot->d_inode) {
+       else if (d_really_is_negative(xaroot)) {
                int err = -ENODATA;
 
                if (xattr_may_create(flags))
-                       err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
+                       err = xattr_mkdir(d_inode(privroot), xaroot, 0700);
                if (err) {
                        dput(xaroot);
                        xaroot = ERR_PTR(err);
                }
        }
 
-       mutex_unlock(&privroot->d_inode->i_mutex);
+       mutex_unlock(&d_inode(privroot)->i_mutex);
        return xaroot;
 }
 
@@ -156,21 +156,21 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                 le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                 inode->i_generation);
 
-       mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
 
        xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
-       if (!IS_ERR(xadir) && !xadir->d_inode) {
+       if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
                int err = -ENODATA;
 
                if (xattr_may_create(flags))
-                       err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
+                       err = xattr_mkdir(d_inode(xaroot), xadir, 0700);
                if (err) {
                        dput(xadir);
                        xadir = ERR_PTR(err);
                }
        }
 
-       mutex_unlock(&xaroot->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xaroot)->i_mutex);
        dput(xaroot);
        return xadir;
 }
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
                container_of(ctx, struct reiserfs_dentry_buf, ctx);
        struct dentry *dentry;
 
-       WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
+       WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
 
        if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                return -ENOSPC;
@@ -207,7 +207,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
        dentry = lookup_one_len(name, dbuf->xadir, namelen);
        if (IS_ERR(dentry)) {
                return PTR_ERR(dentry);
-       } else if (!dentry->d_inode) {
+       } else if (d_really_is_negative(dentry)) {
                /* A directory entry exists, but no file? */
                reiserfs_error(dentry->d_sb, "xattr-20003",
                               "Corrupted directory: xattr %pd listed but "
@@ -249,16 +249,16 @@ static int reiserfs_for_each_xattr(struct inode *inode,
        if (IS_ERR(dir)) {
                err = PTR_ERR(dir);
                goto out;
-       } else if (!dir->d_inode) {
+       } else if (d_really_is_negative(dir)) {
                err = 0;
                goto out_dir;
        }
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
 
        buf.xadir = dir;
        while (1) {
-               err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
+               err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
                if (err)
                        break;
                if (!buf.count)
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                        break;
                buf.count = 0;
        }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 
        cleanup_dentry_buf(&buf);
 
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                if (!err) {
                        int jerror;
 
-                       mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
+                       mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
                                          I_MUTEX_XATTR);
                        err = action(dir, data);
                        reiserfs_write_lock(inode->i_sb);
                        jerror = journal_end(&th);
                        reiserfs_write_unlock(inode->i_sb);
-                       mutex_unlock(&dir->d_parent->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
                        err = jerror ?: err;
                }
        }
@@ -319,7 +319,7 @@ out:
 
 static int delete_one_xattr(struct dentry *dentry, void *data)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
 
        /* This is the xattr dir, handle specially. */
        if (d_is_dir(dentry))
@@ -384,27 +384,27 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
        if (IS_ERR(xadir))
                return ERR_CAST(xadir);
 
-       mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
        xafile = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(xafile)) {
                err = PTR_ERR(xafile);
                goto out;
        }
 
-       if (xafile->d_inode && (flags & XATTR_CREATE))
+       if (d_really_is_positive(xafile) && (flags & XATTR_CREATE))
                err = -EEXIST;
 
-       if (!xafile->d_inode) {
+       if (d_really_is_negative(xafile)) {
                err = -ENODATA;
                if (xattr_may_create(flags))
-                       err = xattr_create(xadir->d_inode, xafile,
+                       err = xattr_create(d_inode(xadir), xafile,
                                              0700|S_IFREG);
        }
 
        if (err)
                dput(xafile);
 out:
-       mutex_unlock(&xadir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xadir)->i_mutex);
        dput(xadir);
        if (err)
                return ERR_PTR(err);
@@ -469,21 +469,21 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
        if (IS_ERR(xadir))
                return PTR_ERR(xadir);
 
-       mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
        dentry = lookup_one_len(name, xadir, strlen(name));
        if (IS_ERR(dentry)) {
                err = PTR_ERR(dentry);
                goto out_dput;
        }
 
-       if (dentry->d_inode) {
-               err = xattr_unlink(xadir->d_inode, dentry);
+       if (d_really_is_positive(dentry)) {
+               err = xattr_unlink(d_inode(xadir), dentry);
                update_ctime(inode);
        }
 
        dput(dentry);
 out_dput:
-       mutex_unlock(&xadir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xadir)->i_mutex);
        dput(xadir);
        return err;
 }
@@ -533,7 +533,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                else
                        chunk = buffer_size - buffer_pos;
 
-               page = reiserfs_get_page(dentry->d_inode, file_pos);
+               page = reiserfs_get_page(d_inode(dentry), file_pos);
                if (IS_ERR(page)) {
                        err = PTR_ERR(page);
                        goto out_unlock;
@@ -573,18 +573,18 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
        }
 
        new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
-       if (!err && new_size < i_size_read(dentry->d_inode)) {
+       if (!err && new_size < i_size_read(d_inode(dentry))) {
                struct iattr newattrs = {
                        .ia_ctime = current_fs_time(inode->i_sb),
                        .ia_size = new_size,
                        .ia_valid = ATTR_SIZE | ATTR_CTIME,
                };
 
-               mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
-               inode_dio_wait(dentry->d_inode);
+               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+               inode_dio_wait(d_inode(dentry));
 
                err = reiserfs_setattr(dentry, &newattrs);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
        } else
                update_ctime(inode);
 out_unlock:
@@ -657,7 +657,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
 
        down_read(&REISERFS_I(inode)->i_xattr_sem);
 
-       isize = i_size_read(dentry->d_inode);
+       isize = i_size_read(d_inode(dentry));
 
        /* Just return the size needed */
        if (buffer == NULL) {
@@ -680,7 +680,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
                else
                        chunk = isize - file_pos;
 
-               page = reiserfs_get_page(dentry->d_inode, file_pos);
+               page = reiserfs_get_page(d_inode(dentry), file_pos);
                if (IS_ERR(page)) {
                        err = PTR_ERR(page);
                        goto out_unlock;
@@ -775,7 +775,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 
        handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                return -EOPNOTSUPP;
 
        return handler->get(dentry, name, buffer, size, handler->flags);
@@ -784,7 +784,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
 /*
  * Inode operation setxattr()
  *
- * dentry->d_inode->i_mutex down
+ * d_inode(dentry)->i_mutex down
  */
 int
 reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
@@ -794,7 +794,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
        handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                return -EOPNOTSUPP;
 
        return handler->set(dentry, name, value, size, flags, handler->flags);
@@ -803,7 +803,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 /*
  * Inode operation removexattr()
  *
- * dentry->d_inode->i_mutex down
+ * d_inode(dentry)->i_mutex down
  */
 int reiserfs_removexattr(struct dentry *dentry, const char *name)
 {
@@ -811,7 +811,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name)
 
        handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
 
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                return -EOPNOTSUPP;
 
        return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
@@ -875,14 +875,14 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                .size = buffer ? size : 0,
        };
 
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                return -EINVAL;
 
        if (!dentry->d_sb->s_xattr ||
-           get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+           get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                return -EOPNOTSUPP;
 
-       dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
+       dir = open_xa_dir(d_inode(dentry), XATTR_REPLACE);
        if (IS_ERR(dir)) {
                err = PTR_ERR(dir);
                if (err == -ENODATA)
@@ -890,9 +890,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                goto out;
        }
 
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
-       err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
+       mutex_unlock(&d_inode(dir)->i_mutex);
 
        if (!err)
                err = buf.pos;
@@ -905,12 +905,12 @@ out:
 static int create_privroot(struct dentry *dentry)
 {
        int err;
-       struct inode *inode = dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(dentry->d_parent);
 
        WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
 
        err = xattr_mkdir(inode, dentry, 0700);
-       if (err || !dentry->d_inode) {
+       if (err || d_really_is_negative(dentry)) {
                reiserfs_warning(dentry->d_sb, "jdm-20006",
                                 "xattrs/ACLs enabled and couldn't "
                                 "find/create .reiserfs_priv. "
@@ -918,7 +918,7 @@ static int create_privroot(struct dentry *dentry)
                return -EOPNOTSUPP;
        }
 
-       dentry->d_inode->i_flags |= S_PRIVATE;
+       d_inode(dentry)->i_flags |= S_PRIVATE;
        reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
                      "storage.\n", PRIVROOT_NAME);
 
@@ -997,17 +997,17 @@ int reiserfs_lookup_privroot(struct super_block *s)
        int err = 0;
 
        /* If we don't have the privroot located yet - go find it */
-       mutex_lock(&s->s_root->d_inode->i_mutex);
+       mutex_lock(&d_inode(s->s_root)->i_mutex);
        dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                strlen(PRIVROOT_NAME));
        if (!IS_ERR(dentry)) {
                REISERFS_SB(s)->priv_root = dentry;
                d_set_d_op(dentry, &xattr_lookup_poison_ops);
-               if (dentry->d_inode)
-                       dentry->d_inode->i_flags |= S_PRIVATE;
+               if (d_really_is_positive(dentry))
+                       d_inode(dentry)->i_flags |= S_PRIVATE;
        } else
                err = PTR_ERR(dentry);
-       mutex_unlock(&s->s_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(s->s_root)->i_mutex);
 
        return err;
 }
@@ -1026,15 +1026,15 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
        if (err)
                goto error;
 
-       if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
-               mutex_lock(&s->s_root->d_inode->i_mutex);
+       if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
+               mutex_lock(&d_inode(s->s_root)->i_mutex);
                err = create_privroot(REISERFS_SB(s)->priv_root);
-               mutex_unlock(&s->s_root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(s->s_root)->i_mutex);
        }
 
-       if (privroot->d_inode) {
+       if (d_really_is_positive(privroot)) {
                s->s_xattr = reiserfs_xattr_handlers;
-               mutex_lock(&privroot->d_inode->i_mutex);
+               mutex_lock(&d_inode(privroot)->i_mutex);
                if (!REISERFS_SB(s)->xattr_root) {
                        struct dentry *dentry;
 
@@ -1045,7 +1045,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                        else
                                err = PTR_ERR(dentry);
                }
-               mutex_unlock(&privroot->d_inode->i_mutex);
+               mutex_unlock(&d_inode(privroot)->i_mutex);
        }
 
 error:
index f620e96..15dde62 100644 (file)
@@ -78,7 +78,7 @@ static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
 
        if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
                nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
-               if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
+               if (d_really_is_negative(REISERFS_SB(inode->i_sb)->xattr_root))
                        nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
        }
 
index e7f8939..9a3b061 100644 (file)
@@ -15,10 +15,10 @@ security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
        if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                return -EINVAL;
 
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                return -EPERM;
 
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
 }
 
 static int
@@ -28,10 +28,10 @@ security_set(struct dentry *dentry, const char *name, const void *buffer,
        if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                return -EINVAL;
 
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                return -EPERM;
 
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
 }
 
 static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
@@ -39,7 +39,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
 {
        const size_t len = namelen + 1;
 
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                return 0;
 
        if (list && len <= list_len) {
index 5eeb0c4..e4f1343 100644 (file)
@@ -14,10 +14,10 @@ trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
        if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                return -EINVAL;
 
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                return -EPERM;
 
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
 }
 
 static int
@@ -27,10 +27,10 @@ trusted_set(struct dentry *dentry, const char *name, const void *buffer,
        if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                return -EINVAL;
 
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                return -EPERM;
 
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
 }
 
 static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
@@ -38,7 +38,7 @@ static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
 {
        const size_t len = name_len + 1;
 
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                return 0;
 
        if (list && len <= list_size) {
index e50eab0..d0b08d3 100644 (file)
@@ -15,7 +15,7 @@ user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
                return -EINVAL;
        if (!reiserfs_xattrs_user(dentry->d_sb))
                return -EOPNOTSUPP;
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
 }
 
 static int
@@ -27,7 +27,7 @@ user_set(struct dentry *dentry, const char *name, const void *buffer,
 
        if (!reiserfs_xattrs_user(dentry->d_sb))
                return -EOPNOTSUPP;
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
 }
 
 static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
index 5e1101f..8073b65 100644 (file)
@@ -110,7 +110,7 @@ static struct dentry *squashfs_fh_to_parent(struct super_block *sb,
 
 static struct dentry *squashfs_get_parent(struct dentry *child)
 {
-       struct inode *inode = child->d_inode;
+       struct inode *inode = d_inode(child);
        unsigned int parent_ino = squashfs_i(inode)->parent;
 
        return squashfs_export_iget(inode->i_sb, parent_ino);
index 92fcde7..e5e0ddf 100644 (file)
@@ -39,7 +39,7 @@ static const struct xattr_handler *squashfs_xattr_handler(int);
 ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
        size_t buffer_size)
 {
-       struct inode *inode = d->d_inode;
+       struct inode *inode = d_inode(d);
        struct super_block *sb = inode->i_sb;
        struct squashfs_sb_info *msblk = sb->s_fs_info;
        u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr)
@@ -229,7 +229,7 @@ static int squashfs_user_get(struct dentry *d, const char *name, void *buffer,
        if (name[0] == '\0')
                return  -EINVAL;
 
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_USER, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_USER, name,
                buffer, size);
 }
 
@@ -259,7 +259,7 @@ static int squashfs_trusted_get(struct dentry *d, const char *name,
        if (name[0] == '\0')
                return  -EINVAL;
 
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_TRUSTED, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_TRUSTED, name,
                buffer, size);
 }
 
@@ -286,7 +286,7 @@ static int squashfs_security_get(struct dentry *d, const char *name,
        if (name[0] == '\0')
                return  -EINVAL;
 
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_SECURITY, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_SECURITY, name,
                buffer, size);
 }
 
index 19636af..cccc1aa 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(generic_fillattr);
  */
 int vfs_getattr_nosec(struct path *path, struct kstat *stat)
 {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
 
        if (inode->i_op->getattr)
                return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -326,7 +326,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
 retry:
        error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
        if (!error) {
-               struct inode *inode = path.dentry->d_inode;
+               struct inode *inode = d_backing_inode(path.dentry);
 
                error = empty ? -ENOENT : -EINVAL;
                if (inode->i_op->readlink) {
index d42291d..8f3555f 100644 (file)
@@ -132,7 +132,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
 {
        const char * name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
-       struct inode * dir = dentry->d_parent->d_inode;
+       struct inode * dir = d_inode(dentry->d_parent);
        unsigned long start, n;
        unsigned long npages = dir_pages(dir);
        struct page *page = NULL;
@@ -176,7 +176,7 @@ found:
 
 int sysv_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const char * name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        struct page *page = NULL;
index a48e304..82ddc09 100644 (file)
@@ -30,7 +30,7 @@ const struct file_operations sysv_file_operations = {
 
 static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, attr);
index 66bc316..2fde40a 100644 (file)
@@ -443,7 +443,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
 int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 {
        struct super_block *s = dentry->d_sb;
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
        stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
        stat->blksize = s->s_blocksize;
        return 0;
index 731b2bb..11e83ed 100644 (file)
@@ -118,7 +118,7 @@ out_fail:
 static int sysv_link(struct dentry * old_dentry, struct inode * dir, 
        struct dentry * dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
 
        inode->i_ctime = CURRENT_TIME_SEC;
        inode_inc_link_count(inode);
@@ -166,7 +166,7 @@ out_dir:
 
 static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        struct page * page;
        struct sysv_dir_entry * de;
        int err = -ENOENT;
@@ -187,7 +187,7 @@ out:
 
 static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int err = -ENOTEMPTY;
 
        if (sysv_empty_dir(inode)) {
@@ -208,8 +208,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
 static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
                  struct inode * new_dir, struct dentry * new_dentry)
 {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
        struct page * dir_page = NULL;
        struct sysv_dir_entry * dir_de = NULL;
        struct page * old_page;
index 00d2f8a..d3fa0d7 100644 (file)
@@ -10,7 +10,7 @@
 
 static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data);
+       nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data);
        return NULL;
 }
 
index 02d1ee7..27060fc 100644 (file)
@@ -499,7 +499,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
                      struct dentry *dentry)
 {
        struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct ubifs_inode *ui = ubifs_inode(inode);
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
        int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
@@ -554,7 +554,7 @@ out_cancel:
 static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
        int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        int err, budgeted = 1;
@@ -646,7 +646,7 @@ static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
 static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
        int err, budgeted = 1;
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -662,7 +662,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
                inode->i_ino, dir->i_ino);
        ubifs_assert(mutex_is_locked(&dir->i_mutex));
        ubifs_assert(mutex_is_locked(&inode->i_mutex));
-       err = check_dir_empty(c, dentry->d_inode);
+       err = check_dir_empty(c, d_inode(dentry));
        if (err)
                return err;
 
@@ -970,8 +970,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry)
 {
        struct ubifs_info *c = old_dir->i_sb->s_fs_info;
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
        int err, release, sync = 0, move = (new_dir != old_dir);
        int is_dir = S_ISDIR(old_inode->i_mode);
@@ -1136,7 +1136,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat)
 {
        loff_t size;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ubifs_inode *ui = ubifs_inode(inode);
 
        mutex_lock(&ui->ui_mutex);
index 3ba3fef..35efc10 100644 (file)
@@ -1257,7 +1257,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
 int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int err;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct ubifs_info *c = inode->i_sb->s_fs_info;
 
        dbg_gen("ino %lu, mode %#x, ia_valid %#x",
@@ -1302,7 +1302,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
 
 static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ubifs_inode *ui = ubifs_inode(dentry->d_inode);
+       struct ubifs_inode *ui = ubifs_inode(d_inode(dentry));
 
        nd_set_link(nd, ui->data);
        return NULL;
index 90ae1a8..0b9da5b 100644 (file)
@@ -930,8 +930,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
        union ubifs_key key;
        struct ubifs_dent_node *dent, *dent2;
        int err, dlen1, dlen2, ilen, lnum, offs, len;
-       const struct inode *old_inode = old_dentry->d_inode;
-       const struct inode *new_inode = new_dentry->d_inode;
+       const struct inode *old_inode = d_inode(old_dentry);
+       const struct inode *new_inode = d_inode(new_dentry);
        int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
        int last_reference = !!(new_inode && new_inode->i_nlink == 0);
        int move = (old_dir != new_dir);
index 3659b19..96f3448 100644 (file)
@@ -364,15 +364,15 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
                   const void *value, size_t size, int flags)
 {
        dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
-               name, dentry->d_inode->i_ino, dentry, size);
+               name, d_inode(dentry)->i_ino, dentry, size);
 
-       return setxattr(dentry->d_inode, name, value, size, flags);
+       return setxattr(d_inode(dentry), name, value, size, flags);
 }
 
 ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
                       size_t size)
 {
-       struct inode *inode, *host = dentry->d_inode;
+       struct inode *inode, *host = d_inode(dentry);
        struct ubifs_info *c = host->i_sb->s_fs_info;
        struct qstr nm = QSTR_INIT(name, strlen(name));
        struct ubifs_inode *ui;
@@ -432,7 +432,7 @@ out_unlock:
 ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
        union ubifs_key key;
-       struct inode *host = dentry->d_inode;
+       struct inode *host = d_inode(dentry);
        struct ubifs_info *c = host->i_sb->s_fs_info;
        struct ubifs_inode *host_ui = ubifs_inode(host);
        struct ubifs_dent_node *xent, *pxent = NULL;
@@ -535,7 +535,7 @@ out_cancel:
 
 int ubifs_removexattr(struct dentry *dentry, const char *name)
 {
-       struct inode *inode, *host = dentry->d_inode;
+       struct inode *inode, *host = d_inode(dentry);
        struct ubifs_info *c = host->i_sb->s_fs_info;
        struct qstr nm = QSTR_INIT(name, strlen(name));
        struct ubifs_dent_node *xent;
index 5dadad9..7a95b8f 100644 (file)
@@ -249,7 +249,7 @@ const struct file_operations udf_file_operations = {
 
 static int udf_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        int error;
 
        error = inode_change_ok(inode, attr);
index 3966197..5c03f0d 100644 (file)
@@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
 static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
 {
        struct udf_inode_info *iinfo = UDF_I(inode);
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        struct udf_fileident_bh fibh;
        struct fileIdentDesc cfi, *fi;
        int err;
@@ -767,7 +767,7 @@ static int empty_dir(struct inode *dir)
 static int udf_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int retval;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct udf_fileident_bh fibh;
        struct fileIdentDesc *fi, cfi;
        struct kernel_lb_addr tloc;
@@ -809,7 +809,7 @@ out:
 static int udf_unlink(struct inode *dir, struct dentry *dentry)
 {
        int retval;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct udf_fileident_bh fibh;
        struct fileIdentDesc *fi;
        struct fileIdentDesc cfi;
@@ -999,7 +999,7 @@ out_no_entry:
 static int udf_link(struct dentry *old_dentry, struct inode *dir,
                    struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        struct udf_fileident_bh fibh;
        struct fileIdentDesc cfi, *fi;
        int err;
@@ -1038,8 +1038,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
                      struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct udf_fileident_bh ofibh, nfibh;
        struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL;
        struct fileIdentDesc ocfi, ncfi;
@@ -1179,7 +1179,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
        struct fileIdentDesc cfi;
        struct udf_fileident_bh fibh;
 
-       if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
+       if (!udf_find_entry(d_inode(child), &dotdot, &fibh, &cfi))
                return ERR_PTR(-EACCES);
 
        if (fibh.sbh != fibh.ebh)
@@ -1187,7 +1187,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
        brelse(fibh.sbh);
 
        tloc = lelb_to_cpu(cfi.icb.extLocation);
-       inode = udf_iget(child->d_inode->i_sb, &tloc);
+       inode = udf_iget(d_inode(child)->i_sb, &tloc);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
 
index 0ecc2ce..1bfe8ca 100644 (file)
@@ -311,7 +311,7 @@ found:
  */
 int ufs_add_link(struct dentry *dentry, struct inode *inode)
 {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
        const unsigned char *name = dentry->d_name.name;
        int namelen = dentry->d_name.len;
        struct super_block *sb = dir->i_sb;
index fd65deb..e491a93 100644 (file)
@@ -165,7 +165,7 @@ out_fail:
 static int ufs_link (struct dentry * old_dentry, struct inode * dir,
        struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int error;
 
        lock_ufs(dir->i_sb);
@@ -222,7 +222,7 @@ out_fail:
 
 static int ufs_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        struct ufs_dir_entry *de;
        struct page *page;
        int err = -ENOENT;
@@ -244,7 +244,7 @@ out:
 
 static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
 {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
        int err= -ENOTEMPTY;
 
        lock_ufs(dir->i_sb);
@@ -263,8 +263,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
 static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
                      struct inode *new_dir, struct dentry *new_dentry)
 {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
        struct page *dir_page = NULL;
        struct ufs_dir_entry * dir_de = NULL;
        struct page *old_page;
index 8092d37..b3bc3e7 100644 (file)
@@ -144,10 +144,10 @@ static struct dentry *ufs_get_parent(struct dentry *child)
        struct qstr dot_dot = QSTR_INIT("..", 2);
        ino_t ino;
 
-       ino = ufs_inode_by_name(child->d_inode, &dot_dot);
+       ino = ufs_inode_by_name(d_inode(child), &dot_dot);
        if (!ino)
                return ERR_PTR(-ENOENT);
-       return d_obtain_alias(ufs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ufs_iget(d_inode(child)->i_sb, ino));
 }
 
 static const struct export_operations ufs_export_ops = {
index d283628..5b537e2 100644 (file)
@@ -34,7 +34,7 @@
 
 static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       struct ufs_inode_info *p = UFS_I(dentry->d_inode);
+       struct ufs_inode_info *p = UFS_I(d_inode(dentry));
        nd_set_link(nd, (char*)p->i_u1.i_symlink);
        return NULL;
 }
index f04f89f..2115470 100644 (file)
@@ -492,7 +492,7 @@ out:
 
 int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        unsigned int ia_valid = attr->ia_valid;
        int error;
 
index a6fbf44..516162b 100644 (file)
@@ -260,6 +260,7 @@ xfs_alloc_fix_len(
                rlen = rlen - (k - args->mod);
        else
                rlen = rlen - args->prod + (args->mod - k);
+       /* casts to (int) catch length underflows */
        if ((int)rlen < (int)args->minlen)
                return;
        ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
@@ -286,7 +287,8 @@ xfs_alloc_fix_minleft(
        if (diff >= 0)
                return 1;
        args->len += diff;              /* shrink the allocated space */
-       if (args->len >= args->minlen)
+       /* casts to (int) catch length underflows */
+       if ((int)args->len >= (int)args->minlen)
                return 1;
        args->agbno = NULLAGBLOCK;
        return 0;
@@ -315,6 +317,9 @@ xfs_alloc_fixup_trees(
        xfs_agblock_t   nfbno2;         /* second new free startblock */
        xfs_extlen_t    nflen1=0;       /* first new free length */
        xfs_extlen_t    nflen2=0;       /* second new free length */
+       struct xfs_mount *mp;
+
+       mp = cnt_cur->bc_mp;
 
        /*
         * Look up the record in the by-size tree if necessary.
@@ -323,13 +328,13 @@ xfs_alloc_fixup_trees(
 #ifdef DEBUG
                if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 #endif
        } else {
                if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        }
        /*
         * Look up the record in the by-block tree if necessary.
@@ -338,13 +343,13 @@ xfs_alloc_fixup_trees(
 #ifdef DEBUG
                if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                        i == 1 && nfbno1 == fbno && nflen1 == flen);
 #endif
        } else {
                if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        }
 
 #ifdef DEBUG
@@ -355,7 +360,7 @@ xfs_alloc_fixup_trees(
                bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
                cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                        bnoblock->bb_numrecs == cntblock->bb_numrecs);
        }
 #endif
@@ -386,25 +391,25 @@ xfs_alloc_fixup_trees(
         */
        if ((error = xfs_btree_delete(cnt_cur, &i)))
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        /*
         * Add new by-size btree entry(s).
         */
        if (nfbno1 != NULLAGBLOCK) {
                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                if ((error = xfs_btree_insert(cnt_cur, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        }
        if (nfbno2 != NULLAGBLOCK) {
                if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                if ((error = xfs_btree_insert(cnt_cur, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        }
        /*
         * Fix up the by-block btree entry(s).
@@ -415,7 +420,7 @@ xfs_alloc_fixup_trees(
                 */
                if ((error = xfs_btree_delete(bno_cur, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        } else {
                /*
                 * Update the by-block entry to start later|be shorter.
@@ -429,10 +434,10 @@ xfs_alloc_fixup_trees(
                 */
                if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                if ((error = xfs_btree_insert(bno_cur, &i)))
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
        }
        return 0;
 }
@@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact(
        error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
        if (error)
                goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
        ASSERT(fbno <= args->agbno);
 
        /*
@@ -783,7 +788,7 @@ xfs_alloc_find_best_extent(
                error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
 
                /*
@@ -946,7 +951,7 @@ restart:
                                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
                                                &ltlen, &i)))
                                        goto error0;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                                if (ltlen >= args->minlen)
                                        break;
                                if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
@@ -966,7 +971,7 @@ restart:
                         */
                        if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
                                                  &ltbnoa, &ltlena);
                        if (ltlena < args->minlen)
@@ -999,7 +1004,7 @@ restart:
                cnt_cur->bc_ptrs[0] = besti;
                if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
                args->len = blen;
                if (!xfs_alloc_fix_minleft(args)) {
@@ -1088,7 +1093,7 @@ restart:
                if (bno_cur_lt) {
                        if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        xfs_alloc_compute_aligned(args, ltbno, ltlen,
                                                  &ltbnoa, &ltlena);
                        if (ltlena >= args->minlen)
@@ -1104,7 +1109,7 @@ restart:
                if (bno_cur_gt) {
                        if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        xfs_alloc_compute_aligned(args, gtbno, gtlen,
                                                  &gtbnoa, &gtlena);
                        if (gtlena >= args->minlen)
@@ -1303,7 +1308,7 @@ restart:
                        error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
                        if (error)
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
 
                        xfs_alloc_compute_aligned(args, fbno, flen,
                                                  &rbno, &rlen);
@@ -1342,7 +1347,7 @@ restart:
         * This can't happen in the second case above.
         */
        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
                        (rlen <= flen && rbno + rlen <= fbno + flen), error0);
        if (rlen < args->maxlen) {
                xfs_agblock_t   bestfbno;
@@ -1362,13 +1367,13 @@ restart:
                        if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
                                        &i)))
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                        if (flen < bestrlen)
                                break;
                        xfs_alloc_compute_aligned(args, fbno, flen,
                                                  &rbno, &rlen);
                        rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-                       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
                                (rlen <= flen && rbno + rlen <= fbno + flen),
                                error0);
                        if (rlen > bestrlen) {
@@ -1383,7 +1388,7 @@ restart:
                if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
                                &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                rlen = bestrlen;
                rbno = bestrbno;
                flen = bestflen;
@@ -1408,7 +1413,7 @@ restart:
        if (!xfs_alloc_fix_minleft(args))
                goto out_nominleft;
        rlen = args->len;
-       XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
+       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
        /*
         * Allocate and initialize a cursor for the by-block tree.
         */
@@ -1422,7 +1427,7 @@ restart:
        cnt_cur = bno_cur = NULL;
        args->len = rlen;
        args->agbno = rbno;
-       XFS_WANT_CORRUPTED_GOTO(
+       XFS_WANT_CORRUPTED_GOTO(args->mp,
                args->agbno + args->len <=
                        be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
                error0);
@@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small(
        if (i) {
                if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
        }
        /*
         * Nothing in the btree, try the freelist.  Make sure
@@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small(
                        }
                        args->len = 1;
                        args->agbno = fbno;
-                       XFS_WANT_CORRUPTED_GOTO(
+                       XFS_WANT_CORRUPTED_GOTO(args->mp,
                                args->agbno + args->len <=
                                be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
                                error0);
@@ -1579,7 +1584,7 @@ xfs_free_ag_extent(
                 */
                if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * It's not contiguous, though.
                 */
@@ -1591,7 +1596,8 @@ xfs_free_ag_extent(
                         * space was invalid, it's (partly) already free.
                         * Very bad.
                         */
-                       XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
+                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                               ltbno + ltlen <= bno, error0);
                }
        }
        /*
@@ -1606,7 +1612,7 @@ xfs_free_ag_extent(
                 */
                if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * It's not contiguous, though.
                 */
@@ -1618,7 +1624,7 @@ xfs_free_ag_extent(
                         * space was invalid, it's (partly) already free.
                         * Very bad.
                         */
-                       XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
+                       XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
                }
        }
        /*
@@ -1635,31 +1641,31 @@ xfs_free_ag_extent(
                 */
                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                if ((error = xfs_btree_delete(cnt_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * Delete the old by-size entry on the right.
                 */
                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                if ((error = xfs_btree_delete(cnt_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * Delete the old by-block entry for the right block.
                 */
                if ((error = xfs_btree_delete(bno_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * Move the by-block cursor back to the left neighbor.
                 */
                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
 #ifdef DEBUG
                /*
                 * Check that this is the right record: delete didn't
@@ -1672,7 +1678,7 @@ xfs_free_ag_extent(
                        if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
                                        &i)))
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(
+                       XFS_WANT_CORRUPTED_GOTO(mp,
                                i == 1 && xxbno == ltbno && xxlen == ltlen,
                                error0);
                }
@@ -1695,17 +1701,17 @@ xfs_free_ag_extent(
                 */
                if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                if ((error = xfs_btree_delete(cnt_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * Back up the by-block cursor to the left neighbor, and
                 * update its length.
                 */
                if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                nbno = ltbno;
                nlen = len + ltlen;
                if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
@@ -1721,10 +1727,10 @@ xfs_free_ag_extent(
                 */
                if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                if ((error = xfs_btree_delete(cnt_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                /*
                 * Update the starting block and length of the right
                 * neighbor in the by-block tree.
@@ -1743,7 +1749,7 @@ xfs_free_ag_extent(
                nlen = len;
                if ((error = xfs_btree_insert(bno_cur, &i)))
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
        }
        xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
        bno_cur = NULL;
@@ -1752,10 +1758,10 @@ xfs_free_ag_extent(
         */
        if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
                goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
        if ((error = xfs_btree_insert(cnt_cur, &i)))
                goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
        xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
        cnt_cur = NULL;
 
index 15105db..04e79d5 100644 (file)
@@ -86,8 +86,83 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
                        int move_count);
 STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
 
+/*
+ * attr3 block 'firstused' conversion helpers.
+ *
+ * firstused refers to the offset of the first used byte of the nameval region
+ * of an attr leaf block. The region starts at the tail of the block and expands
+ * backwards towards the middle. As such, firstused is initialized to the block
+ * size for an empty leaf block and is reduced from there.
+ *
+ * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k.
+ * The in-core firstused field is 32-bit and thus supports the maximum fsb size.
+ * The on-disk field is only 16-bit, however, and overflows at 64k. Since this
+ * only occurs at exactly 64k, we use zero as a magic on-disk value to represent
+ * the attr block size. The following helpers manage the conversion between the
+ * in-core and on-disk formats.
+ */
+
+static void
+xfs_attr3_leaf_firstused_from_disk(
+       struct xfs_da_geometry          *geo,
+       struct xfs_attr3_icleaf_hdr     *to,
+       struct xfs_attr_leafblock       *from)
+{
+       struct xfs_attr3_leaf_hdr       *hdr3;
+
+       if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
+               hdr3 = (struct xfs_attr3_leaf_hdr *) from;
+               to->firstused = be16_to_cpu(hdr3->firstused);
+       } else {
+               to->firstused = be16_to_cpu(from->hdr.firstused);
+       }
+
+       /*
+        * Convert from the magic fsb size value to actual blocksize. This
+        * should only occur for empty blocks when the block size overflows
+        * 16-bits.
+        */
+       if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) {
+               ASSERT(!to->count && !to->usedbytes);
+               ASSERT(geo->blksize > USHRT_MAX);
+               to->firstused = geo->blksize;
+       }
+}
+
+static void
+xfs_attr3_leaf_firstused_to_disk(
+       struct xfs_da_geometry          *geo,
+       struct xfs_attr_leafblock       *to,
+       struct xfs_attr3_icleaf_hdr     *from)
+{
+       struct xfs_attr3_leaf_hdr       *hdr3;
+       uint32_t                        firstused;
+
+       /* magic value should only be seen on disk */
+       ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF);
+
+       /*
+        * Scale down the 32-bit in-core firstused value to the 16-bit on-disk
+        * value. This only overflows at the max supported value of 64k. Use the
+        * magic on-disk value to represent block size in this case.
+        */
+       firstused = from->firstused;
+       if (firstused > USHRT_MAX) {
+               ASSERT(from->firstused == geo->blksize);
+               firstused = XFS_ATTR3_LEAF_NULLOFF;
+       }
+
+       if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
+               hdr3 = (struct xfs_attr3_leaf_hdr *) to;
+               hdr3->firstused = cpu_to_be16(firstused);
+       } else {
+               to->hdr.firstused = cpu_to_be16(firstused);
+       }
+}
+
 void
 xfs_attr3_leaf_hdr_from_disk(
+       struct xfs_da_geometry          *geo,
        struct xfs_attr3_icleaf_hdr     *to,
        struct xfs_attr_leafblock       *from)
 {
@@ -104,7 +179,7 @@ xfs_attr3_leaf_hdr_from_disk(
                to->magic = be16_to_cpu(hdr3->info.hdr.magic);
                to->count = be16_to_cpu(hdr3->count);
                to->usedbytes = be16_to_cpu(hdr3->usedbytes);
-               to->firstused = be16_to_cpu(hdr3->firstused);
+               xfs_attr3_leaf_firstused_from_disk(geo, to, from);
                to->holes = hdr3->holes;
 
                for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -118,7 +193,7 @@ xfs_attr3_leaf_hdr_from_disk(
        to->magic = be16_to_cpu(from->hdr.info.magic);
        to->count = be16_to_cpu(from->hdr.count);
        to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
-       to->firstused = be16_to_cpu(from->hdr.firstused);
+       xfs_attr3_leaf_firstused_from_disk(geo, to, from);
        to->holes = from->hdr.holes;
 
        for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -129,10 +204,11 @@ xfs_attr3_leaf_hdr_from_disk(
 
 void
 xfs_attr3_leaf_hdr_to_disk(
+       struct xfs_da_geometry          *geo,
        struct xfs_attr_leafblock       *to,
        struct xfs_attr3_icleaf_hdr     *from)
 {
-       int     i;
+       int                             i;
 
        ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
               from->magic == XFS_ATTR3_LEAF_MAGIC);
@@ -145,7 +221,7 @@ xfs_attr3_leaf_hdr_to_disk(
                hdr3->info.hdr.magic = cpu_to_be16(from->magic);
                hdr3->count = cpu_to_be16(from->count);
                hdr3->usedbytes = cpu_to_be16(from->usedbytes);
-               hdr3->firstused = cpu_to_be16(from->firstused);
+               xfs_attr3_leaf_firstused_to_disk(geo, to, from);
                hdr3->holes = from->holes;
                hdr3->pad1 = 0;
 
@@ -160,7 +236,7 @@ xfs_attr3_leaf_hdr_to_disk(
        to->hdr.info.magic = cpu_to_be16(from->magic);
        to->hdr.count = cpu_to_be16(from->count);
        to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
-       to->hdr.firstused = cpu_to_be16(from->firstused);
+       xfs_attr3_leaf_firstused_to_disk(geo, to, from);
        to->hdr.holes = from->holes;
        to->hdr.pad1 = 0;
 
@@ -178,7 +254,7 @@ xfs_attr3_leaf_verify(
        struct xfs_attr_leafblock *leaf = bp->b_addr;
        struct xfs_attr3_icleaf_hdr ichdr;
 
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
 
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
@@ -757,9 +833,10 @@ xfs_attr_shortform_allfit(
        struct xfs_attr3_icleaf_hdr leafhdr;
        int                     bytes;
        int                     i;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
        entry = xfs_attr3_leaf_entryp(leaf);
 
        bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -812,7 +889,7 @@ xfs_attr3_leaf_to_shortform(
        memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
 
        leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        entry = xfs_attr3_leaf_entryp(leaf);
 
        /* XXX (dgc): buffer is about to be marked stale - why zero it? */
@@ -923,7 +1000,7 @@ xfs_attr3_leaf_to_node(
        btree = dp->d_ops->node_tree_p(node);
 
        leaf = bp2->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf);
        entries = xfs_attr3_leaf_entryp(leaf);
 
        /* both on-disk, don't endian-flip twice */
@@ -988,7 +1065,7 @@ xfs_attr3_leaf_create(
        }
        ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
 
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
        xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
 
        *bpp = bp;
@@ -1073,7 +1150,7 @@ xfs_attr3_leaf_add(
        trace_xfs_attr_leaf_add(args);
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        ASSERT(args->index >= 0 && args->index <= ichdr.count);
        entsize = xfs_attr_leaf_newentsize(args, NULL);
 
@@ -1126,7 +1203,7 @@ xfs_attr3_leaf_add(
        tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
 
 out_log_hdr:
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
        xfs_trans_log_buf(args->trans, bp,
                XFS_DA_LOGRANGE(leaf, &leaf->hdr,
                                xfs_attr3_leaf_hdr_size(leaf)));
@@ -1294,7 +1371,7 @@ xfs_attr3_leaf_compact(
                                                ichdr_dst->freemap[0].base;
 
        /* write the header back to initialise the underlying buffer */
-       xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
 
        /*
         * Copy all entry's in the same (sorted) order,
@@ -1344,9 +1421,10 @@ xfs_attr_leaf_order(
 {
        struct xfs_attr3_icleaf_hdr ichdr1;
        struct xfs_attr3_icleaf_hdr ichdr2;
+       struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
 
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
        return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
 }
 
@@ -1388,8 +1466,8 @@ xfs_attr3_leaf_rebalance(
        ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
        leaf1 = blk1->bp->b_addr;
        leaf2 = blk2->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr1, leaf1);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, leaf2);
        ASSERT(ichdr2.count == 0);
        args = state->args;
 
@@ -1490,8 +1568,8 @@ xfs_attr3_leaf_rebalance(
                                        ichdr1.count, count);
        }
 
-       xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
-       xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf1, &ichdr1);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf2, &ichdr2);
        xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
        xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
 
@@ -1684,7 +1762,7 @@ xfs_attr3_leaf_toosmall(
         */
        blk = &state->path.blk[ state->path.active-1 ];
        leaf = blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
        bytes = xfs_attr3_leaf_hdr_size(leaf) +
                ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
                ichdr.usedbytes;
@@ -1740,7 +1818,7 @@ xfs_attr3_leaf_toosmall(
                if (error)
                        return error;
 
-               xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
+               xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, bp->b_addr);
 
                bytes = state->args->geo->blksize -
                        (state->args->geo->blksize >> 2) -
@@ -1805,7 +1883,7 @@ xfs_attr3_leaf_remove(
        trace_xfs_attr_leaf_remove(args);
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
 
        ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
        ASSERT(args->index >= 0 && args->index < ichdr.count);
@@ -1918,12 +1996,11 @@ xfs_attr3_leaf_remove(
                                tmp = be16_to_cpu(entry->nameidx);
                }
                ichdr.firstused = tmp;
-               if (!ichdr.firstused)
-                       ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
+               ASSERT(ichdr.firstused != 0);
        } else {
                ichdr.holes = 1;        /* mark as needing compaction */
        }
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
        xfs_trans_log_buf(args->trans, bp,
                          XFS_DA_LOGRANGE(leaf, &leaf->hdr,
                                          xfs_attr3_leaf_hdr_size(leaf)));
@@ -1957,8 +2034,8 @@ xfs_attr3_leaf_unbalance(
 
        drop_leaf = drop_blk->bp->b_addr;
        save_leaf = save_blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
-       xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
        entry = xfs_attr3_leaf_entryp(drop_leaf);
 
        /*
@@ -2012,7 +2089,7 @@ xfs_attr3_leaf_unbalance(
                tmphdr.firstused = state->args->geo->blksize;
 
                /* write the header to the temp buffer to initialise it */
-               xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
+               xfs_attr3_leaf_hdr_to_disk(state->args->geo, tmp_leaf, &tmphdr);
 
                if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
                                         drop_blk->bp, &drophdr)) {
@@ -2039,7 +2116,7 @@ xfs_attr3_leaf_unbalance(
                kmem_free(tmp_leaf);
        }
 
-       xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
        xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
                                           state->args->geo->blksize - 1);
 
@@ -2085,7 +2162,7 @@ xfs_attr3_leaf_lookup_int(
        trace_xfs_attr_leaf_lookup(args);
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        entries = xfs_attr3_leaf_entryp(leaf);
        ASSERT(ichdr.count < args->geo->blksize / 8);
 
@@ -2190,7 +2267,7 @@ xfs_attr3_leaf_getvalue(
        int                     valuelen;
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        ASSERT(ichdr.count < args->geo->blksize / 8);
        ASSERT(args->index < ichdr.count);
 
@@ -2391,8 +2468,9 @@ xfs_attr_leaf_lasthash(
 {
        struct xfs_attr3_icleaf_hdr ichdr;
        struct xfs_attr_leaf_entry *entries;
+       struct xfs_mount *mp = bp->b_target->bt_mount;
 
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
        entries = xfs_attr3_leaf_entryp(bp->b_addr);
        if (count)
                *count = ichdr.count;
@@ -2486,7 +2564,7 @@ xfs_attr3_leaf_clearflag(
        ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
 
 #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        ASSERT(args->index < ichdr.count);
        ASSERT(args->index >= 0);
 
@@ -2550,7 +2628,7 @@ xfs_attr3_leaf_setflag(
 
        leaf = bp->b_addr;
 #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
        ASSERT(args->index < ichdr.count);
        ASSERT(args->index >= 0);
 #endif
@@ -2629,11 +2707,11 @@ xfs_attr3_leaf_flipflags(
        entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
 
 #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr1, leaf1);
        ASSERT(args->index < ichdr1.count);
        ASSERT(args->index >= 0);
 
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr2, leaf2);
        ASSERT(args->index2 < ichdr2.count);
        ASSERT(args->index2 >= 0);
 
index e2929da..025c4b8 100644 (file)
@@ -100,9 +100,11 @@ int        xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
 int    xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
                        xfs_dablk_t bno, xfs_daddr_t mappedbno,
                        struct xfs_buf **bpp);
-void   xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
+void   xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
+                                    struct xfs_attr3_icleaf_hdr *to,
                                     struct xfs_attr_leafblock *from);
-void   xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
+void   xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
+                                  struct xfs_attr_leafblock *to,
                                   struct xfs_attr3_icleaf_hdr *from);
 
 #endif /* __XFS_ATTR_LEAF_H__ */
index 61ec015..aeffeaa 100644 (file)
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
        }
 }
 
-/*
- * Debug/sanity checking code
- */
-
-STATIC int
-xfs_bmap_sanity_check(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp,
-       int                     level)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-
-       if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
-           block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
-               return 0;
-
-       if (be16_to_cpu(block->bb_level) != level ||
-           be16_to_cpu(block->bb_numrecs) == 0 ||
-           be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return 0;
-
-       return 1;
-}
-
 #ifdef DEBUG
 STATIC struct xfs_buf *
 xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
                                goto error_norelse;
                }
                block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
                if (level == 0)
                        break;
 
@@ -424,7 +397,8 @@ xfs_bmap_check_leaf_extents(
                xfs_check_block(block, mp, 0, 0);
                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                                       XFS_FSB_SANITY_CHECK(mp, bno), error0);
                if (bp_release) {
                        bp_release = 0;
                        xfs_trans_brelse(NULL, bp);
@@ -1029,7 +1003,7 @@ xfs_bmap_add_attrfork_btree(
                if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
                        goto error0;
                /* must be at least one entry */
-               XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
                if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
                        goto error0;
                if (stat == 0) {
@@ -1311,14 +1285,12 @@ xfs_bmap_read_extents(
                if (error)
                        return error;
                block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
                if (level == 0)
                        break;
                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                       XFS_FSB_SANITY_CHECK(mp, bno), error0);
                xfs_trans_brelse(tp, bp);
        }
        /*
@@ -1345,9 +1317,6 @@ xfs_bmap_read_extents(
                                XFS_ERRLEVEL_LOW, ip->i_mount, block);
                        goto error0;
                }
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, 0),
-                       error0);
                /*
                 * Read-ahead the next leaf block, if any.
                 */
@@ -1755,7 +1724,9 @@ xfs_bmap_add_extent_delay_real(
        xfs_filblks_t           temp=0; /* value for da_new calculations */
        xfs_filblks_t           temp2=0;/* value for da_new calculations */
        int                     tmp_rval;       /* partial logging flags */
+       struct xfs_mount        *mp;
 
+       mp  = bma->tp ? bma->tp->t_mountp : NULL;
        ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
 
        ASSERT(bma->idx >= 0);
@@ -1866,15 +1837,15 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_blockcount, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_btree_delete(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_btree_decrement(bma->cur, 0, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -1907,7 +1878,7 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -1938,7 +1909,7 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_blockcount, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
                                        new->br_startblock,
                                        PREV.br_blockcount +
@@ -1968,12 +1939,12 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        error = xfs_btree_insert(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                break;
 
@@ -2001,7 +1972,7 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                        LEFT.br_startblock,
                                        LEFT.br_blockcount +
@@ -2038,12 +2009,12 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        error = xfs_btree_insert(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
 
                if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2084,7 +2055,7 @@ xfs_bmap_add_extent_delay_real(
                                        RIGHT.br_blockcount, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                        new->br_startblock,
                                        new->br_blockcount +
@@ -2122,12 +2093,12 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        error = xfs_btree_insert(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
 
                if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2191,12 +2162,12 @@ xfs_bmap_add_extent_delay_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        error = xfs_btree_insert(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
 
                if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2212,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
                if (diff > 0) {
-                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       -((int64_t)diff), 0);
+                       error = xfs_mod_fdblocks(bma->ip->i_mount,
+                                                -((int64_t)diff), false);
                        ASSERT(!error);
                        if (error)
                                goto done;
@@ -2265,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
                        temp += bma->cur->bc_private.b.allocated;
                ASSERT(temp <= da_old);
                if (temp < da_old)
-                       xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       (int64_t)(da_old - temp), 0);
+                       xfs_mod_fdblocks(bma->ip->i_mount,
+                                       (int64_t)(da_old - temp), false);
        }
 
        /* clear out the allocated field, done with it now in any case. */
@@ -2309,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        /* left is 0, right is 1, prev is 2 */
        int                     rval=0; /* return value (logging flags) */
        int                     state = 0;/* state bits, accessed thru macros */
+       struct xfs_mount        *mp = tp->t_mountp;
 
        *logflagsp = 0;
 
@@ -2421,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_delete(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_decrement(cur, 0, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_delete(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_decrement(cur, 0, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                LEFT.br_startblock,
                                LEFT.br_blockcount + PREV.br_blockcount +
@@ -2464,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_delete(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_decrement(cur, 0, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                LEFT.br_startblock,
                                LEFT.br_blockcount + PREV.br_blockcount,
@@ -2499,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real(
                                        RIGHT.br_startblock,
                                        RIGHT.br_blockcount, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_delete(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_btree_decrement(cur, 0, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                new->br_startblock,
                                new->br_blockcount + RIGHT.br_blockcount,
@@ -2532,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                new->br_startblock, new->br_blockcount,
                                newext)))
@@ -2569,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur,
                                PREV.br_startoff + new->br_blockcount,
                                PREV.br_startblock + new->br_blockcount,
@@ -2611,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur,
                                PREV.br_startoff + new->br_blockcount,
                                PREV.br_startblock + new->br_blockcount,
@@ -2621,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real(
                        cur->bc_rec.b = *new;
                        if ((error = xfs_btree_insert(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                break;
 
@@ -2651,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock,
                                        PREV.br_blockcount, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                PREV.br_startblock,
                                PREV.br_blockcount - new->br_blockcount,
@@ -2689,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                PREV.br_startblock,
                                PREV.br_blockcount - new->br_blockcount,
@@ -2699,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        cur->bc_rec.b.br_state = XFS_EXT_NORM;
                        if ((error = xfs_btree_insert(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                break;
 
@@ -2737,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real(
                                        PREV.br_startblock, PREV.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        /* new right extent - oldext */
                        if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
                                r[1].br_startblock, r[1].br_blockcount,
@@ -2749,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real(
                                new->br_startoff - PREV.br_startoff;
                        if ((error = xfs_btree_insert(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        /*
                         * Reset the cursor to the position of the new extent
                         * we are about to insert as we can't trust it after
@@ -2759,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real(
                                        new->br_startblock, new->br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        /* new middle extent - newext */
                        cur->bc_rec.b.br_state = new->br_state;
                        if ((error = xfs_btree_insert(cur, &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                break;
 
@@ -2944,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
        }
        if (oldlen != newlen) {
                ASSERT(oldlen > newlen);
-               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                       (int64_t)(oldlen - newlen), 0);
+               xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
+                                false);
                /*
                 * Nothing to do for disk quota accounting here.
                 */
@@ -2968,7 +2938,9 @@ xfs_bmap_add_extent_hole_real(
        xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
        int                     rval=0; /* return value (logging flags) */
        int                     state;  /* state bits, accessed thru macros */
+       struct xfs_mount        *mp;
 
+       mp = bma->tp ? bma->tp->t_mountp : NULL;
        ifp = XFS_IFORK_PTR(bma->ip, whichfork);
 
        ASSERT(bma->idx >= 0);
@@ -3056,15 +3028,15 @@ xfs_bmap_add_extent_hole_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_btree_delete(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_btree_decrement(bma->cur, 0, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                        left.br_startblock,
                                        left.br_blockcount +
@@ -3097,7 +3069,7 @@ xfs_bmap_add_extent_hole_real(
                                        &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                        left.br_startblock,
                                        left.br_blockcount +
@@ -3131,7 +3103,7 @@ xfs_bmap_add_extent_hole_real(
                                        right.br_blockcount, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                        new->br_startblock,
                                        new->br_blockcount +
@@ -3161,12 +3133,12 @@ xfs_bmap_add_extent_hole_real(
                                        new->br_blockcount, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                        bma->cur->bc_rec.b.br_state = new->br_state;
                        error = xfs_btree_insert(bma->cur, &i);
                        if (error)
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                break;
        }
@@ -4160,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
        ASSERT(indlen > 0);
 
        if (rt) {
-               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)extsz), 0);
+               error = xfs_mod_frextents(mp, -((int64_t)extsz));
        } else {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                -((int64_t)alen), 0);
+               error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
        }
 
        if (error)
                goto out_unreserve_quota;
 
-       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                        -((int64_t)indlen), 0);
+       error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
        if (error)
                goto out_unreserve_blocks;
 
@@ -4198,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
 
 out_unreserve_blocks:
        if (rt)
-               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+               xfs_mod_frextents(mp, extsz);
        else
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+               xfs_mod_fdblocks(mp, alen, false);
 out_unreserve_quota:
        if (XFS_IS_QUOTA_ON(mp))
                xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -4801,7 +4770,7 @@ xfs_bmap_del_extent(
                                        got.br_startblock, got.br_blockcount,
                                        &i)))
                                goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                }
                da_old = da_new = 0;
        } else {
@@ -4835,7 +4804,7 @@ xfs_bmap_del_extent(
                }
                if ((error = xfs_btree_delete(cur, &i)))
                        goto done;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                break;
 
        case 2:
@@ -4935,7 +4904,8 @@ xfs_bmap_del_extent(
                                                        got.br_startblock,
                                                        temp, &i)))
                                                goto done;
-                                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                                               i == 1, done);
                                        /*
                                         * Update the btree record back
                                         * to the original value.
@@ -4956,7 +4926,7 @@ xfs_bmap_del_extent(
                                        error = -ENOSPC;
                                        goto done;
                                }
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                        } else
                                flags |= xfs_ilog_fext(whichfork);
                        XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5012,10 +4982,8 @@ xfs_bmap_del_extent(
         * Nothing to do for disk quota accounting here.
         */
        ASSERT(da_old >= da_new);
-       if (da_old > da_new) {
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                       (int64_t)(da_old - da_new), 0);
-       }
+       if (da_old > da_new)
+               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
 done:
        *logflagsp = flags;
        return error;
@@ -5284,14 +5252,13 @@ xfs_bunmapi(
 
                                rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
                                do_div(rtexts, mp->m_sb.sb_rextsize);
-                               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                               (int64_t)rtexts, 0);
+                               xfs_mod_frextents(mp, (int64_t)rtexts);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
-                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                               (int64_t)del.br_blockcount, 0);
+                               xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
+                                                false);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
@@ -5453,6 +5420,7 @@ xfs_bmse_merge(
        struct xfs_bmbt_irec            left;
        xfs_filblks_t                   blockcount;
        int                             error, i;
+       struct xfs_mount                *mp = ip->i_mount;
 
        xfs_bmbt_get_all(gotp, &got);
        xfs_bmbt_get_all(leftp, &left);
@@ -5487,19 +5455,19 @@ xfs_bmse_merge(
                                   got.br_blockcount, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 
        error = xfs_btree_delete(cur, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 
        /* lookup and update size of the previous extent */
        error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
                                   left.br_blockcount, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 
        left.br_blockcount = blockcount;
 
@@ -5518,50 +5486,92 @@ xfs_bmse_shift_one(
        int                             *current_ext,
        struct xfs_bmbt_rec_host        *gotp,
        struct xfs_btree_cur            *cur,
-       int                             *logflags)
+       int                             *logflags,
+       enum shift_direction            direction)
 {
        struct xfs_ifork                *ifp;
+       struct xfs_mount                *mp;
        xfs_fileoff_t                   startoff;
-       struct xfs_bmbt_rec_host        *leftp;
+       struct xfs_bmbt_rec_host        *adj_irecp;
        struct xfs_bmbt_irec            got;
-       struct xfs_bmbt_irec            left;
+       struct xfs_bmbt_irec            adj_irec;
        int                             error;
        int                             i;
+       int                             total_extents;
 
+       mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
+       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
 
        xfs_bmbt_get_all(gotp, &got);
-       startoff = got.br_startoff - offset_shift_fsb;
 
        /* delalloc extents should be prevented by caller */
-       XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock));
+       XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
 
-       /*
-        * Check for merge if we've got an extent to the left, otherwise make
-        * sure there's enough room at the start of the file for the shift.
-        */
-       if (*current_ext) {
-               /* grab the left extent and check for a large enough hole */
-               leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
-               xfs_bmbt_get_all(leftp, &left);
+       if (direction == SHIFT_LEFT) {
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Check for merge if we've got an extent to the left,
+                * otherwise make sure there's enough room at the start
+                * of the file for the shift.
+                */
+               if (!*current_ext) {
+                       if (got.br_startoff < offset_shift_fsb)
+                               return -EINVAL;
+                       goto update_current_ext;
+               }
+               /*
+                * grab the left extent and check for a large
+                * enough hole.
+                */
+               adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
+               xfs_bmbt_get_all(adj_irecp, &adj_irec);
 
-               if (startoff < left.br_startoff + left.br_blockcount)
+               if (startoff <
+                   adj_irec.br_startoff + adj_irec.br_blockcount)
                        return -EINVAL;
 
                /* check whether to merge the extent or shift it down */
-               if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) {
+               if (xfs_bmse_can_merge(&adj_irec, &got,
+                                      offset_shift_fsb)) {
                        return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
-                                             *current_ext, gotp, leftp, cur,
-                                             logflags);
+                                             *current_ext, gotp, adj_irecp,
+                                             cur, logflags);
                }
-       } else if (got.br_startoff < offset_shift_fsb)
-               return -EINVAL;
-
+       } else {
+               startoff = got.br_startoff + offset_shift_fsb;
+               /* nothing to move if this is the last extent */
+               if (*current_ext >= (total_extents - 1))
+                       goto update_current_ext;
+               /*
+                * If this is not the last extent in the file, make sure there
+                * is enough room between current extent and next extent for
+                * accommodating the shift.
+                */
+               adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
+               xfs_bmbt_get_all(adj_irecp, &adj_irec);
+               if (startoff + got.br_blockcount > adj_irec.br_startoff)
+                       return -EINVAL;
+               /*
+                * Unlike a left shift (which involves a hole punch),
+                * a right shift does not modify extent neighbors
+                * in any way. We should never find mergeable extents
+                * in this scenario. Check anyways and warn if we
+                * encounter two extents that could be one.
+                */
+               if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
+                       WARN_ON_ONCE(1);
+       }
        /*
         * Increment the extent index for the next iteration, update the start
         * offset of the in-core extent and update the btree if applicable.
         */
-       (*current_ext)++;
+update_current_ext:
+       if (direction == SHIFT_LEFT)
+               (*current_ext)++;
+       else
+               (*current_ext)--;
        xfs_bmbt_set_startoff(gotp, startoff);
        *logflags |= XFS_ILOG_CORE;
        if (!cur) {
@@ -5573,18 +5583,18 @@ xfs_bmse_shift_one(
                                   got.br_blockcount, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 
        got.br_startoff = startoff;
        return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
-                               got.br_blockcount, got.br_state);
+                              got.br_blockcount, got.br_state);
 }
 
 /*
- * Shift extent records to the left to cover a hole.
+ * Shift extent records to the left/right to cover/create a hole.
  *
  * The maximum number of extents to be shifted in a single operation is
- * @num_exts. @start_fsb specifies the file offset to start the shift and the
+ * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
  * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
  * is the length by which each extent is shifted. If there is no hole to shift
  * the extents into, this will be considered invalid operation and we abort
@@ -5594,12 +5604,13 @@ int
 xfs_bmap_shift_extents(
        struct xfs_trans        *tp,
        struct xfs_inode        *ip,
-       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           *next_fsb,
        xfs_fileoff_t           offset_shift_fsb,
        int                     *done,
-       xfs_fileoff_t           *next_fsb,
+       xfs_fileoff_t           stop_fsb,
        xfs_fsblock_t           *firstblock,
        struct xfs_bmap_free    *flist,
+       enum shift_direction    direction,
        int                     num_exts)
 {
        struct xfs_btree_cur            *cur = NULL;
@@ -5609,10 +5620,11 @@ xfs_bmap_shift_extents(
        struct xfs_ifork                *ifp;
        xfs_extnum_t                    nexts = 0;
        xfs_extnum_t                    current_ext;
+       xfs_extnum_t                    total_extents;
+       xfs_extnum_t                    stop_extent;
        int                             error = 0;
        int                             whichfork = XFS_DATA_FORK;
        int                             logflags = 0;
-       int                             total_extents;
 
        if (unlikely(XFS_TEST_ERROR(
            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5628,6 +5640,8 @@ xfs_bmap_shift_extents(
 
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
+       ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
 
        ifp = XFS_IFORK_PTR(ip, whichfork);
        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5644,44 +5658,84 @@ xfs_bmap_shift_extents(
                cur->bc_private.b.flags = 0;
        }
 
+       /*
+        * There may be delalloc extents in the data fork before the range we
+        * are collapsing out, so we cannot use the count of real extents here.
+        * Instead we have to calculate it from the incore fork.
+        */
+       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       if (total_extents == 0) {
+               *done = 1;
+               goto del_cursor;
+       }
+
+       /*
+        * In case of first right shift, we need to initialize next_fsb
+        */
+       if (*next_fsb == NULLFSBLOCK) {
+               gotp = xfs_iext_get_ext(ifp, total_extents - 1);
+               xfs_bmbt_get_all(gotp, &got);
+               *next_fsb = got.br_startoff;
+               if (stop_fsb > *next_fsb) {
+                       *done = 1;
+                       goto del_cursor;
+               }
+       }
+
+       /* Lookup the extent index at which we have to stop */
+       if (direction == SHIFT_RIGHT) {
+               gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
+               /* Make stop_extent exclusive of shift range */
+               stop_extent--;
+       } else
+               stop_extent = total_extents;
+
        /*
         * Look up the extent index for the fsb where we start shifting. We can
         * henceforth iterate with current_ext as extent list changes are locked
         * out via ilock.
         *
         * gotp can be null in 2 cases: 1) if there are no extents or 2)
-        * start_fsb lies in a hole beyond which there are no extents. Either
+        * *next_fsb lies in a hole beyond which there are no extents. Either
         * way, we are done.
         */
-       gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
+       gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
        if (!gotp) {
                *done = 1;
                goto del_cursor;
        }
 
-       /*
-        * There may be delalloc extents in the data fork before the range we
-        * are collapsing out, so we cannot use the count of real extents here.
-        * Instead we have to calculate it from the incore fork.
-        */
-       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       while (nexts++ < num_exts && current_ext < total_extents) {
+       /* some sanity checking before we finally start shifting extents */
+       if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
+            (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
+               error = -EIO;
+               goto del_cursor;
+       }
+
+       while (nexts++ < num_exts) {
                error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
-                                       &current_ext, gotp, cur, &logflags);
+                                          &current_ext, gotp, cur, &logflags,
+                                          direction);
                if (error)
                        goto del_cursor;
+               /*
+                * If there was an extent merge during the shift, the extent
+                * count can change. Update the total and grade the next record.
+                */
+               if (direction == SHIFT_LEFT) {
+                       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+                       stop_extent = total_extents;
+               }
 
-               /* update total extent count and grab the next record */
-               total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-               if (current_ext >= total_extents)
+               if (current_ext == stop_extent) {
+                       *done = 1;
+                       *next_fsb = NULLFSBLOCK;
                        break;
+               }
                gotp = xfs_iext_get_ext(ifp, current_ext);
        }
 
-       /* Check if we are done */
-       if (current_ext == total_extents) {
-               *done = 1;
-       } else if (next_fsb) {
+       if (!*done) {
                xfs_bmbt_get_all(gotp, &got);
                *next_fsb = got.br_startoff;
        }
@@ -5696,3 +5750,189 @@ del_cursor:
 
        return error;
 }
+
+/*
+ * Splits an extent into two extents at split_fsb block such that it is
+ * the first block of the current_ext. @current_ext is a target extent
+ * to be split. @split_fsb is a block where the extents is split.
+ * If split_fsb lies in a hole or the first block of extents, just return 0.
+ */
+STATIC int
+xfs_bmap_split_extent_at(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           split_fsb,
+       xfs_fsblock_t           *firstfsb,
+       struct xfs_bmap_free    *free_list)
+{
+       int                             whichfork = XFS_DATA_FORK;
+       struct xfs_btree_cur            *cur = NULL;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            new; /* split extent */
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_fsblock_t                   gotblkcnt; /* new block count for got */
+       xfs_extnum_t                    current_ext;
+       int                             error = 0;
+       int                             logflags = 0;
+       int                             i = 0;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
+                                XFS_ERRLEVEL_LOW, mp);
+               return -EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * gotp can be null in 2 cases: 1) if there are no extents
+        * or 2) split_fsb lies in a hole beyond which there are
+        * no extents. Either way, we are done.
+        */
+       gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
+       if (!gotp)
+               return 0;
+
+       xfs_bmbt_get_all(gotp, &got);
+
+       /*
+        * Check split_fsb lies in a hole or the start boundary offset
+        * of the extent.
+        */
+       if (got.br_startoff >= split_fsb)
+               return 0;
+
+       gotblkcnt = split_fsb - got.br_startoff;
+       new.br_startoff = split_fsb;
+       new.br_startblock = got.br_startblock + gotblkcnt;
+       new.br_blockcount = got.br_blockcount - gotblkcnt;
+       new.br_state = got.br_state;
+
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstfsb;
+               cur->bc_private.b.flist = free_list;
+               cur->bc_private.b.flags = 0;
+               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount,
+                               &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+       }
+
+       xfs_bmbt_set_blockcount(gotp, gotblkcnt);
+       got.br_blockcount = gotblkcnt;
+
+       logflags = XFS_ILOG_CORE;
+       if (cur) {
+               error = xfs_bmbt_update(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount,
+                               got.br_state);
+               if (error)
+                       goto del_cursor;
+       } else
+               logflags |= XFS_ILOG_DEXT;
+
+       /* Add new extent */
+       current_ext++;
+       xfs_iext_insert(ip, current_ext, 1, &new, 0);
+       XFS_IFORK_NEXT_SET(ip, whichfork,
+                          XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+
+       if (cur) {
+               error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
+                               new.br_startblock, new.br_blockcount,
+                               &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
+               cur->bc_rec.b.br_state = new.br_state;
+
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+       }
+
+       /*
+        * Convert to a btree if necessary.
+        */
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
+               int tmp_logflags; /* partial log flag return val */
+
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
+                               &cur, 0, &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+       }
+
+del_cursor:
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               xfs_btree_del_cursor(cur,
+                               error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+       return error;
+}
+
+int
+xfs_bmap_split_extent(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           split_fsb)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           firstfsb;
+       int                     committed;
+       int                     error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+       xfs_bmap_init(&free_list, &firstfsb);
+
+       error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
+                       &firstfsb, &free_list);
+       if (error)
+               goto out;
+
+       error = xfs_bmap_finish(&tp, &free_list, &committed);
+       if (error)
+               goto out;
+
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+
+out:
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       return error;
+}
index b9d8a49..6aaa0c1 100644 (file)
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
  */
 #define XFS_BMAP_MAX_SHIFT_EXTENTS     1
 
+enum shift_direction {
+       SHIFT_LEFT = 0,
+       SHIFT_RIGHT,
+};
+
 #ifdef DEBUG
 void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int        xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                xfs_extnum_t num);
 uint   xfs_default_attroffset(struct xfs_inode *ip);
 int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
-               xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
-               int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
-               struct xfs_bmap_free *flist, int num_exts);
+               xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
+               int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
+               struct xfs_bmap_free *flist, enum shift_direction direction,
+               int num_exts);
+int    xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
 
 #endif /* __XFS_BMAP_H__ */
index 81cad43..c72283d 100644 (file)
@@ -168,7 +168,7 @@ xfs_btree_check_lptr(
        xfs_fsblock_t           bno,    /* btree block disk address */
        int                     level)  /* btree block level */
 {
-       XFS_WANT_CORRUPTED_RETURN(
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                level > 0 &&
                bno != NULLFSBLOCK &&
                XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
@@ -187,7 +187,7 @@ xfs_btree_check_sptr(
 {
        xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
 
-       XFS_WANT_CORRUPTED_RETURN(
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                level > 0 &&
                bno != NULLAGBLOCK &&
                bno != 0 &&
@@ -1825,7 +1825,7 @@ xfs_btree_lookup(
                        error = xfs_btree_increment(cur, 0, &i);
                        if (error)
                                goto error0;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
                        XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
                        *stat = 1;
                        return 0;
@@ -2285,7 +2285,7 @@ xfs_btree_rshift(
        if (error)
                goto error0;
        i = xfs_btree_lastrec(tcur, level);
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
        error = xfs_btree_increment(tcur, level, &i);
        if (error)
@@ -3138,7 +3138,7 @@ xfs_btree_insert(
                        goto error0;
                }
 
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
                level++;
 
                /*
@@ -3582,15 +3582,15 @@ xfs_btree_delrec(
                 * Actually any entry but the first would suffice.
                 */
                i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                error = xfs_btree_increment(tcur, level, &i);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                /* Grab a pointer to the block. */
                right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3634,12 +3634,12 @@ xfs_btree_delrec(
                rrecs = xfs_btree_get_numrecs(right);
                if (!xfs_btree_ptr_is_null(cur, &lptr)) {
                        i = xfs_btree_firstrec(tcur, level);
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                        error = xfs_btree_decrement(tcur, level, &i);
                        if (error)
                                goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
                }
        }
 
@@ -3653,13 +3653,13 @@ xfs_btree_delrec(
                 * previous block.
                 */
                i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                error = xfs_btree_decrement(tcur, level, &i);
                if (error)
                        goto error0;
                i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
 
                /* Grab a pointer to the block. */
                left = xfs_btree_get_block(tcur, level, &lbp);
index 9cb0115..2385f8c 100644 (file)
@@ -538,12 +538,12 @@ xfs_da3_root_split(
        oldroot = blk1->bp->b_addr;
        if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
            oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
-               struct xfs_da3_icnode_hdr nodehdr;
+               struct xfs_da3_icnode_hdr icnodehdr;
 
-               dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
+               dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
                btree = dp->d_ops->node_tree_p(oldroot);
-               size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
-               level = nodehdr.level;
+               size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
+               level = icnodehdr.level;
 
                /*
                 * we are about to copy oldroot to bp, so set up the type
index 0a49b02..74bcbab 100644 (file)
@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr {
        __uint16_t      magic;
        __uint16_t      count;
        __uint16_t      usedbytes;
-       __uint16_t      firstused;
+       /*
+        * firstused is 32-bit here instead of 16-bit like the on-disk variant
+        * to support maximum fsb size of 64k without overflow issues throughout
+        * the attr code. Instead, the overflow condition is handled on
+        * conversion to/from disk.
+        */
+       __uint32_t      firstused;
        __u8            holes;
        struct {
                __uint16_t      base;
@@ -733,6 +739,12 @@ struct xfs_attr3_icleaf_hdr {
        } freemap[XFS_ATTR_LEAF_MAPSIZE];
 };
 
+/*
+ * Special value to represent fs block size in the leaf header firstused field.
+ * Only used when block size overflows the 2-bytes available on disk.
+ */
+#define XFS_ATTR3_LEAF_NULLOFF 0
+
 /*
  * Flags used in the leaf_entry[i].flags field.
  * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
index 5ff31be..de1ea16 100644 (file)
@@ -89,7 +89,7 @@ __xfs_dir3_data_check(
                 * so just ensure that the count falls somewhere inside the
                 * block right now.
                 */
-               XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
+               XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
                        ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
                break;
        case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
@@ -107,21 +107,21 @@ __xfs_dir3_data_check(
        bf = ops->data_bestfree_p(hdr);
        count = lastfree = freeseen = 0;
        if (!bf[0].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
                freeseen |= 1 << 0;
        }
        if (!bf[1].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
                freeseen |= 1 << 1;
        }
        if (!bf[2].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
                freeseen |= 1 << 2;
        }
 
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
+       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
                                                be16_to_cpu(bf[1].length));
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
+       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
                                                be16_to_cpu(bf[2].length));
        /*
         * Loop over the data/unused entries.
@@ -134,18 +134,18 @@ __xfs_dir3_data_check(
                 * doesn't need to be there.
                 */
                if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
-                       XFS_WANT_CORRUPTED_RETURN(
+                       XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
+                       XFS_WANT_CORRUPTED_RETURN(mp,
                                be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
                                               (char *)dup - (char *)hdr);
                        dfp = xfs_dir2_data_freefind(hdr, bf, dup);
                        if (dfp) {
                                i = (int)(dfp - bf);
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                        (freeseen & (1 << i)) == 0);
                                freeseen |= 1 << i;
                        } else {
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                        be16_to_cpu(dup->length) <=
                                                be16_to_cpu(bf[2].length));
                        }
@@ -160,13 +160,13 @@ __xfs_dir3_data_check(
                 * The linear search is crude but this is DEBUG code.
                 */
                dep = (xfs_dir2_data_entry_t *)p;
-               XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
+               XFS_WANT_CORRUPTED_RETURN(mp,
                        !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                        be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
                                               (char *)dep - (char *)hdr);
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                                ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
                count++;
                lastfree = 0;
@@ -183,14 +183,15 @@ __xfs_dir3_data_check(
                                    be32_to_cpu(lep[i].hashval) == hash)
                                        break;
                        }
-                       XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
+                       XFS_WANT_CORRUPTED_RETURN(mp,
+                                                 i < be32_to_cpu(btp->count));
                }
                p += ops->data_entsize(dep->namelen);
        }
        /*
         * Need to have seen all the entries and all the bestfree slots.
         */
-       XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
+       XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
        if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
            hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
                for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
@@ -198,13 +199,13 @@ __xfs_dir3_data_check(
                            cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                stale++;
                        if (i > 0)
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                        be32_to_cpu(lep[i].hashval) >=
                                                be32_to_cpu(lep[i - 1].hashval));
                }
-               XFS_WANT_CORRUPTED_RETURN(count ==
+               XFS_WANT_CORRUPTED_RETURN(mp, count ==
                        be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
-               XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
+               XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
        }
        return 0;
 }
index 8eb7189..4daaa66 100644 (file)
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
        /* must be padded to 64 bit alignment */
 } xfs_dsb_t;
 
-/*
- * Sequence number values for the fields.
- */
-typedef enum {
-       XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
-       XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
-       XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
-       XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
-       XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
-       XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
-       XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
-       XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
-       XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
-       XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
-       XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
-       XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-       XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
-       XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
-       XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
-       XFS_SBS_PQUOTINO, XFS_SBS_LSN,
-       XFS_SBS_FIELDCOUNT
-} xfs_sb_field_t;
-
-/*
- * Mask values, defined based on the xfs_sb_field_t values.
- * Only define the ones we're using.
- */
-#define        XFS_SB_MVAL(x)          (1LL << XFS_SBS_ ## x)
-#define        XFS_SB_UUID             XFS_SB_MVAL(UUID)
-#define        XFS_SB_FNAME            XFS_SB_MVAL(FNAME)
-#define        XFS_SB_ROOTINO          XFS_SB_MVAL(ROOTINO)
-#define        XFS_SB_RBMINO           XFS_SB_MVAL(RBMINO)
-#define        XFS_SB_RSUMINO          XFS_SB_MVAL(RSUMINO)
-#define        XFS_SB_VERSIONNUM       XFS_SB_MVAL(VERSIONNUM)
-#define XFS_SB_UQUOTINO                XFS_SB_MVAL(UQUOTINO)
-#define XFS_SB_GQUOTINO                XFS_SB_MVAL(GQUOTINO)
-#define XFS_SB_QFLAGS          XFS_SB_MVAL(QFLAGS)
-#define XFS_SB_SHARED_VN       XFS_SB_MVAL(SHARED_VN)
-#define XFS_SB_UNIT            XFS_SB_MVAL(UNIT)
-#define XFS_SB_WIDTH           XFS_SB_MVAL(WIDTH)
-#define XFS_SB_ICOUNT          XFS_SB_MVAL(ICOUNT)
-#define XFS_SB_IFREE           XFS_SB_MVAL(IFREE)
-#define XFS_SB_FDBLOCKS                XFS_SB_MVAL(FDBLOCKS)
-#define XFS_SB_FEATURES2       (XFS_SB_MVAL(FEATURES2) | \
-                                XFS_SB_MVAL(BAD_FEATURES2))
-#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
-#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
-#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
-#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
-#define XFS_SB_CRC             XFS_SB_MVAL(CRC)
-#define XFS_SB_PQUOTINO                XFS_SB_MVAL(PQUOTINO)
-#define        XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
-#define        XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
-#define        XFS_SB_MOD_BITS         \
-       (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
-        XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
-        XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-        XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
-        XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
-        XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
-        XFS_SB_PQUOTINO)
-
 
 /*
  * Misc. Flags - warning - these will be cleared by xfs_repair unless
index 116ef1d..07349a1 100644 (file)
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
         */
        newlen = args.mp->m_ialloc_inos;
        if (args.mp->m_maxicount &&
-           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+           percpu_counter_read(&args.mp->m_icount) + newlen >
+                                                       args.mp->m_maxicount)
                return -ENOSPC;
        args.minlen = args.maxlen = args.mp->m_ialloc_blks;
        /*
@@ -700,7 +701,7 @@ xfs_ialloc_next_rec(
                error = xfs_inobt_get_rec(cur, rec, &i);
                if (error)
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
        }
 
        return 0;
@@ -724,7 +725,7 @@ xfs_ialloc_get_rec(
                error = xfs_inobt_get_rec(cur, rec, &i);
                if (error)
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
        }
 
        return 0;
@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt(
                error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
 
                error = xfs_inobt_get_rec(cur, &rec, &j);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
 
                if (rec.ir_freecount > 0) {
                        /*
@@ -944,19 +945,19 @@ newino:
        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
        if (error)
                goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
 
        for (;;) {
                error = xfs_inobt_get_rec(cur, &rec, &i);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                if (rec.ir_freecount > 0)
                        break;
                error = xfs_btree_increment(cur, 0, &i);
                if (error)
                        goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
        }
 
 alloc_inode:
@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near(
                error = xfs_inobt_get_rec(lcur, rec, &i);
                if (error)
                        return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
 
                /*
                 * See if we've landed in the parent inode record. The finobt
@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near(
                error = xfs_inobt_get_rec(rcur, &rrec, &j);
                if (error)
                        goto error_rcur;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
+               XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
        }
 
-       XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
+       XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
        if (i == 1 && j == 1) {
                /*
                 * Both the left and right records are valid. Choose the closer
@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino(
                        error = xfs_inobt_get_rec(cur, rec, &i);
                        if (error)
                                return error;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
                        return 0;
                }
        }
@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino(
        error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
 
        error = xfs_inobt_get_rec(cur, rec, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
 
        return 0;
 }
@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt(
        error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
 
        error = xfs_inobt_get_rec(cur, &rec, &i);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
        ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
                                   XFS_INODES_PER_CHUNK) == 0);
 
        rec.ir_free &= ~XFS_INOBT_MASK(offset);
        rec.ir_freecount--;
 
-       XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
                                  (rec.ir_freecount == frec->ir_freecount));
 
        return xfs_inobt_update(cur, &rec);
@@ -1340,7 +1341,8 @@ xfs_dialloc(
         * inode.
         */
        if (mp->m_maxicount &&
-           mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+           percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
+                                                       mp->m_maxicount) {
                noroom = 1;
                okalloc = 0;
        }
@@ -1475,14 +1477,14 @@ xfs_difree_inobt(
                        __func__, error);
                goto error0;
        }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
        error = xfs_inobt_get_rec(cur, &rec, &i);
        if (error) {
                xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
                        __func__, error);
                goto error0;
        }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
        /*
         * Get the offset in the inode chunk.
         */
@@ -1592,7 +1594,7 @@ xfs_difree_finobt(
                 * freed an inode in a previously fully allocated chunk. If not,
                 * something is out of sync.
                 */
-               XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
+               XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
 
                error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
                                             ibtrec->ir_free, &i);
@@ -1613,12 +1615,12 @@ xfs_difree_finobt(
        error = xfs_inobt_get_rec(cur, &rec, &i);
        if (error)
                goto error;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
 
        rec.ir_free |= XFS_INOBT_MASK(offset);
        rec.ir_freecount++;
 
-       XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
+       XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
                                (rec.ir_freecount == ibtrec->ir_freecount),
                                error);
 
index b0a5fe9..dc4bfc5 100644 (file)
@@ -111,14 +111,6 @@ xfs_mount_validate_sb(
        bool            check_inprogress,
        bool            check_version)
 {
-
-       /*
-        * If the log device and data device have the
-        * same device number, the log is internal.
-        * Consequently, the sb_logstart should be non-zero.  If
-        * we have a zero sb_logstart in this case, we may be trying to mount
-        * a volume filesystem in a non-volume manner.
-        */
        if (sbp->sb_magicnum != XFS_SB_MAGIC) {
                xfs_warn(mp, "bad magic number");
                return -EWRONGFS;
@@ -743,17 +735,15 @@ xfs_initialize_perag_data(
                btree += pag->pagf_btreeblks;
                xfs_perag_put(pag);
        }
-       /*
-        * Overwrite incore superblock counters with just-read data
-        */
+
+       /* Overwrite incore superblock counters with just-read data */
        spin_lock(&mp->m_sb_lock);
        sbp->sb_ifree = ifree;
        sbp->sb_icount = ialloc;
        sbp->sb_fdblocks = bfree + bfreelst + btree;
        spin_unlock(&mp->m_sb_lock);
 
-       /* Fixup the per-cpu counters as well. */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
 
        return 0;
 }
@@ -771,6 +761,10 @@ xfs_log_sb(
        struct xfs_mount        *mp = tp->t_mountp;
        struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
 
+       mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+       mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+       mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+
        xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
        xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
index 1d8eef9..a56960d 100644 (file)
@@ -1232,6 +1232,117 @@ xfs_vm_releasepage(
        return try_to_free_buffers(page);
 }
 
+/*
+ * When we map a DIO buffer, we may need to attach an ioend that describes the
+ * type of write IO we are doing. This passes to the completion function the
+ * operations it needs to perform. If the mapping is for an overwrite wholly
+ * within the EOF then we don't need an ioend and so we don't allocate one.
+ * This avoids the unnecessary overhead of allocating and freeing ioends for
+ * workloads that don't require transactions on IO completion.
+ *
+ * If we get multiple mappings in a single IO, we might be mapping different
+ * types. But because the direct IO can only have a single private pointer, we
+ * need to ensure that:
+ *
+ * a) i) the ioend spans the entire region of unwritten mappings; or
+ *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
+ * b) if it contains unwritten extents, it is *permanently* marked as such
+ *
+ * We could do this by chaining ioends like buffered IO does, but we only
+ * actually get one IO completion callback from the direct IO, and that spans
+ * the entire IO regardless of how many mappings and IOs are needed to complete
+ * the DIO. There is only going to be one reference to the ioend and its life
+ * cycle is constrained by the DIO completion code. hence we don't need
+ * reference counting here.
+ */
+static void
+xfs_map_direct(
+       struct inode            *inode,
+       struct buffer_head      *bh_result,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       struct xfs_ioend        *ioend;
+       xfs_off_t               size = bh_result->b_size;
+       int                     type;
+
+       if (ISUNWRITTEN(imap))
+               type = XFS_IO_UNWRITTEN;
+       else
+               type = XFS_IO_OVERWRITE;
+
+       trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
+
+       if (bh_result->b_private) {
+               ioend = bh_result->b_private;
+               ASSERT(ioend->io_size > 0);
+               ASSERT(offset >= ioend->io_offset);
+               if (offset + size > ioend->io_offset + ioend->io_size)
+                       ioend->io_size = offset - ioend->io_offset + size;
+
+               if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
+                       ioend->io_type = XFS_IO_UNWRITTEN;
+
+               trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
+                                             ioend->io_size, ioend->io_type,
+                                             imap);
+       } else if (type == XFS_IO_UNWRITTEN ||
+                  offset + size > i_size_read(inode)) {
+               ioend = xfs_alloc_ioend(inode, type);
+               ioend->io_offset = offset;
+               ioend->io_size = size;
+
+               bh_result->b_private = ioend;
+               set_buffer_defer_completion(bh_result);
+
+               trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
+                                          imap);
+       } else {
+               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
+                                           imap);
+       }
+}
+
+/*
+ * If this is O_DIRECT or the mpage code calling tell them how large the mapping
+ * is, so that we can avoid repeated get_blocks calls.
+ *
+ * If the mapping spans EOF, then we have to break the mapping up as the mapping
+ * for blocks beyond EOF must be marked new so that sub block regions can be
+ * correctly zeroed. We can't do this for mappings within EOF unless the mapping
+ * was just allocated or is unwritten, otherwise the callers would overwrite
+ * existing data with zeros. Hence we have to split the mapping into a range up
+ * to and including EOF, and a second mapping for beyond EOF.
+ */
+static void
+xfs_map_trim_size(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset,
+       ssize_t                 size)
+{
+       xfs_off_t               mapping_size;
+
+       mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
+       mapping_size <<= inode->i_blkbits;
+
+       ASSERT(mapping_size > 0);
+       if (mapping_size > size)
+               mapping_size = size;
+       if (offset < i_size_read(inode) &&
+           offset + mapping_size >= i_size_read(inode)) {
+               /* limit mapping to block that spans EOF */
+               mapping_size = roundup_64(i_size_read(inode) - offset,
+                                         1 << inode->i_blkbits);
+       }
+       if (mapping_size > LONG_MAX)
+               mapping_size = LONG_MAX;
+
+       bh_result->b_size = mapping_size;
+}
+
 STATIC int
 __xfs_get_blocks(
        struct inode            *inode,
@@ -1320,31 +1431,37 @@ __xfs_get_blocks(
 
                        xfs_iunlock(ip, lockmode);
                }
-
-               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+               trace_xfs_get_blocks_alloc(ip, offset, size,
+                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
+                                                  : XFS_IO_DELALLOC, &imap);
        } else if (nimaps) {
-               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+               trace_xfs_get_blocks_found(ip, offset, size,
+                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
+                                                  : XFS_IO_OVERWRITE, &imap);
                xfs_iunlock(ip, lockmode);
        } else {
                trace_xfs_get_blocks_notfound(ip, offset, size);
                goto out_unlock;
        }
 
+       /* trim mapping down to size requested */
+       if (direct || size > (1 << inode->i_blkbits))
+               xfs_map_trim_size(inode, iblock, bh_result,
+                                 &imap, offset, size);
+
+       /*
+        * For unwritten extents do not report a disk address in the buffered
+        * read case (treat as if we're reading into a hole).
+        */
        if (imap.br_startblock != HOLESTARTBLOCK &&
-           imap.br_startblock != DELAYSTARTBLOCK) {
-               /*
-                * For unwritten extents do not report a disk address on
-                * the read case (treat as if we're reading into a hole).
-                */
-               if (create || !ISUNWRITTEN(&imap))
-                       xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (create && ISUNWRITTEN(&imap)) {
-                       if (direct) {
-                               bh_result->b_private = inode;
-                               set_buffer_defer_completion(bh_result);
-                       }
+           imap.br_startblock != DELAYSTARTBLOCK &&
+           (create || !ISUNWRITTEN(&imap))) {
+               xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (ISUNWRITTEN(&imap))
                        set_buffer_unwritten(bh_result);
-               }
+               /* direct IO needs special help */
+               if (create && direct)
+                       xfs_map_direct(inode, bh_result, &imap, offset);
        }
 
        /*
@@ -1377,39 +1494,6 @@ __xfs_get_blocks(
                }
        }
 
-       /*
-        * If this is O_DIRECT or the mpage code calling tell them how large
-        * the mapping is, so that we can avoid repeated get_blocks calls.
-        *
-        * If the mapping spans EOF, then we have to break the mapping up as the
-        * mapping for blocks beyond EOF must be marked new so that sub block
-        * regions can be correctly zeroed. We can't do this for mappings within
-        * EOF unless the mapping was just allocated or is unwritten, otherwise
-        * the callers would overwrite existing data with zeros. Hence we have
-        * to split the mapping into a range up to and including EOF, and a
-        * second mapping for beyond EOF.
-        */
-       if (direct || size > (1 << inode->i_blkbits)) {
-               xfs_off_t               mapping_size;
-
-               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-               mapping_size <<= inode->i_blkbits;
-
-               ASSERT(mapping_size > 0);
-               if (mapping_size > size)
-                       mapping_size = size;
-               if (offset < i_size_read(inode) &&
-                   offset + mapping_size >= i_size_read(inode)) {
-                       /* limit mapping to block that spans EOF */
-                       mapping_size = roundup_64(i_size_read(inode) - offset,
-                                                 1 << inode->i_blkbits);
-               }
-               if (mapping_size > LONG_MAX)
-                       mapping_size = LONG_MAX;
-
-               bh_result->b_size = mapping_size;
-       }
-
        return 0;
 
 out_unlock:
@@ -1440,9 +1524,11 @@ xfs_get_blocks_direct(
 /*
  * Complete a direct I/O write request.
  *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
  */
 STATIC void
 xfs_end_io_direct_write(
@@ -1454,43 +1540,71 @@ xfs_end_io_direct_write(
        struct inode            *inode = file_inode(iocb->ki_filp);
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ioend        *ioend = private;
 
-       if (XFS_FORCED_SHUTDOWN(mp))
+       trace_xfs_gbmap_direct_endio(ip, offset, size,
+                                    ioend ? ioend->io_type : 0, NULL);
+
+       if (!ioend) {
+               ASSERT(offset + size <= i_size_read(inode));
                return;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               goto out_end_io;
 
        /*
-        * While the generic direct I/O code updates the inode size, it does
-        * so only after the end_io handler is called, which means our
-        * end_io handler thinks the on-disk size is outside the in-core
-        * size.  To prevent this just update it a little bit earlier here.
+        * dio completion end_io functions are only called on writes if more
+        * than 0 bytes was written.
         */
+       ASSERT(size > 0);
+
+       /*
+        * The ioend only maps whole blocks, while the IO may be sector aligned.
+        * Hence the ioend offset/size may not match the IO offset/size exactly.
+        * Because we don't map overwrites within EOF into the ioend, the offset
+        * may not match, but only if the endio spans EOF.  Either way, write
+        * the IO sizes into the ioend so that completion processing does the
+        * right thing.
+        */
+       ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
+       ioend->io_size = size;
+       ioend->io_offset = offset;
+
+       /*
+        * The ioend tells us whether we are doing unwritten extent conversion
+        * or an append transaction that updates the on-disk file size. These
+        * cases are the only cases where we should *potentially* be needing
+        * to update the VFS inode size.
+        *
+        * We need to update the in-core inode size here so that we don't end up
+        * with the on-disk inode size being outside the in-core inode size. We
+        * have no other method of updating EOF for AIO, so always do it here
+        * if necessary.
+        *
+        * We need to lock the test/set EOF update as we can be racing with
+        * other IO completions here to update the EOF. Failing to serialise
+        * here can result in EOF moving backwards and Bad Things Happen when
+        * that occurs.
+        */
+       spin_lock(&ip->i_flags_lock);
        if (offset + size > i_size_read(inode))
                i_size_write(inode, offset + size);
+       spin_unlock(&ip->i_flags_lock);
 
        /*
-        * For direct I/O we do not know if we need to allocate blocks or not,
-        * so we can't preallocate an append transaction, as that results in
-        * nested reservations and log space deadlocks. Hence allocate the
-        * transaction here. While this is sub-optimal and can block IO
-        * completion for some time, we're stuck with doing it this way until
-        * we can pass the ioend to the direct IO allocation callbacks and
-        * avoid nesting that way.
+        * If we are doing an append IO that needs to update the EOF on disk,
+        * do the transaction reserve now so we can use common end io
+        * processing. Stashing the error (if there is one) in the ioend will
+        * result in the ioend processing passing on the error if it is
+        * possible as we can't return it from here.
         */
-       if (private && size > 0) {
-               xfs_iomap_write_unwritten(ip, offset, size);
-       } else if (offset + size > ip->i_d.di_size) {
-               struct xfs_trans        *tp;
-               int                     error;
-
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return;
-               }
+       if (ioend->io_type == XFS_IO_OVERWRITE)
+               ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
 
-               xfs_setfilesize(ip, tp, offset, size);
-       }
+out_end_io:
+       xfs_end_io(&ioend->io_work);
+       return;
 }
 
 STATIC ssize_t
index 83af4c1..f9c1c64 100644 (file)
@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive(
        int                     size;
        int                     tmp;
        int                     i;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
 
        /*
         * Count the number of "remote" value extents.
index a43d370..65fb37a 100644 (file)
@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
        int error, i;
        struct xfs_buf *bp;
        struct xfs_inode        *dp = context->dp;
+       struct xfs_mount        *mp = dp->i_mount;
 
        trace_xfs_attr_node_list(context);
 
@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
                        case XFS_ATTR_LEAF_MAGIC:
                        case XFS_ATTR3_LEAF_MAGIC:
                                leaf = bp->b_addr;
-                               xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+                               xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo,
+                                                            &leafhdr, leaf);
                                entries = xfs_attr3_leaf_entryp(leaf);
                                if (cursor->hashval > be32_to_cpu(
                                                entries[leafhdr.count - 1].hashval)) {
@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
                        xfs_trans_brelse(NULL, bp);
                        return error;
                }
-               xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+               xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
                if (context->seen_enough || leafhdr.forw == 0)
                        break;
                cursor->blkno = leafhdr.forw;
@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int(
        struct xfs_attr_leaf_entry      *entry;
        int                             retval;
        int                             i;
+       struct xfs_mount                *mp = context->dp->i_mount;
 
        trace_xfs_attr_list_leaf(context);
 
        leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
        entries = xfs_attr3_leaf_entryp(leaf);
 
        cursor = context->cursor;
index 22a5dcb..a52bbd3 100644 (file)
@@ -1376,22 +1376,19 @@ out:
 }
 
 /*
- * xfs_collapse_file_space()
- *     This routine frees disk space and shift extent for the given file.
- *     The first thing we do is to free data blocks in the specified range
- *     by calling xfs_free_file_space(). It would also sync dirty data
- *     and invalidate page cache over the region on which collapse range
- *     is working. And Shift extent records to the left to cover a hole.
- * RETURNS:
- *     0 on success
- *     errno on error
- *
+ * @next_fsb will keep track of the extent currently undergoing shift.
+ * @stop_fsb will keep track of the extent at which we have to stop.
+ * If we are shifting left, we will start with block (offset + len) and
+ * shift each extent till last extent.
+ * If we are shifting right, we will start with last extent inside file space
+ * and continue until we reach the block corresponding to offset.
  */
-int
-xfs_collapse_file_space(
-       struct xfs_inode        *ip,
-       xfs_off_t               offset,
-       xfs_off_t               len)
+static int
+xfs_shift_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len,
+       enum shift_direction    direction)
 {
        int                     done = 0;
        struct xfs_mount        *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
        struct xfs_bmap_free    free_list;
        xfs_fsblock_t           first_block;
        int                     committed;
-       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           stop_fsb;
        xfs_fileoff_t           next_fsb;
        xfs_fileoff_t           shift_fsb;
 
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
 
-       trace_xfs_collapse_file_space(ip);
+       if (direction == SHIFT_LEFT) {
+               next_fsb = XFS_B_TO_FSB(mp, offset + len);
+               stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
+       } else {
+               /*
+                * If right shift, delegate the work of initialization of
+                * next_fsb to xfs_bmap_shift_extent as it has ilock held.
+                */
+               next_fsb = NULLFSBLOCK;
+               stop_fsb = XFS_B_TO_FSB(mp, offset);
+       }
 
-       next_fsb = XFS_B_TO_FSB(mp, offset + len);
        shift_fsb = XFS_B_TO_FSB(mp, len);
 
-       error = xfs_free_file_space(ip, offset, len);
-       if (error)
-               return error;
-
        /*
         * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
         * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
 
        /*
         * Writeback and invalidate cache for the remainder of the file as we're
-        * about to shift down every extent from the collapse range to EOF. The
-        * free of the collapse range above might have already done some of
-        * this, but we shouldn't rely on it to do anything outside of the range
-        * that was freed.
+        * about to shift down every extent from offset to EOF.
         */
        error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                            offset + len, -1);
+                                            offset, -1);
        if (error)
                return error;
        error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       (offset + len) >> PAGE_CACHE_SHIFT, -1);
+                                       offset >> PAGE_CACHE_SHIFT, -1);
        if (error)
                return error;
 
+       /*
+        * The extent shiting code works on extent granularity. So, if
+        * stop_fsb is not the starting block of extent, we need to split
+        * the extent at stop_fsb.
+        */
+       if (direction == SHIFT_RIGHT) {
+               error = xfs_bmap_split_extent(ip, stop_fsb);
+               if (error)
+                       return error;
+       }
+
        while (!error && !done) {
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
                /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
                if (error)
                        goto out;
 
-               xfs_trans_ijoin(tp, ip, 0);
+               xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
 
                xfs_bmap_init(&free_list, &first_block);
 
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
                 * We are using the write transaction in which max 2 bmbt
                 * updates are allowed
                 */
-               start_fsb = next_fsb;
-               error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
-                               &done, &next_fsb, &first_block, &free_list,
-                               XFS_BMAP_MAX_SHIFT_EXTENTS);
+               error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
+                               &done, stop_fsb, &first_block, &free_list,
+                               direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
                if (error)
                        goto out;
 
@@ -1484,17 +1493,69 @@ xfs_collapse_file_space(
                        goto out;
 
                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
 
        return error;
 
 out:
        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
 
+/*
+ * xfs_collapse_file_space()
+ *     This routine frees disk space and shift extent for the given file.
+ *     The first thing we do is to free data blocks in the specified range
+ *     by calling xfs_free_file_space(). It would also sync dirty data
+ *     and invalidate page cache over the region on which collapse range
+ *     is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len)
+{
+       int error;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       trace_xfs_collapse_file_space(ip);
+
+       error = xfs_free_file_space(ip, offset, len);
+       if (error)
+               return error;
+
+       return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
+}
+
+/*
+ * xfs_insert_file_space()
+ *     This routine create hole space by shifting extents for the given file.
+ *     The first thing we do is to sync dirty data and invalidate page cache
+ *     over the region on which insert range is working. And split an extent
+ *     to two extents at given offset by calling xfs_bmap_split_extent.
+ *     And shift all extent records which are laying between [offset,
+ *     last allocated extent] to the right to reserve hole range.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ */
+int
+xfs_insert_file_space(
+       struct xfs_inode        *ip,
+       loff_t                  offset,
+       loff_t                  len)
+{
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       trace_xfs_insert_file_space(ip);
+
+       return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
+}
+
 /*
  * We need to check that the format of the data fork in the temporary inode is
  * valid for the target inode before doing the swap. This is not a problem with
@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush(
        /* Verify O_DIRECT for ftmp */
        if (VFS_I(ip)->i_mapping->nrpages)
                return -EINVAL;
-
-       /*
-        * Don't try to swap extents on mmap()d files because we can't lock
-        * out races against page faults safely.
-        */
-       if (mapping_mapped(VFS_I(ip)->i_mapping))
-               return -EBUSY;
        return 0;
 }
 
@@ -1633,13 +1687,14 @@ xfs_swap_extents(
        }
 
        /*
-        * Lock up the inodes against other IO and truncate to begin with.
-        * Then we can ensure the inodes are flushed and have no page cache
-        * safely. Once we have done this we can take the ilocks and do the rest
-        * of the checks.
+        * Lock the inodes against other IO, page faults and truncate to
+        * begin with.  Then we can ensure the inodes are flushed and have no
+        * page cache safely. Once we have done this we can take the ilocks and
+        * do the rest of the checks.
         */
-       lock_flags = XFS_IOLOCK_EXCL;
+       lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
        xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+       xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
 
        /* Verify that both files have the same format */
        if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1666,8 +1721,16 @@ xfs_swap_extents(
                xfs_trans_cancel(tp, 0);
                goto out_unlock;
        }
+
+       /*
+        * Lock and join the inodes to the tansaction so that transaction commit
+        * or cancel will unlock the inodes from this point onwards.
+        */
        xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
        lock_flags |= XFS_ILOCK_EXCL;
+       xfs_trans_ijoin(tp, ip, lock_flags);
+       xfs_trans_ijoin(tp, tip, lock_flags);
+
 
        /* Verify all data are being swapped */
        if (sxp->sx_offset != 0 ||
@@ -1720,9 +1783,6 @@ xfs_swap_extents(
                        goto out_trans_cancel;
        }
 
-       xfs_trans_ijoin(tp, ip, lock_flags);
-       xfs_trans_ijoin(tp, tip, lock_flags);
-
        /*
         * Before we've swapped the forks, lets set the owners of the forks
         * appropriately. We have to do this as we are demand paging the btree
@@ -1856,5 +1916,5 @@ out_unlock:
 
 out_trans_cancel:
        xfs_trans_cancel(tp, 0);
-       goto out_unlock;
+       goto out;
 }
index 736429a..af97d9a 100644 (file)
@@ -63,6 +63,8 @@ int   xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
 int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
                                xfs_off_t len);
+int    xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
+                               xfs_off_t len);
 
 /* EOF block manipulation functions */
 bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
index 507d96a..092d652 100644 (file)
@@ -537,9 +537,9 @@ xfs_buf_item_push(
 
        /* has a previous flush failed due to IO errors? */
        if ((bp->b_flags & XBF_WRITE_FAIL) &&
-           ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+           ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
                xfs_warn(bp->b_target->bt_mount,
-"Detected failing async write on buffer block 0x%llx. Retrying async write.",
+"Failing async write on buffer block 0x%llx. Retrying async write.",
                         (long long)bp->b_bn);
        }
 
index 799e5a2..e85a951 100644 (file)
@@ -84,7 +84,7 @@ xfs_trim_extents(
                error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
                if (error)
                        goto out_del_cursor;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
                ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
 
                /*
index 3ee186a..338e50b 100644 (file)
@@ -131,7 +131,7 @@ xfs_error_report(
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-               "Internal error %s at line %d of file %s.  Caller %pF",
+               "Internal error %s at line %d of file %s.  Caller %pS",
                            tag, linenum, filename, ra);
 
                xfs_stack_trace();
index 279a76e..c0394ed 100644 (file)
@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
 /*
  * Macros to set EFSCORRUPTED & return/branch.
  */
-#define        XFS_WANT_CORRUPTED_GOTO(x,l)    \
+#define        XFS_WANT_CORRUPTED_GOTO(mp, x, l)       \
        { \
                int fs_is_ok = (x); \
                ASSERT(fs_is_ok); \
                if (unlikely(!fs_is_ok)) { \
                        XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
-                                        XFS_ERRLEVEL_LOW, NULL); \
+                                        XFS_ERRLEVEL_LOW, mp); \
                        error = -EFSCORRUPTED; \
                        goto l; \
                } \
        }
 
-#define        XFS_WANT_CORRUPTED_RETURN(x)    \
+#define        XFS_WANT_CORRUPTED_RETURN(mp, x)        \
        { \
                int fs_is_ok = (x); \
                ASSERT(fs_is_ok); \
                if (unlikely(!fs_is_ok)) { \
                        XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
-                                        XFS_ERRLEVEL_LOW, NULL); \
+                                        XFS_ERRLEVEL_LOW, mp); \
                        return -EFSCORRUPTED; \
                } \
        }
index b97359b..652cd3c 100644 (file)
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
        int                     error;
        struct xfs_inode        *cip;
 
-       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+       error = xfs_lookup(XFS_I(d_inode(child)), &xfs_name_dotdot, &cip, NULL);
        if (unlikely(error))
                return ERR_PTR(error);
 
index 1f12ad0..8121e75 100644 (file)
@@ -559,7 +559,7 @@ restart:
        if (error <= 0)
                return error;
 
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, true);
        if (error)
                return error;
 
@@ -569,21 +569,42 @@ restart:
         * write.  If zeroing is needed and we are currently holding the
         * iolock shared, we need to update it to exclusive which implies
         * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
         */
+       spin_lock(&ip->i_flags_lock);
        if (iocb->ki_pos > i_size_read(inode)) {
                bool    zero = false;
 
+               spin_unlock(&ip->i_flags_lock);
                if (*iolock == XFS_IOLOCK_SHARED) {
                        xfs_rw_iunlock(ip, *iolock);
                        *iolock = XFS_IOLOCK_EXCL;
                        xfs_rw_ilock(ip, *iolock);
                        iov_iter_reexpand(from, count);
+
+                       /*
+                        * We now have an IO submission barrier in place, but
+                        * AIO can do EOF updates during IO completion and hence
+                        * we now need to wait for all of them to drain. Non-AIO
+                        * DIO will have drained before we are given the
+                        * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+                        * no-op.
+                        */
+                       inode_dio_wait(inode);
                        goto restart;
                }
                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
                if (error)
                        return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
 
        /*
         * Updating the timestamps will grab the ilock again from
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
        int                     iolock;
        size_t                  count = iov_iter_count(from);
        loff_t                  pos = iocb->ki_pos;
+       loff_t                  end;
+       struct iov_iter         data;
        struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                        mp->m_rtdev_targp : mp->m_ddev_targp;
 
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
                goto out;
        count = iov_iter_count(from);
        pos = iocb->ki_pos;
+       end = pos + count - 1;
 
        if (mapping->nrpages) {
                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                   pos, pos + count - 1);
+                                                  pos, end);
                if (ret)
                        goto out;
                /*
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
                 */
                ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
                                        pos >> PAGE_CACHE_SHIFT,
-                                       (pos + count - 1) >> PAGE_CACHE_SHIFT);
+                                       end >> PAGE_CACHE_SHIFT);
                WARN_ON_ONCE(ret);
                ret = 0;
        }
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
        }
 
        trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, from, pos);
 
+       data = *from;
+       ret = mapping->a_ops->direct_IO(iocb, &data, pos);
+
+       /* see generic_file_direct_write() for why this is necessary */
+       if (mapping->nrpages) {
+               invalidate_inode_pages2_range(mapping,
+                                             pos >> PAGE_CACHE_SHIFT,
+                                             end >> PAGE_CACHE_SHIFT);
+       }
+
+       if (ret > 0) {
+               pos += ret;
+               iov_iter_advance(from, ret);
+               iocb->ki_pos = pos;
+       }
 out:
        xfs_rw_iunlock(ip, iolock);
 
@@ -822,6 +860,11 @@ xfs_file_write_iter(
        return ret;
 }
 
+#define        XFS_FALLOC_FL_SUPPORTED                                         \
+               (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
+                FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
+                FALLOC_FL_INSERT_RANGE)
+
 STATIC long
 xfs_file_fallocate(
        struct file             *file,
@@ -835,18 +878,21 @@ xfs_file_fallocate(
        enum xfs_prealloc_flags flags = 0;
        uint                    iolock = XFS_IOLOCK_EXCL;
        loff_t                  new_size = 0;
+       bool                    do_file_insert = 0;
 
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+       if (mode & ~XFS_FALLOC_FL_SUPPORTED)
                return -EOPNOTSUPP;
 
        xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, false);
        if (error)
                goto out_unlock;
 
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                error = xfs_free_file_space(ip, offset, len);
                if (error)
@@ -873,6 +919,27 @@ xfs_file_fallocate(
                error = xfs_collapse_file_space(ip, offset, len);
                if (error)
                        goto out_unlock;
+       } else if (mode & FALLOC_FL_INSERT_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               new_size = i_size_read(inode) + len;
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               /* check the new inode size does not wrap through zero */
+               if (new_size > inode->i_sb->s_maxbytes) {
+                       error = -EFBIG;
+                       goto out_unlock;
+               }
+
+               /* Offset should be less than i_size */
+               if (offset >= i_size_read(inode)) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+               do_file_insert = 1;
        } else {
                flags |= XFS_PREALLOC_SET;
 
@@ -907,8 +974,19 @@ xfs_file_fallocate(
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
                error = xfs_setattr_size(ip, &iattr);
+               if (error)
+                       goto out_unlock;
        }
 
+       /*
+        * Perform hole insertion now that the file size has been
+        * updated so that if we crash during the operation we don't
+        * leave shifted extents past EOF and hence losing access to
+        * the data that is contained within them.
+        */
+       if (do_file_insert)
+               error = xfs_insert_file_space(ip, offset, len);
+
 out_unlock:
        xfs_iunlock(ip, iolock);
        return error;
@@ -996,20 +1074,6 @@ xfs_file_mmap(
        return 0;
 }
 
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
 /*
  * This type is designed to indicate the type of offset we would like
  * to search from page cache for xfs_seek_hole_data().
@@ -1385,6 +1449,55 @@ xfs_file_llseek(
        }
 }
 
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ *   i_mmap_lock (XFS - truncate serialisation)
+ *     page_lock (MM)
+ *       i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_fault(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_page_mkwrite(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
 const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
        .read_iter      = xfs_file_read_iter,
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
 };
 
 static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = xfs_filemap_fault,
        .map_pages      = filemap_map_pages,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
+       .page_mkwrite   = xfs_filemap_page_mkwrite,
 };
index a2e86e8..da82f1c 100644 (file)
@@ -294,7 +294,7 @@ xfs_filestream_get_parent(
        if (!parent)
                goto out_dput;
 
-       dir = igrab(parent->d_inode);
+       dir = igrab(d_inode(parent));
        dput(parent);
 
 out_dput:
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
 
        pip = xfs_filestream_get_parent(ip);
        if (!pip)
-               goto out;
+               return NULLAGNUMBER;
 
        mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
        if (mru) {
index 74efe5b..cb7e8a2 100644 (file)
@@ -637,12 +637,13 @@ xfs_fs_counts(
        xfs_mount_t             *mp,
        xfs_fsop_counts_t       *cnt)
 {
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
+       cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
+       cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
+
        spin_lock(&mp->m_sb_lock);
-       cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
        cnt->freertx = mp->m_sb.sb_frextents;
-       cnt->freeino = mp->m_sb.sb_ifree;
-       cnt->allocino = mp->m_sb.sb_icount;
        spin_unlock(&mp->m_sb_lock);
        return 0;
 }
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
         * what to do. This means that the amount of free space can
         * change while we do this, so we need to retry if we end up
         * trying to reserve more space than is available.
-        *
-        * We also use the xfs_mod_incore_sb() interface so that we
-        * don't have to care about whether per cpu counter are
-        * enabled, disabled or even compiled in....
         */
 retry:
        spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, 0);
 
        /*
         * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
        } else {
                __int64_t       free;
 
-               free =  mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+               free = percpu_counter_sum(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
                if (!free)
                        goto out; /* ENOSPC and fdblks_delta = 0 */
 
@@ -755,8 +752,7 @@ out:
                 * the extra reserve blocks from the reserve.....
                 */
                int error;
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                fdblks_delta, 0);
+               error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
                if (error == -ENOSPC)
                        goto retry;
        }
index 9771b7e..76a9f27 100644 (file)
@@ -439,11 +439,11 @@ again:
        *ipp = ip;
 
        /*
-        * If we have a real type for an on-disk inode, we can set ops(&unlock)
+        * If we have a real type for an on-disk inode, we can setup the inode
         * now.  If it's a new inode being created, xfs_ialloc will handle it.
         */
        if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
-               xfs_setup_inode(ip);
+               xfs_setup_existing_inode(ip);
        return 0;
 
 out_error_or_again:
index 6163767..d6ebc85 100644 (file)
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
 }
 
 /*
- * The xfs inode contains 2 locks: a multi-reader lock called the
- * i_iolock and a multi-reader lock called the i_lock.  This routine
- * allows either or both of the locks to be obtained.
+ * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
+ * the i_lock.  This routine allows various combinations of the locks to be
+ * obtained.
  *
- * The 2 locks should always be ordered so that the IO lock is
- * obtained first in order to prevent deadlock.
+ * The 3 locks should always be ordered so that the IO lock is obtained first,
+ * the mmap lock second and the ilock last in order to prevent deadlock.
  *
- * ip -- the inode being locked
- * lock_flags -- this parameter indicates the inode's locks
- *       to be locked.  It can be:
- *             XFS_IOLOCK_SHARED,
- *             XFS_IOLOCK_EXCL,
- *             XFS_ILOCK_SHARED,
- *             XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
+ * Basic locking order:
+ *
+ * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
+ *
+ * mmap_sem locking order:
+ *
+ * i_iolock -> page lock -> mmap_sem
+ * mmap_sem -> i_mmap_lock -> page_lock
+ *
+ * The difference in mmap_sem locking order mean that we cannot hold the
+ * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * in get_user_pages() to map the user pages into the kernel address space for
+ * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * page faults already hold the mmap_sem.
+ *
+ * Hence to serialise fully against both syscall and mmap based IO, we need to
+ * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * taken in places where we need to invalidate the page cache in a race
+ * free manner (e.g. truncate, hole punch and other extent manipulation
+ * functions).
  */
 void
 xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
 
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+
        if (lock_flags & XFS_ILOCK_EXCL)
                mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
                if (!mrtryaccess(&ip->i_iolock))
                        goto out;
        }
+
+       if (lock_flags & XFS_MMAPLOCK_EXCL) {
+               if (!mrtryupdate(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+               if (!mrtryaccess(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       }
+
        if (lock_flags & XFS_ILOCK_EXCL) {
                if (!mrtryupdate(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
        } else if (lock_flags & XFS_ILOCK_SHARED) {
                if (!mrtryaccess(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
        }
        return 1;
 
- out_undo_iolock:
+out_undo_mmaplock:
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrunlock_excl(&ip->i_iolock);
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
- out:
+out:
        return 0;
 }
 
@@ -244,6 +277,8 @@ xfs_iunlock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
 
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+
        if (lock_flags & XFS_ILOCK_EXCL)
                mrunlock_excl(&ip->i_lock);
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
        xfs_inode_t             *ip,
        uint                    lock_flags)
 {
-       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
+       ASSERT((lock_flags &
+               ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
 
        if (lock_flags & XFS_ILOCK_EXCL)
                mrdemote(&ip->i_lock);
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrdemote(&ip->i_mmaplock);
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrdemote(&ip->i_iolock);
 
@@ -294,6 +337,12 @@ xfs_isilocked(
                return rwsem_is_locked(&ip->i_lock.mr_lock);
        }
 
+       if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+               if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+                       return !!ip->i_mmaplock.mr_writer;
+               return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+       }
+
        if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
                if (!(lock_flags & XFS_IOLOCK_SHARED))
                        return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
 #endif
 
 /*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
  */
 static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+       }
+
+       if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+               lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+                                                       XFS_MMAPLOCK_SHIFT;
+       }
+
        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
 
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
 }
 
 /*
- * The following routine will lock n inodes in exclusive mode.
- * We assume the caller calls us with the inodes in i_ino order.
+ * The following routine will lock n inodes in exclusive mode.  We assume the
+ * caller calls us with the inodes in i_ino order.
  *
- * We need to detect deadlock where an inode that we lock
- * is in the AIL and we start waiting for another inode that is locked
- * by a thread in a long running transaction (such as truncate). This can
- * result in deadlock since the long running trans might need to wait
- * for the inode we just locked in order to push the tail and free space
- * in the log.
+ * We need to detect deadlock where an inode that we lock is in the AIL and we
+ * start waiting for another inode that is locked by a thread in a long running
+ * transaction (such as truncate). This can result in deadlock since the long
+ * running trans might need to wait for the inode we just locked in order to
+ * push the tail and free space in the log.
  */
 void
 xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
        int             attempts = 0, i, j, try_lock;
        xfs_log_item_t  *lp;
 
-       ASSERT(ips && (inodes >= 2)); /* we need at least two */
+       /* currently supports between 2 and 5 inodes */
+       ASSERT(ips && inodes >= 2 && inodes <= 5);
 
        try_lock = 0;
        i = 0;
-
 again:
        for (; i < inodes; i++) {
                ASSERT(ips[i]);
 
-               if (i && (ips[i] == ips[i-1]))  /* Already locked */
+               if (i && (ips[i] == ips[i - 1]))        /* Already locked */
                        continue;
 
                /*
-                * If try_lock is not set yet, make sure all locked inodes
-                * are not in the AIL.
-                * If any are, set try_lock to be used later.
+                * If try_lock is not set yet, make sure all locked inodes are
+                * not in the AIL.  If any are, set try_lock to be used later.
                 */
-
                if (!try_lock) {
                        for (j = (i - 1); j >= 0 && !try_lock; j--) {
                                lp = (xfs_log_item_t *)ips[j]->i_itemp;
-                               if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+                               if (lp && (lp->li_flags & XFS_LI_IN_AIL))
                                        try_lock++;
-                               }
                        }
                }
 
@@ -381,51 +439,42 @@ again:
                 * we can't get any, we must release all we have
                 * and try again.
                 */
+               if (!try_lock) {
+                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
+                       continue;
+               }
+
+               /* try_lock means we have an inode locked that is in the AIL. */
+               ASSERT(i != 0);
+               if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
+                       continue;
 
-               if (try_lock) {
-                       /* try_lock must be 0 if i is 0. */
+               /*
+                * Unlock all previous guys and try again.  xfs_iunlock will try
+                * to push the tail if the inode is in the AIL.
+                */
+               attempts++;
+               for (j = i - 1; j >= 0; j--) {
                        /*
-                        * try_lock means we have an inode locked
-                        * that is in the AIL.
+                        * Check to see if we've already unlocked this one.  Not
+                        * the first one going back, and the inode ptr is the
+                        * same.
                         */
-                       ASSERT(i != 0);
-                       if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
-                               attempts++;
-
-                               /*
-                                * Unlock all previous guys and try again.
-                                * xfs_iunlock will try to push the tail
-                                * if the inode is in the AIL.
-                                */
-
-                               for(j = i - 1; j >= 0; j--) {
-
-                                       /*
-                                        * Check to see if we've already
-                                        * unlocked this one.
-                                        * Not the first one going back,
-                                        * and the inode ptr is the same.
-                                        */
-                                       if ((j != (i - 1)) && ips[j] ==
-                                                               ips[j+1])
-                                               continue;
-
-                                       xfs_iunlock(ips[j], lock_mode);
-                               }
+                       if (j != (i - 1) && ips[j] == ips[j + 1])
+                               continue;
+
+                       xfs_iunlock(ips[j], lock_mode);
+               }
 
-                               if ((attempts % 5) == 0) {
-                                       delay(1); /* Don't just spin the CPU */
+               if ((attempts % 5) == 0) {
+                       delay(1); /* Don't just spin the CPU */
 #ifdef DEBUG
-                                       xfs_lock_delays++;
+                       xfs_lock_delays++;
 #endif
-                               }
-                               i = 0;
-                               try_lock = 0;
-                               goto again;
-                       }
-               } else {
-                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
                }
+               i = 0;
+               try_lock = 0;
+               goto again;
        }
 
 #ifdef DEBUG
@@ -440,10 +489,10 @@ again:
 }
 
 /*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
  */
 void
 xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
        int                     attempts = 0;
        xfs_log_item_t          *lp;
 
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-               ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+
        ASSERT(ip0->i_ino != ip1->i_ino);
 
        if (ip0->i_ino > ip1->i_ino) {
@@ -818,7 +871,7 @@ xfs_ialloc(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, flags);
 
-       /* now that we have an i_mode we can setup inode ops and unlock */
+       /* now that we have an i_mode we can setup the inode structure */
        xfs_setup_inode(ip);
 
        *ipp = ip;
@@ -1235,12 +1288,14 @@ xfs_create(
        xfs_trans_cancel(tp, cancel_flags);
  out_release_inode:
        /*
-        * Wait until after the current transaction is aborted to
-        * release the inode.  This prevents recursive transactions
-        * and deadlocks from xfs_inactive.
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
         */
-       if (ip)
+       if (ip) {
+               xfs_finish_inode_setup(ip);
                IRELE(ip);
+       }
 
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
@@ -1345,12 +1400,14 @@ xfs_create_tmpfile(
        xfs_trans_cancel(tp, cancel_flags);
  out_release_inode:
        /*
-        * Wait until after the current transaction is aborted to
-        * release the inode.  This prevents recursive transactions
-        * and deadlocks from xfs_inactive.
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
         */
-       if (ip)
+       if (ip) {
+               xfs_finish_inode_setup(ip);
                IRELE(ip);
+       }
 
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
@@ -2611,19 +2668,22 @@ xfs_remove(
 /*
  * Enter all inodes for a rename transaction into a sorted array.
  */
+#define __XFS_SORT_INODES      5
 STATIC void
 xfs_sort_for_rename(
-       xfs_inode_t     *dp1,   /* in: old (source) directory inode */
-       xfs_inode_t     *dp2,   /* in: new (target) directory inode */
-       xfs_inode_t     *ip1,   /* in: inode of old entry */
-       xfs_inode_t     *ip2,   /* in: inode of new entry, if it
-                                  already exists, NULL otherwise. */
-       xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
-       int             *num_inodes)  /* out: number of inodes in array */
+       struct xfs_inode        *dp1,   /* in: old (source) directory inode */
+       struct xfs_inode        *dp2,   /* in: new (target) directory inode */
+       struct xfs_inode        *ip1,   /* in: inode of old entry */
+       struct xfs_inode        *ip2,   /* in: inode of new entry */
+       struct xfs_inode        *wip,   /* in: whiteout inode */
+       struct xfs_inode        **i_tab,/* out: sorted array of inodes */
+       int                     *num_inodes)  /* in/out: inodes in array */
 {
-       xfs_inode_t             *temp;
        int                     i, j;
 
+       ASSERT(*num_inodes == __XFS_SORT_INODES);
+       memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
+
        /*
         * i_tab contains a list of pointers to inodes.  We initialize
         * the table here & we'll sort it.  We will then use it to
@@ -2631,25 +2691,24 @@ xfs_sort_for_rename(
         *
         * Note that the table may contain duplicates.  e.g., dp1 == dp2.
         */
-       i_tab[0] = dp1;
-       i_tab[1] = dp2;
-       i_tab[2] = ip1;
-       if (ip2) {
-               *num_inodes = 4;
-               i_tab[3] = ip2;
-       } else {
-               *num_inodes = 3;
-               i_tab[3] = NULL;
-       }
+       i = 0;
+       i_tab[i++] = dp1;
+       i_tab[i++] = dp2;
+       i_tab[i++] = ip1;
+       if (ip2)
+               i_tab[i++] = ip2;
+       if (wip)
+               i_tab[i++] = wip;
+       *num_inodes = i;
 
        /*
         * Sort the elements via bubble sort.  (Remember, there are at
-        * most 4 elements to sort, so this is adequate.)
+        * most 5 elements to sort, so this is adequate.)
         */
        for (i = 0; i < *num_inodes; i++) {
                for (j = 1; j < *num_inodes; j++) {
                        if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
-                               temp = i_tab[j];
+                               struct xfs_inode *temp = i_tab[j];
                                i_tab[j] = i_tab[j-1];
                                i_tab[j-1] = temp;
                        }
@@ -2657,6 +2716,31 @@ xfs_sort_for_rename(
        }
 }
 
+static int
+xfs_finish_rename(
+       struct xfs_trans        *tp,
+       struct xfs_bmap_free    *free_list)
+{
+       int                     committed = 0;
+       int                     error;
+
+       /*
+        * If this is a synchronous mount, make sure that the rename transaction
+        * goes to disk before returning to the user.
+        */
+       if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+               xfs_trans_set_sync(tp);
+
+       error = xfs_bmap_finish(&tp, free_list, &committed);
+       if (error) {
+               xfs_bmap_cancel(free_list);
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+               return error;
+       }
+
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
+
 /*
  * xfs_cross_rename()
  *
@@ -2685,14 +2769,14 @@ xfs_cross_rename(
                                ip2->i_ino,
                                first_block, free_list, spaceres);
        if (error)
-               goto out;
+               goto out_trans_abort;
 
        /* Swap inode number for dirent in second parent */
        error = xfs_dir_replace(tp, dp2, name2,
                                ip1->i_ino,
                                first_block, free_list, spaceres);
        if (error)
-               goto out;
+               goto out_trans_abort;
 
        /*
         * If we're renaming one or more directories across different parents,
@@ -2707,16 +2791,16 @@ xfs_cross_rename(
                                                dp1->i_ino, first_block,
                                                free_list, spaceres);
                        if (error)
-                               goto out;
+                               goto out_trans_abort;
 
                        /* transfer ip2 ".." reference to dp1 */
                        if (!S_ISDIR(ip1->i_d.di_mode)) {
                                error = xfs_droplink(tp, dp2);
                                if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                error = xfs_bumplink(tp, dp1);
                                if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                        }
 
                        /*
@@ -2734,16 +2818,16 @@ xfs_cross_rename(
                                                dp2->i_ino, first_block,
                                                free_list, spaceres);
                        if (error)
-                               goto out;
+                               goto out_trans_abort;
 
                        /* transfer ip1 ".." reference to dp2 */
                        if (!S_ISDIR(ip2->i_d.di_mode)) {
                                error = xfs_droplink(tp, dp1);
                                if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                error = xfs_bumplink(tp, dp2);
                                if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                        }
 
                        /*
@@ -2771,66 +2855,108 @@ xfs_cross_rename(
        }
        xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-out:
+       return xfs_finish_rename(tp, free_list);
+
+out_trans_abort:
+       xfs_bmap_cancel(free_list);
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
        return error;
 }
 
+/*
+ * xfs_rename_alloc_whiteout()
+ *
+ * Return a referenced, unlinked, unlocked inode that that can be used as a
+ * whiteout in a rename transaction. We use a tmpfile inode here so that if we
+ * crash between allocating the inode and linking it into the rename transaction
+ * recovery will free the inode and we won't leak it.
+ */
+static int
+xfs_rename_alloc_whiteout(
+       struct xfs_inode        *dp,
+       struct xfs_inode        **wip)
+{
+       struct xfs_inode        *tmpfile;
+       int                     error;
+
+       error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+       if (error)
+               return error;
+
+       /* Satisfy xfs_bumplink that this is a real tmpfile */
+       xfs_finish_inode_setup(tmpfile);
+       VFS_I(tmpfile)->i_state |= I_LINKABLE;
+
+       *wip = tmpfile;
+       return 0;
+}
+
 /*
  * xfs_rename
  */
 int
 xfs_rename(
-       xfs_inode_t     *src_dp,
-       struct xfs_name *src_name,
-       xfs_inode_t     *src_ip,
-       xfs_inode_t     *target_dp,
-       struct xfs_name *target_name,
-       xfs_inode_t     *target_ip,
-       unsigned int    flags)
+       struct xfs_inode        *src_dp,
+       struct xfs_name         *src_name,
+       struct xfs_inode        *src_ip,
+       struct xfs_inode        *target_dp,
+       struct xfs_name         *target_name,
+       struct xfs_inode        *target_ip,
+       unsigned int            flags)
 {
-       xfs_trans_t     *tp = NULL;
-       xfs_mount_t     *mp = src_dp->i_mount;
-       int             new_parent;             /* moving to a new dir */
-       int             src_is_directory;       /* src_name is a directory */
-       int             error;
-       xfs_bmap_free_t free_list;
-       xfs_fsblock_t   first_block;
-       int             cancel_flags;
-       int             committed;
-       xfs_inode_t     *inodes[4];
-       int             spaceres;
-       int             num_inodes;
+       struct xfs_mount        *mp = src_dp->i_mount;
+       struct xfs_trans        *tp;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       struct xfs_inode        *wip = NULL;            /* whiteout inode */
+       struct xfs_inode        *inodes[__XFS_SORT_INODES];
+       int                     num_inodes = __XFS_SORT_INODES;
+       bool                    new_parent = (src_dp != target_dp);
+       bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       int                     cancel_flags = 0;
+       int                     spaceres;
+       int                     error;
 
        trace_xfs_rename(src_dp, target_dp, src_name, target_name);
 
-       new_parent = (src_dp != target_dp);
-       src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       if ((flags & RENAME_EXCHANGE) && !target_ip)
+               return -EINVAL;
 
-       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+       /*
+        * If we are doing a whiteout operation, allocate the whiteout inode
+        * we will be placing at the target and ensure the type is set
+        * appropriately.
+        */
+       if (flags & RENAME_WHITEOUT) {
+               ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
+               error = xfs_rename_alloc_whiteout(target_dp, &wip);
+               if (error)
+                       return error;
+
+               /* setup target dirent info as whiteout */
+               src_name->type = XFS_DIR3_FT_CHRDEV;
+       }
+
+       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
                                inodes, &num_inodes);
 
-       xfs_bmap_init(&free_list, &first_block);
        tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
        if (error == -ENOSPC) {
                spaceres = 0;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
        }
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
 
        /*
         * Attach the dquots to the inodes
         */
        error = xfs_qm_vop_rename_dqattach(inodes);
-       if (error) {
-               xfs_trans_cancel(tp, cancel_flags);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
 
        /*
         * Lock all the participating inodes. Depending upon whether
@@ -2851,6 +2977,8 @@ xfs_rename(
        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
        if (target_ip)
                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
+       if (wip)
+               xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
 
        /*
         * If we are using project inheritance, we only allow renames
@@ -2860,24 +2988,16 @@ xfs_rename(
        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
                     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                error = -EXDEV;
-               goto error_return;
+               goto out_trans_cancel;
        }
 
-       /*
-        * Handle RENAME_EXCHANGE flags
-        */
-       if (flags & RENAME_EXCHANGE) {
-               if (target_ip == NULL) {
-                       error = -EINVAL;
-                       goto error_return;
-               }
-               error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
-                                        target_dp, target_name, target_ip,
-                                        &free_list, &first_block, spaceres);
-               if (error)
-                       goto abort_return;
-               goto finish_rename;
-       }
+       xfs_bmap_init(&free_list, &first_block);
+
+       /* RENAME_EXCHANGE is unique from here on. */
+       if (flags & RENAME_EXCHANGE)
+               return xfs_cross_rename(tp, src_dp, src_name, src_ip,
+                                       target_dp, target_name, target_ip,
+                                       &free_list, &first_block, spaceres);
 
        /*
         * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
                if (!spaceres) {
                        error = xfs_dir_canenter(tp, target_dp, target_name);
                        if (error)
-                               goto error_return;
+                               goto out_trans_cancel;
                }
                /*
                 * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
                                                src_ip->i_ino, &first_block,
                                                &free_list, spaceres);
                if (error == -ENOSPC)
-                       goto error_return;
+                       goto out_bmap_cancel;
                if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
 
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
                        if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                }
        } else { /* target_ip != NULL */
                /*
@@ -2926,7 +3046,7 @@ xfs_rename(
                        if (!(xfs_dir_isempty(target_ip)) ||
                            (target_ip->i_d.di_nlink > 2)) {
                                error = -EEXIST;
-                               goto error_return;
+                               goto out_trans_cancel;
                        }
                }
 
@@ -2943,7 +3063,7 @@ xfs_rename(
                                        src_ip->i_ino,
                                        &first_block, &free_list, spaceres);
                if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
 
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, target_ip);
                if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
 
                if (src_is_directory) {
                        /*
@@ -2962,7 +3082,7 @@ xfs_rename(
                         */
                        error = xfs_droplink(tp, target_ip);
                        if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                }
        } /* target_ip != NULL */
 
@@ -2979,7 +3099,7 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                ASSERT(error != -EEXIST);
                if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
        }
 
        /*
@@ -3005,49 +3125,67 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, src_dp);
                if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
        }
 
-       error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+       /*
+        * For whiteouts, we only need to update the source dirent with the
+        * inode number of the whiteout inode rather than removing it
+        * altogether.
+        */
+       if (wip) {
+               error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
                                        &first_block, &free_list, spaceres);
+       } else
+               error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+                                          &first_block, &free_list, spaceres);
        if (error)
-               goto abort_return;
-
-       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-       if (new_parent)
-               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+               goto out_trans_abort;
 
-finish_rename:
        /*
-        * If this is a synchronous mount, make sure that the
-        * rename transaction goes to disk before returning to
-        * the user.
+        * For whiteouts, we need to bump the link count on the whiteout inode.
+        * This means that failures all the way up to this point leave the inode
+        * on the unlinked list and so cleanup is a simple matter of dropping
+        * the remaining reference to it. If we fail here after bumping the link
+        * count, we're shutting down the filesystem so we'll never see the
+        * intermediate state on disk.
         */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-               xfs_trans_set_sync(tp);
-       }
+       if (wip) {
+               ASSERT(wip->i_d.di_nlink == 0);
+               error = xfs_bumplink(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               error = xfs_iunlink_remove(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
 
-       error = xfs_bmap_finish(&tp, &free_list, &committed);
-       if (error) {
-               xfs_bmap_cancel(&free_list);
-               xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT));
-               goto std_return;
+               /*
+                * Now we have a real link, clear the "I'm a tmpfile" state
+                * flag from the inode so it doesn't accidentally get misused in
+                * future.
+                */
+               VFS_I(wip)->i_state &= ~I_LINKABLE;
        }
 
-       /*
-        * trans_commit will unlock src_ip, target_ip & decrement
-        * the vnode references.
-        */
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+       if (new_parent)
+               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
 
- abort_return:
+       error = xfs_finish_rename(tp, &free_list);
+       if (wip)
+               IRELE(wip);
+       return error;
+
+out_trans_abort:
        cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
+out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
- std_return:
+       if (wip)
+               IRELE(wip);
        return error;
 }
 
index a1cd55f..8f22d20 100644 (file)
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
        struct xfs_inode_log_item *i_itemp;     /* logging information */
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_iolock;       /* inode IO lock */
+       mrlock_t                i_mmaplock;     /* inode mmap IO lock */
        atomic_t                i_pincount;     /* inode pin count */
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
        /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define        XFS_IOLOCK_SHARED       (1<<1)
 #define        XFS_ILOCK_EXCL          (1<<2)
 #define        XFS_ILOCK_SHARED        (1<<3)
+#define        XFS_MMAPLOCK_EXCL       (1<<4)
+#define        XFS_MMAPLOCK_SHARED     (1<<5)
 
 #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                               | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
 
 #define XFS_LOCK_FLAGS \
        { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
        { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
        { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
+       { XFS_MMAPLOCK_EXCL,    "MMAPLOCK_EXCL" }, \
+       { XFS_MMAPLOCK_SHARED,  "MMAPLOCK_SHARED" }
 
 
 /*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define XFS_IOLOCK_SHIFT       16
 #define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
 
+#define XFS_MMAPLOCK_SHIFT     20
+
 #define XFS_ILOCK_SHIFT                24
 #define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
 #define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
 #define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
 
-#define XFS_IOLOCK_DEP_MASK    0x00ff0000
+#define XFS_IOLOCK_DEP_MASK    0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK  0x00f00000
 #define XFS_ILOCK_DEP_MASK     0xff000000
-#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
+#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | \
+                                XFS_MMAPLOCK_DEP_MASK | \
+                                XFS_ILOCK_DEP_MASK)
 
-#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
-#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) \
+                                       >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags)        (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+                                       >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) \
+                                       >> XFS_ILOCK_SHIFT)
 
 /*
  * For multiple groups support: if S_ISGID bit is set in the parent
@@ -391,6 +406,28 @@ int        xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
 int    xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
 
 
+/* from xfs_iops.c */
+/*
+ * When setting up a newly allocated inode, we need to call
+ * xfs_finish_inode_setup() once the inode is fully instantiated at
+ * the VFS level to prevent the rest of the world seeing the inode
+ * before we've completed instantiation. Otherwise we can do it
+ * the moment the inode lookup is complete.
+ */
+extern void xfs_setup_inode(struct xfs_inode *ip);
+static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
+{
+       xfs_iflags_clear(ip, XFS_INEW);
+       barrier();
+       unlock_new_inode(VFS_I(ip));
+}
+
+static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
+{
+       xfs_setup_inode(ip);
+       xfs_finish_inode_setup(ip);
+}
+
 #define IHOLD(ip) \
 do { \
        ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
index ac4feae..87f67c6 100644 (file)
@@ -82,7 +82,7 @@ xfs_find_handle(
                error = user_lpath((const char __user *)hreq->path, &path);
                if (error)
                        return error;
-               inode = path.dentry->d_inode;
+               inode = d_inode(path.dentry);
        }
        ip = XFS_I(inode);
 
@@ -210,7 +210,7 @@ xfs_open_by_handle(
        dentry = xfs_handlereq_to_dentry(parfilp, hreq);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
 
        /* Restrict xfs_open_by_handle to directories & regular files. */
        if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
@@ -303,7 +303,7 @@ xfs_readlink_by_handle(
                goto out_dput;
        }
 
-       error = xfs_readlink(XFS_I(dentry->d_inode), link);
+       error = xfs_readlink(XFS_I(d_inode(dentry)), link);
        if (error)
                goto out_kfree;
        error = readlink_copy(hreq->ohandle, olen, link);
@@ -376,7 +376,7 @@ xfs_fssetdm_by_handle(
                return PTR_ERR(dentry);
        }
 
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+       if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
                error = -EPERM;
                goto out;
        }
@@ -386,7 +386,7 @@ xfs_fssetdm_by_handle(
                goto out;
        }
 
-       error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+       error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
                                 fsd.fsd_dmstate);
 
  out:
@@ -429,7 +429,7 @@ xfs_attrlist_by_handle(
                goto out_dput;
 
        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+       error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen,
                                        al_hreq.flags, cursor);
        if (error)
                goto out_kfree;
@@ -559,7 +559,7 @@ xfs_attrmulti_by_handle(
                switch (ops[i].am_opcode) {
                case ATTR_OP_GET:
                        ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        ops[i].am_attrvalue, &ops[i].am_length,
                                        ops[i].am_flags);
                        break;
@@ -568,7 +568,7 @@ xfs_attrmulti_by_handle(
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        ops[i].am_attrvalue, ops[i].am_length,
                                        ops[i].am_flags);
                        mnt_drop_write_file(parfilp);
@@ -578,7 +578,7 @@ xfs_attrmulti_by_handle(
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        ops[i].am_flags);
                        mnt_drop_write_file(parfilp);
                        break;
@@ -631,7 +631,7 @@ xfs_ioc_space(
 
        if (filp->f_flags & O_DSYNC)
                flags |= XFS_PREALLOC_SYNC;
-       if (ioflags & XFS_IO_INVIS)     
+       if (ioflags & XFS_IO_INVIS)
                flags |= XFS_PREALLOC_INVISIBLE;
 
        error = mnt_want_write_file(filp);
@@ -639,10 +639,13 @@ xfs_ioc_space(
                return error;
 
        xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, false);
        if (error)
                goto out_unlock;
 
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
        switch (bf->l_whence) {
        case 0: /*SEEK_SET*/
                break;
index bfc7c7c..b88bdc8 100644 (file)
@@ -375,7 +375,7 @@ xfs_compat_attrlist_by_handle(
                goto out_dput;
 
        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+       error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen,
                                        al_hreq.flags, cursor);
        if (error)
                goto out_kfree;
@@ -445,7 +445,7 @@ xfs_compat_attrmulti_by_handle(
                switch (ops[i].am_opcode) {
                case ATTR_OP_GET:
                        ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        compat_ptr(ops[i].am_attrvalue),
                                        &ops[i].am_length, ops[i].am_flags);
                        break;
@@ -454,7 +454,7 @@ xfs_compat_attrmulti_by_handle(
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        compat_ptr(ops[i].am_attrvalue),
                                        ops[i].am_length, ops[i].am_flags);
                        mnt_drop_write_file(parfilp);
@@ -464,7 +464,7 @@ xfs_compat_attrmulti_by_handle(
                        if (ops[i].am_error)
                                break;
                        ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                        ops[i].am_flags);
                        mnt_drop_write_file(parfilp);
                        break;
@@ -504,7 +504,7 @@ xfs_compat_fssetdm_by_handle(
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+       if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
                error = -EPERM;
                goto out;
        }
@@ -514,7 +514,7 @@ xfs_compat_fssetdm_by_handle(
                goto out;
        }
 
-       error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+       error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
                                 fsd.fsd_dmstate);
 
 out:
index ccb1dd0..38e633b 100644 (file)
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
        alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
                                       alloc_blocks);
 
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-       freesp = mp->m_sb.sb_fdblocks;
+       freesp = percpu_counter_read_positive(&mp->m_fdblocks);
        if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
                shift = 2;
                if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
index e53a903..f4cd720 100644 (file)
@@ -187,6 +187,8 @@ xfs_generic_create(
        else
                d_instantiate(dentry, inode);
 
+       xfs_finish_inode_setup(ip);
+
  out_free_acl:
        if (default_acl)
                posix_acl_release(default_acl);
@@ -195,6 +197,7 @@ xfs_generic_create(
        return error;
 
  out_cleanup_inode:
+       xfs_finish_inode_setup(ip);
        if (!tmpfile)
                xfs_cleanup_inode(dir, inode, dentry);
        iput(inode);
@@ -301,7 +304,7 @@ xfs_vn_link(
        struct inode    *dir,
        struct dentry   *dentry)
 {
-       struct inode    *inode = old_dentry->d_inode;
+       struct inode    *inode = d_inode(old_dentry);
        struct xfs_name name;
        int             error;
 
@@ -326,7 +329,7 @@ xfs_vn_unlink(
 
        xfs_dentry_to_name(&name, dentry, 0);
 
-       error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+       error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
        if (error)
                return error;
 
@@ -367,9 +370,11 @@ xfs_vn_symlink(
                goto out_cleanup_inode;
 
        d_instantiate(dentry, inode);
+       xfs_finish_inode_setup(cip);
        return 0;
 
  out_cleanup_inode:
+       xfs_finish_inode_setup(cip);
        xfs_cleanup_inode(dir, inode, dentry);
        iput(inode);
  out:
@@ -384,22 +389,22 @@ xfs_vn_rename(
        struct dentry   *ndentry,
        unsigned int    flags)
 {
-       struct inode    *new_inode = ndentry->d_inode;
+       struct inode    *new_inode = d_inode(ndentry);
        int             omode = 0;
        struct xfs_name oname;
        struct xfs_name nname;
 
-       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
 
        /* if we are exchanging files, we need to set i_mode of both files */
        if (flags & RENAME_EXCHANGE)
-               omode = ndentry->d_inode->i_mode;
+               omode = d_inode(ndentry)->i_mode;
 
        xfs_dentry_to_name(&oname, odentry, omode);
-       xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
+       xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode);
 
-       return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+       return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
                          XFS_I(ndir), &nname,
                          new_inode ? XFS_I(new_inode) : NULL, flags);
 }
@@ -421,7 +426,7 @@ xfs_vn_follow_link(
        if (!link)
                goto out_err;
 
-       error = xfs_readlink(XFS_I(dentry->d_inode), link);
+       error = xfs_readlink(XFS_I(d_inode(dentry)), link);
        if (unlikely(error))
                goto out_kfree;
 
@@ -441,7 +446,7 @@ xfs_vn_getattr(
        struct dentry           *dentry,
        struct kstat            *stat)
 {
-       struct inode            *inode = dentry->d_inode;
+       struct inode            *inode = d_inode(dentry);
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
 
@@ -766,6 +771,7 @@ xfs_setattr_size(
                return error;
 
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
        ASSERT(S_ISREG(ip->i_d.di_mode));
        ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -829,55 +835,27 @@ xfs_setattr_size(
        inode_dio_wait(inode);
 
        /*
-        * Do all the page cache truncate work outside the transaction context
-        * as the "lock" order is page lock->log space reservation.  i.e.
-        * locking pages inside the transaction can ABBA deadlock with
-        * writeback. We have to do the VFS inode size update before we truncate
-        * the pagecache, however, to avoid racing with page faults beyond the
-        * new EOF they are not serialised against truncate operations except by
-        * page locks and size updates.
+        * We've already locked out new page faults, so now we can safely remove
+        * pages from the page cache knowing they won't get refaulted until we
+        * drop the XFS_MMAP_EXCL lock after the extent manipulations are
+        * complete. The truncate_setsize() call also cleans partial EOF page
+        * PTEs on extending truncates and hence ensures sub-page block size
+        * filesystems are correctly handled, too.
         *
-        * Hence we are in a situation where a truncate can fail with ENOMEM
-        * from xfs_trans_reserve(), but having already truncated the in-memory
-        * version of the file (i.e. made user visible changes). There's not
-        * much we can do about this, except to hope that the caller sees ENOMEM
-        * and retries the truncate operation.
+        * We have to do all the page cache truncate work outside the
+        * transaction context as the "lock" order is page lock->log space
+        * reservation as defined by extent allocation in the writeback path.
+        * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
+        * having already truncated the in-memory version of the file (i.e. made
+        * user visible changes). There's not much we can do about this, except
+        * to hope that the caller sees ENOMEM and retries the truncate
+        * operation.
         */
        error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
        if (error)
                return error;
        truncate_setsize(inode, newsize);
 
-       /*
-        * The "we can't serialise against page faults" pain gets worse.
-        *
-        * If the file is mapped then we have to clean the page at the old EOF
-        * when extending the file. Extending the file can expose changes the
-        * underlying page mapping (e.g. from beyond EOF to a hole or
-        * unwritten), and so on the next attempt to write to that page we need
-        * to remap it for write. i.e. we need .page_mkwrite() to be called.
-        * Hence we need to clean the page to clean the pte and so a new write
-        * fault will be triggered appropriately.
-        *
-        * If we do it before we change the inode size, then we can race with a
-        * page fault that maps the page with exactly the same problem. If we do
-        * it after we change the file size, then a new page fault can come in
-        * and allocate space before we've run the rest of the truncate
-        * transaction. That's kinda grotesque, but it's better than have data
-        * over a hole, and so that's the lesser evil that has been chosen here.
-        *
-        * The real solution, however, is to have some mechanism for locking out
-        * page faults while a truncate is in progress.
-        */
-       if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
-               error = filemap_write_and_wait_range(
-                               VFS_I(ip)->i_mapping,
-                               round_down(oldsize, PAGE_CACHE_SIZE),
-                               round_up(oldsize, PAGE_CACHE_SIZE) - 1);
-               if (error)
-                       return error;
-       }
-
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
        if (error)
@@ -968,16 +946,20 @@ xfs_vn_setattr(
        struct dentry           *dentry,
        struct iattr            *iattr)
 {
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode        *ip = XFS_I(d_inode(dentry));
        int                     error;
 
        if (iattr->ia_valid & ATTR_SIZE) {
                uint            iolock = XFS_IOLOCK_EXCL;
 
                xfs_ilock(ip, iolock);
-               error = xfs_break_layouts(dentry->d_inode, &iolock);
-               if (!error)
+               error = xfs_break_layouts(d_inode(dentry), &iolock, true);
+               if (!error) {
+                       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+                       iolock |= XFS_MMAPLOCK_EXCL;
+
                        error = xfs_setattr_size(ip, iattr);
+               }
                xfs_iunlock(ip, iolock);
        } else {
                error = xfs_setattr_nonsize(ip, iattr, 0);
@@ -1228,16 +1210,12 @@ xfs_diflags_to_iflags(
 }
 
 /*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
+ * Initialize the Linux inode and set up the operation vectors.
  *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
+ * When reading existing inodes from disk this is called directly from xfs_iget,
+ * when creating a new inode it is called from xfs_ialloc after setting up the
+ * inode. These callers have different criteria for clearing XFS_INEW, so leave
+ * it up to the caller to deal with unlocking the inode appropriately.
  */
 void
 xfs_setup_inode(
@@ -1324,9 +1302,4 @@ xfs_setup_inode(
                inode_has_no_xattr(inode);
                cache_no_acl(inode);
        }
-
-       xfs_iflags_clear(ip, XFS_INEW);
-       barrier();
-
-       unlock_new_inode(inode);
 }
index ea7a98e..a0f84ab 100644 (file)
@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
 
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
-extern void xfs_setup_inode(struct xfs_inode *);
-
 /*
  * Internal setattr interfaces.
  */
index 82e3142..8042989 100644 (file)
@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk(
        error = xfs_inobt_get_rec(cur, irec, &stat);
        if (error)
                return error;
-       XFS_WANT_CORRUPTED_RETURN(stat == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
 
        /* Check if the record contains the inode in request */
        if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
index c31d2c2..7c7842c 100644 (file)
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
 #undef XFS_NATIVE_HOST
 #endif
 
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
 #define irix_sgid_inherit      xfs_params.sgid_inherit.val
 #define irix_symlink_mode      xfs_params.symlink_mode.val
 #define xfs_panic_mask         xfs_params.panic_mask.val
index a5a945f..4f5784f 100644 (file)
@@ -4463,10 +4463,10 @@ xlog_do_recover(
        xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
        ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
        ASSERT(xfs_sb_good_version(sbp));
+       xfs_reinit_percpu_counters(log->l_mp);
+
        xfs_buf_relse(bp);
 
-       /* We've re-read the superblock so re-initialize per-cpu counters */
-       xfs_icsb_reinit_counters(log->l_mp);
 
        xlog_recover_check_summary(log);
 
index 4fa80e6..2ce7ee3 100644 (file)
 #include "xfs_sysfs.h"
 
 
-#ifdef HAVE_PERCPU_SB
-STATIC void    xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-
-#define xfs_icsb_balance_counter(mp, a, b)             do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)      do { } while (0)
-#endif
-
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
 static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
                goto reread;
        }
 
-       /* Initialize per-cpu counters */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
 
        /* no need to be quiet anymore, so reset the buf ops */
        bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
        if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
                return 0;
 
-       xfs_icsb_sync_counters(mp, 0);
-
        /*
         * we don't need to do this if we are updating the superblock
         * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
        return xfs_sync_sb(mp, true);
 }
 
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
- * a delta to a specified field in the in-core superblock.  Simply
- * switch on the field indicated and apply the delta to that field.
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
+int
+xfs_mod_icount(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
 {
-       int             scounter;       /* short counter for 32 bit fields */
-       long long       lcounter;       /* long counter for 64 bit fields */
-       long long       res_used, rem;
-
-       /*
-        * With the in-core superblock spin lock held, switch
-        * on the indicated field.  Apply the delta to the
-        * proper field.  If the fields value would dip below
-        * 0, then do not apply the delta and return EINVAL.
-        */
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = (long long)mp->m_sb.sb_icount;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_icount = lcounter;
-               return 0;
-       case XFS_SBS_IFREE:
-               lcounter = (long long)mp->m_sb.sb_ifree;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_ifree = lcounter;
-               return 0;
-       case XFS_SBS_FDBLOCKS:
-               lcounter = (long long)
-                       mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-
-               if (delta > 0) {                /* Putting blocks back */
-                       if (res_used > delta) {
-                               mp->m_resblks_avail += delta;
-                       } else {
-                               rem = delta - res_used;
-                               mp->m_resblks_avail = mp->m_resblks;
-                               lcounter += rem;
-                       }
-               } else {                                /* Taking blocks away */
-                       lcounter += delta;
-                       if (lcounter >= 0) {
-                               mp->m_sb.sb_fdblocks = lcounter +
-                                                       XFS_ALLOC_SET_ASIDE(mp);
-                               return 0;
-                       }
-
-                       /*
-                        * We are out of blocks, use any available reserved
-                        * blocks if were allowed to.
-                        */
-                       if (!rsvd)
-                               return -ENOSPC;
-
-                       lcounter = (long long)mp->m_resblks_avail + delta;
-                       if (lcounter >= 0) {
-                               mp->m_resblks_avail = lcounter;
-                               return 0;
-                       }
-                       printk_once(KERN_WARNING
-                               "Filesystem \"%s\": reserve blocks depleted! "
-                               "Consider increasing reserve pool size.",
-                               mp->m_fsname);
-                       return -ENOSPC;
-               }
-
-               mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               return 0;
-       case XFS_SBS_FREXTENTS:
-               lcounter = (long long)mp->m_sb.sb_frextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       return -ENOSPC;
-               }
-               mp->m_sb.sb_frextents = lcounter;
-               return 0;
-       case XFS_SBS_DBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_dblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_dblocks = lcounter;
-               return 0;
-       case XFS_SBS_AGCOUNT:
-               scounter = mp->m_sb.sb_agcount;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_agcount = scounter;
-               return 0;
-       case XFS_SBS_IMAX_PCT:
-               scounter = mp->m_sb.sb_imax_pct;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_imax_pct = scounter;
-               return 0;
-       case XFS_SBS_REXTSIZE:
-               scounter = mp->m_sb.sb_rextsize;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextsize = scounter;
-               return 0;
-       case XFS_SBS_RBMBLOCKS:
-               scounter = mp->m_sb.sb_rbmblocks;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rbmblocks = scounter;
-               return 0;
-       case XFS_SBS_RBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_rblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rblocks = lcounter;
-               return 0;
-       case XFS_SBS_REXTENTS:
-               lcounter = (long long)mp->m_sb.sb_rextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextents = lcounter;
-               return 0;
-       case XFS_SBS_REXTSLOG:
-               scounter = mp->m_sb.sb_rextslog;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextslog = scounter;
-               return 0;
-       default:
+       /* deltas are +/-64, hence the large batch size of 128. */
+       __percpu_counter_add(&mp->m_icount, delta, 128);
+       if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
                ASSERT(0);
+               percpu_counter_add(&mp->m_icount, -delta);
                return -EINVAL;
        }
+       return 0;
 }
 
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta.  This modification
- * is protected by the m_sb_lock.  Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
 int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
        struct xfs_mount        *mp,
-       xfs_sb_field_t          field,
-       int64_t                 delta,
-       int                     rsvd)
+       int64_t                 delta)
 {
-       int                     status;
-
-#ifdef HAVE_PERCPU_SB
-       ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-#endif
-       spin_lock(&mp->m_sb_lock);
-       status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       return status;
+       percpu_counter_add(&mp->m_ifree, delta);
+       if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
+               ASSERT(0);
+               percpu_counter_add(&mp->m_ifree, -delta);
+               return -EINVAL;
+       }
+       return 0;
 }
 
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be applied or none of them will.  If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
 int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
        struct xfs_mount        *mp,
-       xfs_mod_sb_t            *msb,
-       uint                    nmsb,
-       int                     rsvd)
+       int64_t                 delta,
+       bool                    rsvd)
 {
-       xfs_mod_sb_t            *msbp;
-       int                     error = 0;
+       int64_t                 lcounter;
+       long long               res_used;
+       s32                     batch;
+
+       if (delta > 0) {
+               /*
+                * If the reserve pool is depleted, put blocks back into it
+                * first. Most of the time the pool is full.
+                */
+               if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+                       return 0;
+               }
+
+               spin_lock(&mp->m_sb_lock);
+               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+
+               if (res_used > delta) {
+                       mp->m_resblks_avail += delta;
+               } else {
+                       delta -= res_used;
+                       mp->m_resblks_avail = mp->m_resblks;
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+               }
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
+       }
 
        /*
-        * Loop through the array of mod structures and apply each individually.
-        * If any fail, then back out all those which have already been applied.
-        * Do all of this within the scope of the m_sb_lock so that all of the
-        * changes will be atomic.
+        * Taking blocks away, need to be more accurate the closer we
+        * are to zero.
+        *
+        * batch size is set to a maximum of 1024 blocks - if we are
+        * allocating of freeing extents larger than this then we aren't
+        * going to be hammering the counter lock so a lock per update
+        * is not a problem.
+        *
+        * If the counter has a value of less than 2 * max batch size,
+        * then make everything serialise as we are real close to
+        * ENOSPC.
+        */
+#define __BATCH        1024
+       if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+               batch = 1;
+       else
+               batch = __BATCH;
+#undef __BATCH
+
+       __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+       if (percpu_counter_compare(&mp->m_fdblocks,
+                                  XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+               /* we had space! */
+               return 0;
+       }
+
+       /*
+        * lock up the sb for dipping into reserves before releasing the space
+        * that took us to ENOSPC.
         */
        spin_lock(&mp->m_sb_lock);
-       for (msbp = msb; msbp < (msb + nmsb); msbp++) {
-               ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
-                      msbp->msb_field > XFS_SBS_FDBLOCKS);
+       percpu_counter_add(&mp->m_fdblocks, -delta);
+       if (!rsvd)
+               goto fdblocks_enospc;
 
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  msbp->msb_delta, rsvd);
-               if (error)
-                       goto unwind;
+       lcounter = (long long)mp->m_resblks_avail + delta;
+       if (lcounter >= 0) {
+               mp->m_resblks_avail = lcounter;
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
        }
+       printk_once(KERN_WARNING
+               "Filesystem \"%s\": reserve blocks depleted! "
+               "Consider increasing reserve pool size.",
+               mp->m_fsname);
+fdblocks_enospc:
        spin_unlock(&mp->m_sb_lock);
-       return 0;
+       return -ENOSPC;
+}
 
-unwind:
-       while (--msbp >= msb) {
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  -msbp->msb_delta, rsvd);
-               ASSERT(error == 0);
-       }
+int
+xfs_mod_frextents(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
+{
+       int64_t                 lcounter;
+       int                     ret = 0;
+
+       spin_lock(&mp->m_sb_lock);
+       lcounter = mp->m_sb.sb_frextents + delta;
+       if (lcounter < 0)
+               ret = -ENOSPC;
+       else
+               mp->m_sb.sb_frextents = lcounter;
        spin_unlock(&mp->m_sb_lock);
-       return error;
+       return ret;
 }
 
 /*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
        }
        return 0;
 }
-
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g.  free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- *     1. m_sb_lock before picking up per-cpu locks
- *     2. per-cpu locks always picked up via for_each_online_cpu() order
- *     3. accurate counter sync requires m_sb_lock + per cpu locks
- *     4. modifying per-cpu counters requires holding per-cpu lock
- *     5. modifying global counters requires holding m_sb_lock
- *     6. enabling or disabling a counter requires holding the m_sb_lock 
- *        and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
-       struct notifier_block *nfb,
-       unsigned long action,
-       void *hcpu)
-{
-       xfs_icsb_cnts_t *cntp;
-       xfs_mount_t     *mp;
-
-       mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
-       cntp = (xfs_icsb_cnts_t *)
-                       per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               /* Easy Case - initialize the area and locks, and
-                * then rebalance when online does everything else for us. */
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-               break;
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               xfs_icsb_lock(mp);
-               xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-               xfs_icsb_unlock(mp);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /* Disable all the counters, then fold the dead cpu's
-                * count into the total on the global superblock and
-                * re-enable the counters. */
-               xfs_icsb_lock(mp);
-               spin_lock(&mp->m_sb_lock);
-               xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
-               xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
-               xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-
-               mp->m_sb.sb_icount += cntp->icsb_icount;
-               mp->m_sb.sb_ifree += cntp->icsb_ifree;
-               mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
-               spin_unlock(&mp->m_sb_lock);
-               xfs_icsb_unlock(mp);
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int
-xfs_icsb_init_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-       if (mp->m_sb_cnts == NULL)
-               return -ENOMEM;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-       }
-
-       mutex_init(&mp->m_icsb_mutex);
-
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-
-#ifdef CONFIG_HOTPLUG_CPU
-       mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-       mp->m_icsb_notifier.priority = 0;
-       register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
-       return 0;
-}
-
-void
-xfs_icsb_reinit_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_lock(mp);
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-       xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-       xfs_icsb_unlock(mp);
-}
-
-void
-xfs_icsb_destroy_counters(
-       xfs_mount_t     *mp)
-{
-       if (mp->m_sb_cnts) {
-               unregister_hotcpu_notifier(&mp->m_icsb_notifier);
-               free_percpu(mp->m_sb_cnts);
-       }
-       mutex_destroy(&mp->m_icsb_mutex);
-}
-
-STATIC void
-xfs_icsb_lock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
-               ndelay(1000);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-
-
-STATIC void
-xfs_icsb_lock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_lock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_unlock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_count(
-       xfs_mount_t     *mp,
-       xfs_icsb_cnts_t *cnt,
-       int             flags)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_lock_all_counters(mp);
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               cnt->icsb_icount += cntp->icsb_icount;
-               cnt->icsb_ifree += cntp->icsb_ifree;
-               cnt->icsb_fdblocks += cntp->icsb_fdblocks;
-       }
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC int
-xfs_icsb_counter_disabled(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-       return test_bit(field, &mp->m_icsb_counters);
-}
-
-STATIC void
-xfs_icsb_disable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       xfs_icsb_cnts_t cnt;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       /*
-        * If we are already disabled, then there is nothing to do
-        * here. We check before locking all the counters to avoid
-        * the expensive lock operation when being called in the
-        * slow path and the counter is already disabled. This is
-        * safe because the only time we set or clear this state is under
-        * the m_icsb_mutex.
-        */
-       if (xfs_icsb_counter_disabled(mp, field))
-               return;
-
-       xfs_icsb_lock_all_counters(mp);
-       if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
-               /* drain back to superblock */
-
-               xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
-               switch(field) {
-               case XFS_SBS_ICOUNT:
-                       mp->m_sb.sb_icount = cnt.icsb_icount;
-                       break;
-               case XFS_SBS_IFREE:
-                       mp->m_sb.sb_ifree = cnt.icsb_ifree;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-                       break;
-               default:
-                       BUG();
-               }
-       }
-
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC void
-xfs_icsb_enable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       uint64_t        count,
-       uint64_t        resid)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       xfs_icsb_lock_all_counters(mp);
-       for_each_online_cpu(i) {
-               cntp = per_cpu_ptr(mp->m_sb_cnts, i);
-               switch (field) {
-               case XFS_SBS_ICOUNT:
-                       cntp->icsb_icount = count + resid;
-                       break;
-               case XFS_SBS_IFREE:
-                       cntp->icsb_ifree = count + resid;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       cntp->icsb_fdblocks = count + resid;
-                       break;
-               default:
-                       BUG();
-                       break;
-               }
-               resid = 0;
-       }
-       clear_bit(field, &mp->m_icsb_counters);
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-void
-xfs_icsb_sync_counters_locked(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       xfs_icsb_cnts_t cnt;
-
-       xfs_icsb_count(mp, &cnt, flags);
-
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
-               mp->m_sb.sb_icount = cnt.icsb_icount;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
-               mp->m_sb.sb_ifree = cnt.icsb_ifree;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
-               mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, flags);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic.  inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-
-#define XFS_ICSB_INO_CNTR_REENABLE     (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
-               (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int             min_per_cpu)
-{
-       uint64_t        count, resid;
-       int             weight = num_online_cpus();
-       uint64_t        min = (uint64_t)min_per_cpu;
-
-       /* disable counter and sync counter */
-       xfs_icsb_disable_counter(mp, field);
-
-       /* update counters  - first CPU gets residual*/
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               count = mp->m_sb.sb_icount;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_IFREE:
-               count = mp->m_sb.sb_ifree;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_FDBLOCKS:
-               count = mp->m_sb.sb_fdblocks;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-                       return;
-               break;
-       default:
-               BUG();
-               count = resid = 0;      /* quiet, gcc */
-               break;
-       }
-
-       xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  fields,
-       int             min_per_cpu)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-int
-xfs_icsb_modify_counters(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
-{
-       xfs_icsb_cnts_t *icsbp;
-       long long       lcounter;       /* long counter for 64 bit fields */
-       int             ret = 0;
-
-       might_sleep();
-again:
-       preempt_disable();
-       icsbp = this_cpu_ptr(mp->m_sb_cnts);
-
-       /*
-        * if the counter is disabled, go to slow path
-        */
-       if (unlikely(xfs_icsb_counter_disabled(mp, field)))
-               goto slow_path;
-       xfs_icsb_lock_cntr(icsbp);
-       if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock_cntr(icsbp);
-               goto slow_path;
-       }
-
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = icsbp->icsb_icount;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_icount = lcounter;
-               break;
-
-       case XFS_SBS_IFREE:
-               lcounter = icsbp->icsb_ifree;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_ifree = lcounter;
-               break;
-
-       case XFS_SBS_FDBLOCKS:
-               BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-
-               lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               break;
-       default:
-               BUG();
-               break;
-       }
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-       return 0;
-
-slow_path:
-       preempt_enable();
-
-       /*
-        * serialise with a mutex so we don't burn lots of cpu on
-        * the superblock lock. We still need to hold the superblock
-        * lock, however, when we modify the global structures.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * Now running atomically.
-        *
-        * If the counter is enabled, someone has beaten us to rebalancing.
-        * Drop the lock and try again in the fast path....
-        */
-       if (!(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock(mp);
-               goto again;
-       }
-
-       /*
-        * The counter is currently disabled. Because we are
-        * running atomically here, we know a rebalance cannot
-        * be in progress. Hence we can go straight to operating
-        * on the global superblock. We do not call xfs_mod_incore_sb()
-        * here even though we need to get the m_sb_lock. Doing so
-        * will cause us to re-enter this function and deadlock.
-        * Hence we get the m_sb_lock ourselves and then call
-        * xfs_mod_incore_sb_unlocked() as the unlocked path operates
-        * directly on the global counters.
-        */
-       spin_lock(&mp->m_sb_lock);
-       ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * Now that we've modified the global superblock, we
-        * may be able to re-enable the distributed counters
-        * (e.g. lots of space just got freed). After that
-        * we are done.
-        */
-       if (ret != -ENOSPC)
-               xfs_icsb_balance_counter(mp, field, 0);
-       xfs_icsb_unlock(mp);
-       return ret;
-
-balance_counter:
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-
-       /*
-        * We may have multiple threads here if multiple per-cpu
-        * counters run dry at the same time. This will mean we can
-        * do more balances than strictly necessary but it is not
-        * the common slowpath case.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * running atomically.
-        *
-        * This will leave the counter in the correct state for future
-        * accesses. After the rebalance, we simply try again and our retry
-        * will either succeed through the fast path or slow path without
-        * another balance operation being required.
-        */
-       xfs_icsb_balance_counter(mp, field, delta);
-       xfs_icsb_unlock(mp);
-       goto again;
-}
-
-#endif
index 0d8abd6..8c995a2 100644 (file)
@@ -18,8 +18,6 @@
 #ifndef __XFS_MOUNT_H__
 #define        __XFS_MOUNT_H__
 
-#ifdef __KERNEL__
-
 struct xlog;
 struct xfs_inode;
 struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
 struct xfs_dir_ops;
 struct xfs_da_geometry;
 
-#ifdef HAVE_PERCPU_SB
-
-/*
- * Valid per-cpu incore superblock counters. Note that if you add new counters,
- * you may need to define new counter disabled bit field descriptors as there
- * are more possible fields in the superblock that can fit in a bitfield on a
- * 32 bit platform. The XFS_SBS_* values for the current current counters just
- * fit.
- */
-typedef struct xfs_icsb_cnts {
-       uint64_t        icsb_fdblocks;
-       uint64_t        icsb_ifree;
-       uint64_t        icsb_icount;
-       unsigned long   icsb_flags;
-} xfs_icsb_cnts_t;
-
-#define XFS_ICSB_FLAG_LOCK     (1 << 0)        /* counter lock bit */
-
-#define XFS_ICSB_LAZY_COUNT    (1 << 1)        /* accuracy not needed */
-
-extern int     xfs_icsb_init_counters(struct xfs_mount *);
-extern void    xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void    xfs_icsb_destroy_counters(struct xfs_mount *);
-extern void    xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void    xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
-extern int     xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
-                                               int64_t, int);
-
-#else
-#define xfs_icsb_init_counters(mp)             (0)
-#define xfs_icsb_destroy_counters(mp)          do { } while (0)
-#define xfs_icsb_reinit_counters(mp)           do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags)      do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
-#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
-       xfs_mod_incore_sb(mp, field, delta, rsvd)
-#endif
-
 /* dynamic preallocation free space thresholds, 5% down to 1% */
 enum {
        XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
        struct super_block      *m_super;
        xfs_tid_t               m_tid;          /* next unused tid for fs */
        struct xfs_ail          *m_ail;         /* fs active log item list */
-       xfs_sb_t                m_sb;           /* copy of fs superblock */
+
+       struct xfs_sb           m_sb;           /* copy of fs superblock */
        spinlock_t              m_sb_lock;      /* sb counter lock */
+       struct percpu_counter   m_icount;       /* allocated inodes counter */
+       struct percpu_counter   m_ifree;        /* free inodes counter */
+       struct percpu_counter   m_fdblocks;     /* free block counter */
+
        struct xfs_buf          *m_sb_bp;       /* buffer for superblock */
        char                    *m_fsname;      /* filesystem name */
        int                     m_fsname_len;   /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
        const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
        uint                    m_chsize;       /* size of next field */
        atomic_t                m_active_trans; /* number trans frozen */
-#ifdef HAVE_PERCPU_SB
-       xfs_icsb_cnts_t __percpu *m_sb_cnts;    /* per-cpu superblock counters */
-       unsigned long           m_icsb_counters; /* disabled per-cpu counters */
-       struct notifier_block   m_icsb_notifier; /* hotplug cpu notifier */
-       struct mutex            m_icsb_mutex;   /* balancer sync lock */
-#endif
        struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
        struct delayed_work     m_reclaim_work; /* background inode reclaim */
        struct delayed_work     m_eofblocks_work; /* background eof blocks
@@ -300,35 +259,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
        return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-/*
- * Per-cpu superblock locking functions
- */
-#ifdef HAVE_PERCPU_SB
-static inline void
-xfs_icsb_lock(xfs_mount_t *mp)
-{
-       mutex_lock(&mp->m_icsb_mutex);
-}
-
-static inline void
-xfs_icsb_unlock(xfs_mount_t *mp)
-{
-       mutex_unlock(&mp->m_icsb_mutex);
-}
-#else
-#define xfs_icsb_lock(mp)
-#define xfs_icsb_unlock(mp)
-#endif
-
-/*
- * This structure is for use by the xfs_mod_incore_sb_batch() routine.
- * xfs_growfs can specify a few fields which are more than int limit
- */
-typedef struct xfs_mod_sb {
-       xfs_sb_field_t  msb_field;      /* Field to modify, see below */
-       int64_t         msb_delta;      /* Change to make to specified field */
-} xfs_mod_sb_t;
-
 /*
  * Per-ag incore structure, copies of information in agf and agi, to improve the
  * performance of allocation group selection.
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int     xfs_mountfs(xfs_mount_t *mp);
 extern int     xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
                                     xfs_agnumber_t *maxagi);
-
 extern void    xfs_unmountfs(xfs_mount_t *);
-extern int     xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int     xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
-                       uint, int);
+
+extern int     xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+                                bool reserved);
+extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
+
 extern int     xfs_mount_log_sb(xfs_mount_t *);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int     xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int  xfs_dev_is_read_only(struct xfs_mount *, char *);
 
 extern void    xfs_set_low_space_thresholds(struct xfs_mount *);
 
-#endif /* __KERNEL__ */
-
 #endif /* __XFS_MOUNT_H__ */
index 30ecca3..f8a674d 100644 (file)
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
        if (!mru || !mru->lists)
                return -EINVAL;
 
-       if (radix_tree_preload(GFP_KERNEL))
+       if (radix_tree_preload(GFP_NOFS))
                return -ENOMEM;
 
        INIT_LIST_HEAD(&elem->list_node);
index 365dd57..981a657 100644 (file)
@@ -31,7 +31,8 @@
 int
 xfs_break_layouts(
        struct inode            *inode,
-       uint                    *iolock)
+       uint                    *iolock,
+       bool                    with_imutex)
 {
        struct xfs_inode        *ip = XFS_I(inode);
        int                     error;
@@ -40,8 +41,12 @@ xfs_break_layouts(
 
        while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                xfs_iunlock(ip, *iolock);
+               if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
+                       mutex_unlock(&inode->i_mutex);
                error = break_layout(inode, true);
                *iolock = XFS_IOLOCK_EXCL;
+               if (with_imutex)
+                       mutex_lock(&inode->i_mutex);
                xfs_ilock(ip, *iolock);
        }
 
index b7fbfce..8147ac1 100644 (file)
@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
 int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
                struct iattr *iattr);
 
-int xfs_break_layouts(struct inode *inode, uint *iolock);
+int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
 #else
-static inline int xfs_break_layouts(struct inode *inode, uint *iolock)
+static inline int
+xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
 {
        return 0;
 }
index fbbb9e6..5538468 100644 (file)
@@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
        xfs_trans_t     *tp;
        int             error;
        int             committed;
+       bool            need_alloc = true;
 
        *ip = NULL;
        /*
@@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
                                return error;
                        mp->m_sb.sb_gquotino = NULLFSINO;
                        mp->m_sb.sb_pquotino = NULLFSINO;
+                       need_alloc = false;
                }
        }
 
@@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
                return error;
        }
 
-       if (!*ip) {
+       if (need_alloc) {
                error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
                                                                &committed);
                if (error) {
@@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
        spin_unlock(&mp->m_sb_lock);
        xfs_log_sb(tp);
 
-       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error) {
+               ASSERT(XFS_FORCED_SHUTDOWN(mp));
                xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-               return error;
        }
-       return 0;
+       if (need_alloc)
+               xfs_finish_inode_setup(*ip);
+       return error;
 }
 
 
index 8fcc4cc..858e1e6 100644 (file)
@@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj;        /* global debug sysfs attrs */
 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
 #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
 #define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
 #define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
 #define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
 
@@ -361,28 +359,10 @@ xfs_parseargs(
                } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
                        mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                        mp->m_qflags &= ~XFS_GQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                       xfs_warn(mp,
-       "delaylog is the default now, option is deprecated.");
-               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                       xfs_warn(mp,
-       "nodelaylog support has been removed, option is deprecated.");
                } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
                        mp->m_flags |= XFS_MOUNT_DISCARD;
                } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
                        mp->m_flags &= ~XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, "ihashsize")) {
-                       xfs_warn(mp,
-       "ihashsize no longer used, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisdsync")) {
-                       xfs_warn(mp,
-       "osyncisdsync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisosync")) {
-                       xfs_warn(mp,
-       "osyncisosync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "irixsgid")) {
-                       xfs_warn(mp,
-       "irixsgid is now a sysctl(2) variable, option is deprecated.");
                } else {
                        xfs_warn(mp, "unknown mount option [%s].", this_char);
                        return -EINVAL;
@@ -986,6 +966,8 @@ xfs_fs_inode_init_once(
        atomic_set(&ip->i_pincount, 0);
        spin_lock_init(&ip->i_flags_lock);
 
+       mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
 }
@@ -1033,23 +1015,6 @@ xfs_free_fsname(
        kfree(mp->m_logname);
 }
 
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_filestream_unmount(mp);
-       xfs_unmountfs(mp);
-
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_destroy_mount_workqueues(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
 STATIC int
 xfs_fs_sync_fs(
        struct super_block      *sb,
@@ -1083,8 +1048,11 @@ xfs_fs_statfs(
 {
        struct xfs_mount        *mp = XFS_M(dentry->d_sb);
        xfs_sb_t                *sbp = &mp->m_sb;
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode        *ip = XFS_I(d_inode(dentry));
        __uint64_t              fakeinos, id;
+       __uint64_t              icount;
+       __uint64_t              ifree;
+       __uint64_t              fdblocks;
        xfs_extlen_t            lsize;
        __int64_t               ffree;
 
@@ -1095,17 +1063,21 @@ xfs_fs_statfs(
        statp->f_fsid.val[0] = (u32)id;
        statp->f_fsid.val[1] = (u32)(id >> 32);
 
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       icount = percpu_counter_sum(&mp->m_icount);
+       ifree = percpu_counter_sum(&mp->m_ifree);
+       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
 
        spin_lock(&mp->m_sb_lock);
        statp->f_bsize = sbp->sb_blocksize;
        lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
        statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       spin_unlock(&mp->m_sb_lock);
+
+       statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       statp->f_bavail = statp->f_bfree;
+
        fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
        if (mp->m_maxicount)
                statp->f_files = min_t(typeof(statp->f_files),
                                        statp->f_files,
@@ -1117,10 +1089,9 @@ xfs_fs_statfs(
                                        sbp->sb_icount);
 
        /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       ffree = statp->f_files - (icount - ifree);
        statp->f_ffree = max_t(__int64_t, ffree, 0);
 
-       spin_unlock(&mp->m_sb_lock);
 
        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1256,6 +1227,12 @@ xfs_fs_remount(
 
        /* ro -> rw */
        if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+                       xfs_warn(mp,
+               "ro->rw transition prohibited on norecovery mount");
+                       return -EINVAL;
+               }
+
                mp->m_flags &= ~XFS_MOUNT_RDONLY;
 
                /*
@@ -1401,6 +1378,51 @@ xfs_finish_flags(
        return 0;
 }
 
+static int
+xfs_init_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       int             error;
+
+       error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
+       if (error)
+               return -ENOMEM;
+
+       error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
+       if (error)
+               goto free_icount;
+
+       error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+       if (error)
+               goto free_ifree;
+
+       return 0;
+
+free_ifree:
+       percpu_counter_destroy(&mp->m_ifree);
+free_icount:
+       percpu_counter_destroy(&mp->m_icount);
+       return -ENOMEM;
+}
+
+void
+xfs_reinit_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
+       percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
+       percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+}
+
+static void
+xfs_destroy_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_destroy(&mp->m_icount);
+       percpu_counter_destroy(&mp->m_ifree);
+       percpu_counter_destroy(&mp->m_fdblocks);
+}
+
 STATIC int
 xfs_fs_fill_super(
        struct super_block      *sb,
@@ -1449,7 +1471,7 @@ xfs_fs_fill_super(
        if (error)
                goto out_close_devices;
 
-       error = xfs_icsb_init_counters(mp);
+       error = xfs_init_percpu_counters(mp);
        if (error)
                goto out_destroy_workqueues;
 
@@ -1507,7 +1529,7 @@ xfs_fs_fill_super(
  out_free_sb:
        xfs_freesb(mp);
  out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
+       xfs_destroy_percpu_counters(mp);
 out_destroy_workqueues:
        xfs_destroy_mount_workqueues(mp);
  out_close_devices:
@@ -1524,6 +1546,24 @@ out_destroy_workqueues:
        goto out_free_sb;
 }
 
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_notice(mp, "Unmounting Filesystem");
+       xfs_filestream_unmount(mp);
+       xfs_unmountfs(mp);
+
+       xfs_freesb(mp);
+       xfs_destroy_percpu_counters(mp);
+       xfs_destroy_mount_workqueues(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
 STATIC struct dentry *
 xfs_fs_mount(
        struct file_system_type *fs_type,
index 2b830c2..499058f 100644 (file)
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
 extern const struct quotactl_ops xfs_quotactl_operations;
 
+extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+
 #define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
 
 #endif /* __XFS_SUPER_H__ */
index 25791df..3df411e 100644 (file)
@@ -177,7 +177,7 @@ xfs_symlink(
        int                     pathlen;
        struct xfs_bmap_free    free_list;
        xfs_fsblock_t           first_block;
-       bool                    unlock_dp_on_error = false;
+       bool                    unlock_dp_on_error = false;
        uint                    cancel_flags;
        int                     committed;
        xfs_fileoff_t           first_fsb;
@@ -221,7 +221,7 @@ xfs_symlink(
                        XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
                        &udqp, &gdqp, &pdqp);
        if (error)
-               goto std_return;
+               return error;
 
        tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -241,7 +241,7 @@ xfs_symlink(
        }
        if (error) {
                cancel_flags = 0;
-               goto error_return;
+               goto out_trans_cancel;
        }
 
        xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -252,7 +252,7 @@ xfs_symlink(
         */
        if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
                error = -EPERM;
-               goto error_return;
+               goto out_trans_cancel;
        }
 
        /*
@@ -261,7 +261,7 @@ xfs_symlink(
        error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
                                                pdqp, resblks, 1, 0);
        if (error)
-               goto error_return;
+               goto out_trans_cancel;
 
        /*
         * Check for ability to enter directory entry, if no space reserved.
@@ -269,7 +269,7 @@ xfs_symlink(
        if (!resblks) {
                error = xfs_dir_canenter(tp, dp, link_name);
                if (error)
-                       goto error_return;
+                       goto out_trans_cancel;
        }
        /*
         * Initialize the bmap freelist prior to calling either
@@ -282,15 +282,14 @@ xfs_symlink(
         */
        error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
                               prid, resblks > 0, &ip, NULL);
-       if (error) {
-               if (error == -ENOSPC)
-                       goto error_return;
-               goto error1;
-       }
+       if (error)
+               goto out_trans_cancel;
 
        /*
-        * An error after we've joined dp to the transaction will result in the
-        * transaction cancel unlocking dp so don't do it explicitly in the
+        * Now we join the directory inode to the transaction.  We do not do it
+        * earlier because xfs_dir_ialloc might commit the previous transaction
+        * (and release all the locks).  An error from here on will result in
+        * the transaction cancel unlocking dp so don't do it explicitly in the
         * error path.
         */
        xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
@@ -330,7 +329,7 @@ xfs_symlink(
                                  XFS_BMAPI_METADATA, &first_block, resblks,
                                  mval, &nmaps, &free_list);
                if (error)
-                       goto error2;
+                       goto out_bmap_cancel;
 
                if (resblks)
                        resblks -= fs_blocks;
@@ -348,7 +347,7 @@ xfs_symlink(
                                               BTOBB(byte_cnt), 0);
                        if (!bp) {
                                error = -ENOMEM;
-                               goto error2;
+                               goto out_bmap_cancel;
                        }
                        bp->b_ops = &xfs_symlink_buf_ops;
 
@@ -378,7 +377,7 @@ xfs_symlink(
        error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
                                        &first_block, &free_list, resblks);
        if (error)
-               goto error2;
+               goto out_bmap_cancel;
        xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
@@ -392,10 +391,13 @@ xfs_symlink(
        }
 
        error = xfs_bmap_finish(&tp, &free_list, &committed);
-       if (error) {
-               goto error2;
-       }
+       if (error)
+               goto out_bmap_cancel;
+
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error)
+               goto out_release_inode;
+
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);
@@ -403,20 +405,28 @@ xfs_symlink(
        *ipp = ip;
        return 0;
 
- error2:
-       IRELE(ip);
- error1:
+out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
        cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
+out_release_inode:
+       /*
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
+        */
+       if (ip) {
+               xfs_finish_inode_setup(ip);
+               IRELE(ip);
+       }
+
        xfs_qm_dqrele(udqp);
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);
 
        if (unlock_dp_on_error)
                xfs_iunlock(dp, XFS_ILOCK_EXCL);
- std_return:
        return error;
 }
 
index 51372e3..615781b 100644 (file)
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
                __entry->refcount = refcount;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+       TP_printk("dev %d:%d agno %u refcount %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
+                 "lock %d error %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
                __entry->lock_flags = lock_flags;
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
 DEFINE_INODE_EVENT(xfs_zero_file_space);
 DEFINE_INODE_EVENT(xfs_collapse_file_space);
+DEFINE_INODE_EVENT(xfs_insert_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
 DEFINE_INODE_EVENT(xfs_get_acl);
@@ -685,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
 
+DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+
 DECLARE_EVENT_CLASS(xfs_iref_class,
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
        TP_ARGS(ip, caller_ip),
@@ -702,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
                __entry->pincount = atomic_read(&ip->i_pincount);
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->count,
@@ -1217,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
 
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1333,7 +1342,7 @@ TRACE_EVENT(xfs_bunmap,
                __entry->flags = flags;
        ),
        TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
+                 "flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->size,
@@ -1466,7 +1475,7 @@ TRACE_EVENT(xfs_agf,
        ),
        TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
                  "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
+                 "freeblks %u longest %u caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
index eb90cd5..220ef2c 100644 (file)
@@ -173,7 +173,7 @@ xfs_trans_reserve(
        uint                    rtextents)
 {
        int             error = 0;
-       int             rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
        /* Mark this thread as being in a transaction */
        current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (blocks > 0) {
-               error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                         -((int64_t)blocks), rsvd);
+               error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                if (error != 0) {
                        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
                        return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (rtextents > 0) {
-               error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)rtextents), rsvd);
+               error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
                if (error) {
                        error = -ENOSPC;
                        goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
 
 undo_blocks:
        if (blocks > 0) {
-               xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                        (int64_t)blocks, rsvd);
+               xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                tp->t_blk_res = 0;
        }
 
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
                                  sizeof(sbp->sb_frextents) - 1);
 }
 
+STATIC int
+xfs_sb_mod8(
+       uint8_t                 *field,
+       int8_t                  delta)
+{
+       int8_t                  counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod32(
+       uint32_t                *field,
+       int32_t                 delta)
+{
+       int32_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod64(
+       uint64_t                *field,
+       int64_t                 delta)
+{
+       int64_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
 /*
  * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
  * and apply superblock counter changes to the in-core superblock.  The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
  * applied to the in-core superblock.  The idea is that that has already been
  * done.
  *
- * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
- * However, we have to ensure that we only modify each superblock field only
- * once because the application of the delta values may not be atomic. That can
- * lead to ENOSPC races occurring if we have two separate modifcations of the
- * free space counter to put back the entire reservation and then take away
- * what we used.
- *
  * If we are not logging superblock counters, then the inode allocated/free and
  * used block counts are not updated in the on disk superblock. In this case,
  * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
  */
 void
 xfs_trans_unreserve_and_mod_sb(
-       xfs_trans_t     *tp)
+       struct xfs_trans        *tp)
 {
-       xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
-       xfs_mod_sb_t    *msbp;
-       xfs_mount_t     *mp = tp->t_mountp;
-       /* REFERENCED */
-       int             error;
-       int             rsvd;
-       int64_t         blkdelta = 0;
-       int64_t         rtxdelta = 0;
-       int64_t         idelta = 0;
-       int64_t         ifreedelta = 0;
-
-       msbp = msb;
-       rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       struct xfs_mount        *mp = tp->t_mountp;
+       bool                    rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       int64_t                 blkdelta = 0;
+       int64_t                 rtxdelta = 0;
+       int64_t                 idelta = 0;
+       int64_t                 ifreedelta = 0;
+       int                     error;
 
        /* calculate deltas */
        if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
 
        /* apply the per-cpu counters */
        if (blkdelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                blkdelta, rsvd);
+               error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
                if (error)
                        goto out;
        }
 
        if (idelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
-                                                idelta, rsvd);
+               error = xfs_mod_icount(mp, idelta);
                if (error)
                        goto out_undo_fdblocks;
        }
 
        if (ifreedelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
-                                                ifreedelta, rsvd);
+               error = xfs_mod_ifree(mp, ifreedelta);
                if (error)
                        goto out_undo_icount;
        }
 
+       if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+               return;
+
        /* apply remaining deltas */
-       if (rtxdelta != 0) {
-               msbp->msb_field = XFS_SBS_FREXTENTS;
-               msbp->msb_delta = rtxdelta;
-               msbp++;
+       spin_lock(&mp->m_sb_lock);
+       if (rtxdelta) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
+               if (error)
+                       goto out_undo_ifree;
        }
 
-       if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
-               if (tp->t_dblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_DBLOCKS;
-                       msbp->msb_delta = tp->t_dblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_agcount_delta != 0) {
-                       msbp->msb_field = XFS_SBS_AGCOUNT;
-                       msbp->msb_delta = tp->t_agcount_delta;
-                       msbp++;
-               }
-               if (tp->t_imaxpct_delta != 0) {
-                       msbp->msb_field = XFS_SBS_IMAX_PCT;
-                       msbp->msb_delta = tp->t_imaxpct_delta;
-                       msbp++;
-               }
-               if (tp->t_rextsize_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSIZE;
-                       msbp->msb_delta = tp->t_rextsize_delta;
-                       msbp++;
-               }
-               if (tp->t_rbmblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBMBLOCKS;
-                       msbp->msb_delta = tp->t_rbmblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBLOCKS;
-                       msbp->msb_delta = tp->t_rblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rextents_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTENTS;
-                       msbp->msb_delta = tp->t_rextents_delta;
-                       msbp++;
-               }
-               if (tp->t_rextslog_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSLOG;
-                       msbp->msb_delta = tp->t_rextslog_delta;
-                       msbp++;
-               }
+       if (tp->t_dblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
+               if (error)
+                       goto out_undo_frextents;
        }
-
-       /*
-        * If we need to change anything, do it.
-        */
-       if (msbp > msb) {
-               error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
-                       (uint)(msbp - msb), rsvd);
+       if (tp->t_agcount_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
                if (error)
-                       goto out_undo_ifreecount;
+                       goto out_undo_dblocks;
        }
-
+       if (tp->t_imaxpct_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+               if (error)
+                       goto out_undo_agcount;
+       }
+       if (tp->t_rextsize_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+                                    tp->t_rextsize_delta);
+               if (error)
+                       goto out_undo_imaxpct;
+       }
+       if (tp->t_rbmblocks_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+                                    tp->t_rbmblocks_delta);
+               if (error)
+                       goto out_undo_rextsize;
+       }
+       if (tp->t_rblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+               if (error)
+                       goto out_undo_rbmblocks;
+       }
+       if (tp->t_rextents_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+                                    tp->t_rextents_delta);
+               if (error)
+                       goto out_undo_rblocks;
+       }
+       if (tp->t_rextslog_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+                                    tp->t_rextslog_delta);
+               if (error)
+                       goto out_undo_rextents;
+       }
+       spin_unlock(&mp->m_sb_lock);
        return;
 
-out_undo_ifreecount:
+out_undo_rextents:
+       if (tp->t_rextents_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+out_undo_rblocks:
+       if (tp->t_rblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+out_undo_rbmblocks:
+       if (tp->t_rbmblocks_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+out_undo_rextsize:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+out_undo_imaxpct:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+out_undo_agcount:
+       if (tp->t_agcount_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+out_undo_dblocks:
+       if (tp->t_dblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+out_undo_frextents:
+       if (rtxdelta)
+               xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+out_undo_ifree:
+       spin_unlock(&mp->m_sb_lock);
        if (ifreedelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+               xfs_mod_ifree(mp, -ifreedelta);
 out_undo_icount:
        if (idelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+               xfs_mod_icount(mp, -idelta);
 out_undo_fdblocks:
        if (blkdelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+               xfs_mod_fdblocks(mp, -blkdelta, rsvd);
 out:
        ASSERT(error == 0);
        return;
index 69f6e47..c036815 100644 (file)
@@ -35,7 +35,7 @@ static int
 xfs_xattr_get(struct dentry *dentry, const char *name,
                void *value, size_t size, int xflags)
 {
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode *ip = XFS_I(d_inode(dentry));
        int error, asize = size;
 
        if (strcmp(name, "") == 0)
@@ -57,7 +57,7 @@ static int
 xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
                size_t size, int flags, int xflags)
 {
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode *ip = XFS_I(d_inode(dentry));
 
        if (strcmp(name, "") == 0)
                return -EINVAL;
@@ -197,7 +197,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
 {
        struct xfs_attr_list_context context;
        struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = dentry->d_inode;
+       struct inode            *inode = d_inode(dentry);
        int                     error;
 
        /*
index 444671e..dd86c5f 100644 (file)
@@ -3,11 +3,15 @@
 
 #include <linux/io.h>
 
+#include <asm/acpi.h>
+
+#ifndef acpi_os_ioremap
 static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
                                            acpi_size size)
 {
        return ioremap_cache(phys, size);
 }
+#endif
 
 void __iomem *__init_refok
 acpi_os_map_iomem(acpi_physical_address phys, acpi_size size);
index b95dc32..4188a4d 100644 (file)
@@ -196,7 +196,7 @@ struct acpi_processor_flags {
 struct acpi_processor {
        acpi_handle handle;
        u32 acpi_id;
-       u32 phys_id;    /* CPU hardware ID such as APIC ID for x86 */
+       phys_cpuid_t phys_id;   /* CPU hardware ID such as APIC ID for x86 */
        u32 id;         /* CPU logical ID allocated by OS */
        u32 pblk;
        int performance_platform_limit;
@@ -310,8 +310,8 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 #endif                         /* CONFIG_CPU_FREQ */
 
 /* in processor_core.c */
-int acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
-int acpi_map_cpuid(int phys_id, u32 acpi_id);
+phys_cpuid_t acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id);
 int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
 
 /* in processor_pdc.c */
diff --git a/include/dt-bindings/dma/jz4780-dma.h b/include/dt-bindings/dma/jz4780-dma.h
new file mode 100644 (file)
index 0000000..df017fd
--- /dev/null
@@ -0,0 +1,49 @@
+#ifndef __DT_BINDINGS_DMA_JZ4780_DMA_H__
+#define __DT_BINDINGS_DMA_JZ4780_DMA_H__
+
+/*
+ * Request type numbers for the JZ4780 DMA controller (written to the DRTn
+ * register for the channel).
+ */
+#define JZ4780_DMA_I2S1_TX     0x4
+#define JZ4780_DMA_I2S1_RX     0x5
+#define JZ4780_DMA_I2S0_TX     0x6
+#define JZ4780_DMA_I2S0_RX     0x7
+#define JZ4780_DMA_AUTO                0x8
+#define JZ4780_DMA_SADC_RX     0x9
+#define JZ4780_DMA_UART4_TX    0xc
+#define JZ4780_DMA_UART4_RX    0xd
+#define JZ4780_DMA_UART3_TX    0xe
+#define JZ4780_DMA_UART3_RX    0xf
+#define JZ4780_DMA_UART2_TX    0x10
+#define JZ4780_DMA_UART2_RX    0x11
+#define JZ4780_DMA_UART1_TX    0x12
+#define JZ4780_DMA_UART1_RX    0x13
+#define JZ4780_DMA_UART0_TX    0x14
+#define JZ4780_DMA_UART0_RX    0x15
+#define JZ4780_DMA_SSI0_TX     0x16
+#define JZ4780_DMA_SSI0_RX     0x17
+#define JZ4780_DMA_SSI1_TX     0x18
+#define JZ4780_DMA_SSI1_RX     0x19
+#define JZ4780_DMA_MSC0_TX     0x1a
+#define JZ4780_DMA_MSC0_RX     0x1b
+#define JZ4780_DMA_MSC1_TX     0x1c
+#define JZ4780_DMA_MSC1_RX     0x1d
+#define JZ4780_DMA_MSC2_TX     0x1e
+#define JZ4780_DMA_MSC2_RX     0x1f
+#define JZ4780_DMA_PCM0_TX     0x20
+#define JZ4780_DMA_PCM0_RX     0x21
+#define JZ4780_DMA_SMB0_TX     0x24
+#define JZ4780_DMA_SMB0_RX     0x25
+#define JZ4780_DMA_SMB1_TX     0x26
+#define JZ4780_DMA_SMB1_RX     0x27
+#define JZ4780_DMA_SMB2_TX     0x28
+#define JZ4780_DMA_SMB2_RX     0x29
+#define JZ4780_DMA_SMB3_TX     0x2a
+#define JZ4780_DMA_SMB3_RX     0x2b
+#define JZ4780_DMA_SMB4_TX     0x2c
+#define JZ4780_DMA_SMB4_RX     0x2d
+#define JZ4780_DMA_DES_TX      0x2e
+#define JZ4780_DMA_DES_RX      0x2f
+
+#endif /* __DT_BINDINGS_DMA_JZ4780_DMA_H__ */
index dd12127..e4da5e3 100644 (file)
@@ -79,6 +79,7 @@ enum acpi_irq_model_id {
        ACPI_IRQ_MODEL_IOAPIC,
        ACPI_IRQ_MODEL_IOSAPIC,
        ACPI_IRQ_MODEL_PLATFORM,
+       ACPI_IRQ_MODEL_GIC,
        ACPI_IRQ_MODEL_COUNT
 };
 
@@ -152,9 +153,14 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
 int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
 void acpi_numa_arch_fixup(void);
 
+#ifndef PHYS_CPUID_INVALID
+typedef u32 phys_cpuid_t;
+#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
+#endif
+
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu);
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
 int acpi_unmap_cpu(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
diff --git a/include/linux/acpi_irq.h b/include/linux/acpi_irq.h
new file mode 100644 (file)
index 0000000..f10c872
--- /dev/null
@@ -0,0 +1,10 @@
+#ifndef _LINUX_ACPI_IRQ_H
+#define _LINUX_ACPI_IRQ_H
+
+#include <linux/irq.h>
+
+#ifndef acpi_irq_init
+static inline void acpi_irq_init(void) { }
+#endif
+
+#endif /* _LINUX_ACPI_IRQ_H */
diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h
deleted file mode 100644 (file)
index 34b98f2..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Xilinx DMA Engine drivers support header file
- *
- * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __DMA_XILINX_DMA_H
-#define __DMA_XILINX_DMA_H
-
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-
-/**
- * struct xilinx_vdma_config - VDMA Configuration structure
- * @frm_dly: Frame delay
- * @gen_lock: Whether in gen-lock mode
- * @master: Master that it syncs to
- * @frm_cnt_en: Enable frame count enable
- * @park: Whether wants to park
- * @park_frm: Frame to park on
- * @coalesc: Interrupt coalescing threshold
- * @delay: Delay counter
- * @reset: Reset Channel
- * @ext_fsync: External Frame Sync source
- */
-struct xilinx_vdma_config {
-       int frm_dly;
-       int gen_lock;
-       int master;
-       int frm_cnt_en;
-       int park;
-       int park_frm;
-       int coalesc;
-       int delay;
-       int reset;
-       int ext_fsync;
-};
-
-int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
-                                       struct xilinx_vdma_config *cfg);
-
-#endif
index 179b38f..388574e 100644 (file)
@@ -60,12 +60,15 @@ struct dma_chan_ref {
  * dependency chain
  * @ASYNC_TX_FENCE: specify that the next operation in the dependency
  * chain uses this operation's result as an input
+ * @ASYNC_TX_PQ_XOR_DST: do not overwrite the syndrome but XOR it with the
+ * input data. Required for rmw case.
  */
 enum async_tx_flags {
        ASYNC_TX_XOR_ZERO_DST    = (1 << 0),
        ASYNC_TX_XOR_DROP_DST    = (1 << 1),
        ASYNC_TX_ACK             = (1 << 2),
        ASYNC_TX_FENCE           = (1 << 3),
+       ASYNC_TX_PQ_XOR_DST      = (1 << 4),
 };
 
 /**
index 71e05bb..4763ad6 100644 (file)
 #define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
 #define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
 #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
+#define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
+#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
+#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
+#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
+#define CEPH_FEATURE_OSD_REPOP         (1ULL<<46)   /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
+#define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
+#define CEPH_FEATURE_CRUSH_V4      (1ULL<<48)  /* straw2 buckets */
+#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
+// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
+#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
 
 /*
  * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features)
         CEPH_FEATURE_EXPORT_PEER |             \
         CEPH_FEATURE_OSDMAP_ENC |              \
         CEPH_FEATURE_CRUSH_TUNABLES3 |         \
-        CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
+        CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
+        CEPH_FEATURE_CRUSH_V4)
 
 #define CEPH_FEATURES_REQUIRED_DEFAULT   \
        (CEPH_FEATURE_NOSRCADDR |        \
index 31eb03d..d7d072a 100644 (file)
@@ -323,6 +323,7 @@ enum {
        CEPH_MDS_OP_MKSNAP     = 0x01400,
        CEPH_MDS_OP_RMSNAP     = 0x01401,
        CEPH_MDS_OP_LSSNAP     = 0x00402,
+       CEPH_MDS_OP_RENAMESNAP = 0x01403,
 };
 
 extern const char *ceph_mds_op_name(int op);
index 1df086d..29cf897 100644 (file)
@@ -7,13 +7,7 @@
 #define CEPH_DEFINE_SHOW_FUNC(name)                                    \
 static int name##_open(struct inode *inode, struct file *file)         \
 {                                                                      \
-       struct seq_file *sf;                                            \
-       int ret;                                                        \
-                                                                       \
-       ret = single_open(file, name, NULL);                            \
-       sf = file->private_data;                                        \
-       sf->private = inode->i_private;                                 \
-       return ret;                                                     \
+       return single_open(file, name, inode->i_private);               \
 }                                                                      \
                                                                        \
 static const struct file_operations name##_fops = {                    \
index 16fff96..30f92ce 100644 (file)
@@ -135,6 +135,7 @@ struct ceph_client {
        struct dentry *debugfs_dir;
        struct dentry *debugfs_monmap;
        struct dentry *debugfs_osdmap;
+       struct dentry *debugfs_options;
 #endif
 };
 
@@ -191,6 +192,7 @@ extern struct ceph_options *ceph_parse_options(char *options,
                              const char *dev_name, const char *dev_name_end,
                              int (*parse_extra_token)(char *c, void *private),
                              void *private);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
 extern void ceph_destroy_options(struct ceph_options *opt);
 extern int ceph_compare_options(struct ceph_options *new_opt,
                                struct ceph_client *client);
index 561ea89..e55c08b 100644 (file)
@@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
        __u8 version;
 
        if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
-               pr_warning("incomplete pg encoding");
-
+               pr_warn("incomplete pg encoding\n");
                return -EINVAL;
        }
        version = ceph_decode_8(p);
        if (version > 1) {
-               pr_warning("do not understand pg encoding %d > 1",
+               pr_warn("do not understand pg encoding %d > 1\n",
                        (int)version);
                return -EINVAL;
        }
index 1355098..d27d015 100644 (file)
@@ -253,4 +253,10 @@ extern void clocksource_of_init(void);
 static inline void clocksource_of_init(void) {}
 #endif
 
+#ifdef CONFIG_ACPI
+void acpi_generic_timer_init(void);
+#else
+static inline void acpi_generic_timer_init(void) { }
+#endif
+
 #endif /* _LINUX_CLOCKSOURCE_H */
index 4fad5f8..48a1a7d 100644 (file)
@@ -96,13 +96,15 @@ struct crush_rule {
  *  uniform         O(1)       poor         poor
  *  list            O(n)       optimal      poor
  *  tree            O(log n)   good         good
- *  straw           O(n)       optimal      optimal
+ *  straw           O(n)       better       better
+ *  straw2          O(n)       optimal      optimal
  */
 enum {
        CRUSH_BUCKET_UNIFORM = 1,
        CRUSH_BUCKET_LIST = 2,
        CRUSH_BUCKET_TREE = 3,
-       CRUSH_BUCKET_STRAW = 4
+       CRUSH_BUCKET_STRAW = 4,
+       CRUSH_BUCKET_STRAW2 = 5,
 };
 extern const char *crush_bucket_alg_name(int alg);
 
@@ -149,6 +151,11 @@ struct crush_bucket_straw {
        __u32 *straws;         /* 16-bit fixed point */
 };
 
+struct crush_bucket_straw2 {
+       struct crush_bucket h;
+       __u32 *item_weights;   /* 16-bit fixed point */
+};
+
 
 
 /*
@@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
 extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
 extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
 extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
 extern void crush_destroy_bucket(struct crush_bucket *b);
 extern void crush_destroy_rule(struct crush_rule *r);
 extern void crush_destroy(struct crush_map *map);
index 694e1fe..2f0b431 100644 (file)
@@ -162,6 +162,33 @@ struct dma_buf_attachment {
        void *priv;
 };
 
+/**
+ * struct dma_buf_export_info - holds information needed to export a dma_buf
+ * @exp_name:  name of the exporting module - useful for debugging.
+ * @ops:       Attach allocator-defined dma buf ops to the new buffer
+ * @size:      Size of the buffer
+ * @flags:     mode flags for the file
+ * @resv:      reservation-object, NULL to allocate default one
+ * @priv:      Attach private data of allocator to this buffer
+ *
+ * This structure holds the information required to export the buffer. Used
+ * with dma_buf_export() only.
+ */
+struct dma_buf_export_info {
+       const char *exp_name;
+       const struct dma_buf_ops *ops;
+       size_t size;
+       int flags;
+       struct reservation_object *resv;
+       void *priv;
+};
+
+/**
+ * helper macro for exporters; zeros and fills in most common values
+ */
+#define DEFINE_DMA_BUF_EXPORT_INFO(a)  \
+       struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME }
+
 /**
  * get_dma_buf - convenience wrapper for get_file.
  * @dmabuf:    [in]    pointer to dma_buf
@@ -181,12 +208,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 void dma_buf_detach(struct dma_buf *dmabuf,
                                struct dma_buf_attachment *dmabuf_attach);
 
-struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-                              size_t size, int flags, const char *,
-                              struct reservation_object *);
-
-#define dma_buf_export(priv, ops, size, flags, resv)   \
-       dma_buf_export_named(priv, ops, size, flags, KBUILD_MODNAME, resv)
+struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
 
 int dma_buf_fd(struct dma_buf *dmabuf, int flags);
 struct dma_buf *dma_buf_get(int fd);
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h
new file mode 100644 (file)
index 0000000..34b98f2
--- /dev/null
@@ -0,0 +1,47 @@
+/*
+ * Xilinx DMA Engine drivers support header file
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DMA_XILINX_DMA_H
+#define __DMA_XILINX_DMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct xilinx_vdma_config - VDMA Configuration structure
+ * @frm_dly: Frame delay
+ * @gen_lock: Whether in gen-lock mode
+ * @master: Master that it syncs to
+ * @frm_cnt_en: Enable frame count enable
+ * @park: Whether wants to park
+ * @park_frm: Frame to park on
+ * @coalesc: Interrupt coalescing threshold
+ * @delay: Delay counter
+ * @reset: Reset Channel
+ * @ext_fsync: External Frame Sync source
+ */
+struct xilinx_vdma_config {
+       int frm_dly;
+       int gen_lock;
+       int master;
+       int frm_cnt_en;
+       int park;
+       int park_frm;
+       int coalesc;
+       int delay;
+       int reset;
+       int ext_fsync;
+};
+
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+                                       struct xilinx_vdma_config *cfg);
+
+#endif
index b6997a0..ad41975 100644 (file)
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
  * The full GNU General Public License is included in this distribution in the
  * file called COPYING.
  */
@@ -574,7 +570,6 @@ struct dma_tx_state {
  * @copy_align: alignment shift for memcpy operations
  * @xor_align: alignment shift for xor operations
  * @pq_align: alignment shift for pq operations
- * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @src_addr_widths: bit mask of src addr widths the device supports
@@ -625,7 +620,6 @@ struct dma_device {
        u8 copy_align;
        u8 xor_align;
        u8 pq_align;
-       u8 fill_align;
        #define DMA_HAS_PQ_CONTINUE (1 << 15)
 
        int dev_id;
@@ -826,12 +820,6 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
        return dmaengine_check_align(dev->pq_align, off1, off2, len);
 }
 
-static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
-                                      size_t off2, size_t len)
-{
-       return dmaengine_check_align(dev->fill_align, off1, off2, len);
-}
-
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
@@ -1098,7 +1086,6 @@ void dma_async_device_unregister(struct dma_device *device);
 void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
 struct dma_chan *dma_get_any_slave_channel(struct dma_device *device);
-struct dma_chan *net_dma_find_channel(void);
 #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
 #define dma_request_slave_channel_compat(mask, x, y, dev, name) \
        __dma_request_slave_channel_compat(&(mask), x, y, dev, name)
@@ -1116,27 +1103,4 @@ static inline struct dma_chan
 
        return __dma_request_channel(mask, fn, fn_param);
 }
-
-/* --- Helper iov-locking functions --- */
-
-struct dma_page_list {
-       char __user *base_address;
-       int nr_pages;
-       struct page **pages;
-};
-
-struct dma_pinned_list {
-       int nr_iovecs;
-       struct dma_page_list page_list[0];
-};
-
-struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len);
-void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list);
-
-dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
-       struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len);
-dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
-       struct dma_pinned_list *pinned_list, struct page *page,
-       unsigned int offset, size_t len);
-
 #endif /* DMAENGINE_H */
index 3159168..9961110 100644 (file)
@@ -21,4 +21,10 @@ struct space_resv {
 #define FS_IOC_RESVSP          _IOW('X', 40, struct space_resv)
 #define FS_IOC_RESVSP64                _IOW('X', 42, struct space_resv)
 
+#define        FALLOC_FL_SUPPORTED_MASK        (FALLOC_FL_KEEP_SIZE |          \
+                                        FALLOC_FL_PUNCH_HOLE |         \
+                                        FALLOC_FL_COLLAPSE_RANGE |     \
+                                        FALLOC_FL_ZERO_RANGE |         \
+                                        FALLOC_FL_INSERT_RANGE)
+
 #endif /* _FALLOC_H_ */
index c7496f2..35ec87e 100644 (file)
@@ -1820,7 +1820,7 @@ struct super_operations {
 #define I_SYNC                 (1 << __I_SYNC)
 #define I_REFERENCED           (1 << 8)
 #define __I_DIO_WAKEUP         9
-#define I_DIO_WAKEUP           (1 << I_DIO_WAKEUP)
+#define I_DIO_WAKEUP           (1 << __I_DIO_WAKEUP)
 #define I_LINKABLE             (1 << 10)
 #define I_DIRTY_TIME           (1 << 11)
 #define __I_DIRTY_TIME_EXPIRED 12
@@ -2644,6 +2644,9 @@ enum {
 
        /* filesystem can handle aio writes beyond i_size */
        DIO_ASYNC_EXTEND = 0x04,
+
+       /* inode/fs/bdev does not need truncate protection */
+       DIO_SKIP_DIO_COUNT = 0x08,
 };
 
 void dio_end_io(struct bio *bio, int error);
@@ -2666,7 +2669,31 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
 #endif
 
 void inode_dio_wait(struct inode *inode);
-void inode_dio_done(struct inode *inode);
+
+/*
+ * inode_dio_begin - signal start of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+static inline void inode_dio_begin(struct inode *inode)
+{
+       atomic_inc(&inode->i_dio_count);
+}
+
+/*
+ * inode_dio_end - signal finish of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+static inline void inode_dio_end(struct inode *inode)
+{
+       if (atomic_dec_and_test(&inode->i_dio_count))
+               wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+}
 
 extern void inode_set_flags(struct inode *inode, unsigned int flags,
                            unsigned int mask);
index a65208a..796ef96 100644 (file)
@@ -115,10 +115,19 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
  * Extended Capability Register
  */
 
-#define ecap_niotlb_iunits(e)  ((((e) >> 24) & 0xff) + 1)
+#define ecap_pss(e)            ((e >> 35) & 0x1f)
+#define ecap_eafs(e)           ((e >> 34) & 0x1)
+#define ecap_nwfs(e)           ((e >> 33) & 0x1)
+#define ecap_srs(e)            ((e >> 31) & 0x1)
+#define ecap_ers(e)            ((e >> 30) & 0x1)
+#define ecap_prs(e)            ((e >> 29) & 0x1)
+#define ecap_pasid(e)          ((e >> 28) & 0x1)
+#define ecap_dis(e)            ((e >> 27) & 0x1)
+#define ecap_nest(e)           ((e >> 26) & 0x1)
+#define ecap_mts(e)            ((e >> 25) & 0x1)
+#define ecap_ecs(e)            ((e >> 24) & 0x1)
 #define ecap_iotlb_offset(e)   ((((e) >> 8) & 0x3ff) * 16)
-#define ecap_max_iotlb_offset(e) \
-       (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
 #define ecap_coherent(e)       ((e) & 0x1)
 #define ecap_qis(e)            ((e) & 0x2)
 #define ecap_pass_through(e)   ((e >> 6) & 0x1)
@@ -180,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
 #define DMA_GSTS_IRES (((u32)1) << 25)
 #define DMA_GSTS_CFIS (((u32)1) << 23)
 
+/* DMA_RTADDR_REG */
+#define DMA_RTADDR_RTT (((u64)1) << 11)
+
 /* CCMD_REG */
 #define DMA_CCMD_ICC (((u64)1) << 63)
 #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
diff --git a/include/linux/irqchip/arm-gic-acpi.h b/include/linux/irqchip/arm-gic-acpi.h
new file mode 100644 (file)
index 0000000..de3419e
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014, Linaro Ltd.
+ *     Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ARM_GIC_ACPI_H_
+#define ARM_GIC_ACPI_H_
+
+#ifdef CONFIG_ACPI
+
+/*
+ * Hard code here, we can not get memory size from MADT (but FDT does),
+ * Actually no need to do that, because this size can be inferred
+ * from GIC spec.
+ */
+#define ACPI_GICV2_DIST_MEM_SIZE       (SZ_4K)
+#define ACPI_GIC_CPU_IF_MEM_SIZE       (SZ_8K)
+
+struct acpi_table_header;
+
+int gic_v2_acpi_init(struct acpi_table_header *table);
+void acpi_gic_init(void);
+#else
+static inline void acpi_gic_init(void) { }
+#endif
+
+#endif /* ARM_GIC_ACPI_H_ */
index 9962c6b..6db19f3 100644 (file)
@@ -61,8 +61,8 @@ struct lguest_data {
        u32 tsc_khz;
 
 /* Fields initialized by the Guest at boot: */
-       /* Instruction range to suppress interrupts even if enabled */
-       unsigned long noirq_start, noirq_end;
+       /* Instruction to suppress interrupts even if enabled */
+       unsigned long noirq_iret;
        /* Address above which page tables are all identical. */
        unsigned long kernel_address;
        /* The vector to try to use for system calls (0x40 or 0x80). */
index 0e166b9..324a346 100644 (file)
@@ -16,6 +16,7 @@
 #ifndef __LINUX_MFD_CROS_EC_H
 #define __LINUX_MFD_CROS_EC_H
 
+#include <linux/cdev.h>
 #include <linux/notifier.h>
 #include <linux/mfd/cros_ec_commands.h>
 #include <linux/mutex.h>
@@ -38,20 +39,20 @@ enum {
 /*
  * @version: Command version number (often 0)
  * @command: Command to send (EC_CMD_...)
- * @outdata: Outgoing data to EC
  * @outsize: Outgoing length in bytes
- * @indata: Where to put the incoming data from EC
  * @insize: Max number of bytes to accept from EC
  * @result: EC's response to the command (separate from communication failure)
+ * @outdata: Outgoing data to EC
+ * @indata: Where to put the incoming data from EC
  */
 struct cros_ec_command {
        uint32_t version;
        uint32_t command;
-       uint8_t *outdata;
        uint32_t outsize;
-       uint8_t *indata;
        uint32_t insize;
        uint32_t result;
+       uint8_t outdata[EC_PROTO2_MAX_PARAM_SIZE];
+       uint8_t indata[EC_PROTO2_MAX_PARAM_SIZE];
 };
 
 /**
@@ -59,9 +60,17 @@ struct cros_ec_command {
  *
  * @ec_name: name of EC device (e.g. 'chromeos-ec')
  * @phys_name: name of physical comms layer (e.g. 'i2c-4')
- * @dev: Device pointer
+ * @dev: Device pointer for physical comms device
+ * @vdev: Device pointer for virtual comms device
+ * @cdev: Character device structure for virtual comms device
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
+ * @cmd_readmem: direct read of the EC memory-mapped region, if supported
+ *     @offset is within EC_LPC_ADDR_MEMMAP region.
+ *     @bytes: number of bytes to read. zero means "read a string" (including
+ *     the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be read.
+ *     Caller must ensure that the buffer is large enough for the result when
+ *     reading a string.
  *
  * @priv: Private data
  * @irq: Interrupt to use
@@ -90,8 +99,12 @@ struct cros_ec_device {
        const char *ec_name;
        const char *phys_name;
        struct device *dev;
+       struct device *vdev;
+       struct cdev cdev;
        bool was_wake_device;
        struct class *cros_class;
+       int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset,
+                          unsigned int bytes, void *dest);
 
        /* These are used to implement the platform-specific interface */
        void *priv;
index 6058128..24b86d5 100644 (file)
@@ -111,6 +111,8 @@ struct dma_chan;
  * data for the MMC controller
  */
 struct tmio_mmc_data {
+       void                            *chan_priv_tx;
+       void                            *chan_priv_rx;
        unsigned int                    hclk;
        unsigned long                   capabilities;
        unsigned long                   capabilities2;
index f9ce34b..83e80ab 100644 (file)
@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
 
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
+                        int port);
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
 int mlx4_flow_attach(struct mlx4_dev *dev,
                     struct mlx4_net_trans_rule *rule, u64 *reg_id);
 int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
index 8b08607..0755b9f 100644 (file)
@@ -499,7 +499,7 @@ static inline int page_count(struct page *page)
 
 static inline bool __compound_tail_refcounted(struct page *page)
 {
-       return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
+       return !PageSlab(page) && !PageHeadHuge(page);
 }
 
 /*
index da77e5e..95d6f03 100644 (file)
@@ -7,14 +7,4 @@
 #define SH_MOBILE_SDHI_IRQ_SDCARD      "sdcard"
 #define SH_MOBILE_SDHI_IRQ_SDIO                "sdio"
 
-struct sh_mobile_sdhi_info {
-       int dma_slave_tx;
-       int dma_slave_rx;
-       unsigned long tmio_flags;
-       unsigned long tmio_caps;
-       unsigned long tmio_caps2;
-       u32 tmio_ocr_mask;      /* available MMC voltages */
-       unsigned int cd_gpio;
-};
-
 #endif /* LINUX_MMC_SH_MOBILE_SDHI_H */
index 5f487d7..29975c7 100644 (file)
@@ -77,7 +77,7 @@
 /* ensure we never evaluate anything shorted than an unsigned long
  * to zero, and ensure we'll never miss the end of an comparison (bjd) */
 
-#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1))/ sizeof(unsigned long))
+#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1)) / sizeof(unsigned long))
 
 #ifdef CONFIG_MTD_MAP_BANK_WIDTH_8
 # ifdef map_bankwidth
@@ -181,7 +181,7 @@ static inline int map_bankwidth_supported(int w)
        }
 }
 
-#define MAX_MAP_LONGS ( ((MAX_MAP_BANKWIDTH*8) + BITS_PER_LONG - 1) / BITS_PER_LONG )
+#define MAX_MAP_LONGS (((MAX_MAP_BANKWIDTH * 8) + BITS_PER_LONG - 1) / BITS_PER_LONG)
 
 typedef union {
        unsigned long x[MAX_MAP_LONGS];
@@ -264,20 +264,22 @@ void unregister_mtd_chip_driver(struct mtd_chip_driver *);
 struct mtd_info *do_map_probe(const char *name, struct map_info *map);
 void map_destroy(struct mtd_info *mtd);
 
-#define ENABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 1); } while(0)
-#define DISABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 0); } while(0)
+#define ENABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 1); } while (0)
+#define DISABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 0); } while (0)
 
 #define INVALIDATE_CACHED_RANGE(map, from, size) \
-       do { if(map->inval_cache) map->inval_cache(map, from, size); } while(0)
+       do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0)
 
 
 static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2)
 {
        int i;
-       for (i=0; i<map_words(map); i++) {
+
+       for (i = 0; i < map_words(map); i++) {
                if (val1.x[i] != val2.x[i])
                        return 0;
        }
+
        return 1;
 }
 
@@ -286,9 +288,9 @@ static inline map_word map_word_and(struct map_info *map, map_word val1, map_wor
        map_word r;
        int i;
 
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                r.x[i] = val1.x[i] & val2.x[i];
-       }
+
        return r;
 }
 
@@ -297,9 +299,9 @@ static inline map_word map_word_clr(struct map_info *map, map_word val1, map_wor
        map_word r;
        int i;
 
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                r.x[i] = val1.x[i] & ~val2.x[i];
-       }
+
        return r;
 }
 
@@ -308,22 +310,33 @@ static inline map_word map_word_or(struct map_info *map, map_word val1, map_word
        map_word r;
        int i;
 
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                r.x[i] = val1.x[i] | val2.x[i];
-       }
+
        return r;
 }
 
-#define map_word_andequal(m, a, b, z) map_word_equal(m, z, map_word_and(m, a, b))
+static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3)
+{
+       int i;
+
+       for (i = 0; i < map_words(map); i++) {
+               if ((val1.x[i] & val2.x[i]) != val3.x[i])
+                       return 0;
+       }
+
+       return 1;
+}
 
 static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2)
 {
        int i;
 
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++) {
                if (val1.x[i] & val2.x[i])
                        return 1;
        }
+
        return 0;
 }
 
@@ -355,14 +368,16 @@ static inline map_word map_word_load_partial(struct map_info *map, map_word orig
 
        if (map_bankwidth_is_large(map)) {
                char *dest = (char *)&orig;
+
                memcpy(dest+start, buf, len);
        } else {
-               for (i=start; i < start+len; i++) {
+               for (i = start; i < start+len; i++) {
                        int bitpos;
+
 #ifdef __LITTLE_ENDIAN
-                       bitpos = i*8;
+                       bitpos = i * 8;
 #else /* __BIG_ENDIAN */
-                       bitpos = (map_bankwidth(map)-1-i)*8;
+                       bitpos = (map_bankwidth(map) - 1 - i) * 8;
 #endif
                        orig.x[0] &= ~(0xff << bitpos);
                        orig.x[0] |= (unsigned long)buf[i-start] << bitpos;
@@ -384,9 +399,10 @@ static inline map_word map_word_ff(struct map_info *map)
 
        if (map_bankwidth(map) < MAP_FF_LIMIT) {
                int bw = 8 * map_bankwidth(map);
+
                r.x[0] = (1UL << bw) - 1;
        } else {
-               for (i=0; i<map_words(map); i++)
+               for (i = 0; i < map_words(map); i++)
                        r.x[i] = ~0UL;
        }
        return r;
@@ -407,7 +423,7 @@ static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
                r.x[0] = __raw_readq(map->virt + ofs);
 #endif
        else if (map_bankwidth_is_large(map))
-               memcpy_fromio(r.x, map->virt+ofs, map->bankwidth);
+               memcpy_fromio(r.x, map->virt + ofs, map->bankwidth);
        else
                BUG();
 
index 4720b86..e540952 100644 (file)
@@ -155,6 +155,8 @@ enum spi_nor_option_flags {
  * @write:             [DRIVER-SPECIFIC] write data to the SPI NOR
  * @erase:             [DRIVER-SPECIFIC] erase a sector of the SPI NOR
  *                     at the offset @offs
+ * @lock:              [FLASH-SPECIFIC] lock a region of the SPI NOR
+ * @unlock:            [FLASH-SPECIFIC] unlock a region of the SPI NOR
  * @priv:              the private data
  */
 struct spi_nor {
@@ -189,6 +191,9 @@ struct spi_nor {
                        size_t len, size_t *retlen, const u_char *write_buf);
        int (*erase)(struct spi_nor *nor, loff_t offs);
 
+       int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+       int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+
        void *priv;
 };
 
index ed43cb7..32201c2 100644 (file)
 #include <linux/uidgid.h>
 #include <uapi/linux/nfs4.h>
 
+enum nfs4_acl_whotype {
+       NFS4_ACL_WHO_NAMED = 0,
+       NFS4_ACL_WHO_OWNER,
+       NFS4_ACL_WHO_GROUP,
+       NFS4_ACL_WHO_EVERYONE,
+};
+
 struct nfs4_ace {
        uint32_t        type;
        uint32_t        flag;
index 410abd1..b95f914 100644 (file)
@@ -511,6 +511,7 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
  * Try to write back everything synchronously (but check the
  * return value!)
  */
+extern int nfs_sync_inode(struct inode *inode);
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
deleted file mode 100644 (file)
index 333844e..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * include/linux/nfs_idmap.h
- *
- *  UID and GID to name mapping for clients.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef NFS_IDMAP_H
-#define NFS_IDMAP_H
-
-#include <linux/uidgid.h>
-#include <uapi/linux/nfs_idmap.h>
-
-
-/* Forward declaration to make this header independent of others */
-struct nfs_client;
-struct nfs_server;
-struct nfs_fattr;
-struct nfs4_string;
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-int nfs_idmap_init(void);
-void nfs_idmap_quit(void);
-#else
-static inline int nfs_idmap_init(void)
-{
-       return 0;
-}
-
-static inline void nfs_idmap_quit(void)
-{}
-#endif
-
-int nfs_idmap_new(struct nfs_client *);
-void nfs_idmap_delete(struct nfs_client *);
-
-void nfs_fattr_init_names(struct nfs_fattr *fattr,
-               struct nfs4_string *owner_name,
-               struct nfs4_string *group_name);
-void nfs_fattr_free_names(struct nfs_fattr *);
-void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
-
-int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
-int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
-int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
-int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
-
-int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
-
-extern unsigned int nfs_idmap_cache_timeout;
-#endif /* NFS_IDMAP_H */
index 4cb3eaa..93ab607 100644 (file)
@@ -255,11 +255,13 @@ struct nfs4_layoutget {
 struct nfs4_getdeviceinfo_args {
        struct nfs4_sequence_args seq_args;
        struct pnfs_device *pdev;
+       __u32 notify_types;
 };
 
 struct nfs4_getdeviceinfo_res {
        struct nfs4_sequence_res seq_res;
        struct pnfs_device *pdev;
+       __u32 notification;
 };
 
 struct nfs4_layoutcommit_args {
@@ -1271,11 +1273,15 @@ struct nfs42_falloc_args {
        nfs4_stateid                     falloc_stateid;
        u64                              falloc_offset;
        u64                              falloc_length;
+       const u32                       *falloc_bitmask;
 };
 
 struct nfs42_falloc_res {
        struct nfs4_sequence_res        seq_res;
        unsigned int                    status;
+
+       struct nfs_fattr                *falloc_fattr;
+       const struct nfs_server         *falloc_server;
 };
 
 struct nfs42_seek_args {
index 5f124f6..ddeaae6 100644 (file)
@@ -305,6 +305,7 @@ extern int of_property_read_string_helper(struct device_node *np,
 extern int of_device_is_compatible(const struct device_node *device,
                                   const char *);
 extern bool of_device_is_available(const struct device_node *device);
+extern bool of_device_is_big_endian(const struct device_node *device);
 extern const void *of_get_property(const struct device_node *node,
                                const char *name,
                                int *lenp);
@@ -467,6 +468,11 @@ static inline bool of_device_is_available(const struct device_node *device)
        return false;
 }
 
+static inline bool of_device_is_big_endian(const struct device_node *device)
+{
+       return false;
+}
+
 static inline struct property *of_find_property(const struct device_node *np,
                                                const char *name,
                                                int *lenp)
index 0ff360d..587ee50 100644 (file)
@@ -33,6 +33,8 @@ extern void *of_fdt_get_property(const void *blob,
 extern int of_fdt_is_compatible(const void *blob,
                                unsigned long node,
                                const char *compat);
+extern bool of_fdt_is_big_endian(const void *blob,
+                                unsigned long node);
 extern int of_fdt_match(const void *blob, unsigned long node,
                        const char *const *compat);
 extern void of_fdt_unflatten_tree(unsigned long *blob,
index bfec136..d884929 100644 (file)
@@ -37,8 +37,6 @@ extern int of_irq_parse_one(struct device_node *device, int index,
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
                              struct resource *r);
-extern int of_irq_to_resource_table(struct device_node *dev,
-               struct resource *res, int nr_irqs);
 
 extern void of_irq_init(const struct of_device_id *matches);
 
@@ -46,6 +44,8 @@ extern void of_irq_init(const struct of_device_id *matches);
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
+extern int of_irq_to_resource_table(struct device_node *dev,
+               struct resource *res, int nr_irqs);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -59,6 +59,11 @@ static inline int of_irq_get_byname(struct device_node *dev, const char *name)
 {
        return 0;
 }
+static inline int of_irq_to_resource_table(struct device_node *dev,
+                                          struct resource *res, int nr_irqs)
+{
+       return 0;
+}
 #endif
 
 #if defined(CONFIG_OF)
index eabac4e..2d08816 100644 (file)
@@ -48,6 +48,9 @@ struct sdma_script_start_addrs {
        s32 ssish_2_mcu_addr;
        s32 hdmi_dma_addr;
        /* End of v2 array */
+       s32 zcanfd_2_mcu_addr;
+       s32 zqspi_2_mcu_addr;
+       /* End of v3 array */
 };
 
 /**
index 73069cb..a7a06d1 100644 (file)
@@ -72,6 +72,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
 /* Routine choices */
 struct raid6_calls {
        void (*gen_syndrome)(int, size_t, void **);
+       void (*xor_syndrome)(int, int, int, size_t, void **);
        int  (*valid)(void);    /* Returns 1 if this routine set is usable */
        const char *name;       /* Name of this routine set */
        int prefer;             /* Has special performance attribute */
index abdf1f2..dd0ba50 100644 (file)
@@ -69,6 +69,7 @@ struct shdma_chan {
        int id;                         /* Raw id of this channel */
        int irq;                        /* Channel IRQ */
        int slave_id;                   /* Client ID for slave DMA */
+       int real_slave_id;              /* argument passed to filter function */
        int hw_req;                     /* DMA request line for slave DMA - same
                                         * as MID/RID, used with DT */
        enum shdma_pm_state pm_state;
index aadc6a0..8073713 100644 (file)
@@ -142,12 +142,18 @@ typedef __be32    rpc_fraghdr;
        (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4))
 
 /*
- * RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
+ * Well-known netids. See:
+ *
+ *   http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml
  */
 #define RPCBIND_NETID_UDP      "udp"
 #define RPCBIND_NETID_TCP      "tcp"
+#define RPCBIND_NETID_RDMA     "rdma"
+#define RPCBIND_NETID_SCTP     "sctp"
 #define RPCBIND_NETID_UDP6     "udp6"
 #define RPCBIND_NETID_TCP6     "tcp6"
+#define RPCBIND_NETID_RDMA6    "rdma6"
+#define RPCBIND_NETID_SCTP6    "sctp6"
 #define RPCBIND_NETID_LOCAL    "local"
 
 /*
index 64a0a0a..c984c85 100644 (file)
 #ifndef _LINUX_SUNRPC_XPRTRDMA_H
 #define _LINUX_SUNRPC_XPRTRDMA_H
 
-/*
- * rpcbind (v3+) RDMA netid.
- */
-#define RPCBIND_NETID_RDMA     "rdma"
-
 /*
  * Constants. Max RPC/NFS header is big enough to account for
  * additional marshaling buffers passed down by Linux client.
index 28f0e65..8f4d4bf 100644 (file)
@@ -108,8 +108,6 @@ struct virtio_device {
        void *priv;
 };
 
-bool virtio_device_is_legacy_only(struct virtio_device_id id);
-
 static inline struct virtio_device *dev_to_virtio(struct device *_dev)
 {
        return container_of(_dev, struct virtio_device, dev);
index ca3ed78..1e306f7 100644 (file)
@@ -298,13 +298,6 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
                }                                                       \
        } while(0)
 
-static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
-{
-       u8 ret;
-       vdev->config->get(vdev, offset, &ret, sizeof(ret));
-       return ret;
-}
-
 /* Read @count fields, @bytes each. */
 static inline void __virtio_cread_many(struct virtio_device *vdev,
                                       unsigned int offset,
@@ -326,7 +319,6 @@ static inline void __virtio_cread_many(struct virtio_device *vdev,
        } while (gen != old);
 }
 
-
 static inline void virtio_cread_bytes(struct virtio_device *vdev,
                                      unsigned int offset,
                                      void *buf, size_t len)
@@ -334,6 +326,13 @@ static inline void virtio_cread_bytes(struct virtio_device *vdev,
        __virtio_cread_many(vdev, offset, buf, len, 1);
 }
 
+static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
+{
+       u8 ret;
+       vdev->config->get(vdev, offset, &ret, sizeof(ret));
+       return ret;
+}
+
 static inline void virtio_cwrite8(struct virtio_device *vdev,
                                  unsigned int offset, u8 val)
 {
@@ -374,7 +373,6 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
                                 unsigned int offset)
 {
        u64 ret;
-       vdev->config->get(vdev, offset, &ret, sizeof(ret));
        __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
        return virtio64_to_cpu(vdev, (__force __virtio64)ret);
 }
index 67e06fe..8e50888 100644 (file)
  * actually quite cheap.
  */
 
-#ifdef CONFIG_SMP
 static inline void virtio_mb(bool weak_barriers)
 {
+#ifdef CONFIG_SMP
        if (weak_barriers)
                smp_mb();
        else
+#endif
                mb();
 }
 
 static inline void virtio_rmb(bool weak_barriers)
 {
        if (weak_barriers)
-               smp_rmb();
+               dma_rmb();
        else
                rmb();
 }
@@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
 static inline void virtio_wmb(bool weak_barriers)
 {
        if (weak_barriers)
-               smp_wmb();
+               dma_wmb();
        else
                wmb();
 }
-#else
-static inline void virtio_mb(bool weak_barriers)
-{
-       mb();
-}
-
-static inline void virtio_rmb(bool weak_barriers)
-{
-       rmb();
-}
-
-static inline void virtio_wmb(bool weak_barriers)
-{
-       wmb();
-}
-#endif
 
 struct virtio_device;
 struct virtqueue;
index d3583d3..54e7af3 100644 (file)
@@ -20,6 +20,8 @@
 #define ISCSIT_MIN_TAGS                        16
 #define ISCSIT_EXTRA_TAGS              8
 #define ISCSIT_TCP_BACKLOG             256
+#define ISCSI_RX_THREAD_NAME           "iscsi_trx"
+#define ISCSI_TX_THREAD_NAME           "iscsi_ttx"
 
 /* struct iscsi_node_attrib sanity values */
 #define NA_DATAOUT_TIMEOUT             3
@@ -60,6 +62,7 @@
 #define TA_CACHE_CORE_NPS              0
 /* T10 protection information disabled by default */
 #define TA_DEFAULT_T10_PI              0
+#define TA_DEFAULT_FABRIC_PROT_TYPE    0
 
 #define ISCSI_IOV_DATA_BUFFER          5
 
@@ -600,8 +603,11 @@ struct iscsi_conn {
        struct iscsi_tpg_np     *tpg_np;
        /* Pointer to parent session */
        struct iscsi_session    *sess;
-       /* Pointer to thread_set in use for this conn's threads */
-       struct iscsi_thread_set *thread_set;
+       int                     bitmap_id;
+       int                     rx_thread_active;
+       struct task_struct      *rx_thread;
+       int                     tx_thread_active;
+       struct task_struct      *tx_thread;
        /* list_head for session connection list */
        struct list_head        conn_list;
 } ____cacheline_aligned;
@@ -767,6 +773,7 @@ struct iscsi_tpg_attrib {
        u32                     demo_mode_discovery;
        u32                     default_erl;
        u8                      t10_pi;
+       u32                     fabric_prot_type;
        struct iscsi_portal_group *tpg;
 };
 
@@ -871,10 +878,10 @@ struct iscsit_global {
        /* Unique identifier used for the authentication daemon */
        u32                     auth_id;
        u32                     inactive_ts;
-       /* Thread Set bitmap count */
-       int                     ts_bitmap_count;
+#define ISCSIT_BITMAP_BITS     262144
        /* Thread Set bitmap pointer */
        unsigned long           *ts_bitmap;
+       spinlock_t              ts_bitmap_lock;
        /* Used for iSCSI discovery session authentication */
        struct iscsi_node_acl   discovery_acl;
        struct iscsi_portal_group       *discovery_tpg;
index 672150b..480e9f8 100644 (file)
@@ -165,10 +165,8 @@ enum se_cmd_flags_table {
        SCF_SEND_DELAYED_TAS            = 0x00004000,
        SCF_ALUA_NON_OPTIMIZED          = 0x00008000,
        SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000,
-       SCF_ACK_KREF                    = 0x00040000,
        SCF_COMPARE_AND_WRITE           = 0x00080000,
        SCF_COMPARE_AND_WRITE_POST      = 0x00100000,
-       SCF_CMD_XCOPY_PASSTHROUGH       = 0x00200000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -520,11 +518,11 @@ struct se_cmd {
        struct list_head        se_cmd_list;
        struct completion       cmd_wait_comp;
        struct kref             cmd_kref;
-       struct target_core_fabric_ops *se_tfo;
+       const struct target_core_fabric_ops *se_tfo;
        sense_reason_t          (*execute_cmd)(struct se_cmd *);
        sense_reason_t          (*execute_rw)(struct se_cmd *, struct scatterlist *,
                                              u32, enum dma_data_direction);
-       sense_reason_t (*transport_complete_callback)(struct se_cmd *);
+       sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
 
        unsigned char           *t_task_cdb;
        unsigned char           __t_task_cdb[TCM_MAX_COMMAND_SIZE];
@@ -591,6 +589,7 @@ struct se_node_acl {
        bool                    acl_stop:1;
        u32                     queue_depth;
        u32                     acl_index;
+       enum target_prot_type   saved_prot_type;
 #define MAX_ACL_TAG_SIZE 64
        char                    acl_tag[MAX_ACL_TAG_SIZE];
        /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
@@ -616,6 +615,7 @@ struct se_session {
        unsigned                sess_tearing_down:1;
        u64                     sess_bin_isid;
        enum target_prot_op     sup_prot_ops;
+       enum target_prot_type   sess_prot_type;
        struct se_node_acl      *se_node_acl;
        struct se_portal_group *se_tpg;
        void                    *fabric_sess_ptr;
@@ -890,7 +890,7 @@ struct se_portal_group {
        /* List of TCM sessions associated wth this TPG */
        struct list_head        tpg_sess_list;
        /* Pointer to $FABRIC_MOD dependent code */
-       struct target_core_fabric_ops *se_tpg_tfo;
+       const struct target_core_fabric_ops *se_tpg_tfo;
        struct se_wwn           *se_tpg_wwn;
        struct config_group     tpg_group;
        struct config_group     *tpg_default_groups[7];
index e080138..25bb04c 100644 (file)
@@ -5,12 +5,6 @@
 #define TARGET_CORE_NAME_MAX_LEN       64
 #define TARGET_FABRIC_NAME_SIZE                32
 
-extern struct target_fabric_configfs *target_fabric_configfs_init(
-                               struct module *, const char *);
-extern void target_fabric_configfs_free(struct target_fabric_configfs *);
-extern int target_fabric_configfs_register(struct target_fabric_configfs *);
-extern void target_fabric_configfs_deregister(struct target_fabric_configfs *);
-
 struct target_fabric_configfs_template {
        struct config_item_type tfc_discovery_cit;
        struct config_item_type tfc_wwn_cit;
index 22a4e98..17c7f5a 100644 (file)
@@ -2,6 +2,8 @@
 #define TARGET_CORE_FABRIC_H
 
 struct target_core_fabric_ops {
+       struct module *module;
+       const char *name;
        struct configfs_subsystem *tf_subsys;
        char *(*get_fabric_name)(void);
        u8 (*get_fabric_proto_ident)(struct se_portal_group *);
@@ -27,6 +29,14 @@ struct target_core_fabric_ops {
         * inquiry response
         */
        int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
+       /*
+        * Optionally used as a configfs tunable to determine when
+        * target-core should signal the PROTECT=1 feature bit for
+        * backends that don't support T10-PI, so that either fabric
+        * HW offload or target-core emulation performs the associated
+        * WRITE_STRIP and READ_INSERT operations.
+        */
+       int (*tpg_check_prot_fabric_only)(struct se_portal_group *);
        struct se_node_acl *(*tpg_alloc_fabric_acl)(
                                        struct se_portal_group *);
        void (*tpg_release_fabric_acl)(struct se_portal_group *,
@@ -82,8 +92,23 @@ struct target_core_fabric_ops {
        struct se_node_acl *(*fabric_make_nodeacl)(struct se_portal_group *,
                                struct config_group *, const char *);
        void (*fabric_drop_nodeacl)(struct se_node_acl *);
+
+       struct configfs_attribute **tfc_discovery_attrs;
+       struct configfs_attribute **tfc_wwn_attrs;
+       struct configfs_attribute **tfc_tpg_base_attrs;
+       struct configfs_attribute **tfc_tpg_np_base_attrs;
+       struct configfs_attribute **tfc_tpg_attrib_attrs;
+       struct configfs_attribute **tfc_tpg_auth_attrs;
+       struct configfs_attribute **tfc_tpg_param_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_base_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_attrib_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_auth_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_param_attrs;
 };
 
+int target_register_template(const struct target_core_fabric_ops *fo);
+void target_unregister_template(const struct target_core_fabric_ops *fo);
+
 struct se_session *transport_init_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
                unsigned int);
@@ -95,13 +120,15 @@ void       transport_register_session(struct se_portal_group *,
                struct se_node_acl *, struct se_session *, void *);
 void   target_get_session(struct se_session *);
 void   target_put_session(struct se_session *);
+ssize_t        target_show_dynamic_sessions(struct se_portal_group *, char *);
 void   transport_free_session(struct se_session *);
 void   target_put_nacl(struct se_node_acl *);
 void   transport_deregister_session_configfs(struct se_session *);
 void   transport_deregister_session(struct se_session *);
 
 
-void   transport_init_se_cmd(struct se_cmd *, struct target_core_fabric_ops *,
+void   transport_init_se_cmd(struct se_cmd *,
+               const struct target_core_fabric_ops *,
                struct se_session *, u32, int, int, unsigned char *);
 sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u32);
 sense_reason_t target_setup_cmd_from_cdb(struct se_cmd *, unsigned char *);
@@ -153,8 +180,8 @@ int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *,
                unsigned char *, u32, int);
 int    core_tpg_set_initiator_node_tag(struct se_portal_group *,
                struct se_node_acl *, const char *);
-int    core_tpg_register(struct target_core_fabric_ops *, struct se_wwn *,
-               struct se_portal_group *, void *, int);
+int    core_tpg_register(const struct target_core_fabric_ops *,
+               struct se_wwn *, struct se_portal_group *, void *, int);
 int    core_tpg_deregister(struct se_portal_group *);
 
 /* SAS helpers */
index b32a149..7a0649c 100644 (file)
@@ -90,6 +90,11 @@ static struct target_fabric_tpg_attribute _fabric##_tpg_##_name =    \
        _fabric##_tpg_store_##_name);
 
 
+#define TF_TPG_BASE_ATTR_RO(_fabric, _name)                            \
+static struct target_fabric_tpg_attribute _fabric##_tpg_##_name =      \
+       __CONFIGFS_EATTR_RO(_name,                                      \
+       _fabric##_tpg_show_##_name);
+
 CONFIGFS_EATTR_STRUCT(target_fabric_wwn, target_fabric_configfs);
 #define TF_WWN_ATTR(_fabric, _name, _mode)                             \
 static struct target_fabric_wwn_attribute _fabric##_wwn_##_name =      \
index 572e650..7f79cf4 100644 (file)
@@ -407,10 +407,10 @@ TRACE_EVENT(btrfs_sync_file,
 
        TP_fast_assign(
                struct dentry *dentry = file->f_path.dentry;
-               struct inode *inode = dentry->d_inode;
+               struct inode *inode = d_inode(dentry);
 
                __entry->ino            = inode->i_ino;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
                __entry->datasync       = datasync;
                __entry->root_objectid  =
                                 BTRFS_I(inode)->root->root_key.objectid;
index 7f20707..fc733d2 100644 (file)
@@ -439,10 +439,10 @@ TRACE_EVENT(ext3_sync_file_enter,
        TP_fast_assign(
                struct dentry *dentry = file->f_path.dentry;
 
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                __entry->datasync       = datasync;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
        ),
 
        TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
@@ -710,9 +710,9 @@ TRACE_EVENT(ext3_unlink_enter,
 
        TP_fast_assign(
                __entry->parent         = parent->i_ino;
-               __entry->ino            = dentry->d_inode->i_ino;
-               __entry->size           = dentry->d_inode->i_size;
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
+               __entry->size           = d_inode(dentry)->i_size;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
        ),
 
        TP_printk("dev %d,%d ino %lu size %lld parent %ld",
@@ -734,8 +734,8 @@ TRACE_EVENT(ext3_unlink_exit,
        ),
 
        TP_fast_assign(
-               __entry->ino            = dentry->d_inode->i_ino;
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
                __entry->ret            = ret;
        ),
 
index 47fca36..08ec3dd 100644 (file)
@@ -872,10 +872,10 @@ TRACE_EVENT(ext4_sync_file_enter,
        TP_fast_assign(
                struct dentry *dentry = file->f_path.dentry;
 
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                __entry->datasync       = datasync;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
        ),
 
        TP_printk("dev %d,%d ino %lu parent %lu datasync %d ",
@@ -1453,10 +1453,10 @@ TRACE_EVENT(ext4_unlink_enter,
        ),
 
        TP_fast_assign(
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                __entry->parent         = parent->i_ino;
-               __entry->size           = dentry->d_inode->i_size;
+               __entry->size           = d_inode(dentry)->i_size;
        ),
 
        TP_printk("dev %d,%d ino %lu size %lld parent %lu",
@@ -1477,8 +1477,8 @@ TRACE_EVENT(ext4_unlink_exit,
        ),
 
        TP_fast_assign(
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                __entry->ret            = ret;
        ),
 
index 640954b..1a0006a 100644 (file)
@@ -431,6 +431,7 @@ header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
 header-y += virtio_ids.h
+header-y += virtio_input.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
index d1197ae..3e445a7 100644 (file)
  */
 #define FALLOC_FL_ZERO_RANGE           0x10
 
+/*
+ * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
+ * overwriting any existing data. The contents of the file beyond offset are
+ * shifted towards right by len bytes to create a hole.  As such, this
+ * operation will increase the size of the file by len bytes.
+ *
+ * Different filesystems may implement different limitations on the granularity
+ * of the operation. Most will limit operations to filesystem block size
+ * boundaries, but this boundary may be larger or smaller depending on
+ * the filesystem and/or the configuration of the filesystem or file.
+ *
+ * Attempting to insert space using this flag at OR beyond the end of
+ * the file is considered an illegal operation - just use ftruncate(2) or
+ * fallocate(2) with mode 0 for such type of operations.
+ */
+#define FALLOC_FL_INSERT_RANGE         0x20
+
 #endif /* _UAPI_FALLOC_H_ */
index f574d7b..4b60056 100644 (file)
@@ -813,6 +813,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MIPS_MSA 112
 #define KVM_CAP_S390_INJECT_IRQ 113
 #define KVM_CAP_S390_IRQ_STATE 114
+#define KVM_CAP_PPC_HWRNG 115
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 35f5f4c..adc0aff 100644 (file)
  */
 #define NFS4_MAX_BACK_CHANNEL_OPS 2
 
-enum nfs4_acl_whotype {
-       NFS4_ACL_WHO_NAMED = 0,
-       NFS4_ACL_WHO_OWNER,
-       NFS4_ACL_WHO_GROUP,
-       NFS4_ACL_WHO_EVERYONE,
-};
-
 #endif /* _UAPI_LINUX_NFS4_H */
 
 /*
index 8d4b1c7..038e36c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * include/linux/nfs_idmap.h
+ * include/uapi/linux/nfs_idmap.h
  *
  *  UID and GID to name mapping for clients.
  *
index 0bf130a..28ec6c9 100644 (file)
 
 #include <linux/sunrpc/debug.h>
 
-/*
- * Enable debugging for nfsd.
- * Requires RPC_DEBUG.
- */
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define NFSD_DEBUG            1
-#endif
-
 /*
  * knfsd debug flags
  */
index d3bd6ff..0df7bd5 100644 (file)
@@ -21,6 +21,9 @@
 
 /*
  * Export flags.
+ *
+ * Please update the expflags[] array in fs/nfsd/export.c when adding
+ * a new flag.
  */
 #define NFSEXP_READONLY                0x0001
 #define NFSEXP_INSECURE_PORT   0x0002
index 49f4210..2ae6131 100644 (file)
 #define MD_DISK_ACTIVE         1 /* disk is running or spare disk */
 #define MD_DISK_SYNC           2 /* disk is in sync with the raid set */
 #define MD_DISK_REMOVED                3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD     4 /* Initiate a disk add across the cluster
+                                  * For clustered enviroments only.
+                                  */
+#define MD_DISK_CANDIDATE      5 /* disk is added as spare (local) until confirmed
+                                  * For clustered enviroments only.
+                                  */
 
 #define        MD_DISK_WRITEMOSTLY     9 /* disk is "write-mostly" is RAID1 config.
                                   * read requests will only be sent here in
@@ -101,6 +107,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_CLEAN            0
 #define MD_SB_ERRORS           1
 
+#define        MD_SB_CLUSTERED         5 /* MD is clustered */
 #define        MD_SB_BITMAP_PRESENT    8 /* bitmap may be present nearby */
 
 /*
index 74e7c60..1cb8aa6 100644 (file)
@@ -62,6 +62,7 @@
 #define STOP_ARRAY             _IO (MD_MAJOR, 0x32)
 #define STOP_ARRAY_RO          _IO (MD_MAJOR, 0x33)
 #define RESTART_ARRAY_RW       _IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK    _IO (MD_MAJOR, 0x35)
 
 /* 63 partitions with the alternate major number (mdp) */
 #define MdpMinorShift 6
index b483d19..b67f99d 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <linux/uio.h>
 
-#define TCMU_VERSION "1.0"
+#define TCMU_VERSION "2.0"
 
 /*
  * Ring Design
  * should process the next packet the same way, and so on.
  */
 
-#define TCMU_MAILBOX_VERSION 1
+#define TCMU_MAILBOX_VERSION 2
 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */
 
+/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */
+#define xstr(s) str(s)
+#define str(s) #s
+
 struct tcmu_mailbox {
        __u16 version;
        __u16 flags;
@@ -64,31 +68,36 @@ enum tcmu_opcode {
  * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
  */
 struct tcmu_cmd_entry_hdr {
-               __u32 len_op;
+       __u32 len_op;
+       __u16 cmd_id;
+       __u8 kflags;
+#define TCMU_UFLAG_UNKNOWN_OP 0x1
+       __u8 uflags;
+
 } __packed;
 
 #define TCMU_OP_MASK 0x7
 
-static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+static inline enum tcmu_opcode tcmu_hdr_get_op(__u32 len_op)
 {
-       return hdr->len_op & TCMU_OP_MASK;
+       return len_op & TCMU_OP_MASK;
 }
 
-static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+static inline void tcmu_hdr_set_op(__u32 *len_op, enum tcmu_opcode op)
 {
-       hdr->len_op &= ~TCMU_OP_MASK;
-       hdr->len_op |= (op & TCMU_OP_MASK);
+       *len_op &= ~TCMU_OP_MASK;
+       *len_op |= (op & TCMU_OP_MASK);
 }
 
-static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+static inline __u32 tcmu_hdr_get_len(__u32 len_op)
 {
-       return hdr->len_op & ~TCMU_OP_MASK;
+       return len_op & ~TCMU_OP_MASK;
 }
 
-static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+static inline void tcmu_hdr_set_len(__u32 *len_op, __u32 len)
 {
-       hdr->len_op &= TCMU_OP_MASK;
-       hdr->len_op |= len;
+       *len_op &= TCMU_OP_MASK;
+       *len_op |= len;
 }
 
 /* Currently the same as SCSI_SENSE_BUFFERSIZE */
@@ -97,13 +106,14 @@ static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
 struct tcmu_cmd_entry {
        struct tcmu_cmd_entry_hdr hdr;
 
-       uint16_t cmd_id;
-       uint16_t __pad1;
-
        union {
                struct {
+                       uint32_t iov_cnt;
+                       uint32_t iov_bidi_cnt;
+                       uint32_t iov_dif_cnt;
                        uint64_t cdb_off;
-                       uint64_t iov_cnt;
+                       uint64_t __pad1;
+                       uint64_t __pad2;
                        struct iovec iov[0];
                } req;
                struct {
index 4b0488f..984169a 100644 (file)
@@ -25,6 +25,7 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
+#include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
@@ -38,9 +39,9 @@
 
 struct virtio_balloon_config {
        /* Number of pages host wants Guest to give up. */
-       __le32 num_pages;
+       __u32 num_pages;
        /* Number of pages we've actually got in balloon. */
-       __le32 actual;
+       __u32 actual;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
@@ -51,9 +52,32 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_NR       6
 
+/*
+ * Memory statistics structure.
+ * Driver fills an array of these structures and passes to device.
+ *
+ * NOTE: fields are laid out in a way that would make compiler add padding
+ * between and after fields, so we have to use compiler-specific attributes to
+ * pack it, to disable this padding. This also often causes compiler to
+ * generate suboptimal code.
+ *
+ * We maintain this statistics structure format for backwards compatibility,
+ * but don't follow this example.
+ *
+ * If implementing a similar structure, do something like the below instead:
+ *     struct virtio_balloon_stat {
+ *         __virtio16 tag;
+ *         __u8 reserved[6];
+ *         __virtio64 val;
+ *     };
+ *
+ * In other words, add explicit reserved fields to align field and
+ * structure boundaries at field size, avoiding compiler padding
+ * without the packed attribute.
+ */
 struct virtio_balloon_stat {
-       __u16 tag;
-       __u64 val;
+       __virtio16 tag;
+       __virtio64 val;
 } __attribute__((packed));
 
 #endif /* _LINUX_VIRTIO_BALLOON_H */
index 284fc3a..5f60aa4 100644 (file)
@@ -39,5 +39,6 @@
 #define VIRTIO_ID_9P           9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF        12 /* Virtio caif */
+#define VIRTIO_ID_INPUT        18 /* virtio input */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h
new file mode 100644 (file)
index 0000000..a7fe5c8
--- /dev/null
@@ -0,0 +1,76 @@
+#ifndef _LINUX_VIRTIO_INPUT_H
+#define _LINUX_VIRTIO_INPUT_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+#include <linux/types.h>
+
+enum virtio_input_config_select {
+       VIRTIO_INPUT_CFG_UNSET      = 0x00,
+       VIRTIO_INPUT_CFG_ID_NAME    = 0x01,
+       VIRTIO_INPUT_CFG_ID_SERIAL  = 0x02,
+       VIRTIO_INPUT_CFG_ID_DEVIDS  = 0x03,
+       VIRTIO_INPUT_CFG_PROP_BITS  = 0x10,
+       VIRTIO_INPUT_CFG_EV_BITS    = 0x11,
+       VIRTIO_INPUT_CFG_ABS_INFO   = 0x12,
+};
+
+struct virtio_input_absinfo {
+       __u32 min;
+       __u32 max;
+       __u32 fuzz;
+       __u32 flat;
+       __u32 res;
+};
+
+struct virtio_input_devids {
+       __u16 bustype;
+       __u16 vendor;
+       __u16 product;
+       __u16 version;
+};
+
+struct virtio_input_config {
+       __u8    select;
+       __u8    subsel;
+       __u8    size;
+       __u8    reserved[5];
+       union {
+               char string[128];
+               __u8 bitmap[128];
+               struct virtio_input_absinfo abs;
+               struct virtio_input_devids ids;
+       } u;
+};
+
+struct virtio_input_event {
+       __le16 type;
+       __le16 code;
+       __le32 value;
+};
+
+#endif /* _LINUX_VIRTIO_INPUT_H */
index 46145a5..a45be6b 100644 (file)
@@ -864,7 +864,7 @@ struct snd_ctl_elem_id {
        snd_ctl_elem_iface_t iface;     /* interface identifier */
        unsigned int device;            /* device/client number */
        unsigned int subdevice;         /* subdevice (substream) number */
-       unsigned char name[44];         /* ASCII name of item */
+       unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];              /* ASCII name of item */
        unsigned int index;             /* index of item */
 };
 
index 7635a1c..3aaea7f 100644 (file)
@@ -466,7 +466,7 @@ out_unlock:
 
 static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
        dir->i_size -= DIRENT_SIZE;
@@ -770,7 +770,7 @@ static struct file *do_open(struct path *path, int oflag)
        if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
                return ERR_PTR(-EINVAL);
        acc = oflag2acc[oflag & O_ACCMODE];
-       if (inode_permission(path->dentry->d_inode, acc))
+       if (inode_permission(d_inode(path->dentry), acc))
                return ERR_PTR(-EACCES);
        return dentry_open(path, oflag, current_cred());
 }
@@ -802,7 +802,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
 
        ro = mnt_want_write(mnt);       /* we'll drop it in any case */
        error = 0;
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
        path.dentry = lookup_one_len(name->name, root, strlen(name->name));
        if (IS_ERR(path.dentry)) {
                error = PTR_ERR(path.dentry);
@@ -811,7 +811,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
        path.mnt = mntget(mnt);
 
        if (oflag & O_CREAT) {
-               if (path.dentry->d_inode) {     /* entry already exists */
+               if (d_really_is_positive(path.dentry)) {        /* entry already exists */
                        audit_inode(name, path.dentry, 0);
                        if (oflag & O_EXCL) {
                                error = -EEXIST;
@@ -824,12 +824,12 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
                                goto out;
                        }
                        audit_inode_parent_hidden(name, root);
-                       filp = do_create(ipc_ns, root->d_inode,
+                       filp = do_create(ipc_ns, d_inode(root),
                                                &path, oflag, mode,
                                                u_attr ? &attr : NULL);
                }
        } else {
-               if (!path.dentry->d_inode) {
+               if (d_really_is_negative(path.dentry)) {
                        error = -ENOENT;
                        goto out;
                }
@@ -848,7 +848,7 @@ out_putfd:
                put_unused_fd(fd);
                fd = error;
        }
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
        if (!ro)
                mnt_drop_write(mnt);
 out_putname:
@@ -873,7 +873,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
        err = mnt_want_write(mnt);
        if (err)
                goto out_name;
-       mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
        dentry = lookup_one_len(name->name, mnt->mnt_root,
                                strlen(name->name));
        if (IS_ERR(dentry)) {
@@ -881,17 +881,17 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
                goto out_unlock;
        }
 
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
        if (!inode) {
                err = -ENOENT;
        } else {
                ihold(inode);
-               err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
+               err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
        }
        dput(dentry);
 
 out_unlock:
-       mutex_unlock(&mnt->mnt_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
        if (inode)
                iput(inode);
        mnt_drop_write(mnt);
index d280a74..6d76707 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1132,7 +1132,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        path = shp->shm_file->f_path;
        path_get(&path);
        shp->shm_nattch++;
-       size = i_size_read(path.dentry->d_inode);
+       size = i_size_read(d_inode(path.dentry));
        ipc_unlock_object(&shp->shm_perm);
        rcu_read_unlock();
 
index 72ab759..1c13e42 100644 (file)
@@ -43,6 +43,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/file.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/atomic.h>
@@ -107,6 +108,7 @@ static u32  audit_rate_limit;
  * When set to zero, this means unlimited. */
 static u32     audit_backlog_limit = 64;
 #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
+static u32     audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
 static u32     audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
 static u32     audit_backlog_wait_overflow = 0;
 
@@ -338,13 +340,13 @@ static int audit_set_backlog_limit(u32 limit)
 static int audit_set_backlog_wait_time(u32 timeout)
 {
        return audit_do_config_change("audit_backlog_wait_time",
-                                     &audit_backlog_wait_time, timeout);
+                                     &audit_backlog_wait_time_master, timeout);
 }
 
 static int audit_set_enabled(u32 state)
 {
        int rc;
-       if (state < AUDIT_OFF || state > AUDIT_LOCKED)
+       if (state > AUDIT_LOCKED)
                return -EINVAL;
 
        rc =  audit_do_config_change("audit_enabled", &audit_enabled, state);
@@ -663,7 +665,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
        case AUDIT_MAKE_EQUIV:
                /* Only support auditd and auditctl in initial pid namespace
                 * for now. */
-               if ((task_active_pid_ns(current) != &init_pid_ns))
+               if (task_active_pid_ns(current) != &init_pid_ns)
                        return -EPERM;
 
                if (!netlink_capable(skb, CAP_AUDIT_CONTROL))
@@ -834,7 +836,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                s.lost                  = atomic_read(&audit_lost);
                s.backlog               = skb_queue_len(&audit_skb_queue);
                s.feature_bitmap        = AUDIT_FEATURE_BITMAP_ALL;
-               s.backlog_wait_time     = audit_backlog_wait_time;
+               s.backlog_wait_time     = audit_backlog_wait_time_master;
                audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
                break;
        }
@@ -877,8 +879,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) {
                        if (sizeof(s) > (size_t)nlh->nlmsg_len)
                                return -EINVAL;
-                       if (s.backlog_wait_time < 0 ||
-                           s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
+                       if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
                                return -EINVAL;
                        err = audit_set_backlog_wait_time(s.backlog_wait_time);
                        if (err < 0)
@@ -1385,7 +1386,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
                return NULL;
        }
 
-       audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
+       if (!reserve)
+               audit_backlog_wait_time = audit_backlog_wait_time_master;
 
        ab = audit_buffer_alloc(ctx, gfp_mask, type);
        if (!ab) {
@@ -1759,7 +1761,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
        } else
                audit_log_format(ab, " name=(null)");
 
-       if (n->ino != (unsigned long)-1) {
+       if (n->ino != (unsigned long)-1)
                audit_log_format(ab, " inode=%lu"
                                 " dev=%02x:%02x mode=%#ho"
                                 " ouid=%u ogid=%u rdev=%02x:%02x",
@@ -1771,7 +1773,6 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
                                 from_kgid(&init_user_ns, n->gid),
                                 MAJOR(n->rdev),
                                 MINOR(n->rdev));
-       }
        if (n->osid != 0) {
                char *ctx = NULL;
                u32 len;
@@ -1838,11 +1839,29 @@ error_path:
 }
 EXPORT_SYMBOL(audit_log_task_context);
 
+void audit_log_d_path_exe(struct audit_buffer *ab,
+                         struct mm_struct *mm)
+{
+       struct file *exe_file;
+
+       if (!mm)
+               goto out_null;
+
+       exe_file = get_mm_exe_file(mm);
+       if (!exe_file)
+               goto out_null;
+
+       audit_log_d_path(ab, " exe=", &exe_file->f_path);
+       fput(exe_file);
+       return;
+out_null:
+       audit_log_format(ab, " exe=(null)");
+}
+
 void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
 {
        const struct cred *cred;
        char comm[sizeof(tsk->comm)];
-       struct mm_struct *mm = tsk->mm;
        char *tty;
 
        if (!ab)
@@ -1878,13 +1897,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
        audit_log_format(ab, " comm=");
        audit_log_untrustedstring(ab, get_task_comm(comm, tsk));
 
-       if (mm) {
-               down_read(&mm->mmap_sem);
-               if (mm->exe_file)
-                       audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
-               up_read(&mm->mmap_sem);
-       } else
-               audit_log_format(ab, " exe=(null)");
+       audit_log_d_path_exe(ab, tsk->mm);
        audit_log_task_context(ab);
 }
 EXPORT_SYMBOL(audit_log_task_info);
@@ -1915,7 +1928,7 @@ void audit_log_link_denied(const char *operation, struct path *link)
 
        /* Generate AUDIT_PATH record with object. */
        name->type = AUDIT_TYPE_NORMAL;
-       audit_copy_inode(name, link->dentry, link->dentry->d_inode);
+       audit_copy_inode(name, link->dentry, d_backing_inode(link->dentry));
        audit_log_name(current->audit_context, name, link, 0, NULL);
 out:
        kfree(name);
index 1caa0d3..d641f9b 100644 (file)
@@ -257,6 +257,9 @@ extern struct list_head audit_filter_list[];
 
 extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
 
+extern void audit_log_d_path_exe(struct audit_buffer *ab,
+                                struct mm_struct *mm);
+
 /* audit watch functions */
 #ifdef CONFIG_AUDIT_WATCH
 extern void audit_put_watch(struct audit_watch *watch);
index 2e0c974..b0f9877 100644 (file)
@@ -37,6 +37,7 @@ struct audit_chunk {
 
 static LIST_HEAD(tree_list);
 static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
 
 /*
  * One struct chunk is attached to each inode of interest.
@@ -576,7 +577,7 @@ int audit_remove_tree_rule(struct audit_krule *rule)
 
 static int compare_root(struct vfsmount *mnt, void *arg)
 {
-       return mnt->mnt_root->d_inode == arg;
+       return d_backing_inode(mnt->mnt_root) == arg;
 }
 
 void audit_trim_trees(void)
@@ -648,7 +649,58 @@ void audit_put_tree(struct audit_tree *tree)
 
 static int tag_mount(struct vfsmount *mnt, void *arg)
 {
-       return tag_chunk(mnt->mnt_root->d_inode, arg);
+       return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
+}
+
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (list_empty(&prune_list))
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+
+               mutex_lock(&audit_cmd_mutex);
+               mutex_lock(&audit_filter_mutex);
+
+               while (!list_empty(&prune_list)) {
+                       struct audit_tree *victim;
+
+                       victim = list_entry(prune_list.next,
+                                       struct audit_tree, list);
+                       list_del_init(&victim->list);
+
+                       mutex_unlock(&audit_filter_mutex);
+
+                       prune_one(victim);
+
+                       mutex_lock(&audit_filter_mutex);
+               }
+
+               mutex_unlock(&audit_filter_mutex);
+               mutex_unlock(&audit_cmd_mutex);
+       }
+       return 0;
+}
+
+static int audit_launch_prune(void)
+{
+       if (prune_thread)
+               return 0;
+       prune_thread = kthread_create(prune_tree_thread, NULL,
+                               "audit_prune_tree");
+       if (IS_ERR(prune_thread)) {
+               pr_err("cannot start thread audit_prune_tree");
+               prune_thread = NULL;
+               return -ENOMEM;
+       } else {
+               wake_up_process(prune_thread);
+               return 0;
+       }
 }
 
 /* called with audit_filter_mutex */
@@ -674,6 +726,12 @@ int audit_add_tree_rule(struct audit_krule *rule)
        /* do not set rule->tree yet */
        mutex_unlock(&audit_filter_mutex);
 
+       if (unlikely(!prune_thread)) {
+               err = audit_launch_prune();
+               if (err)
+                       goto Err;
+       }
+
        err = kern_path(tree->pathname, 0, &path);
        if (err)
                goto Err;
@@ -811,36 +869,10 @@ int audit_tag_tree(char *old, char *new)
        return failed;
 }
 
-/*
- * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread.
- */
-static int prune_tree_thread(void *unused)
-{
-       mutex_lock(&audit_cmd_mutex);
-       mutex_lock(&audit_filter_mutex);
-
-       while (!list_empty(&prune_list)) {
-               struct audit_tree *victim;
-
-               victim = list_entry(prune_list.next, struct audit_tree, list);
-               list_del_init(&victim->list);
-
-               mutex_unlock(&audit_filter_mutex);
-
-               prune_one(victim);
-
-               mutex_lock(&audit_filter_mutex);
-       }
-
-       mutex_unlock(&audit_filter_mutex);
-       mutex_unlock(&audit_cmd_mutex);
-       return 0;
-}
 
 static void audit_schedule_prune(void)
 {
-       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+       wake_up_process(prune_thread);
 }
 
 /*
@@ -907,9 +939,9 @@ static void evict_chunk(struct audit_chunk *chunk)
        for (n = 0; n < chunk->count; n++)
                list_del_init(&chunk->owners[n].list);
        spin_unlock(&hash_lock);
+       mutex_unlock(&audit_filter_mutex);
        if (need_prune)
                audit_schedule_prune();
-       mutex_unlock(&audit_filter_mutex);
 }
 
 static int audit_tree_handle_event(struct fsnotify_group *group,
index ad9c168..6e30024 100644 (file)
@@ -146,7 +146,7 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
 /* Initialize a parent watch entry. */
 static struct audit_parent *audit_init_parent(struct path *path)
 {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
        struct audit_parent *parent;
        int ret;
 
@@ -361,11 +361,11 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
        struct dentry *d = kern_path_locked(watch->path, parent);
        if (IS_ERR(d))
                return PTR_ERR(d);
-       mutex_unlock(&parent->dentry->d_inode->i_mutex);
-       if (d->d_inode) {
+       mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+       if (d_is_positive(d)) {
                /* update watch filter fields */
-               watch->dev = d->d_inode->i_sb->s_dev;
-               watch->ino = d->d_inode->i_ino;
+               watch->dev = d_backing_inode(d)->i_sb->s_dev;
+               watch->ino = d_backing_inode(d)->i_ino;
        }
        dput(d);
        return 0;
@@ -426,7 +426,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
                return ret;
 
        /* either find an old parent or attach a new one */
-       parent = audit_find_parent(parent_path.dentry->d_inode);
+       parent = audit_find_parent(d_backing_inode(parent_path.dentry));
        if (!parent) {
                parent = audit_init_parent(&parent_path);
                if (IS_ERR(parent)) {
@@ -482,7 +482,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
 
        switch (data_type) {
        case (FSNOTIFY_EVENT_PATH):
-               inode = ((struct path *)data)->dentry->d_inode;
+               inode = d_backing_inode(((struct path *)data)->dentry);
                break;
        case (FSNOTIFY_EVENT_INODE):
                inode = (struct inode *)data;
index dc4ae70..9fb9d1c 100644 (file)
@@ -1629,7 +1629,7 @@ retry:
        rcu_read_lock();
        seq = read_seqbegin(&rename_lock);
        for(;;) {
-               struct inode *inode = d->d_inode;
+               struct inode *inode = d_backing_inode(d);
                if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) {
                        struct audit_chunk *chunk;
                        chunk = audit_tree_lookup(inode);
@@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
                   unsigned int flags)
 {
        struct audit_context *context = current->audit_context;
-       const struct inode *inode = dentry->d_inode;
+       const struct inode *inode = d_backing_inode(dentry);
        struct audit_names *n;
        bool parent = flags & AUDIT_INODE_PARENT;
 
@@ -1853,7 +1853,7 @@ void __audit_inode_child(const struct inode *parent,
                         const unsigned char type)
 {
        struct audit_context *context = current->audit_context;
-       const struct inode *inode = dentry->d_inode;
+       const struct inode *inode = d_backing_inode(dentry);
        const char *dname = dentry->d_name.name;
        struct audit_names *n, *found_parent = NULL, *found_child = NULL;
 
@@ -2361,7 +2361,6 @@ static void audit_log_task(struct audit_buffer *ab)
        kuid_t auid, uid;
        kgid_t gid;
        unsigned int sessionid;
-       struct mm_struct *mm = current->mm;
        char comm[sizeof(current->comm)];
 
        auid = audit_get_loginuid(current);
@@ -2376,13 +2375,7 @@ static void audit_log_task(struct audit_buffer *ab)
        audit_log_task_context(ab);
        audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
        audit_log_untrustedstring(ab, get_task_comm(comm, current));
-       if (mm) {
-               down_read(&mm->mmap_sem);
-               if (mm->exe_file)
-                       audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
-               up_read(&mm->mmap_sem);
-       } else
-               audit_log_format(ab, " exe=(null)");
+       audit_log_d_path_exe(ab, current->mm);
 }
 
 /**
index 650b038..42a1d2a 100644 (file)
@@ -387,9 +387,9 @@ static bool check_symbol(const struct symsearch *syms,
                pr_warn("Symbol %s is marked as UNUSED, however this module is "
                        "using it.\n", fsa->name);
                pr_warn("This symbol will go away in the future.\n");
-               pr_warn("Please evalute if this is the right api to use and if "
-                       "it really is, submit a report the linux kernel "
-                       "mailinglist together with submitting your code for "
+               pr_warn("Please evaluate if this is the right api to use and "
+                       "if it really is, submit a report to the linux kernel "
+                       "mailing list together with submitting your code for "
                        "inclusion.\n");
        }
 #endif
@@ -2511,7 +2511,8 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
                return err;
 
        /* Suck in entire file: we'll want most of it. */
-       info->hdr = vmalloc(info->len);
+       info->hdr = __vmalloc(info->len,
+                       GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL);
        if (!info->hdr)
                return -ENOMEM;
 
index 728e05b..a22d6a7 100644 (file)
@@ -173,9 +173,9 @@ static char *next_arg(char *args, char **param, char **val)
                        if (args[i-1] == '"')
                                args[i-1] = '\0';
                }
-               if (quoted && args[i-1] == '"')
-                       args[i-1] = '\0';
        }
+       if (quoted && args[i-1] == '"')
+               args[i-1] = '\0';
 
        if (args[i]) {
                args[i] = '\0';
index 5a56d3c..e9dbaeb 100644 (file)
@@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf,
                                        struct dentry *dentry)
 {
        buf->dentry = dentry;
-       buf->dentry->d_inode->i_size = buf->early_bytes;
+       d_inode(buf->dentry)->i_size = buf->early_bytes;
 }
 
 static struct dentry *relay_create_buf_file(struct rchan *chan,
@@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
                buf->padding[old_subbuf] = buf->prev_padding;
                buf->subbufs_produced++;
                if (buf->dentry)
-                       buf->dentry->d_inode->i_size +=
+                       d_inode(buf->dentry)->i_size +=
                                buf->chan->subbuf_size -
                                buf->padding[old_subbuf];
                else
index 91eecaa..0533049 100644 (file)
@@ -6079,7 +6079,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
        struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
 
        if (ret) /* See tracing_get_cpu() */
-               ret->d_inode->i_cdev = (void *)(cpu + 1);
+               d_inode(ret)->i_cdev = (void *)(cpu + 1);
        return ret;
 }
 
index 7da1dfe..c4de47f 100644 (file)
@@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
        if (dir) {
                spin_lock(&dir->d_lock);        /* probably unneeded */
                list_for_each_entry(child, &dir->d_subdirs, d_child) {
-                       if (child->d_inode)     /* probably unneeded */
-                               child->d_inode->i_private = NULL;
+                       if (d_really_is_positive(child))        /* probably unneeded */
+                               d_inode(child)->i_private = NULL;
                }
                spin_unlock(&dir->d_lock);
 
@@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
 static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
 {
        char *event = NULL, *sub = NULL, *match;
+       int ret;
 
        /*
         * The buf format can be <subsystem>:<event-name>
@@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
                        event = NULL;
        }
 
-       return __ftrace_set_clr_event(tr, match, sub, event, set);
+       ret = __ftrace_set_clr_event(tr, match, sub, event, set);
+
+       /* Put back the colon to allow this to be called again */
+       if (buf)
+               *(buf - 1) = ':';
+
+       return ret;
 }
 
 /**
@@ -1753,6 +1760,8 @@ static void update_event_printk(struct ftrace_event_call *call,
                                ptr++;
                                /* Check for alpha chars like ULL */
                        } while (isalnum(*ptr));
+                       if (!*ptr)
+                               break;
                        /*
                         * A number must have some kind of delimiter after
                         * it, and we can ignore that too.
@@ -1779,12 +1788,16 @@ static void update_event_printk(struct ftrace_event_call *call,
                        do {
                                ptr++;
                        } while (isalnum(*ptr) || *ptr == '_');
+                       if (!*ptr)
+                               break;
                        /*
                         * If what comes after this variable is a '.' or
                         * '->' then we can continue to ignore that string.
                         */
                        if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
                                ptr += *ptr == '.' ? 1 : 2;
+                               if (!*ptr)
+                                       break;
                                goto skip_more;
                        }
                        /*
index 9cfea4c..a51e796 100644 (file)
@@ -1308,15 +1308,19 @@ void graph_trace_open(struct trace_iterator *iter)
 {
        /* pid and depth on the last trace processed */
        struct fgraph_data *data;
+       gfp_t gfpflags;
        int cpu;
 
        iter->private = NULL;
 
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       /* We can be called in atomic context via ftrace_dump() */
+       gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+
+       data = kzalloc(sizeof(*data), gfpflags);
        if (!data)
                goto out_err;
 
-       data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+       data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags);
        if (!data->cpu_data)
                goto out_err_free;
 
index d60fe62..6dd022c 100644 (file)
@@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv)
        if (ret)
                goto fail_address_parse;
 
-       inode = igrab(path.dentry->d_inode);
+       inode = igrab(d_inode(path.dentry));
        path_put(&path);
 
        if (!inode || !S_ISREG(inode->i_mode)) {
index dbef231..975c6e0 100644 (file)
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
 static inline const struct raid6_calls *raid6_choose_gen(
        void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
 {
-       unsigned long perf, bestperf, j0, j1;
+       unsigned long perf, bestgenperf, bestxorperf, j0, j1;
+       int start = (disks>>1)-1, stop = disks-3;       /* work on the second half of the disks */
        const struct raid6_calls *const *algo;
        const struct raid6_calls *best;
 
-       for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+       for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
                if (!best || (*algo)->prefer >= best->prefer) {
                        if ((*algo)->valid && !(*algo)->valid())
                                continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
                        }
                        preempt_enable();
 
-                       if (perf > bestperf) {
-                               bestperf = perf;
+                       if (perf > bestgenperf) {
+                               bestgenperf = perf;
                                best = *algo;
                        }
-                       pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+                       pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
                               (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+
+                       if (!(*algo)->xor_syndrome)
+                               continue;
+
+                       perf = 0;
+
+                       preempt_disable();
+                       j0 = jiffies;
+                       while ((j1 = jiffies) == j0)
+                               cpu_relax();
+                       while (time_before(jiffies,
+                                           j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+                               (*algo)->xor_syndrome(disks, start, stop,
+                                                     PAGE_SIZE, *dptrs);
+                               perf++;
+                       }
+                       preempt_enable();
+
+                       if (best == *algo)
+                               bestxorperf = perf;
+
+                       pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
+                               (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
                }
        }
 
        if (best) {
-               pr_info("raid6: using algorithm %s (%ld MB/s)\n",
+               pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
                       best->name,
-                      (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+                      (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+               if (best->xor_syndrome)
+                       pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+                              (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
                raid6_call = *best;
        } else
                pr_err("raid6: Yikes!  No algorithm found!\n");
index 7cc12b5..bec27fc 100644 (file)
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
 
 const struct raid6_calls raid6_altivec$# = {
        raid6_altivec$#_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_altivec,
        "altivecx$#",
        0
index bc3b1dd..7673400 100644 (file)
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_avx2x1 = {
        raid6_avx21_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_avx2,
        "avx2x1",
        1                       /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_avx2x2 = {
        raid6_avx22_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_avx2,
        "avx2x2",
        1                       /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_avx2x4 = {
        raid6_avx24_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_avx2,
        "avx2x4",
        1                       /* Has cache hints */
index 5b50f8d..558aeac 100644 (file)
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
        }
 }
 
+static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+               /* P/Q data pages */
+               wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+                       wp$$ ^= wd$$;
+                       w2$$ = MASK(wq$$);
+                       w1$$ = SHLBYTE(wq$$);
+                       w2$$ &= NBYTES(0x1d);
+                       w1$$ ^= w2$$;
+                       wq$$ = w1$$ ^ wd$$;
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       w2$$ = MASK(wq$$);
+                       w1$$ = SHLBYTE(wq$$);
+                       w2$$ &= NBYTES(0x1d);
+                       wq$$ = w1$$ ^ w2$$;
+               }
+               *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+               *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+       }
+
+}
+
 const struct raid6_calls raid6_intx$# = {
        raid6_int$#_gen_syndrome,
-       NULL,           /* always valid */
+       raid6_int$#_xor_syndrome,
+       NULL,                   /* always valid */
        "int" NSTRING "x$#",
        0
 };
index 590c71c..b3b0e1f 100644 (file)
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_mmxx1 = {
        raid6_mmx1_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_mmx,
        "mmxx1",
        0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_mmxx2 = {
        raid6_mmx2_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_mmx,
        "mmxx2",
        0
index 36ad470..d9ad6ee 100644 (file)
@@ -42,6 +42,7 @@
        }                                                               \
        struct raid6_calls const raid6_neonx ## _n = {                  \
                raid6_neon ## _n ## _gen_syndrome,                      \
+               NULL,           /* XOR not yet implemented */           \
                raid6_have_neon,                                        \
                "neonx" #_n,                                            \
                0                                                       \
index f762971..9025b8c 100644 (file)
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_sse1x1 = {
        raid6_sse11_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_sse1_or_mmxext,
        "sse1x1",
        1                       /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_sse1x2 = {
        raid6_sse12_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        raid6_have_sse1_or_mmxext,
        "sse1x2",
        1                       /* Has cache hints */
index 85b82c8..1d2276b 100644 (file)
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
        kernel_fpu_end();
 }
 
+
+static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 16 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("pxor %xmm4,%xmm2");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm5,%xmm4");
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pxor %xmm5,%xmm4");
+               }
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               /* Don't use movntdq for r/w memory area < cache line */
+               asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+       }
+
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+}
+
 const struct raid6_calls raid6_sse2x1 = {
        raid6_sse21_gen_syndrome,
+       raid6_sse21_xor_syndrome,
        raid6_have_sse2,
        "sse2x1",
        1                       /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
        kernel_fpu_end();
 }
 
+ static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 32 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+               asm volatile("pxor %xmm4,%xmm2");
+               asm volatile("pxor %xmm6,%xmm3");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm7,%xmm3");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+               }
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+               /* Don't use movntdq for r/w memory area < cache line */
+               asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
+               asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+               asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
+       }
+
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+ }
+
 const struct raid6_calls raid6_sse2x2 = {
        raid6_sse22_gen_syndrome,
+       raid6_sse22_xor_syndrome,
        raid6_have_sse2,
        "sse2x2",
        1                       /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
        kernel_fpu_end();
 }
 
+ static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 64 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+               asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
+               asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+               asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
+               asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
+               asm volatile("pxor %xmm4,%xmm2");
+               asm volatile("pxor %xmm6,%xmm3");
+               asm volatile("pxor %xmm12,%xmm10");
+               asm volatile("pxor %xmm14,%xmm11");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+                       asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pxor %xmm13,%xmm13");
+                       asm volatile("pxor %xmm15,%xmm15");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("pcmpgtb %xmm12,%xmm13");
+                       asm volatile("pcmpgtb %xmm14,%xmm15");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("paddb %xmm12,%xmm12");
+                       asm volatile("paddb %xmm14,%xmm14");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pand %xmm0,%xmm13");
+                       asm volatile("pand %xmm0,%xmm15");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+                       asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+                       asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm7,%xmm3");
+                       asm volatile("pxor %xmm13,%xmm10");
+                       asm volatile("pxor %xmm15,%xmm11");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+               }
+               asm volatile("prefetchnta %0" :: "m" (q[d]));
+               asm volatile("prefetchnta %0" :: "m" (q[d+32]));
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pxor %xmm13,%xmm13");
+                       asm volatile("pxor %xmm15,%xmm15");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("pcmpgtb %xmm12,%xmm13");
+                       asm volatile("pcmpgtb %xmm14,%xmm15");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("paddb %xmm12,%xmm12");
+                       asm volatile("paddb %xmm14,%xmm14");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pand %xmm0,%xmm13");
+                       asm volatile("pand %xmm0,%xmm15");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+               }
+               asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+               asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+               asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+               asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+               asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
+               asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
+               asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+               asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+               asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+       }
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+ }
+
+
 const struct raid6_calls raid6_sse2x4 = {
        raid6_sse24_gen_syndrome,
+       raid6_sse24_xor_syndrome,
        raid6_have_sse2,
        "sse2x4",
        1                       /* Has cache hints */
index 5a485b7..3bebbab 100644 (file)
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
 char data[NDISKS][PAGE_SIZE];
 char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
 
-static void makedata(void)
+static void makedata(int start, int stop)
 {
        int i, j;
 
-       for (i = 0; i < NDISKS; i++) {
+       for (i = start; i <= stop; i++) {
                for (j = 0; j < PAGE_SIZE; j++)
                        data[i][j] = rand();
 
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
 {
        const struct raid6_calls *const *algo;
        const struct raid6_recov_calls *const *ra;
-       int i, j;
+       int i, j, p1, p2;
        int err = 0;
 
-       makedata();
+       makedata(0, NDISKS-1);
 
        for (ra = raid6_recov_algos; *ra; ra++) {
                if ((*ra)->valid  && !(*ra)->valid())
                        continue;
+
                raid6_2data_recov = (*ra)->data2;
                raid6_datap_recov = (*ra)->datap;
 
                printf("using recovery %s\n", (*ra)->name);
 
                for (algo = raid6_algos; *algo; algo++) {
-                       if (!(*algo)->valid || (*algo)->valid()) {
-                               raid6_call = **algo;
+                       if ((*algo)->valid && !(*algo)->valid())
+                               continue;
+
+                       raid6_call = **algo;
+
+                       /* Nuke syndromes */
+                       memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+                       /* Generate assumed good syndrome */
+                       raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+                                               (void **)&dataptrs);
+
+                       for (i = 0; i < NDISKS-1; i++)
+                               for (j = i+1; j < NDISKS; j++)
+                                       err += test_disks(i, j);
+
+                       if (!raid6_call.xor_syndrome)
+                               continue;
+
+                       for (p1 = 0; p1 < NDISKS-2; p1++)
+                               for (p2 = p1; p2 < NDISKS-2; p2++) {
 
-                               /* Nuke syndromes */
-                               memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+                                       /* Simulate rmw run */
+                                       raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+                                                               (void **)&dataptrs);
+                                       makedata(p1, p2);
+                                       raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+                                                                (void **)&dataptrs);
 
-                               /* Generate assumed good syndrome */
-                               raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
-                                                       (void **)&dataptrs);
+                                       for (i = 0; i < NDISKS-1; i++)
+                                               for (j = i+1; j < NDISKS; j++)
+                                                       err += test_disks(i, j);
+                               }
 
-                               for (i = 0; i < NDISKS-1; i++)
-                                       for (j = i+1; j < NDISKS; j++)
-                                               err += test_disks(i, j);
-                       }
                }
                printf("\n");
        }
index e7c2945..2dd291a 100644 (file)
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 const struct raid6_calls raid6_tilegx$# = {
        raid6_tilegx$#_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
        NULL,
        "tilegx$#",
        0
index 1ea2400..de98137 100644 (file)
@@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
 static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);
        int error;
 
@@ -2274,7 +2274,7 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  */
 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int ret;
 
        /*
@@ -2298,7 +2298,7 @@ out:
 
 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
                shmem_free_inode(inode->i_sb);
@@ -2315,7 +2315,7 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
        if (!simple_empty(dentry))
                return -ENOTEMPTY;
 
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
        drop_nlink(dir);
        return shmem_unlink(dir, dentry);
 }
@@ -2336,8 +2336,8 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
        }
        old_dir->i_ctime = old_dir->i_mtime =
        new_dir->i_ctime = new_dir->i_mtime =
-       old_dentry->d_inode->i_ctime =
-       new_dentry->d_inode->i_ctime = CURRENT_TIME;
+       d_inode(old_dentry)->i_ctime =
+       d_inode(new_dentry)->i_ctime = CURRENT_TIME;
 
        return 0;
 }
@@ -2376,7 +2376,7 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  */
 static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
 {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = S_ISDIR(inode->i_mode);
 
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
@@ -2396,10 +2396,10 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
                        return error;
        }
 
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                (void) shmem_unlink(new_dir, new_dentry);
                if (they_are_dirs) {
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                        drop_nlink(old_dir);
                }
        } else if (they_are_dirs) {
@@ -2476,14 +2476,14 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
 
 static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
 {
-       nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
+       nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
        return NULL;
 }
 
 static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
        struct page *page = NULL;
-       int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+       int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
        nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
        if (page)
                unlock_page(page);
@@ -2574,7 +2574,7 @@ static int shmem_xattr_validate(const char *name)
 static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
                              void *buffer, size_t size)
 {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
        int err;
 
        /*
@@ -2595,7 +2595,7 @@ static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
 static int shmem_setxattr(struct dentry *dentry, const char *name,
                          const void *value, size_t size, int flags)
 {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
        int err;
 
        /*
@@ -2615,7 +2615,7 @@ static int shmem_setxattr(struct dentry *dentry, const char *name,
 
 static int shmem_removexattr(struct dentry *dentry, const char *name)
 {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
        int err;
 
        /*
@@ -2635,7 +2635,7 @@ static int shmem_removexattr(struct dentry *dentry, const char *name)
 
 static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
        return simple_xattr_list(&info->xattrs, buffer, size);
 }
 #endif /* CONFIG_TMPFS_XATTR */
index ec56550..79e8f71 100644 (file)
@@ -490,6 +490,43 @@ out:
 }
 EXPORT_SYMBOL(ceph_parse_options);
 
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+{
+       struct ceph_options *opt = client->options;
+       size_t pos = m->count;
+
+       if (opt->name)
+               seq_printf(m, "name=%s,", opt->name);
+       if (opt->key)
+               seq_puts(m, "secret=<hidden>,");
+
+       if (opt->flags & CEPH_OPT_FSID)
+               seq_printf(m, "fsid=%pU,", &opt->fsid);
+       if (opt->flags & CEPH_OPT_NOSHARE)
+               seq_puts(m, "noshare,");
+       if (opt->flags & CEPH_OPT_NOCRC)
+               seq_puts(m, "nocrc,");
+       if (opt->flags & CEPH_OPT_NOMSGAUTH)
+               seq_puts(m, "nocephx_require_signatures,");
+       if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
+               seq_puts(m, "notcp_nodelay,");
+
+       if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+               seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+       if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+               seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+       if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+               seq_printf(m, "osdkeepalivetimeout=%d,",
+                          opt->osd_keepalive_timeout);
+
+       /* drop redundant comma */
+       if (m->count != pos)
+               m->count--;
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_print_client_options);
+
 u64 ceph_client_id(struct ceph_client *client)
 {
        return client->monc.auth->global_id;
index 16bc199..9d84ce4 100644 (file)
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
        case CRUSH_BUCKET_LIST: return "list";
        case CRUSH_BUCKET_TREE: return "tree";
        case CRUSH_BUCKET_STRAW: return "straw";
+       case CRUSH_BUCKET_STRAW2: return "straw2";
        default: return "unknown";
        }
 }
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
                return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
        case CRUSH_BUCKET_STRAW:
                return ((struct crush_bucket_straw *)b)->item_weights[p];
+       case CRUSH_BUCKET_STRAW2:
+               return ((struct crush_bucket_straw2 *)b)->item_weights[p];
        }
        return 0;
 }
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
        kfree(b);
 }
 
+void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
+{
+       kfree(b->item_weights);
+       kfree(b->h.perm);
+       kfree(b->h.items);
+       kfree(b);
+}
+
 void crush_destroy_bucket(struct crush_bucket *b)
 {
        switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
        case CRUSH_BUCKET_STRAW:
                crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
                break;
+       case CRUSH_BUCKET_STRAW2:
+               crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
+               break;
        }
 }
 
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h
new file mode 100644 (file)
index 0000000..6192c7f
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#if defined(__linux__)
+#include <linux/types.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#endif
+
+#ifndef CEPH_CRUSH_LN_H
+#define CEPH_CRUSH_LN_H
+
+
+// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+
+static int64_t __RH_LH_tbl[128*2+2] = {
+  0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
+  0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
+  0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
+  0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
+  0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
+  0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
+  0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
+  0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
+  0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
+  0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
+  0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
+  0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
+  0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
+  0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
+  0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
+  0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
+  0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
+  0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
+  0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
+  0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
+  0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
+  0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
+  0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
+  0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
+  0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
+  0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
+  0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
+  0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
+  0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
+  0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
+  0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
+  0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
+  0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
+  0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
+  0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
+  0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
+  0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
+  0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
+  0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
+  0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
+  0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
+  0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
+  0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
+  0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
+  0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
+  0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
+  0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
+  0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
+  0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
+  0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
+  0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
+  0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
+  0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
+  0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
+  0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
+  0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
+  0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
+  0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
+  0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
+  0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
+  0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
+  0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
+  0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
+  0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
+  0x0000800000000000ll, 0x0000ffff00000000ll,
+  };
+
+
+    // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
+static int64_t __LL_tbl[256] = {
+  0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
+  0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
+  0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
+  0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
+  0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
+  0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
+  0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
+  0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
+  0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
+  0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
+  0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
+  0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
+  0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
+  0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
+  0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
+  0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
+  0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
+  0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
+  0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
+  0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
+  0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
+  0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
+  0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
+  0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
+  0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
+  0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
+  0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
+  0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
+  0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
+  0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
+  0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
+  0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
+  0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
+  0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
+  0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
+  0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
+  0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
+  0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
+  0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
+  0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
+  0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
+  0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
+  0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
+  0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
+  0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
+  0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
+  0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
+  0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
+  0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
+  0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
+  0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
+  0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
+  0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
+  0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
+  0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
+  0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
+  0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
+  0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
+  0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
+  0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
+  0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
+  0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
+  0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
+  0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
+};
+
+
+
+
+#endif
index a1ef53c..5b47736 100644 (file)
@@ -20,7 +20,7 @@
 
 #include <linux/crush/crush.h>
 #include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
 
 /*
  * Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
        return bucket->h.items[high];
 }
 
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+    unsigned x=xin, x1;
+    int iexpon, index1, index2;
+    uint64_t RH, LH, LL, xl64, result;
+
+    x++;
+
+    // normalize input
+    iexpon = 15;
+    while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+    index1 = (x>>8)<<1;
+    // RH ~ 2^56/index1
+    RH = __RH_LH_tbl[index1 - 256];
+    // LH ~ 2^48 * log2(index1/256)
+    LH = __RH_LH_tbl[index1 + 1 - 256];
+
+    // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+    xl64 = (int64_t)x * RH;
+    xl64 >>= 48;
+    x1 = xl64;
+
+    result = iexpon;
+    result <<= (12 + 32);
+
+    index2 = x1 & 0xff;
+    // LL ~ 2^48*log2(1.0+index2/2^15)
+    LL = __LL_tbl[index2];
+
+    LH = LH + LL;
+
+    LH >>= (48-12 - 32);
+    result += LH;
+
+    return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+                               int x, int r)
+{
+       unsigned i, high = 0;
+       unsigned u;
+       unsigned w;
+       __s64 ln, draw, high_draw = 0;
+
+       for (i = 0; i < bucket->h.size; i++) {
+               w = bucket->item_weights[i];
+               if (w) {
+                       u = crush_hash32_3(bucket->h.hash, x,
+                                          bucket->h.items[i], r);
+                       u &= 0xffff;
+
+                       /*
+                        * for some reason slightly less than 0x10000 produces
+                        * a slightly more accurate distribution... probably a
+                        * rounding effect.
+                        *
+                        * the natural log lookup table maps [0,0xffff]
+                        * (corresponding to real numbers [1/0x10000, 1] to
+                        * [0, 0xffffffffffff] (corresponding to real numbers
+                        * [-11.090355,0]).
+                        */
+                       ln = crush_ln(u) - 0x1000000000000ll;
+
+                       /*
+                        * divide by 16.16 fixed-point weight.  note
+                        * that the ln value is negative, so a larger
+                        * weight means a larger (less negative) value
+                        * for draw.
+                        */
+                       draw = div64_s64(ln, w);
+               } else {
+                       draw = S64_MIN;
+               }
+
+               if (i == 0 || draw > high_draw) {
+                       high = i;
+                       high_draw = draw;
+               }
+       }
+       return bucket->h.items[high];
+}
+
+
 static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
 {
        dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
        case CRUSH_BUCKET_STRAW:
                return bucket_straw_choose((struct crush_bucket_straw *)in,
                                           x, r);
+       case CRUSH_BUCKET_STRAW2:
+               return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+                                           x, r);
        default:
                dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
                return in->items[0];
        }
 }
 
+
 /*
  * true if device is marked "out" (failed, fully offloaded)
  * of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
  * @type: the type of item to choose
  * @out: pointer to output vector
  * @outpos: our position in that vector
+ * @out_size: size of the out vector
  * @tries: number of attempts to make
  * @recurse_tries: number of attempts to have recursive chooseleaf make
  * @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                               const __u32 *weight, int weight_max,
                               int x, int numrep, int type,
                               int *out, int outpos,
+                              int out_size,
                               unsigned int tries,
                               unsigned int recurse_tries,
                               unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
        int item = 0;
        int itemtype;
        int collide, reject;
+       int count = out_size;
 
        dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
                recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                tries, recurse_tries, local_retries, local_fallback_retries,
                parent_r);
 
-       for (rep = outpos; rep < numrep; rep++) {
+       for (rep = outpos; rep < numrep && count > 0 ; rep++) {
                /* keep trying until we get a non-out, non-colliding item */
                ftotal = 0;
                skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                         map->buckets[-1-item],
                                                         weight, weight_max,
                                                         x, outpos+1, 0,
-                                                        out2, outpos,
+                                                        out2, outpos, count,
                                                         recurse_tries, 0,
                                                         local_retries,
                                                         local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
                dprintk("CHOOSE got %d\n", item);
                out[outpos] = item;
                outpos++;
+               count--;
        }
 
        dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
        __u32 step;
        int i, j;
        int numrep;
+       int out_size;
        /*
         * the original choose_total_tries value was off by one (it
         * counted "retries" and not "tries").  add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
                                                x, numrep,
                                                curstep->arg2,
                                                o+osize, j,
+                                               result_max-osize,
                                                choose_tries,
                                                recurse_tries,
                                                choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
                                                c+osize,
                                                0);
                                } else {
+                                       out_size = ((numrep < (result_max-osize)) ?
+                                                    numrep : (result_max-osize));
                                        crush_choose_indep(
                                                map,
                                                map->buckets[-1-w[i]],
                                                weight, weight_max,
-                                               x, numrep, numrep,
+                                               x, out_size, numrep,
                                                curstep->arg2,
                                                o+osize, j,
                                                choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
                                                recurse_to_leaf,
                                                c+osize,
                                                0);
-                                       osize += numrep;
+                                       osize += out_size;
                                }
                        }
 
index 14d9995..593dc2e 100644 (file)
@@ -22,6 +22,7 @@
  *      .../monmap      - current monmap
  *      .../osdc        - active osd requests
  *      .../monc        - mon client state
+ *      .../client_options - libceph-only (i.e. not rbd or cephfs) options
  *      .../dentry_lru  - dump contents of dentry lru
  *      .../caps        - expose cap (reservation) stats
  *      .../bdi         - symlink to ../../bdi/something
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
        return 0;
 }
 
+static int client_options_show(struct seq_file *s, void *p)
+{
+       struct ceph_client *client = s->private;
+       int ret;
+
+       ret = ceph_print_client_options(s, client);
+       if (ret)
+               return ret;
+
+       seq_putc(s, '\n');
+       return 0;
+}
+
 CEPH_DEFINE_SHOW_FUNC(monmap_show)
 CEPH_DEFINE_SHOW_FUNC(osdmap_show)
 CEPH_DEFINE_SHOW_FUNC(monc_show)
 CEPH_DEFINE_SHOW_FUNC(osdc_show)
+CEPH_DEFINE_SHOW_FUNC(client_options_show)
 
 int ceph_debugfs_init(void)
 {
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
        if (!client->debugfs_osdmap)
                goto out;
 
+       client->debugfs_options = debugfs_create_file("client_options",
+                                       0600,
+                                       client->debugfs_dir,
+                                       client,
+                                       &client_options_show_fops);
+       if (!client->debugfs_options)
+               goto out;
+
        return 0;
 
 out:
@@ -252,6 +275,7 @@ out:
 void ceph_debugfs_client_cleanup(struct ceph_client *client)
 {
        dout("ceph_debugfs_client_cleanup %p\n", client);
+       debugfs_remove(client->debugfs_options);
        debugfs_remove(client->debugfs_osdmap);
        debugfs_remove(client->debugfs_monmap);
        debugfs_remove(client->osdc.debugfs_file);
index a9f4ae4..967080a 100644 (file)
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                pr_err("connect %s error %d\n",
                       ceph_pr_addr(&con->peer_addr.in_addr), ret);
                sock_release(sock);
-               con->error_msg = "connect error";
-
                return ret;
        }
 
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
                 * to WAIT.  This shouldn't happen if we are the
                 * client.
                 */
-               pr_err("process_connect got WAIT as client\n");
                con->error_msg = "protocol error, got WAIT as client";
                return -1;
 
        default:
-               pr_err("connect protocol error, will retry\n");
                con->error_msg = "protocol error, garbage tag during connect";
                return -1;
        }
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
 
        crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
        if (cpu_to_le32(crc) != con->in_hdr.crc) {
-               pr_err("read_partial_message bad hdr "
-                      " crc %u != expected %u\n",
+               pr_err("read_partial_message bad hdr crc %u != expected %u\n",
                       crc, con->in_hdr.crc);
                return -EBADMSG;
        }
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
                pr_err("read_partial_message bad seq %lld expected %lld\n",
                       seq, con->in_seq + 1);
                con->error_msg = "bad message sequence # for incoming message";
-               return -EBADMSG;
+               return -EBADE;
        }
 
        /* allocate message? */
@@ -2660,6 +2655,8 @@ more:
                        switch (ret) {
                        case -EBADMSG:
                                con->error_msg = "bad crc";
+                               /* fall through */
+                       case -EBADE:
                                ret = -EIO;
                                break;
                        case -EIO:
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work)
                if (ret < 0) {
                        if (ret == -EAGAIN)
                                continue;
-                       con->error_msg = "socket error on read";
+                       if (!con->error_msg)
+                               con->error_msg = "socket error on read";
                        fault = true;
                        break;
                }
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work)
                if (ret < 0) {
                        if (ret == -EAGAIN)
                                continue;
-                       con->error_msg = "socket error on write";
+                       if (!con->error_msg)
+                               con->error_msg = "socket error on write";
                        fault = true;
                }
 
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work)
  */
 static void con_fault(struct ceph_connection *con)
 {
-       pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-               ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
        dout("fault %p state %lu to peer %s\n",
             con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
 
+       pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+               ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+       con->error_msg = NULL;
+
        WARN_ON(con->state != CON_STATE_CONNECTING &&
               con->state != CON_STATE_NEGOTIATING &&
               con->state != CON_STATE_OPEN);
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
                 */
                if (*skip)
                        return 0;
-               con->error_msg = "error allocating memory for incoming message";
 
+               con->error_msg = "error allocating memory for incoming message";
                return -ENOMEM;
        }
        memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
index b8c3fde..1579669 100644 (file)
@@ -122,6 +122,22 @@ bad:
        return -EINVAL;
 }
 
+static int crush_decode_straw2_bucket(void **p, void *end,
+                                     struct crush_bucket_straw2 *b)
+{
+       int j;
+       dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
+       b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+       if (b->item_weights == NULL)
+               return -ENOMEM;
+       ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
+       for (j = 0; j < b->h.size; j++)
+               b->item_weights[j] = ceph_decode_32(p);
+       return 0;
+bad:
+       return -EINVAL;
+}
+
 static int skip_name_map(void **p, void *end)
 {
         int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
                case CRUSH_BUCKET_STRAW:
                        size = sizeof(struct crush_bucket_straw);
                        break;
+               case CRUSH_BUCKET_STRAW2:
+                       size = sizeof(struct crush_bucket_straw2);
+                       break;
                default:
                        err = -EINVAL;
                        goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
                        if (err < 0)
                                goto bad;
                        break;
+               case CRUSH_BUCKET_STRAW2:
+                       err = crush_decode_straw2_bucket(p, end,
+                               (struct crush_bucket_straw2 *)b);
+                       if (err < 0)
+                               goto bad;
+                       break;
                }
        }
 
index 3e33959..884e329 100644 (file)
@@ -312,7 +312,7 @@ static const struct super_operations sockfs_ops = {
 static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
-                               dentry->d_inode->i_ino);
+                               d_inode(dentry)->i_ino);
 }
 
 static const struct dentry_operations sockfs_dentry_operations = {
@@ -375,7 +375,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
                  &socket_file_ops);
        if (unlikely(IS_ERR(file))) {
                /* drop dentry, keep inode */
-               ihold(path.dentry->d_inode);
+               ihold(d_inode(path.dentry));
                path_put(&path);
                return file;
        }
@@ -497,7 +497,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
        ssize_t len;
        ssize_t used = 0;
 
-       len = security_inode_listsecurity(dentry->d_inode, buffer, size);
+       len = security_inode_listsecurity(d_inode(dentry), buffer, size);
        if (len < 0)
                return len;
        used += len;
index 2d12b76..d81186d 100644 (file)
@@ -94,7 +94,7 @@ rpc_timeout_upcall_queue(struct work_struct *work)
        }
        dentry = dget(pipe->dentry);
        spin_unlock(&pipe->lock);
-       rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL,
+       rpc_purge_list(dentry ? &RPC_I(d_inode(dentry))->waitq : NULL,
                        &free_list, destroy_msg, -ETIMEDOUT);
        dput(dentry);
 }
@@ -152,7 +152,7 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg)
        dentry = dget(pipe->dentry);
        spin_unlock(&pipe->lock);
        if (dentry) {
-               wake_up(&RPC_I(dentry->d_inode)->waitq);
+               wake_up(&RPC_I(d_inode(dentry))->waitq);
                dput(dentry);
        }
        return res;
@@ -591,7 +591,7 @@ static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
        err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
        if (err)
                return err;
-       rpci = RPC_I(dentry->d_inode);
+       rpci = RPC_I(d_inode(dentry));
        rpci->private = private;
        rpci->pipe = pipe;
        fsnotify_create(dir, dentry);
@@ -616,7 +616,7 @@ int rpc_rmdir(struct dentry *dentry)
        int error;
 
        parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
        error = __rpc_rmdir(dir, dentry);
        mutex_unlock(&dir->i_mutex);
@@ -638,7 +638,7 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
 
 static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
 
        rpc_close_pipes(inode);
        return __rpc_unlink(dir, dentry);
@@ -654,7 +654,7 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
                if (!dentry)
                        return ERR_PTR(-ENOMEM);
        }
-       if (dentry->d_inode == NULL)
+       if (d_really_is_negative(dentry))
                return dentry;
        dput(dentry);
        return ERR_PTR(-EEXIST);
@@ -667,7 +667,7 @@ static void __rpc_depopulate(struct dentry *parent,
                             const struct rpc_filelist *files,
                             int start, int eof)
 {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct dentry *dentry;
        struct qstr name;
        int i;
@@ -679,9 +679,9 @@ static void __rpc_depopulate(struct dentry *parent,
 
                if (dentry == NULL)
                        continue;
-               if (dentry->d_inode == NULL)
+               if (d_really_is_negative(dentry))
                        goto next;
-               switch (dentry->d_inode->i_mode & S_IFMT) {
+               switch (d_inode(dentry)->i_mode & S_IFMT) {
                        default:
                                BUG();
                        case S_IFREG:
@@ -699,7 +699,7 @@ static void rpc_depopulate(struct dentry *parent,
                           const struct rpc_filelist *files,
                           int start, int eof)
 {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
 
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
        __rpc_depopulate(parent, files, start, eof);
@@ -711,7 +711,7 @@ static int rpc_populate(struct dentry *parent,
                        int start, int eof,
                        void *private)
 {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        struct dentry *dentry;
        int i, err;
 
@@ -754,7 +754,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
                int (*populate)(struct dentry *, void *), void *args_populate)
 {
        struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        int error;
 
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
@@ -787,7 +787,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
        int error;
 
        parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
        if (depopulate != NULL)
                depopulate(dentry);
@@ -819,7 +819,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
                                 void *private, struct rpc_pipe *pipe)
 {
        struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
        umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
        int err;
 
@@ -864,7 +864,7 @@ rpc_unlink(struct dentry *dentry)
        int error = 0;
 
        parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
        mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
        error = __rpc_rmpipe(dir, dentry);
        mutex_unlock(&dir->i_mutex);
@@ -1375,7 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
        struct dentry *clnt_dir = pipe_dentry->d_parent;
        struct dentry *gssd_dir = clnt_dir->d_parent;
 
-       __rpc_rmpipe(clnt_dir->d_inode, pipe_dentry);
+       __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
        __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
        __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
        dput(pipe_dentry);
index b91fd9c..337ca85 100644 (file)
@@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
        if (!task->tk_timeout)
                return;
 
-       dprintk("RPC: %5u setting alarm for %lu ms\n",
-                       task->tk_pid, task->tk_timeout * 1000 / HZ);
+       dprintk("RPC: %5u setting alarm for %u ms\n",
+               task->tk_pid, jiffies_to_msecs(task->tk_timeout));
 
        task->u.tk_wait.expires = jiffies + task->tk_timeout;
        if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
index 9949722..1d4fe24 100644 (file)
@@ -326,6 +326,15 @@ out_unlock:
        xprt_clear_locked(xprt);
 }
 
+static void xprt_task_clear_bytes_sent(struct rpc_task *task)
+{
+       if (task != NULL) {
+               struct rpc_rqst *req = task->tk_rqstp;
+               if (req != NULL)
+                       req->rq_bytes_sent = 0;
+       }
+}
+
 /**
  * xprt_release_xprt - allow other requests to use a transport
  * @xprt: transport with other tasks potentially waiting
@@ -336,11 +345,7 @@ out_unlock:
 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        if (xprt->snd_task == task) {
-               if (task != NULL) {
-                       struct rpc_rqst *req = task->tk_rqstp;
-                       if (req != NULL)
-                               req->rq_bytes_sent = 0;
-               }
+               xprt_task_clear_bytes_sent(task);
                xprt_clear_locked(xprt);
                __xprt_lock_write_next(xprt);
        }
@@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt);
 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        if (xprt->snd_task == task) {
-               if (task != NULL) {
-                       struct rpc_rqst *req = task->tk_rqstp;
-                       if (req != NULL)
-                               req->rq_bytes_sent = 0;
-               }
+               xprt_task_clear_bytes_sent(task);
                xprt_clear_locked(xprt);
                __xprt_lock_write_next_cong(xprt);
        }
@@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
                goto out;
        if (xprt->snd_task != task)
                goto out;
+       xprt_task_clear_bytes_sent(task);
        xprt->snd_task = cookie;
        ret = true;
 out:
index da5136f..579f72b 100644 (file)
@@ -1,6 +1,7 @@
 obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
 
-xprtrdma-y := transport.o rpc_rdma.o verbs.o
+xprtrdma-y := transport.o rpc_rdma.o verbs.o \
+       fmr_ops.o frwr_ops.o physical_ops.o
 
 obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
 
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
new file mode 100644 (file)
index 0000000..302d4eb
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Memory Regions (FMR).
+ * Referred to sometimes as MTHCAFMR mode.
+ *
+ * FMR uses synchronous memory registration and deregistration.
+ * FMR registration is known to be fast, but FMR deregistration
+ * can take tens of usecs to complete.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+/* Maximum scatter/gather per FMR */
+#define RPCRDMA_MAX_FMR_SGES   (64)
+
+static int
+fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+           struct rpcrdma_create_data_internal *cdata)
+{
+       return 0;
+}
+
+/* FMR mode conveys up to 64 pages of payload per chunk segment.
+ */
+static size_t
+fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
+}
+
+static int
+fmr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
+       struct ib_fmr_attr fmr_attr = {
+               .max_pages      = RPCRDMA_MAX_FMR_SGES,
+               .max_maps       = 1,
+               .page_shift     = PAGE_SHIFT
+       };
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       struct rpcrdma_mw *r;
+       int i, rc;
+
+       INIT_LIST_HEAD(&buf->rb_mws);
+       INIT_LIST_HEAD(&buf->rb_all);
+
+       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+       dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
+
+       while (i--) {
+               r = kzalloc(sizeof(*r), GFP_KERNEL);
+               if (!r)
+                       return -ENOMEM;
+
+               r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+               if (IS_ERR(r->r.fmr))
+                       goto out_fmr_err;
+
+               list_add(&r->mw_list, &buf->rb_mws);
+               list_add(&r->mw_all, &buf->rb_all);
+       }
+       return 0;
+
+out_fmr_err:
+       rc = PTR_ERR(r->r.fmr);
+       dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
+       kfree(r);
+       return rc;
+}
+
+/* Use the ib_map_phys_fmr() verb to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+          int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_device *device = ia->ri_id->device;
+       enum dma_data_direction direction = rpcrdma_data_dir(writing);
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
+       u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+       int len, pageoff, i, rc;
+
+       pageoff = offset_in_page(seg1->mr_offset);
+       seg1->mr_offset -= pageoff;     /* start of page */
+       seg1->mr_len += pageoff;
+       len = -pageoff;
+       if (nsegs > RPCRDMA_MAX_FMR_SGES)
+               nsegs = RPCRDMA_MAX_FMR_SGES;
+       for (i = 0; i < nsegs;) {
+               rpcrdma_map_one(device, seg, direction);
+               physaddrs[i] = seg->mr_dma;
+               len += seg->mr_len;
+               ++seg;
+               ++i;
+               /* Check for holes */
+               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+                       break;
+       }
+
+       rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+       if (rc)
+               goto out_maperr;
+
+       seg1->mr_rkey = mw->r.fmr->rkey;
+       seg1->mr_base = seg1->mr_dma + pageoff;
+       seg1->mr_nsegs = i;
+       seg1->mr_len = len;
+       return i;
+
+out_maperr:
+       dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
+               __func__, len, (unsigned long long)seg1->mr_dma,
+               pageoff, i, rc);
+       while (i--)
+               rpcrdma_unmap_one(device, --seg);
+       return rc;
+}
+
+/* Use the ib_unmap_fmr() verb to prevent further remote
+ * access via RDMA READ or RDMA WRITE.
+ */
+static int
+fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct ib_device *device;
+       int rc, nsegs = seg->mr_nsegs;
+       LIST_HEAD(l);
+
+       list_add(&seg1->rl_mw->r.fmr->list, &l);
+       rc = ib_unmap_fmr(&l);
+       read_lock(&ia->ri_qplock);
+       device = ia->ri_id->device;
+       while (seg1->mr_nsegs--)
+               rpcrdma_unmap_one(device, seg++);
+       read_unlock(&ia->ri_qplock);
+       if (rc)
+               goto out_err;
+       return nsegs;
+
+out_err:
+       dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
+       return nsegs;
+}
+
+/* After a disconnect, unmap all FMRs.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_fmr_external().
+ */
+static void
+fmr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct rpcrdma_mw *r;
+       LIST_HEAD(list);
+       int rc;
+
+       list_for_each_entry(r, &buf->rb_all, mw_all)
+               list_add(&r->r.fmr->list, &list);
+
+       rc = ib_unmap_fmr(&list);
+       if (rc)
+               dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
+                       __func__, rc);
+}
+
+static void
+fmr_op_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_mw *r;
+       int rc;
+
+       while (!list_empty(&buf->rb_all)) {
+               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+               list_del(&r->mw_all);
+               rc = ib_dealloc_fmr(r->r.fmr);
+               if (rc)
+                       dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
+                               __func__, rc);
+               kfree(r);
+       }
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
+       .ro_map                         = fmr_op_map,
+       .ro_unmap                       = fmr_op_unmap,
+       .ro_open                        = fmr_op_open,
+       .ro_maxpages                    = fmr_op_maxpages,
+       .ro_init                        = fmr_op_init,
+       .ro_reset                       = fmr_op_reset,
+       .ro_destroy                     = fmr_op_destroy,
+       .ro_displayname                 = "fmr",
+};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
new file mode 100644 (file)
index 0000000..dff0481
--- /dev/null
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Registration Work
+ * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ *
+ * FRWR features ordered asynchronous registration and deregistration
+ * of arbitrarily sized memory regions. This is the fastest and safest
+ * but most complex memory registration mode.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+static int
+__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
+           unsigned int depth)
+{
+       struct rpcrdma_frmr *f = &r->r.frmr;
+       int rc;
+
+       f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+       if (IS_ERR(f->fr_mr))
+               goto out_mr_err;
+       f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
+       if (IS_ERR(f->fr_pgl))
+               goto out_list_err;
+       return 0;
+
+out_mr_err:
+       rc = PTR_ERR(f->fr_mr);
+       dprintk("RPC:       %s: ib_alloc_fast_reg_mr status %i\n",
+               __func__, rc);
+       return rc;
+
+out_list_err:
+       rc = PTR_ERR(f->fr_pgl);
+       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
+               __func__, rc);
+       ib_dereg_mr(f->fr_mr);
+       return rc;
+}
+
+static void
+__frwr_release(struct rpcrdma_mw *r)
+{
+       int rc;
+
+       rc = ib_dereg_mr(r->r.frmr.fr_mr);
+       if (rc)
+               dprintk("RPC:       %s: ib_dereg_mr status %i\n",
+                       __func__, rc);
+       ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+}
+
+static int
+frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+            struct rpcrdma_create_data_internal *cdata)
+{
+       struct ib_device_attr *devattr = &ia->ri_devattr;
+       int depth, delta;
+
+       ia->ri_max_frmr_depth =
+                       min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                             devattr->max_fast_reg_page_list_len);
+       dprintk("RPC:       %s: device's max FR page list len = %u\n",
+               __func__, ia->ri_max_frmr_depth);
+
+       /* Add room for frmr register and invalidate WRs.
+        * 1. FRMR reg WR for head
+        * 2. FRMR invalidate WR for head
+        * 3. N FRMR reg WRs for pagelist
+        * 4. N FRMR invalidate WRs for pagelist
+        * 5. FRMR reg WR for tail
+        * 6. FRMR invalidate WR for tail
+        * 7. The RDMA_SEND WR
+        */
+       depth = 7;
+
+       /* Calculate N if the device max FRMR depth is smaller than
+        * RPCRDMA_MAX_DATA_SEGS.
+        */
+       if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+               delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+               do {
+                       depth += 2; /* FRMR reg + invalidate */
+                       delta -= ia->ri_max_frmr_depth;
+               } while (delta > 0);
+       }
+
+       ep->rep_attr.cap.max_send_wr *= depth;
+       if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
+               cdata->max_requests = devattr->max_qp_wr / depth;
+               if (!cdata->max_requests)
+                       return -EINVAL;
+               ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+                                              depth;
+       }
+
+       return 0;
+}
+
+/* FRWR mode conveys a list of pages per chunk segment. The
+ * maximum length of that list is the FRWR page list depth.
+ */
+static size_t
+frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
+}
+
+/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
+static void
+frwr_sendcompletion(struct ib_wc *wc)
+{
+       struct rpcrdma_mw *r;
+
+       if (likely(wc->status == IB_WC_SUCCESS))
+               return;
+
+       /* WARNING: Only wr_id and status are reliable at this point */
+       r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+       dprintk("RPC:       %s: frmr %p (stale), status %d\n",
+               __func__, r, wc->status);
+       r->r.frmr.fr_state = FRMR_IS_STALE;
+}
+
+static int
+frwr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       int i;
+
+       INIT_LIST_HEAD(&buf->rb_mws);
+       INIT_LIST_HEAD(&buf->rb_all);
+
+       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+       dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
+
+       while (i--) {
+               struct rpcrdma_mw *r;
+               int rc;
+
+               r = kzalloc(sizeof(*r), GFP_KERNEL);
+               if (!r)
+                       return -ENOMEM;
+
+               rc = __frwr_init(r, pd, device, depth);
+               if (rc) {
+                       kfree(r);
+                       return rc;
+               }
+
+               list_add(&r->mw_list, &buf->rb_mws);
+               list_add(&r->mw_all, &buf->rb_all);
+               r->mw_sendcompletion = frwr_sendcompletion;
+       }
+
+       return 0;
+}
+
+/* Post a FAST_REG Work Request to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+           int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_device *device = ia->ri_id->device;
+       enum dma_data_direction direction = rpcrdma_data_dir(writing);
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
+       struct rpcrdma_frmr *frmr = &mw->r.frmr;
+       struct ib_mr *mr = frmr->fr_mr;
+       struct ib_send_wr fastreg_wr, *bad_wr;
+       u8 key;
+       int len, pageoff;
+       int i, rc;
+       int seg_len;
+       u64 pa;
+       int page_no;
+
+       pageoff = offset_in_page(seg1->mr_offset);
+       seg1->mr_offset -= pageoff;     /* start of page */
+       seg1->mr_len += pageoff;
+       len = -pageoff;
+       if (nsegs > ia->ri_max_frmr_depth)
+               nsegs = ia->ri_max_frmr_depth;
+       for (page_no = i = 0; i < nsegs;) {
+               rpcrdma_map_one(device, seg, direction);
+               pa = seg->mr_dma;
+               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
+                       frmr->fr_pgl->page_list[page_no++] = pa;
+                       pa += PAGE_SIZE;
+               }
+               len += seg->mr_len;
+               ++seg;
+               ++i;
+               /* Check for holes */
+               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+                       break;
+       }
+       dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
+               __func__, mw, i, len);
+
+       frmr->fr_state = FRMR_IS_VALID;
+
+       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+       fastreg_wr.wr_id = (unsigned long)(void *)mw;
+       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+       fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
+       fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
+       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+       fastreg_wr.wr.fast_reg.page_list_len = page_no;
+       fastreg_wr.wr.fast_reg.length = len;
+       fastreg_wr.wr.fast_reg.access_flags = writing ?
+                               IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+                               IB_ACCESS_REMOTE_READ;
+       key = (u8)(mr->rkey & 0x000000FF);
+       ib_update_fast_reg_key(mr, ++key);
+       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+       DECR_CQCOUNT(&r_xprt->rx_ep);
+       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+       if (rc)
+               goto out_senderr;
+
+       seg1->mr_rkey = mr->rkey;
+       seg1->mr_base = seg1->mr_dma + pageoff;
+       seg1->mr_nsegs = i;
+       seg1->mr_len = len;
+       return i;
+
+out_senderr:
+       dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+       ib_update_fast_reg_key(mr, --key);
+       frmr->fr_state = FRMR_IS_INVALID;
+       while (i--)
+               rpcrdma_unmap_one(device, --seg);
+       return rc;
+}
+
+/* Post a LOCAL_INV Work Request to prevent further remote access
+ * via RDMA READ or RDMA WRITE.
+ */
+static int
+frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_send_wr invalidate_wr, *bad_wr;
+       int rc, nsegs = seg->mr_nsegs;
+       struct ib_device *device;
+
+       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+
+       memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+       invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+       invalidate_wr.opcode = IB_WR_LOCAL_INV;
+       invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+       DECR_CQCOUNT(&r_xprt->rx_ep);
+
+       read_lock(&ia->ri_qplock);
+       device = ia->ri_id->device;
+       while (seg1->mr_nsegs--)
+               rpcrdma_unmap_one(device, seg++);
+       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+       read_unlock(&ia->ri_qplock);
+       if (rc)
+               goto out_err;
+       return nsegs;
+
+out_err:
+       /* Force rpcrdma_buffer_get() to retry */
+       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
+       dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+       return nsegs;
+}
+
+/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
+ * an unusable state. Find FRMRs in this state and dereg / reg
+ * each.  FRMRs that are VALID and attached to an rpcrdma_req are
+ * also torn down.
+ *
+ * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_frmr_external().
+ */
+static void
+frwr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       struct rpcrdma_mw *r;
+       int rc;
+
+       list_for_each_entry(r, &buf->rb_all, mw_all) {
+               if (r->r.frmr.fr_state == FRMR_IS_INVALID)
+                       continue;
+
+               __frwr_release(r);
+               rc = __frwr_init(r, pd, device, depth);
+               if (rc) {
+                       dprintk("RPC:       %s: mw %p left %s\n",
+                               __func__, r,
+                               (r->r.frmr.fr_state == FRMR_IS_STALE ?
+                                       "stale" : "valid"));
+                       continue;
+               }
+
+               r->r.frmr.fr_state = FRMR_IS_INVALID;
+       }
+}
+
+static void
+frwr_op_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_mw *r;
+
+       while (!list_empty(&buf->rb_all)) {
+               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+               list_del(&r->mw_all);
+               __frwr_release(r);
+               kfree(r);
+       }
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
+       .ro_map                         = frwr_op_map,
+       .ro_unmap                       = frwr_op_unmap,
+       .ro_open                        = frwr_op_open,
+       .ro_maxpages                    = frwr_op_maxpages,
+       .ro_init                        = frwr_op_init,
+       .ro_reset                       = frwr_op_reset,
+       .ro_destroy                     = frwr_op_destroy,
+       .ro_displayname                 = "frwr",
+};
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c
new file mode 100644 (file)
index 0000000..ba518af
--- /dev/null
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* No-op chunk preparation. All client memory is pre-registered.
+ * Sometimes referred to as ALLPHYSICAL mode.
+ *
+ * Physical registration is simple because all client memory is
+ * pre-registered and never deregistered. This mode is good for
+ * adapter bring up, but is considered not safe: the server is
+ * trusted not to abuse its access to client memory not involved
+ * in RDMA I/O.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+static int
+physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+                struct rpcrdma_create_data_internal *cdata)
+{
+       return 0;
+}
+
+/* PHYSICAL memory registration conveys one page per chunk segment.
+ */
+static size_t
+physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt));
+}
+
+static int
+physical_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       return 0;
+}
+
+/* The client's physical memory is already exposed for
+ * remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+               int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       rpcrdma_map_one(ia->ri_id->device, seg,
+                       rpcrdma_data_dir(writing));
+       seg->mr_rkey = ia->ri_bind_mem->rkey;
+       seg->mr_base = seg->mr_dma;
+       seg->mr_nsegs = 1;
+       return 1;
+}
+
+/* Unmap a memory region, but leave it registered.
+ */
+static int
+physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       read_lock(&ia->ri_qplock);
+       rpcrdma_unmap_one(ia->ri_id->device, seg);
+       read_unlock(&ia->ri_qplock);
+
+       return 1;
+}
+
+static void
+physical_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+}
+
+static void
+physical_op_destroy(struct rpcrdma_buffer *buf)
+{
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
+       .ro_map                         = physical_op_map,
+       .ro_unmap                       = physical_op_unmap,
+       .ro_open                        = physical_op_open,
+       .ro_maxpages                    = physical_op_maxpages,
+       .ro_init                        = physical_op_init,
+       .ro_reset                       = physical_op_reset,
+       .ro_destroy                     = physical_op_destroy,
+       .ro_displayname                 = "physical",
+};
index 91ffde8..2c53ea9 100644 (file)
 # define RPCDBG_FACILITY       RPCDBG_TRANS
 #endif
 
+enum rpcrdma_chunktype {
+       rpcrdma_noch = 0,
+       rpcrdma_readch,
+       rpcrdma_areadch,
+       rpcrdma_writech,
+       rpcrdma_replych
+};
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char transfertypes[][12] = {
        "pure inline",  /* no chunks */
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
        struct rpcrdma_write_array *warray = NULL;
        struct rpcrdma_write_chunk *cur_wchunk = NULL;
        __be32 *iptr = headerp->rm_body.rm_chunks;
+       int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool);
 
        if (type == rpcrdma_readch || type == rpcrdma_areadch) {
                /* a read chunk - server will RDMA Read our memory */
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
        if (nsegs < 0)
                return nsegs;
 
+       map = r_xprt->rx_ia.ri_ops->ro_map;
        do {
-               n = rpcrdma_register_external(seg, nsegs,
-                                               cur_wchunk != NULL, r_xprt);
+               n = map(r_xprt, seg, nsegs, cur_wchunk != NULL);
                if (n <= 0)
                        goto out;
                if (cur_rchunk) {       /* read */
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
        return (unsigned char *)iptr - (unsigned char *)headerp;
 
 out:
-       if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
-               for (pos = 0; nchunks--;)
-                       pos += rpcrdma_deregister_external(
-                                       &req->rl_segments[pos], r_xprt);
-       }
-       return n;
-}
+       if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+               return n;
 
-/*
- * Marshal chunks. This routine returns the header length
- * consumed by marshaling.
- *
- * Returns positive RPC/RDMA header size, or negative errno.
- */
-
-ssize_t
-rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
-{
-       struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-       struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
-
-       if (req->rl_rtype != rpcrdma_noch)
-               result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
-                                              headerp, req->rl_rtype);
-       else if (req->rl_wtype != rpcrdma_noch)
-               result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
-                                              headerp, req->rl_wtype);
-       return result;
+       for (pos = 0; nchunks--;)
+               pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
+                                                     &req->rl_segments[pos]);
+       return n;
 }
 
 /*
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
        char *base;
        size_t rpclen, padlen;
        ssize_t hdrlen;
+       enum rpcrdma_chunktype rtype, wtype;
        struct rpcrdma_msg *headerp;
 
        /*
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
         * into pages; otherwise use reply chunks.
         */
        if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
-               req->rl_wtype = rpcrdma_noch;
+               wtype = rpcrdma_noch;
        else if (rqst->rq_rcv_buf.page_len == 0)
-               req->rl_wtype = rpcrdma_replych;
+               wtype = rpcrdma_replych;
        else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
-               req->rl_wtype = rpcrdma_writech;
+               wtype = rpcrdma_writech;
        else
-               req->rl_wtype = rpcrdma_replych;
+               wtype = rpcrdma_replych;
 
        /*
         * Chunks needed for arguments?
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
         * TBD check NFSv4 setacl
         */
        if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
-               req->rl_rtype = rpcrdma_noch;
+               rtype = rpcrdma_noch;
        else if (rqst->rq_snd_buf.page_len == 0)
-               req->rl_rtype = rpcrdma_areadch;
+               rtype = rpcrdma_areadch;
        else
-               req->rl_rtype = rpcrdma_readch;
+               rtype = rpcrdma_readch;
 
        /* The following simplification is not true forever */
-       if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
-               req->rl_wtype = rpcrdma_noch;
-       if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
+       if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+               wtype = rpcrdma_noch;
+       if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
                dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
                        __func__);
                return -EIO;
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
         * When padding is in use and applies to the transfer, insert
         * it and change the message type.
         */
-       if (req->rl_rtype == rpcrdma_noch) {
+       if (rtype == rpcrdma_noch) {
 
                padlen = rpcrdma_inline_pullup(rqst,
                                                RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
                        headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
                        headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
                        hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
-                       if (req->rl_wtype != rpcrdma_noch) {
+                       if (wtype != rpcrdma_noch) {
                                dprintk("RPC:       %s: invalid chunk list\n",
                                        __func__);
                                return -EIO;
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
                         * on receive. Therefore, we request a reply chunk
                         * for non-writes wherever feasible and efficient.
                         */
-                       if (req->rl_wtype == rpcrdma_noch)
-                               req->rl_wtype = rpcrdma_replych;
+                       if (wtype == rpcrdma_noch)
+                               wtype = rpcrdma_replych;
                }
        }
 
-       hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
+       if (rtype != rpcrdma_noch) {
+               hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+                                              headerp, rtype);
+               wtype = rtype;  /* simplify dprintk */
+
+       } else if (wtype != rpcrdma_noch) {
+               hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+                                              headerp, wtype);
+       }
        if (hdrlen < 0)
                return hdrlen;
 
        dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
                " headerp 0x%p base 0x%p lkey 0x%x\n",
-               __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
+               __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
                headerp, base, rdmab_lkey(req->rl_rdmabuf));
 
        /*
index 2e192ba..54f23b1 100644 (file)
@@ -156,13 +156,48 @@ static struct ctl_table sunrpc_table[] = {
 
 static struct rpc_xprt_ops xprt_rdma_procs;    /* forward reference */
 
+static void
+xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+       struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+       char buf[20];
+
+       snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+       xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA;
+}
+
+static void
+xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+       char buf[40];
+
+       snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+       xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
+}
+
 static void
 xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 {
        struct sockaddr *sap = (struct sockaddr *)
                                        &rpcx_to_rdmad(xprt).addr;
-       struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-       char buf[64];
+       char buf[128];
+
+       switch (sap->sa_family) {
+       case AF_INET:
+               xprt_rdma_format_addresses4(xprt, sap);
+               break;
+       case AF_INET6:
+               xprt_rdma_format_addresses6(xprt, sap);
+               break;
+       default:
+               pr_err("rpcrdma: Unrecognized address family\n");
+               return;
+       }
 
        (void)rpc_ntop(sap, buf, sizeof(buf));
        xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
        snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
        xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-       xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
-
-       snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
-       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
-
        snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
        xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 
-       /* netid */
-       xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+       xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 }
 
 static void
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args)
                          xprt_rdma_connect_worker);
 
        xprt_rdma_format_addresses(xprt);
-       xprt->max_payload = rpcrdma_max_payload(new_xprt);
+       xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
+       if (xprt->max_payload == 0)
+               goto out4;
+       xprt->max_payload <<= PAGE_SHIFT;
        dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
                __func__, xprt->max_payload);
 
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer)
 
        for (i = 0; req->rl_nchunks;) {
                --req->rl_nchunks;
-               i += rpcrdma_deregister_external(
-                       &req->rl_segments[i], r_xprt);
+               i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
+                                                   &req->rl_segments[i]);
        }
 
        rpcrdma_buffer_put(req);
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task)
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        int rc = 0;
 
-       if (req->rl_niovs == 0)
-               rc = rpcrdma_marshal_req(rqst);
-       else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
-               rc = rpcrdma_marshal_chunks(rqst, 0);
+       rc = rpcrdma_marshal_req(rqst);
        if (rc < 0)
                goto failed_marshal;
 
index e28909f..4870d27 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
+#include <linux/sunrpc/addr.h>
 #include <asm/bitops.h>
 
 #include "xprt_rdma.h"
@@ -62,9 +63,6 @@
 # define RPCDBG_FACILITY       RPCDBG_TRANS
 #endif
 
-static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
-static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
-
 /*
  * internal functions
  */
@@ -188,7 +186,7 @@ static const char * const wc_status[] = {
        "remote access error",
        "remote operation error",
        "transport retry counter exceeded",
-       "RNR retrycounter exceeded",
+       "RNR retry counter exceeded",
        "local RDD violation error",
        "remove invalid RD request",
        "operation aborted",
@@ -206,21 +204,17 @@ static const char * const wc_status[] = {
 static void
 rpcrdma_sendcq_process_wc(struct ib_wc *wc)
 {
-       if (likely(wc->status == IB_WC_SUCCESS))
-               return;
-
        /* WARNING: Only wr_id and status are reliable at this point */
-       if (wc->wr_id == 0ULL) {
-               if (wc->status != IB_WC_WR_FLUSH_ERR)
+       if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
+               if (wc->status != IB_WC_SUCCESS &&
+                   wc->status != IB_WC_WR_FLUSH_ERR)
                        pr_err("RPC:       %s: SEND: %s\n",
                               __func__, COMPLETION_MSG(wc->status));
        } else {
                struct rpcrdma_mw *r;
 
                r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-               r->r.frmr.fr_state = FRMR_IS_STALE;
-               pr_err("RPC:       %s: frmr %p (stale): %s\n",
-                      __func__, r, COMPLETION_MSG(wc->status));
+               r->mw_sendcompletion(wc);
        }
 }
 
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
        struct rpcrdma_ia *ia = &xprt->rx_ia;
        struct rpcrdma_ep *ep = &xprt->rx_ep;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-       struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+       struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
 #endif
        struct ib_qp_attr *attr = &ia->ri_qp_attr;
        struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
@@ -480,9 +474,8 @@ connected:
                wake_up_all(&ep->rep_connect_wait);
                /*FALLTHROUGH*/
        default:
-               dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
-                       __func__, &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port), ep,
+               dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
+                       __func__, sap, rpc_get_port(sap), ep,
                        CONNECTION_MSG(event->event));
                break;
        }
@@ -491,19 +484,16 @@ connected:
        if (connstate == 1) {
                int ird = attr->max_dest_rd_atomic;
                int tird = ep->rep_remote_cma.responder_resources;
-               printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
-                       "on %s, memreg %d slots %d ird %d%s\n",
-                       &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port),
+
+               pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
+                       sap, rpc_get_port(sap),
                        ia->ri_id->device->name,
-                       ia->ri_memreg_strategy,
+                       ia->ri_ops->ro_displayname,
                        xprt->rx_buf.rb_max_requests,
                        ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
        } else if (connstate < 0) {
-               printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
-                       &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port),
-                       connstate);
+               pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
+                       sap, rpc_get_port(sap), connstate);
        }
 #endif
 
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
 
        if (memreg == RPCRDMA_FRMR) {
                /* Requires both frmr reg and local dma lkey */
-               if ((devattr->device_cap_flags &
+               if (((devattr->device_cap_flags &
                     (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
-                   (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+                   (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
+                     (devattr->max_fast_reg_page_list_len == 0)) {
                        dprintk("RPC:       %s: FRMR registration "
                                "not supported by HCA\n", __func__);
                        memreg = RPCRDMA_MTHCAFMR;
-               } else {
-                       /* Mind the ia limit on FRMR page list depth */
-                       ia->ri_max_frmr_depth = min_t(unsigned int,
-                               RPCRDMA_MAX_DATA_SEGS,
-                               devattr->max_fast_reg_page_list_len);
                }
        }
        if (memreg == RPCRDMA_MTHCAFMR) {
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
         */
        switch (memreg) {
        case RPCRDMA_FRMR:
+               ia->ri_ops = &rpcrdma_frwr_memreg_ops;
                break;
        case RPCRDMA_ALLPHYSICAL:
+               ia->ri_ops = &rpcrdma_physical_memreg_ops;
                mem_priv = IB_ACCESS_LOCAL_WRITE |
                                IB_ACCESS_REMOTE_WRITE |
                                IB_ACCESS_REMOTE_READ;
                goto register_setup;
        case RPCRDMA_MTHCAFMR:
+               ia->ri_ops = &rpcrdma_fmr_memreg_ops;
                if (ia->ri_have_dma_lkey)
                        break;
                mem_priv = IB_ACCESS_LOCAL_WRITE;
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                rc = -ENOMEM;
                goto out3;
        }
-       dprintk("RPC:       %s: memory registration strategy is %d\n",
-               __func__, memreg);
+       dprintk("RPC:       %s: memory registration strategy is '%s'\n",
+               __func__, ia->ri_ops->ro_displayname);
 
        /* Else will do memory reg/dereg for each chunk */
        ia->ri_memreg_strategy = memreg;
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 
        ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
        ep->rep_attr.qp_context = ep;
-       /* send_cq and recv_cq initialized below */
        ep->rep_attr.srq = NULL;
        ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR: {
-               int depth = 7;
-
-               /* Add room for frmr register and invalidate WRs.
-                * 1. FRMR reg WR for head
-                * 2. FRMR invalidate WR for head
-                * 3. N FRMR reg WRs for pagelist
-                * 4. N FRMR invalidate WRs for pagelist
-                * 5. FRMR reg WR for tail
-                * 6. FRMR invalidate WR for tail
-                * 7. The RDMA_SEND WR
-                */
-
-               /* Calculate N if the device max FRMR depth is smaller than
-                * RPCRDMA_MAX_DATA_SEGS.
-                */
-               if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
-                       int delta = RPCRDMA_MAX_DATA_SEGS -
-                                   ia->ri_max_frmr_depth;
-
-                       do {
-                               depth += 2; /* FRMR reg + invalidate */
-                               delta -= ia->ri_max_frmr_depth;
-                       } while (delta > 0);
-
-               }
-               ep->rep_attr.cap.max_send_wr *= depth;
-               if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-                       cdata->max_requests = devattr->max_qp_wr / depth;
-                       if (!cdata->max_requests)
-                               return -EINVAL;
-                       ep->rep_attr.cap.max_send_wr = cdata->max_requests *
-                                                      depth;
-               }
-               break;
-       }
-       default:
-               break;
-       }
+       rc = ia->ri_ops->ro_open(ia, ep, cdata);
+       if (rc)
+               return rc;
        ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
        ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
        ep->rep_attr.cap.max_recv_sge = 1;
@@ -944,21 +895,9 @@ retry:
                rpcrdma_ep_disconnect(ep, ia);
                rpcrdma_flush_cqs(ep);
 
-               switch (ia->ri_memreg_strategy) {
-               case RPCRDMA_FRMR:
-                       rpcrdma_reset_frmrs(ia);
-                       break;
-               case RPCRDMA_MTHCAFMR:
-                       rpcrdma_reset_fmrs(ia);
-                       break;
-               case RPCRDMA_ALLPHYSICAL:
-                       break;
-               default:
-                       rc = -EIO;
-                       goto out;
-               }
-
                xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+               ia->ri_ops->ro_reset(xprt);
+
                id = rpcrdma_create_id(xprt, ia,
                                (struct sockaddr *)&xprt->rx_data.addr);
                if (IS_ERR(id)) {
@@ -1123,91 +1062,6 @@ out:
        return ERR_PTR(rc);
 }
 
-static int
-rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-       int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
-       struct ib_fmr_attr fmr_attr = {
-               .max_pages      = RPCRDMA_MAX_DATA_SEGS,
-               .max_maps       = 1,
-               .page_shift     = PAGE_SHIFT
-       };
-       struct rpcrdma_mw *r;
-       int i, rc;
-
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
-
-       while (i--) {
-               r = kzalloc(sizeof(*r), GFP_KERNEL);
-               if (r == NULL)
-                       return -ENOMEM;
-
-               r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
-               if (IS_ERR(r->r.fmr)) {
-                       rc = PTR_ERR(r->r.fmr);
-                       dprintk("RPC:       %s: ib_alloc_fmr failed %i\n",
-                               __func__, rc);
-                       goto out_free;
-               }
-
-               list_add(&r->mw_list, &buf->rb_mws);
-               list_add(&r->mw_all, &buf->rb_all);
-       }
-       return 0;
-
-out_free:
-       kfree(r);
-       return rc;
-}
-
-static int
-rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_frmr *f;
-       struct rpcrdma_mw *r;
-       int i, rc;
-
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
-
-       while (i--) {
-               r = kzalloc(sizeof(*r), GFP_KERNEL);
-               if (r == NULL)
-                       return -ENOMEM;
-               f = &r->r.frmr;
-
-               f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-                                               ia->ri_max_frmr_depth);
-               if (IS_ERR(f->fr_mr)) {
-                       rc = PTR_ERR(f->fr_mr);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_mr "
-                               "failed %i\n", __func__, rc);
-                       goto out_free;
-               }
-
-               f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
-                                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(f->fr_pgl)) {
-                       rc = PTR_ERR(f->fr_pgl);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list "
-                               "failed %i\n", __func__, rc);
-
-                       ib_dereg_mr(f->fr_mr);
-                       goto out_free;
-               }
-
-               list_add(&r->mw_list, &buf->rb_mws);
-               list_add(&r->mw_all, &buf->rb_all);
-       }
-
-       return 0;
-
-out_free:
-       kfree(r);
-       return rc;
-}
-
 int
 rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
 {
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
        buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
        p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
 
-       INIT_LIST_HEAD(&buf->rb_mws);
-       INIT_LIST_HEAD(&buf->rb_all);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_init_frmrs(ia, buf);
-               if (rc)
-                       goto out;
-               break;
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_init_fmrs(ia, buf);
-               if (rc)
-                       goto out;
-               break;
-       default:
-               break;
-       }
+       rc = ia->ri_ops->ro_init(r_xprt);
+       if (rc)
+               goto out;
 
        for (i = 0; i < buf->rb_max_requests; i++) {
                struct rpcrdma_req *req;
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
        kfree(req);
 }
 
-static void
-rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int rc;
-
-       while (!list_empty(&buf->rb_all)) {
-               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-               list_del(&r->mw_all);
-               list_del(&r->mw_list);
-
-               rc = ib_dealloc_fmr(r->r.fmr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
-                               __func__, rc);
-
-               kfree(r);
-       }
-}
-
-static void
-rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int rc;
-
-       while (!list_empty(&buf->rb_all)) {
-               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-               list_del(&r->mw_all);
-               list_del(&r->mw_list);
-
-               rc = ib_dereg_mr(r->r.frmr.fr_mr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-                               __func__, rc);
-               ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-               kfree(r);
-       }
-}
-
 void
 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
 {
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                        rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
        }
 
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               rpcrdma_destroy_frmrs(buf);
-               break;
-       case RPCRDMA_MTHCAFMR:
-               rpcrdma_destroy_fmrs(buf);
-               break;
-       default:
-               break;
-       }
+       ia->ri_ops->ro_destroy(buf);
 
        kfree(buf->rb_pool);
 }
 
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       LIST_HEAD(l);
-       int rc;
-
-       list_for_each(pos, &buf->rb_all) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-               INIT_LIST_HEAD(&l);
-               list_add(&r->r.fmr->list, &l);
-               rc = ib_unmap_fmr(&l);
-               if (rc)
-                       dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
-                               __func__, rc);
-       }
-}
-
-/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
- * an unusable state. Find FRMRs in this state and dereg / reg
- * each.  FRMRs that are VALID and attached to an rpcrdma_req are
- * also torn down.
- *
- * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_frmr_external().
- */
-static void
-rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       int rc;
-
-       list_for_each(pos, &buf->rb_all) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-               if (r->r.frmr.fr_state == FRMR_IS_INVALID)
-                       continue;
-
-               rc = ib_dereg_mr(r->r.frmr.fr_mr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-                               __func__, rc);
-               ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-               r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(r->r.frmr.fr_mr)) {
-                       rc = PTR_ERR(r->r.frmr.fr_mr);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
-                               " failed %i\n", __func__, rc);
-                       continue;
-               }
-               r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
-                                       ia->ri_id->device,
-                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(r->r.frmr.fr_pgl)) {
-                       rc = PTR_ERR(r->r.frmr.fr_pgl);
-                       dprintk("RPC:       %s: "
-                               "ib_alloc_fast_reg_page_list "
-                               "failed %i\n", __func__, rc);
-
-                       ib_dereg_mr(r->r.frmr.fr_mr);
-                       continue;
-               }
-               r->r.frmr.fr_state = FRMR_IS_INVALID;
-       }
-}
-
 /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
  * some req segments uninitialized.
  */
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
        }
 }
 
-/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+/* rpcrdma_unmap_one() was already done during deregistration.
  * Redo only the ib_post_send().
  */
 static void
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
  * Wrappers for internal-use kmalloc memory registration, used by buffer code.
  */
 
+void
+rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
+{
+       dprintk("RPC:       map_one: offset %p iova %llx len %zu\n",
+               seg->mr_offset,
+               (unsigned long long)seg->mr_dma, seg->mr_dmalen);
+}
+
 static int
 rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
                                struct ib_mr **mrp, struct ib_sge *iov)
@@ -1853,287 +1568,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
        }
 }
 
-/*
- * Wrappers for chunk registration, shared by read/write chunk code.
- */
-
-static void
-rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
-{
-       seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-       seg->mr_dmalen = seg->mr_len;
-       if (seg->mr_page)
-               seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
-                               seg->mr_page, offset_in_page(seg->mr_offset),
-                               seg->mr_dmalen, seg->mr_dir);
-       else
-               seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
-                               seg->mr_offset,
-                               seg->mr_dmalen, seg->mr_dir);
-       if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
-               dprintk("RPC:       %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
-                       __func__,
-                       (unsigned long long)seg->mr_dma,
-                       seg->mr_offset, seg->mr_dmalen);
-       }
-}
-
-static void
-rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
-{
-       if (seg->mr_page)
-               ib_dma_unmap_page(ia->ri_id->device,
-                               seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-       else
-               ib_dma_unmap_single(ia->ri_id->device,
-                               seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-}
-
-static int
-rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
-                       int *nsegs, int writing, struct rpcrdma_ia *ia,
-                       struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       struct rpcrdma_mw *mw = seg1->rl_mw;
-       struct rpcrdma_frmr *frmr = &mw->r.frmr;
-       struct ib_mr *mr = frmr->fr_mr;
-       struct ib_send_wr fastreg_wr, *bad_wr;
-       u8 key;
-       int len, pageoff;
-       int i, rc;
-       int seg_len;
-       u64 pa;
-       int page_no;
-
-       pageoff = offset_in_page(seg1->mr_offset);
-       seg1->mr_offset -= pageoff;     /* start of page */
-       seg1->mr_len += pageoff;
-       len = -pageoff;
-       if (*nsegs > ia->ri_max_frmr_depth)
-               *nsegs = ia->ri_max_frmr_depth;
-       for (page_no = i = 0; i < *nsegs;) {
-               rpcrdma_map_one(ia, seg, writing);
-               pa = seg->mr_dma;
-               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-                       frmr->fr_pgl->page_list[page_no++] = pa;
-                       pa += PAGE_SIZE;
-               }
-               len += seg->mr_len;
-               ++seg;
-               ++i;
-               /* Check for holes */
-               if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-                       break;
-       }
-       dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
-               __func__, mw, i);
-
-       frmr->fr_state = FRMR_IS_VALID;
-
-       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-       fastreg_wr.wr_id = (unsigned long)(void *)mw;
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
-       fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-       fastreg_wr.wr.fast_reg.page_list_len = page_no;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
-       if (fastreg_wr.wr.fast_reg.length < len) {
-               rc = -EIO;
-               goto out_err;
-       }
-
-       /* Bump the key */
-       key = (u8)(mr->rkey & 0x000000FF);
-       ib_update_fast_reg_key(mr, ++key);
-
-       fastreg_wr.wr.fast_reg.access_flags = (writing ?
-                               IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-                               IB_ACCESS_REMOTE_READ);
-       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
-       if (rc) {
-               dprintk("RPC:       %s: failed ib_post_send for register,"
-                       " status %i\n", __func__, rc);
-               ib_update_fast_reg_key(mr, --key);
-               goto out_err;
-       } else {
-               seg1->mr_rkey = mr->rkey;
-               seg1->mr_base = seg1->mr_dma + pageoff;
-               seg1->mr_nsegs = i;
-               seg1->mr_len = len;
-       }
-       *nsegs = i;
-       return 0;
-out_err:
-       frmr->fr_state = FRMR_IS_INVALID;
-       while (i--)
-               rpcrdma_unmap_one(ia, --seg);
-       return rc;
-}
-
-static int
-rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
-                       struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       struct ib_send_wr invalidate_wr, *bad_wr;
-       int rc;
-
-       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
-
-       memset(&invalidate_wr, 0, sizeof invalidate_wr);
-       invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
-       invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       read_lock(&ia->ri_qplock);
-       while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(ia, seg++);
-       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-       read_unlock(&ia->ri_qplock);
-       if (rc) {
-               /* Force rpcrdma_buffer_get() to retry */
-               seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
-               dprintk("RPC:       %s: failed ib_post_send for invalidate,"
-                       " status %i\n", __func__, rc);
-       }
-       return rc;
-}
-
-static int
-rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
-                       int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
-       int len, pageoff, i, rc;
-
-       pageoff = offset_in_page(seg1->mr_offset);
-       seg1->mr_offset -= pageoff;     /* start of page */
-       seg1->mr_len += pageoff;
-       len = -pageoff;
-       if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-               *nsegs = RPCRDMA_MAX_DATA_SEGS;
-       for (i = 0; i < *nsegs;) {
-               rpcrdma_map_one(ia, seg, writing);
-               physaddrs[i] = seg->mr_dma;
-               len += seg->mr_len;
-               ++seg;
-               ++i;
-               /* Check for holes */
-               if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-                       break;
-       }
-       rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
-       if (rc) {
-               dprintk("RPC:       %s: failed ib_map_phys_fmr "
-                       "%u@0x%llx+%i (%d)... status %i\n", __func__,
-                       len, (unsigned long long)seg1->mr_dma,
-                       pageoff, i, rc);
-               while (i--)
-                       rpcrdma_unmap_one(ia, --seg);
-       } else {
-               seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
-               seg1->mr_base = seg1->mr_dma + pageoff;
-               seg1->mr_nsegs = i;
-               seg1->mr_len = len;
-       }
-       *nsegs = i;
-       return rc;
-}
-
-static int
-rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
-                       struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       LIST_HEAD(l);
-       int rc;
-
-       list_add(&seg1->rl_mw->r.fmr->list, &l);
-       rc = ib_unmap_fmr(&l);
-       read_lock(&ia->ri_qplock);
-       while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(ia, seg++);
-       read_unlock(&ia->ri_qplock);
-       if (rc)
-               dprintk("RPC:       %s: failed ib_unmap_fmr,"
-                       " status %i\n", __func__, rc);
-       return rc;
-}
-
-int
-rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
-                       int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       int rc = 0;
-
-       switch (ia->ri_memreg_strategy) {
-
-       case RPCRDMA_ALLPHYSICAL:
-               rpcrdma_map_one(ia, seg, writing);
-               seg->mr_rkey = ia->ri_bind_mem->rkey;
-               seg->mr_base = seg->mr_dma;
-               seg->mr_nsegs = 1;
-               nsegs = 1;
-               break;
-
-       /* Registration using frmr registration */
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
-               break;
-
-       /* Registration using fmr memory registration */
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
-               break;
-
-       default:
-               return -EIO;
-       }
-       if (rc)
-               return rc;
-
-       return nsegs;
-}
-
-int
-rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
-               struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       int nsegs = seg->mr_nsegs, rc;
-
-       switch (ia->ri_memreg_strategy) {
-
-       case RPCRDMA_ALLPHYSICAL:
-               read_lock(&ia->ri_qplock);
-               rpcrdma_unmap_one(ia, seg);
-               read_unlock(&ia->ri_qplock);
-               break;
-
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-               break;
-
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_deregister_fmr_external(seg, ia);
-               break;
-
-       default:
-               break;
-       }
-       return nsegs;
-}
-
 /*
  * Prepost any receive buffer, then post send.
  *
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
        }
 
        send_wr.next = NULL;
-       send_wr.wr_id = 0ULL;   /* no send cookie */
+       send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
        send_wr.sg_list = req->rl_send_iov;
        send_wr.num_sge = req->rl_niovs;
        send_wr.opcode = IB_WR_SEND;
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
        return rc;
 }
 
-/* Physical mapping means one Read/Write list entry per-page.
- * All list entries must fit within an inline buffer
- *
- * NB: The server must return a Write list for NFS READ,
- *     which has the same constraint. Factor in the inline
- *     rsize as well.
+/* How many chunk list items fit within our inline buffers?
  */
-static size_t
-rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
+unsigned int
+rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
 {
        struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-       unsigned int inline_size, pages;
+       int bytes, segments;
 
-       inline_size = min_t(unsigned int,
-                           cdata->inline_wsize, cdata->inline_rsize);
-       inline_size -= RPCRDMA_HDRLEN_MIN;
-       pages = inline_size / sizeof(struct rpcrdma_segment);
-       return pages << PAGE_SHIFT;
-}
-
-static size_t
-rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-       return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
-}
-
-size_t
-rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-       size_t result;
-
-       switch (r_xprt->rx_ia.ri_memreg_strategy) {
-       case RPCRDMA_ALLPHYSICAL:
-               result = rpcrdma_physical_max_payload(r_xprt);
-               break;
-       default:
-               result = rpcrdma_mr_max_payload(r_xprt);
+       bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
+       bytes -= RPCRDMA_HDRLEN_MIN;
+       if (bytes < sizeof(struct rpcrdma_segment) * 2) {
+               pr_warn("RPC:       %s: inline threshold too small\n",
+                       __func__);
+               return 0;
        }
-       return result;
+
+       segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
+       dprintk("RPC:       %s: max chunk list size = %d segments\n",
+               __func__, segments);
+       return segments;
 }
index 0a16fb6..78e0b8b 100644 (file)
@@ -60,6 +60,7 @@
  * Interface Adapter -- one per transport instance
  */
 struct rpcrdma_ia {
+       const struct rpcrdma_memreg_ops *ri_ops;
        rwlock_t                ri_qplock;
        struct rdma_cm_id       *ri_id;
        struct ib_pd            *ri_pd;
@@ -105,6 +106,10 @@ struct rpcrdma_ep {
 #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
 #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
 
+/* Force completion handler to ignore the signal
+ */
+#define RPCRDMA_IGNORE_COMPLETION      (0ULL)
+
 /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
  *
  * The below structure appears at the front of a large region of kmalloc'd
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
        return (struct rpcrdma_msg *)rb->rg_base;
 }
 
-enum rpcrdma_chunktype {
-       rpcrdma_noch = 0,
-       rpcrdma_readch,
-       rpcrdma_areadch,
-       rpcrdma_writech,
-       rpcrdma_replych
-};
-
 /*
  * struct rpcrdma_rep -- this structure encapsulates state required to recv
  * and complete a reply, asychronously. It needs several pieces of
@@ -213,6 +210,7 @@ struct rpcrdma_mw {
                struct ib_fmr           *fmr;
                struct rpcrdma_frmr     frmr;
        } r;
+       void                    (*mw_sendcompletion)(struct ib_wc *);
        struct list_head        mw_list;
        struct list_head        mw_all;
 };
@@ -258,7 +256,6 @@ struct rpcrdma_req {
        unsigned int    rl_niovs;       /* 0, 2 or 4 */
        unsigned int    rl_nchunks;     /* non-zero if chunks */
        unsigned int    rl_connect_cookie;      /* retry detection */
-       enum rpcrdma_chunktype  rl_rtype, rl_wtype;
        struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
        struct rpcrdma_rep      *rl_reply;/* holder for reply buffer */
        struct ib_sge   rl_send_iov[4]; /* for active requests */
@@ -339,6 +336,29 @@ struct rpcrdma_stats {
        unsigned long           bad_reply_count;
 };
 
+/*
+ * Per-registration mode operations
+ */
+struct rpcrdma_xprt;
+struct rpcrdma_memreg_ops {
+       int             (*ro_map)(struct rpcrdma_xprt *,
+                                 struct rpcrdma_mr_seg *, int, bool);
+       int             (*ro_unmap)(struct rpcrdma_xprt *,
+                                   struct rpcrdma_mr_seg *);
+       int             (*ro_open)(struct rpcrdma_ia *,
+                                  struct rpcrdma_ep *,
+                                  struct rpcrdma_create_data_internal *);
+       size_t          (*ro_maxpages)(struct rpcrdma_xprt *);
+       int             (*ro_init)(struct rpcrdma_xprt *);
+       void            (*ro_reset)(struct rpcrdma_xprt *);
+       void            (*ro_destroy)(struct rpcrdma_buffer *);
+       const char      *ro_displayname;
+};
+
+extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
+
 /*
  * RPCRDMA transport -- encapsulates the structures above for
  * integration with RPC.
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
 
-int rpcrdma_register_external(struct rpcrdma_mr_seg *,
-                               int, int, struct rpcrdma_xprt *);
-int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
-                               struct rpcrdma_xprt *);
-
 struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
                                            size_t, gfp_t);
 void rpcrdma_free_regbuf(struct rpcrdma_ia *,
                         struct rpcrdma_regbuf *);
 
+unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
+
+static inline enum dma_data_direction
+rpcrdma_data_dir(bool writing)
+{
+       return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+static inline void
+rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
+               enum dma_data_direction direction)
+{
+       seg->mr_dir = direction;
+       seg->mr_dmalen = seg->mr_len;
+
+       if (seg->mr_page)
+               seg->mr_dma = ib_dma_map_page(device,
+                               seg->mr_page, offset_in_page(seg->mr_offset),
+                               seg->mr_dmalen, seg->mr_dir);
+       else
+               seg->mr_dma = ib_dma_map_single(device,
+                               seg->mr_offset,
+                               seg->mr_dmalen, seg->mr_dir);
+
+       if (ib_dma_mapping_error(device, seg->mr_dma))
+               rpcrdma_mapping_error(seg);
+}
+
+static inline void
+rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
+{
+       if (seg->mr_page)
+               ib_dma_unmap_page(device,
+                                 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+       else
+               ib_dma_unmap_single(device,
+                                   seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
 /*
  * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
  */
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
 /*
  * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
  */
-ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
 int rpcrdma_marshal_req(struct rpc_rqst *);
-size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
 
 /* Temporary NFS request map cache. Created in svc_rdma.c  */
 extern struct kmem_cache *svc_rdma_map_cachep;
index 433f287..5266ea7 100644 (file)
@@ -305,7 +305,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
                    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
                struct dentry *dentry = unix_sk(s)->path.dentry;
 
-               if (dentry && dentry->d_inode == i) {
+               if (dentry && d_backing_inode(dentry) == i) {
                        sock_hold(s);
                        goto found;
                }
@@ -778,7 +778,7 @@ static struct sock *unix_find_other(struct net *net,
                err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
                if (err)
                        goto fail;
-               inode = path.dentry->d_inode;
+               inode = d_backing_inode(path.dentry);
                err = inode_permission(inode, MAY_WRITE);
                if (err)
                        goto put_fail;
@@ -839,7 +839,7 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
         */
        err = security_path_mknod(&path, dentry, mode, 0);
        if (!err) {
-               err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
                if (!err) {
                        res->mnt = mntget(path.mnt);
                        res->dentry = dget(dentry);
@@ -905,7 +905,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                        goto out_up;
                }
                addr->hash = UNIX_HASH_SIZE;
-               hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+               hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
                spin_lock(&unix_table_lock);
                u->path = path;
                list = &unix_socket_table[hash];
index ef542fb..c512f64 100644 (file)
@@ -25,7 +25,7 @@ static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
 
        if (dentry) {
                struct unix_diag_vfs uv = {
-                       .udiag_vfs_ino = dentry->d_inode->i_ino,
+                       .udiag_vfs_ino = d_backing_inode(dentry)->i_ino,
                        .udiag_vfs_dev = dentry->d_sb->s_dev,
                };
 
diff --git a/scripts/check_extable.sh b/scripts/check_extable.sh
new file mode 100755 (executable)
index 0000000..0fb6b1c
--- /dev/null
@@ -0,0 +1,146 @@
+#! /bin/bash
+# (c) 2015, Quentin Casasnovas <quentin.casasnovas@oracle.com>
+
+obj=$1
+
+file ${obj} | grep -q ELF || (echo "${obj} is not and ELF file." 1>&2 ; exit 0)
+
+# Bail out early if there isn't an __ex_table section in this object file.
+objdump -hj __ex_table ${obj} 2> /dev/null > /dev/null
+[ $? -ne 0 ] && exit 0
+
+white_list=.text,.fixup
+
+suspicious_relocs=$(objdump -rj __ex_table ${obj}  | tail -n +6 |
+                       grep -v $(eval echo -e{${white_list}}) | awk '{print $3}')
+
+# No suspicious relocs in __ex_table, jobs a good'un
+[ -z "${suspicious_relocs}" ] && exit 0
+
+
+# After this point, something is seriously wrong since we just found out we
+# have some relocations in __ex_table which point to sections which aren't
+# white listed.  If you're adding a new section in the Linux kernel, and
+# you're expecting this section to contain code which can fault (i.e. the
+# __ex_table relocation to your new section is expected), simply add your
+# new section to the white_list variable above.  If not, you're probably
+# doing something wrong and the rest of this code is just trying to print
+# you more information about it.
+
+function find_section_offset_from_symbol()
+{
+    eval $(objdump -t ${obj} | grep ${1} | sed 's/\([0-9a-f]\+\) .\{7\} \([^ \t]\+\).*/section="\2"; section_offset="0x\1" /')
+
+    # addr2line takes addresses in hexadecimal...
+    section_offset=$(printf "0x%016x" $(( ${section_offset} + $2 )) )
+}
+
+function find_symbol_and_offset_from_reloc()
+{
+    # Extract symbol and offset from the objdump output
+    eval $(echo $reloc | sed 's/\([^+]\+\)+\?\(0x[0-9a-f]\+\)\?/symbol="\1"; symbol_offset="\2"/')
+
+    # When the relocation points to the begining of a symbol or section, it
+    # won't print the offset since it is zero.
+    if [ -z "${symbol_offset}" ]; then
+       symbol_offset=0x0
+    fi
+}
+
+function find_alt_replacement_target()
+{
+    # The target of the .altinstr_replacement is the relocation just before
+    # the .altinstr_replacement one.
+    eval $(objdump -rj .altinstructions ${obj} | grep -B1 "${section}+${section_offset}" | head -n1 | awk '{print $3}' |
+          sed 's/\([^+]\+\)+\(0x[0-9a-f]\+\)/alt_target_section="\1"; alt_target_offset="\2"/')
+}
+
+function handle_alt_replacement_reloc()
+{
+    # This will define alt_target_section and alt_target_section_offset
+    find_alt_replacement_target ${section} ${section_offset}
+
+    echo "Error: found a reference to .altinstr_replacement in __ex_table:"
+    addr2line -fip -j ${alt_target_section} -e ${obj} ${alt_target_offset} | awk '{print "\t" $0}'
+
+    error=true
+}
+
+function is_executable_section()
+{
+    objdump -hwj ${section} ${obj} | grep -q CODE
+    return $?
+}
+
+function handle_suspicious_generic_reloc()
+{
+    if is_executable_section ${section}; then
+       # We've got a relocation to a non white listed _executable_
+       # section, print a warning so the developper adds the section to
+       # the white list or fix his code.  We try to pretty-print the file
+       # and line number where that relocation was added.
+       echo "Warning: found a reference to section \"${section}\" in __ex_table:"
+       addr2line -fip -j ${section} -e ${obj} ${section_offset} | awk '{print "\t" $0}'
+    else
+       # Something is definitively wrong here since we've got a relocation
+       # to a non-executable section, there's no way this would ever be
+       # running in the kernel.
+       echo "Error: found a reference to non-executable section \"${section}\" in __ex_table at offset ${section_offset}"
+       error=true
+    fi
+}
+
+function handle_suspicious_reloc()
+{
+    case "${section}" in
+       ".altinstr_replacement")
+           handle_alt_replacement_reloc ${section} ${section_offset}
+           ;;
+       *)
+           handle_suspicious_generic_reloc ${section} ${section_offset}
+           ;;
+    esac
+}
+
+function diagnose()
+{
+
+    for reloc in ${suspicious_relocs}; do
+       # Let's find out where the target of the relocation in __ex_table
+       # is, this will define ${symbol} and ${symbol_offset}
+       find_symbol_and_offset_from_reloc ${reloc}
+
+       # When there's a global symbol at the place of the relocation,
+       # objdump will use it instead of giving us a section+offset, so
+       # let's find out which section is this symbol in and the total
+       # offset withing that section.
+       find_section_offset_from_symbol ${symbol} ${symbol_offset}
+
+       # In this case objdump was presenting us with a reloc to a symbol
+       # rather than a section. Now that we've got the actual section,
+       # we can skip it if it's in the white_list.
+       if [ -z "$( echo $section | grep -v $(eval echo -e{${white_list}}))" ]; then
+           continue;
+       fi
+
+       # Will either print a warning if the relocation happens to be in a
+       # section we do not know but has executable bit set, or error out.
+       handle_suspicious_reloc
+    done
+}
+
+function check_debug_info() {
+    objdump -hj .debug_info ${obj} 2> /dev/null > /dev/null ||
+       echo -e "${obj} does not contain debug information, the addr2line output will be limited.\n" \
+            "Recompile ${obj} with CONFIG_DEBUG_INFO to get a more useful output."
+}
+
+check_debug_info
+
+diagnose
+
+if [ "${error}" ]; then
+    exit 1
+fi
+
+exit 0
index d439856..91ee1b2 100644 (file)
@@ -776,6 +776,7 @@ static const char *sech_name(struct elf_info *elf, Elf_Shdr *sechdr)
  * "foo" will match an exact string equal to "foo"
  * "*foo" will match a string that ends with "foo"
  * "foo*" will match a string that begins with "foo"
+ * "*foo*" will match a string that contains "foo"
  */
 static int match(const char *sym, const char * const pat[])
 {
@@ -784,8 +785,17 @@ static int match(const char *sym, const char * const pat[])
                p = *pat++;
                const char *endp = p + strlen(p) - 1;
 
+               /* "*foo*" */
+               if (*p == '*' && *endp == '*') {
+                       char *here, *bare = strndup(p + 1, strlen(p) - 2);
+
+                       here = strstr(sym, bare);
+                       free(bare);
+                       if (here != NULL)
+                               return 1;
+               }
                /* "*foo" */
-               if (*p == '*') {
+               else if (*p == '*') {
                        if (strrcmp(sym, p + 1) == 0)
                                return 1;
                }
@@ -873,7 +883,10 @@ static void check_section(const char *modname, struct elf_info *elf,
 #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS
 
 #define DATA_SECTIONS ".data", ".data.rel"
-#define TEXT_SECTIONS ".text", ".text.unlikely"
+#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
+               ".kprobes.text"
+#define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
+               ".fixup", ".entry.text", ".exception.text", ".text.*"
 
 #define INIT_SECTIONS      ".init.*"
 #define MEM_INIT_SECTIONS  ".meminit.*"
@@ -881,6 +894,9 @@ static void check_section(const char *modname, struct elf_info *elf,
 #define EXIT_SECTIONS      ".exit.*"
 #define MEM_EXIT_SECTIONS  ".memexit.*"
 
+#define ALL_TEXT_SECTIONS  ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \
+               TEXT_SECTIONS, OTHER_TEXT_SECTIONS
+
 /* init data sections */
 static const char *const init_data_sections[] =
        { ALL_INIT_DATA_SECTIONS, NULL };
@@ -892,6 +908,9 @@ static const char *const init_sections[] = { ALL_INIT_SECTIONS, NULL };
 static const char *const init_exit_sections[] =
        {ALL_INIT_SECTIONS, ALL_EXIT_SECTIONS, NULL };
 
+/* all text sections */
+static const char *const text_sections[] = { ALL_TEXT_SECTIONS, NULL };
+
 /* data section */
 static const char *const data_sections[] = { DATA_SECTIONS, NULL };
 
@@ -910,6 +929,7 @@ static const char *const data_sections[] = { DATA_SECTIONS, NULL };
 static const char *const head_sections[] = { ".head.text*", NULL };
 static const char *const linker_symbols[] =
        { "__init_begin", "_sinittext", "_einittext", NULL };
+static const char *const optim_symbols[] = { "*.constprop.*", NULL };
 
 enum mismatch {
        TEXT_TO_ANY_INIT,
@@ -921,34 +941,65 @@ enum mismatch {
        ANY_INIT_TO_ANY_EXIT,
        ANY_EXIT_TO_ANY_INIT,
        EXPORT_TO_INIT_EXIT,
+       EXTABLE_TO_NON_TEXT,
 };
 
+/**
+ * Describe how to match sections on different criterias:
+ *
+ * @fromsec: Array of sections to be matched.
+ *
+ * @bad_tosec: Relocations applied to a section in @fromsec to a section in
+ * this array is forbidden (black-list).  Can be empty.
+ *
+ * @good_tosec: Relocations applied to a section in @fromsec must be
+ * targetting sections in this array (white-list).  Can be empty.
+ *
+ * @mismatch: Type of mismatch.
+ *
+ * @symbol_white_list: Do not match a relocation to a symbol in this list
+ * even if it is targetting a section in @bad_to_sec.
+ *
+ * @handler: Specific handler to call when a match is found.  If NULL,
+ * default_mismatch_handler() will be called.
+ *
+ */
 struct sectioncheck {
        const char *fromsec[20];
-       const char *tosec[20];
+       const char *bad_tosec[20];
+       const char *good_tosec[20];
        enum mismatch mismatch;
        const char *symbol_white_list[20];
+       void (*handler)(const char *modname, struct elf_info *elf,
+                       const struct sectioncheck* const mismatch,
+                       Elf_Rela *r, Elf_Sym *sym, const char *fromsec);
+
 };
 
+static void extable_mismatch_handler(const char *modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela *r, Elf_Sym *sym,
+                                    const char *fromsec);
+
 static const struct sectioncheck sectioncheck[] = {
 /* Do not reference init/exit code/data from
  * normal code and data
  */
 {
        .fromsec = { TEXT_SECTIONS, NULL },
-       .tosec   = { ALL_INIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_INIT_SECTIONS, NULL },
        .mismatch = TEXT_TO_ANY_INIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 {
        .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { ALL_XXXINIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_XXXINIT_SECTIONS, NULL },
        .mismatch = DATA_TO_ANY_INIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 {
        .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
        .mismatch = DATA_TO_ANY_INIT,
        .symbol_white_list = {
                "*_template", "*_timer", "*_sht", "*_ops",
@@ -957,56 +1008,66 @@ static const struct sectioncheck sectioncheck[] = {
 },
 {
        .fromsec = { TEXT_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
        .mismatch = TEXT_TO_ANY_EXIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 {
        .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
        .mismatch = DATA_TO_ANY_EXIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 /* Do not reference init code/data from meminit code/data */
 {
        .fromsec = { ALL_XXXINIT_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
        .mismatch = XXXINIT_TO_SOME_INIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 /* Do not reference exit code/data from memexit code/data */
 {
        .fromsec = { ALL_XXXEXIT_SECTIONS, NULL },
-       .tosec   = { EXIT_SECTIONS, NULL },
+       .bad_tosec = { EXIT_SECTIONS, NULL },
        .mismatch = XXXEXIT_TO_SOME_EXIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 /* Do not use exit code/data from init code */
 {
        .fromsec = { ALL_INIT_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
        .mismatch = ANY_INIT_TO_ANY_EXIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 /* Do not use init code/data from exit code */
 {
        .fromsec = { ALL_EXIT_SECTIONS, NULL },
-       .tosec   = { ALL_INIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_INIT_SECTIONS, NULL },
        .mismatch = ANY_EXIT_TO_ANY_INIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
 },
 {
        .fromsec = { ALL_PCI_INIT_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
        .mismatch = ANY_INIT_TO_ANY_EXIT,
        .symbol_white_list = { NULL },
 },
 /* Do not export init/exit functions or data */
 {
        .fromsec = { "__ksymtab*", NULL },
-       .tosec   = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
        .mismatch = EXPORT_TO_INIT_EXIT,
        .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
+},
+{
+       .fromsec = { "__ex_table", NULL },
+       /* If you're adding any new black-listed sections in here, consider
+        * adding a special 'printer' for them in scripts/check_extable.
+        */
+       .bad_tosec = { ".altinstr_replacement", NULL },
+       .good_tosec = {ALL_TEXT_SECTIONS , NULL},
+       .mismatch = EXTABLE_TO_NON_TEXT,
+       .handler = extable_mismatch_handler,
 }
 };
 
@@ -1017,10 +1078,22 @@ static const struct sectioncheck *section_mismatch(
        int elems = sizeof(sectioncheck) / sizeof(struct sectioncheck);
        const struct sectioncheck *check = &sectioncheck[0];
 
+       /*
+        * The target section could be the SHT_NUL section when we're
+        * handling relocations to un-resolved symbols, trying to match it
+        * doesn't make much sense and causes build failures on parisc and
+        * mn10300 architectures.
+        */
+       if (*tosec == '\0')
+               return NULL;
+
        for (i = 0; i < elems; i++) {
-               if (match(fromsec, check->fromsec) &&
-                   match(tosec, check->tosec))
-                       return check;
+               if (match(fromsec, check->fromsec)) {
+                       if (check->bad_tosec[0] && match(tosec, check->bad_tosec))
+                               return check;
+                       if (check->good_tosec[0] && !match(tosec, check->good_tosec))
+                               return check;
+               }
                check++;
        }
        return NULL;
@@ -1067,6 +1140,17 @@ static const struct sectioncheck *section_mismatch(
  *   This pattern is identified by
  *   refsymname = __init_begin, _sinittext, _einittext
  *
+ * Pattern 5:
+ *   GCC may optimize static inlines when fed constant arg(s) resulting
+ *   in functions like cpumask_empty() -- generating an associated symbol
+ *   cpumask_empty.constprop.3 that appears in the audit.  If the const that
+ *   is passed in comes from __init, like say nmi_ipi_mask, we get a
+ *   meaningless section warning.  May need to add isra symbols too...
+ *   This pattern is identified by
+ *   tosec   = init section
+ *   fromsec = text section
+ *   refsymname = *.constprop.*
+ *
  **/
 static int secref_whitelist(const struct sectioncheck *mismatch,
                            const char *fromsec, const char *fromsym,
@@ -1099,6 +1183,12 @@ static int secref_whitelist(const struct sectioncheck *mismatch,
        if (match(tosym, linker_symbols))
                return 0;
 
+       /* Check for pattern 5 */
+       if (match(fromsec, text_sections) &&
+           match(tosec, init_sections) &&
+           match(fromsym, optim_symbols))
+               return 0;
+
        return 1;
 }
 
@@ -1261,6 +1351,15 @@ static void print_section_list(const char * const list[20])
        fprintf(stderr, "\n");
 }
 
+static inline void get_pretty_name(int is_func, const char** name, const char** name_p)
+{
+       switch (is_func) {
+       case 0: *name = "variable"; *name_p = ""; break;
+       case 1: *name = "function"; *name_p = "()"; break;
+       default: *name = "(unknown reference)"; *name_p = ""; break;
+       }
+}
+
 /*
  * Print a warning about a section mismatch.
  * Try to find symbols near it so user can find it.
@@ -1280,21 +1379,13 @@ static void report_sec_mismatch(const char *modname,
        char *prl_from;
        char *prl_to;
 
-       switch (from_is_func) {
-       case 0: from = "variable"; from_p = "";   break;
-       case 1: from = "function"; from_p = "()"; break;
-       default: from = "(unknown reference)"; from_p = ""; break;
-       }
-       switch (to_is_func) {
-       case 0: to = "variable"; to_p = "";   break;
-       case 1: to = "function"; to_p = "()"; break;
-       default: to = "(unknown reference)"; to_p = ""; break;
-       }
-
        sec_mismatch_count++;
        if (!sec_mismatch_verbose)
                return;
 
+       get_pretty_name(from_is_func, &from, &from_p);
+       get_pretty_name(to_is_func, &to, &to_p);
+
        warn("%s(%s+0x%llx): Section mismatch in reference from the %s %s%s "
             "to the %s %s:%s%s\n",
             modname, fromsec, fromaddr, from, fromsym, from_p, to, tosec,
@@ -1408,41 +1499,179 @@ static void report_sec_mismatch(const char *modname,
                tosym, prl_to, prl_to, tosym);
                free(prl_to);
                break;
+       case EXTABLE_TO_NON_TEXT:
+               fatal("There's a special handler for this mismatch type, "
+                     "we should never get here.");
+               break;
        }
        fprintf(stderr, "\n");
 }
 
-static void check_section_mismatch(const char *modname, struct elf_info *elf,
-                                  Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+static void default_mismatch_handler(const char *modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
 {
        const char *tosec;
-       const struct sectioncheck *mismatch;
+       Elf_Sym *to;
+       Elf_Sym *from;
+       const char *tosym;
+       const char *fromsym;
+
+       from = find_elf_symbol2(elf, r->r_offset, fromsec);
+       fromsym = sym_name(elf, from);
+
+       if (!strncmp(fromsym, "reference___initcall",
+                    sizeof("reference___initcall")-1))
+               return;
 
        tosec = sec_name(elf, get_secindex(elf, sym));
-       mismatch = section_mismatch(fromsec, tosec);
+       to = find_elf_symbol(elf, r->r_addend, sym);
+       tosym = sym_name(elf, to);
+
+       /* check whitelist - we may ignore it */
+       if (secref_whitelist(mismatch,
+                            fromsec, fromsym, tosec, tosym)) {
+               report_sec_mismatch(modname, mismatch,
+                                   fromsec, r->r_offset, fromsym,
+                                   is_function(from), tosec, tosym,
+                                   is_function(to));
+       }
+}
+
+static int is_executable_section(struct elf_info* elf, unsigned int section_index)
+{
+       if (section_index > elf->num_sections)
+               fatal("section_index is outside elf->num_sections!\n");
+
+       return ((elf->sechdrs[section_index].sh_flags & SHF_EXECINSTR) == SHF_EXECINSTR);
+}
+
+/*
+ * We rely on a gross hack in section_rel[a]() calling find_extable_entry_size()
+ * to know the sizeof(struct exception_table_entry) for the target architecture.
+ */
+static unsigned int extable_entry_size = 0;
+static void find_extable_entry_size(const char* const sec, const Elf_Rela* r)
+{
+       /*
+        * If we're currently checking the second relocation within __ex_table,
+        * that relocation offset tells us the offsetof(struct
+        * exception_table_entry, fixup) which is equal to sizeof(struct
+        * exception_table_entry) divided by two.  We use that to our advantage
+        * since there's no portable way to get that size as every architecture
+        * seems to go with different sized types.  Not pretty but better than
+        * hard-coding the size for every architecture..
+        */
+       if (!extable_entry_size)
+               extable_entry_size = r->r_offset * 2;
+}
+
+static inline bool is_extable_fault_address(Elf_Rela *r)
+{
+       /*
+        * extable_entry_size is only discovered after we've handled the
+        * _second_ relocation in __ex_table, so only abort when we're not
+        * handling the first reloc and extable_entry_size is zero.
+        */
+       if (r->r_offset && extable_entry_size == 0)
+               fatal("extable_entry size hasn't been discovered!\n");
+
+       return ((r->r_offset == 0) ||
+               (r->r_offset % extable_entry_size == 0));
+}
+
+#define is_second_extable_reloc(Start, Cur, Sec)                       \
+       (((Cur) == (Start) + 1) && (strcmp("__ex_table", (Sec)) == 0))
+
+static void report_extable_warnings(const char* modname, struct elf_info* elf,
+                                   const struct sectioncheck* const mismatch,
+                                   Elf_Rela* r, Elf_Sym* sym,
+                                   const char* fromsec, const char* tosec)
+{
+       Elf_Sym* fromsym = find_elf_symbol2(elf, r->r_offset, fromsec);
+       const char* fromsym_name = sym_name(elf, fromsym);
+       Elf_Sym* tosym = find_elf_symbol(elf, r->r_addend, sym);
+       const char* tosym_name = sym_name(elf, tosym);
+       const char* from_pretty_name;
+       const char* from_pretty_name_p;
+       const char* to_pretty_name;
+       const char* to_pretty_name_p;
+
+       get_pretty_name(is_function(fromsym),
+                       &from_pretty_name, &from_pretty_name_p);
+       get_pretty_name(is_function(tosym),
+                       &to_pretty_name, &to_pretty_name_p);
+
+       warn("%s(%s+0x%lx): Section mismatch in reference"
+            " from the %s %s%s to the %s %s:%s%s\n",
+            modname, fromsec, (long)r->r_offset, from_pretty_name,
+            fromsym_name, from_pretty_name_p,
+            to_pretty_name, tosec, tosym_name, to_pretty_name_p);
+
+       if (!match(tosec, mismatch->bad_tosec) &&
+           is_executable_section(elf, get_secindex(elf, sym)))
+               fprintf(stderr,
+                       "The relocation at %s+0x%lx references\n"
+                       "section \"%s\" which is not in the list of\n"
+                       "authorized sections.  If you're adding a new section\n"
+                       "and/or if this reference is valid, add \"%s\" to the\n"
+                       "list of authorized sections to jump to on fault.\n"
+                       "This can be achieved by adding \"%s\" to \n"
+                       "OTHER_TEXT_SECTIONS in scripts/mod/modpost.c.\n",
+                       fromsec, (long)r->r_offset, tosec, tosec, tosec);
+}
+
+static void extable_mismatch_handler(const char* modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela* r, Elf_Sym* sym,
+                                    const char *fromsec)
+{
+       const char* tosec = sec_name(elf, get_secindex(elf, sym));
+
+       sec_mismatch_count++;
+
+       if (sec_mismatch_verbose)
+               report_extable_warnings(modname, elf, mismatch, r, sym,
+                                       fromsec, tosec);
+
+       if (match(tosec, mismatch->bad_tosec))
+               fatal("The relocation at %s+0x%lx references\n"
+                     "section \"%s\" which is black-listed.\n"
+                     "Something is seriously wrong and should be fixed.\n"
+                     "You might get more information about where this is\n"
+                     "coming from by using scripts/check_extable.sh %s\n",
+                     fromsec, (long)r->r_offset, tosec, modname);
+       else if (!is_executable_section(elf, get_secindex(elf, sym))) {
+               if (is_extable_fault_address(r))
+                       fatal("The relocation at %s+0x%lx references\n"
+                             "section \"%s\" which is not executable, IOW\n"
+                             "it is not possible for the kernel to fault\n"
+                             "at that address.  Something is seriously wrong\n"
+                             "and should be fixed.\n",
+                             fromsec, (long)r->r_offset, tosec);
+               else
+                       fatal("The relocation at %s+0x%lx references\n"
+                             "section \"%s\" which is not executable, IOW\n"
+                             "the kernel will fault if it ever tries to\n"
+                             "jump to it.  Something is seriously wrong\n"
+                             "and should be fixed.\n",
+                             fromsec, (long)r->r_offset, tosec);
+       }
+}
+
+static void check_section_mismatch(const char *modname, struct elf_info *elf,
+                                  Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+{
+       const char *tosec = sec_name(elf, get_secindex(elf, sym));;
+       const struct sectioncheck *mismatch = section_mismatch(fromsec, tosec);
+
        if (mismatch) {
-               Elf_Sym *to;
-               Elf_Sym *from;
-               const char *tosym;
-               const char *fromsym;
-
-               from = find_elf_symbol2(elf, r->r_offset, fromsec);
-               fromsym = sym_name(elf, from);
-               to = find_elf_symbol(elf, r->r_addend, sym);
-               tosym = sym_name(elf, to);
-
-               if (!strncmp(fromsym, "reference___initcall",
-                               sizeof("reference___initcall")-1))
-                       return;
-
-               /* check whitelist - we may ignore it */
-               if (secref_whitelist(mismatch,
-                                       fromsec, fromsym, tosec, tosym)) {
-                       report_sec_mismatch(modname, mismatch,
-                          fromsec, r->r_offset, fromsym,
-                          is_function(from), tosec, tosym,
-                          is_function(to));
-               }
+               if (mismatch->handler)
+                       mismatch->handler(modname, elf,  mismatch,
+                                         r, sym, fromsec);
+               else
+                       default_mismatch_handler(modname, elf, mismatch,
+                                                r, sym, fromsec);
        }
 }
 
@@ -1582,6 +1811,8 @@ static void section_rela(const char *modname, struct elf_info *elf,
                /* Skip special sections */
                if (is_shndx_special(sym->st_shndx))
                        continue;
+               if (is_second_extable_reloc(start, rela, fromsec))
+                       find_extable_entry_size(fromsec, &r);
                check_section_mismatch(modname, elf, &r, sym, fromsec);
        }
 }
@@ -1640,6 +1871,8 @@ static void section_rel(const char *modname, struct elf_info *elf,
                /* Skip special sections */
                if (is_shndx_special(sym->st_shndx))
                        continue;
+               if (is_second_extable_reloc(start, rel, fromsec))
+                       find_extable_entry_size(fromsec, &r);
                check_section_mismatch(modname, elf, &r, sym, fromsec);
        }
 }
index 7db9954..ad4fa49 100644 (file)
@@ -365,7 +365,7 @@ void __aa_fs_profile_rmdir(struct aa_profile *profile)
                if (!profile->dents[i])
                        continue;
 
-               r = profile->dents[i]->d_inode->i_private;
+               r = d_inode(profile->dents[i])->i_private;
                securityfs_remove(profile->dents[i]);
                aa_put_replacedby(r);
                profile->dents[i] = NULL;
index fdaa50c..913f377 100644 (file)
@@ -259,7 +259,7 @@ unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start,
  */
 static inline bool is_deleted(struct dentry *dentry)
 {
-       if (d_unlinked(dentry) && dentry->d_inode->i_nlink == 0)
+       if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0)
                return 1;
        return 0;
 }
@@ -351,8 +351,8 @@ int aa_path_link(struct aa_profile *profile, struct dentry *old_dentry,
        struct path link = { new_dir->mnt, new_dentry };
        struct path target = { new_dir->mnt, old_dentry };
        struct path_cond cond = {
-               old_dentry->d_inode->i_uid,
-               old_dentry->d_inode->i_mode
+               d_backing_inode(old_dentry)->i_uid,
+               d_backing_inode(old_dentry)->i_mode
        };
        char *buffer = NULL, *buffer2 = NULL;
        const char *lname, *tname = NULL, *info = NULL;
index dd56bff..e5f1561 100644 (file)
@@ -204,8 +204,8 @@ static int common_perm_mnt_dentry(int op, struct vfsmount *mnt,
                                  struct dentry *dentry, u32 mask)
 {
        struct path path = { mnt, dentry };
-       struct path_cond cond = { dentry->d_inode->i_uid,
-                                 dentry->d_inode->i_mode
+       struct path_cond cond = { d_backing_inode(dentry)->i_uid,
+                                 d_backing_inode(dentry)->i_mode
        };
 
        return common_perm(op, &path, mask, &cond);
@@ -223,7 +223,7 @@ static int common_perm_mnt_dentry(int op, struct vfsmount *mnt,
 static int common_perm_rm(int op, struct path *dir,
                          struct dentry *dentry, u32 mask)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct path_cond cond = { };
 
        if (!inode || !dir->mnt || !mediated_filesystem(dentry))
@@ -281,8 +281,8 @@ static int apparmor_path_mknod(struct path *dir, struct dentry *dentry,
 
 static int apparmor_path_truncate(struct path *path)
 {
-       struct path_cond cond = { path->dentry->d_inode->i_uid,
-                                 path->dentry->d_inode->i_mode
+       struct path_cond cond = { d_backing_inode(path->dentry)->i_uid,
+                                 d_backing_inode(path->dentry)->i_mode
        };
 
        if (!path->mnt || !mediated_filesystem(path->dentry))
@@ -327,8 +327,8 @@ static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry,
        if (!unconfined(profile)) {
                struct path old_path = { old_dir->mnt, old_dentry };
                struct path new_path = { new_dir->mnt, new_dentry };
-               struct path_cond cond = { old_dentry->d_inode->i_uid,
-                                         old_dentry->d_inode->i_mode
+               struct path_cond cond = { d_backing_inode(old_dentry)->i_uid,
+                                         d_backing_inode(old_dentry)->i_mode
                };
 
                error = aa_path_perm(OP_RENAME_SRC, profile, &old_path, 0,
@@ -354,8 +354,8 @@ static int apparmor_path_chmod(struct path *path, umode_t mode)
 
 static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid)
 {
-       struct path_cond cond =  { path->dentry->d_inode->i_uid,
-                                  path->dentry->d_inode->i_mode
+       struct path_cond cond =  { d_backing_inode(path->dentry)->i_uid,
+                                  d_backing_inode(path->dentry)->i_mode
        };
 
        if (!mediated_filesystem(path->dentry))
index f66713b..f2875cd 100644 (file)
@@ -297,7 +297,7 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
  */
 int cap_inode_need_killpriv(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        int error;
 
        if (!inode->i_op->getxattr)
@@ -319,7 +319,7 @@ int cap_inode_need_killpriv(struct dentry *dentry)
  */
 int cap_inode_killpriv(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
 
        if (!inode->i_op->removexattr)
               return 0;
@@ -375,7 +375,7 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
  */
 int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        __u32 magic_etc;
        unsigned tocopy, i;
        int size;
index 131a3c4..91503b7 100644 (file)
@@ -27,7 +27,7 @@ static int mount_count;
 
 static inline int positive(struct dentry *dentry)
 {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
 }
 
 static int fill_super(struct super_block *sb, void *data, int silent)
@@ -102,14 +102,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
        if (!parent)
                parent = mount->mnt_root;
 
-       dir = parent->d_inode;
+       dir = d_inode(parent);
 
        mutex_lock(&dir->i_mutex);
        dentry = lookup_one_len(name, parent, strlen(name));
        if (IS_ERR(dentry))
                goto out;
 
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                error = -EEXIST;
                goto out1;
        }
@@ -197,20 +197,20 @@ void securityfs_remove(struct dentry *dentry)
                return;
 
        parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                return;
 
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
        if (positive(dentry)) {
-               if (dentry->d_inode) {
+               if (d_really_is_positive(dentry)) {
                        if (d_is_dir(dentry))
-                               simple_rmdir(parent->d_inode, dentry);
+                               simple_rmdir(d_inode(parent), dentry);
                        else
-                               simple_unlink(parent->d_inode, dentry);
+                               simple_unlink(d_inode(parent), dentry);
                        dput(dentry);
                }
        }
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
        simple_release_fs(&mount, &mount_count);
 }
 EXPORT_SYMBOL_GPL(securityfs_remove);
index 5e9687f..159ef3e 100644 (file)
@@ -131,7 +131,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
                                size_t req_xattr_value_len,
                                char type, char *digest)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct shash_desc *desc;
        char **xattrname;
        size_t xattr_size = 0;
@@ -199,7 +199,7 @@ int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name,
 int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name,
                        const char *xattr_value, size_t xattr_value_len)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct evm_ima_xattr_data xattr_data;
        int rc = 0;
 
index f589c9a..10f9943 100644 (file)
@@ -72,7 +72,7 @@ static void __init evm_init_config(void)
 
 static int evm_find_protected_xattrs(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        char **xattr;
        int error;
        int count = 0;
@@ -165,8 +165,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                        /* Replace RSA with HMAC if not mounted readonly and
                         * not immutable
                         */
-                       if (!IS_RDONLY(dentry->d_inode) &&
-                           !IS_IMMUTABLE(dentry->d_inode))
+                       if (!IS_RDONLY(d_backing_inode(dentry)) &&
+                           !IS_IMMUTABLE(d_backing_inode(dentry)))
                                evm_update_evmxattr(dentry, xattr_name,
                                                    xattr_value,
                                                    xattr_value_len);
@@ -235,7 +235,7 @@ enum integrity_status evm_verifyxattr(struct dentry *dentry,
                return INTEGRITY_UNKNOWN;
 
        if (!iint) {
-               iint = integrity_iint_find(dentry->d_inode);
+               iint = integrity_iint_find(d_backing_inode(dentry));
                if (!iint)
                        return INTEGRITY_UNKNOWN;
        }
@@ -253,7 +253,7 @@ EXPORT_SYMBOL_GPL(evm_verifyxattr);
  */
 static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
 
        if (!evm_initialized || !S_ISREG(inode->i_mode) || evm_fixmode)
                return 0;
@@ -293,13 +293,13 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name,
        if (evm_status == INTEGRITY_NOXATTRS) {
                struct integrity_iint_cache *iint;
 
-               iint = integrity_iint_find(dentry->d_inode);
+               iint = integrity_iint_find(d_backing_inode(dentry));
                if (iint && (iint->flags & IMA_NEW_FILE))
                        return 0;
        }
 out:
        if (evm_status != INTEGRITY_PASS)
-               integrity_audit_msg(AUDIT_INTEGRITY_METADATA, dentry->d_inode,
+               integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                                    dentry->d_name.name, "appraise_metadata",
                                    integrity_status_msg[evm_status],
                                    -EPERM, 0);
@@ -379,7 +379,7 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
  */
 void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
 
        if (!evm_initialized || !evm_protected_xattr(xattr_name))
                return;
@@ -404,7 +404,7 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
        if ((evm_status == INTEGRITY_PASS) ||
            (evm_status == INTEGRITY_NOXATTRS))
                return 0;
-       integrity_audit_msg(AUDIT_INTEGRITY_METADATA, dentry->d_inode,
+       integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                            dentry->d_name.name, "appraise_metadata",
                            integrity_status_msg[evm_status], -EPERM, 0);
        return -EPERM;
index fffcdb0..4df493e 100644 (file)
@@ -165,7 +165,7 @@ void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
 int ima_read_xattr(struct dentry *dentry,
                   struct evm_ima_xattr_data **xattr_value)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
 
        if (!inode->i_op->getxattr)
                return 0;
@@ -190,7 +190,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
        static const char op[] = "appraise_data";
        char *cause = "unknown";
        struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        enum integrity_status status = INTEGRITY_UNKNOWN;
        int rc = xattr_len, hash_start = 0;
 
@@ -314,7 +314,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
  */
 void ima_inode_post_setattr(struct dentry *dentry)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct integrity_iint_cache *iint;
        int must_appraise, rc;
 
@@ -380,7 +380,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
        if (result == 1) {
                if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
                        return -EINVAL;
-               ima_reset_appraise_flags(dentry->d_inode,
+               ima_reset_appraise_flags(d_backing_inode(dentry),
                         (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
                result = 0;
        }
@@ -393,7 +393,7 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
 
        result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
        if (result == 1) {
-               ima_reset_appraise_flags(dentry->d_inode, 0);
+               ima_reset_appraise_flags(d_backing_inode(dentry), 0);
                result = 0;
        }
        return result;
index b526ddc..1d34277 100644 (file)
@@ -237,7 +237,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 
                audit_log_d_path(ab, " path=", &a->u.path);
 
-               inode = a->u.path.dentry->d_inode;
+               inode = d_backing_inode(a->u.path.dentry);
                if (inode) {
                        audit_log_format(ab, " dev=");
                        audit_log_untrustedstring(ab, inode->i_sb->s_id);
@@ -251,7 +251,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
                audit_log_format(ab, " name=");
                audit_log_untrustedstring(ab, a->u.dentry->d_name.name);
 
-               inode = a->u.dentry->d_inode;
+               inode = d_backing_inode(a->u.dentry);
                if (inode) {
                        audit_log_format(ab, " dev=");
                        audit_log_untrustedstring(ab, inode->i_sb->s_id);
index 730ac65..8e9b1f4 100644 (file)
@@ -410,7 +410,7 @@ EXPORT_SYMBOL(security_old_inode_init_security);
 int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
                        unsigned int dev)
 {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return security_ops->path_mknod(dir, dentry, mode, dev);
 }
@@ -418,7 +418,7 @@ EXPORT_SYMBOL(security_path_mknod);
 
 int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode)
 {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return security_ops->path_mkdir(dir, dentry, mode);
 }
@@ -426,14 +426,14 @@ EXPORT_SYMBOL(security_path_mkdir);
 
 int security_path_rmdir(struct path *dir, struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return security_ops->path_rmdir(dir, dentry);
 }
 
 int security_path_unlink(struct path *dir, struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return security_ops->path_unlink(dir, dentry);
 }
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(security_path_unlink);
 int security_path_symlink(struct path *dir, struct dentry *dentry,
                          const char *old_name)
 {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                return 0;
        return security_ops->path_symlink(dir, dentry, old_name);
 }
@@ -450,7 +450,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
                       struct dentry *new_dentry)
 {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                return 0;
        return security_ops->path_link(old_dentry, new_dir, new_dentry);
 }
@@ -459,8 +459,8 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
                         struct path *new_dir, struct dentry *new_dentry,
                         unsigned int flags)
 {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
-                    (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
+                    (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry)))))
                return 0;
 
        if (flags & RENAME_EXCHANGE) {
@@ -477,21 +477,21 @@ EXPORT_SYMBOL(security_path_rename);
 
 int security_path_truncate(struct path *path)
 {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return security_ops->path_truncate(path);
 }
 
 int security_path_chmod(struct path *path, umode_t mode)
 {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return security_ops->path_chmod(path, mode);
 }
 
 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
 {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return security_ops->path_chown(path, uid, gid);
 }
@@ -513,14 +513,14 @@ EXPORT_SYMBOL_GPL(security_inode_create);
 int security_inode_link(struct dentry *old_dentry, struct inode *dir,
                         struct dentry *new_dentry)
 {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                return 0;
        return security_ops->inode_link(old_dentry, dir, new_dentry);
 }
 
 int security_inode_unlink(struct inode *dir, struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_unlink(dir, dentry);
 }
@@ -543,7 +543,7 @@ EXPORT_SYMBOL_GPL(security_inode_mkdir);
 
 int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_rmdir(dir, dentry);
 }
@@ -559,8 +559,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry,
                           unsigned int flags)
 {
-        if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
-            (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
+            (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry)))))
                return 0;
 
        if (flags & RENAME_EXCHANGE) {
@@ -576,14 +576,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 int security_inode_readlink(struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_readlink(dentry);
 }
 
 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_follow_link(dentry, nd);
 }
@@ -599,7 +599,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
 {
        int ret;
 
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        ret = security_ops->inode_setattr(dentry, attr);
        if (ret)
@@ -610,7 +610,7 @@ EXPORT_SYMBOL_GPL(security_inode_setattr);
 
 int security_inode_getattr(const struct path *path)
 {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                return 0;
        return security_ops->inode_getattr(path);
 }
@@ -620,7 +620,7 @@ int security_inode_setxattr(struct dentry *dentry, const char *name,
 {
        int ret;
 
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        ret = security_ops->inode_setxattr(dentry, name, value, size, flags);
        if (ret)
@@ -634,7 +634,7 @@ int security_inode_setxattr(struct dentry *dentry, const char *name,
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
                                  const void *value, size_t size, int flags)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return;
        security_ops->inode_post_setxattr(dentry, name, value, size, flags);
        evm_inode_post_setxattr(dentry, name, value, size);
@@ -642,14 +642,14 @@ void security_inode_post_setxattr(struct dentry *dentry, const char *name,
 
 int security_inode_getxattr(struct dentry *dentry, const char *name)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_getxattr(dentry, name);
 }
 
 int security_inode_listxattr(struct dentry *dentry)
 {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        return security_ops->inode_listxattr(dentry);
 }
@@ -658,7 +658,7 @@ int security_inode_removexattr(struct dentry *dentry, const char *name)
 {
        int ret;
 
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                return 0;
        ret = security_ops->inode_removexattr(dentry, name);
        if (ret)
index c318b30..7dade28 100644 (file)
@@ -414,7 +414,7 @@ static int sb_finish_set_opts(struct super_block *sb)
 {
        struct superblock_security_struct *sbsec = sb->s_security;
        struct dentry *root = sb->s_root;
-       struct inode *root_inode = root->d_inode;
+       struct inode *root_inode = d_backing_inode(root);
        int rc = 0;
 
        if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
@@ -552,7 +552,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
                opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT;
        }
        if (sbsec->flags & ROOTCONTEXT_MNT) {
-               struct inode *root = sbsec->sb->s_root->d_inode;
+               struct inode *root = d_backing_inode(sbsec->sb->s_root);
                struct inode_security_struct *isec = root->i_security;
 
                rc = security_sid_to_context(isec->sid, &context, &len);
@@ -608,7 +608,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
        int rc = 0, i;
        struct superblock_security_struct *sbsec = sb->s_security;
        const char *name = sb->s_type->name;
-       struct inode *inode = sbsec->sb->s_root->d_inode;
+       struct inode *inode = d_backing_inode(sbsec->sb->s_root);
        struct inode_security_struct *root_isec = inode->i_security;
        u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
        u32 defcontext_sid = 0;
@@ -835,8 +835,8 @@ static int selinux_cmp_sb_context(const struct super_block *oldsb,
        if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid)
                goto mismatch;
        if (oldflags & ROOTCONTEXT_MNT) {
-               struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security;
-               struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security;
+               struct inode_security_struct *oldroot = d_backing_inode(oldsb->s_root)->i_security;
+               struct inode_security_struct *newroot = d_backing_inode(newsb->s_root)->i_security;
                if (oldroot->sid != newroot->sid)
                        goto mismatch;
        }
@@ -886,16 +886,16 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb,
                if (!set_fscontext)
                        newsbsec->sid = sid;
                if (!set_rootcontext) {
-                       struct inode *newinode = newsb->s_root->d_inode;
+                       struct inode *newinode = d_backing_inode(newsb->s_root);
                        struct inode_security_struct *newisec = newinode->i_security;
                        newisec->sid = sid;
                }
                newsbsec->mntpoint_sid = sid;
        }
        if (set_rootcontext) {
-               const struct inode *oldinode = oldsb->s_root->d_inode;
+               const struct inode *oldinode = d_backing_inode(oldsb->s_root);
                const struct inode_security_struct *oldisec = oldinode->i_security;
-               struct inode *newinode = newsb->s_root->d_inode;
+               struct inode *newinode = d_backing_inode(newsb->s_root);
                struct inode_security_struct *newisec = newinode->i_security;
 
                newisec->sid = oldisec->sid;
@@ -1610,7 +1610,7 @@ static inline int dentry_has_perm(const struct cred *cred,
                                  struct dentry *dentry,
                                  u32 av)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct common_audit_data ad;
 
        ad.type = LSM_AUDIT_DATA_DENTRY;
@@ -1625,7 +1625,7 @@ static inline int path_has_perm(const struct cred *cred,
                                const struct path *path,
                                u32 av)
 {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
        struct common_audit_data ad;
 
        ad.type = LSM_AUDIT_DATA_PATH;
@@ -1753,7 +1753,7 @@ static int may_link(struct inode *dir,
        int rc;
 
        dsec = dir->i_security;
-       isec = dentry->d_inode->i_security;
+       isec = d_backing_inode(dentry)->i_security;
 
        ad.type = LSM_AUDIT_DATA_DENTRY;
        ad.u.dentry = dentry;
@@ -1797,7 +1797,7 @@ static inline int may_rename(struct inode *old_dir,
        int rc;
 
        old_dsec = old_dir->i_security;
-       old_isec = old_dentry->d_inode->i_security;
+       old_isec = d_backing_inode(old_dentry)->i_security;
        old_is_dir = d_is_dir(old_dentry);
        new_dsec = new_dir->i_security;
 
@@ -1827,7 +1827,7 @@ static inline int may_rename(struct inode *old_dir,
        if (rc)
                return rc;
        if (d_is_positive(new_dentry)) {
-               new_isec = new_dentry->d_inode->i_security;
+               new_isec = d_backing_inode(new_dentry)->i_security;
                new_is_dir = d_is_dir(new_dentry);
                rc = avc_has_perm(sid, new_isec->sid,
                                  new_isec->sclass,
@@ -1963,7 +1963,7 @@ static int selinux_binder_transfer_file(struct task_struct *from,
 {
        u32 sid = task_sid(to);
        struct file_security_struct *fsec = file->f_security;
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = d_backing_inode(file->f_path.dentry);
        struct inode_security_struct *isec = inode->i_security;
        struct common_audit_data ad;
        int rc;
@@ -2627,7 +2627,7 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
                        break;
                case ROOTCONTEXT_MNT: {
                        struct inode_security_struct *root_isec;
-                       root_isec = sb->s_root->d_inode->i_security;
+                       root_isec = d_backing_inode(sb->s_root)->i_security;
 
                        if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
                                goto out_bad_option;
@@ -2727,7 +2727,7 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
        struct task_security_struct *tsec;
        struct inode_security_struct *dsec;
        struct superblock_security_struct *sbsec;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_backing_inode(dentry->d_parent);
        u32 newsid;
        int rc;
 
@@ -2982,7 +2982,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
 static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
                                  const void *value, size_t size, int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct inode_security_struct *isec = inode->i_security;
        struct superblock_security_struct *sbsec;
        struct common_audit_data ad;
@@ -3059,7 +3059,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
                                        const void *value, size_t size,
                                        int flags)
 {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
        struct inode_security_struct *isec = inode->i_security;
        u32 newsid;
        int rc;
index 5fde343..d2787cc 100644 (file)
@@ -1737,7 +1737,7 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name,
        inc_nlink(inode);
        d_add(dentry, inode);
        /* bump link count on parent directory, too */
-       inc_nlink(dir->d_inode);
+       inc_nlink(d_inode(dir));
 
        return dentry;
 }
index 69fdc38..b644757 100644 (file)
@@ -593,7 +593,7 @@ static int smack_sb_copy_data(char *orig, char *smackopts)
 static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
        struct dentry *root = sb->s_root;
-       struct inode *inode = root->d_inode;
+       struct inode *inode = d_backing_inode(root);
        struct superblock_smack *sp = sb->s_security;
        struct inode_smack *isp;
        struct smack_known *skp;
@@ -889,15 +889,15 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
        smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
-       isp = smk_of_inode(old_dentry->d_inode);
+       isp = smk_of_inode(d_backing_inode(old_dentry));
        rc = smk_curacc(isp, MAY_WRITE, &ad);
-       rc = smk_bu_inode(old_dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_bu_inode(d_backing_inode(old_dentry), MAY_WRITE, rc);
 
        if (rc == 0 && d_is_positive(new_dentry)) {
-               isp = smk_of_inode(new_dentry->d_inode);
+               isp = smk_of_inode(d_backing_inode(new_dentry));
                smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
                rc = smk_curacc(isp, MAY_WRITE, &ad);
-               rc = smk_bu_inode(new_dentry->d_inode, MAY_WRITE, rc);
+               rc = smk_bu_inode(d_backing_inode(new_dentry), MAY_WRITE, rc);
        }
 
        return rc;
@@ -913,7 +913,7 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
  */
 static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
 {
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_backing_inode(dentry);
        struct smk_audit_info ad;
        int rc;
 
@@ -956,8 +956,8 @@ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
        /*
         * You need write access to the thing you're removing
         */
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
        if (rc == 0) {
                /*
                 * You also need write access to the containing directory
@@ -995,15 +995,15 @@ static int smack_inode_rename(struct inode *old_inode,
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
        smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
 
-       isp = smk_of_inode(old_dentry->d_inode);
+       isp = smk_of_inode(d_backing_inode(old_dentry));
        rc = smk_curacc(isp, MAY_READWRITE, &ad);
-       rc = smk_bu_inode(old_dentry->d_inode, MAY_READWRITE, rc);
+       rc = smk_bu_inode(d_backing_inode(old_dentry), MAY_READWRITE, rc);
 
        if (rc == 0 && d_is_positive(new_dentry)) {
-               isp = smk_of_inode(new_dentry->d_inode);
+               isp = smk_of_inode(d_backing_inode(new_dentry));
                smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
                rc = smk_curacc(isp, MAY_READWRITE, &ad);
-               rc = smk_bu_inode(new_dentry->d_inode, MAY_READWRITE, rc);
+               rc = smk_bu_inode(d_backing_inode(new_dentry), MAY_READWRITE, rc);
        }
        return rc;
 }
@@ -1060,8 +1060,8 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
        smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
        return rc;
 }
 
@@ -1075,7 +1075,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 static int smack_inode_getattr(const struct path *path)
 {
        struct smk_audit_info ad;
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
        int rc;
 
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
@@ -1142,8 +1142,8 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
        smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
        if (rc == 0) {
-               rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-               rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+               rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+               rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
        }
 
        return rc;
@@ -1164,7 +1164,7 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
                                      const void *value, size_t size, int flags)
 {
        struct smack_known *skp;
-       struct inode_smack *isp = dentry->d_inode->i_security;
+       struct inode_smack *isp = d_backing_inode(dentry)->i_security;
 
        if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
                isp->smk_flags |= SMK_INODE_TRANSMUTE;
@@ -1209,8 +1209,8 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
        smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_READ, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_READ, rc);
        return rc;
 }
 
@@ -1246,12 +1246,12 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
        smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
        smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
 
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
        if (rc != 0)
                return rc;
 
-       isp = dentry->d_inode->i_security;
+       isp = d_backing_inode(dentry)->i_security;
        /*
         * Don't do anything special for these.
         *      XATTR_NAME_SMACKIPIN
index 06f719e..d968298 100644 (file)
@@ -2490,7 +2490,7 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
                return rc;
        }
 
-       root_inode = sb->s_root->d_inode;
+       root_inode = d_inode(sb->s_root);
 
        return 0;
 }
index 63681e8..6c4528d 100644 (file)
@@ -714,7 +714,7 @@ void tomoyo_get_attributes(struct tomoyo_obj_info *obj)
                        dentry = dget_parent(dentry);
                        break;
                }
-               inode = dentry->d_inode;
+               inode = d_backing_inode(dentry);
                if (inode) {
                        struct tomoyo_mini_stat *stat = &obj->stat[i];
                        stat->uid  = inode->i_uid;
index 1e0d480..5077f19 100644 (file)
@@ -97,7 +97,7 @@ static char *tomoyo_get_absolute_path(const struct path *path, char * const buff
                /* go to whatever namespace root we are under */
                pos = d_absolute_path(path, buffer, buflen - 1);
                if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
-                       struct inode *inode = path->dentry->d_inode;
+                       struct inode *inode = d_backing_inode(path->dentry);
                        if (inode && S_ISDIR(inode->i_mode)) {
                                buffer[buflen - 2] = '/';
                                buffer[buflen - 1] = '\0';
@@ -125,7 +125,7 @@ static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
        if (buflen >= 256) {
                pos = dentry_path_raw(dentry, buffer, buflen - 1);
                if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
-                       struct inode *inode = dentry->d_inode;
+                       struct inode *inode = d_backing_inode(dentry);
                        if (inode && S_ISDIR(inode->i_mode)) {
                                buffer[buflen - 2] = '/';
                                buffer[buflen - 1] = '\0';
@@ -168,7 +168,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
        if (!MAJOR(sb->s_dev))
                goto prepend_filesystem_name;
        {
-               struct inode *inode = sb->s_root->d_inode;
+               struct inode *inode = d_backing_inode(sb->s_root);
                /*
                 * Use filesystem name if filesystem does not support rename()
                 * operation.
@@ -219,7 +219,7 @@ out:
 static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
                                    const int buflen)
 {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
        struct socket *sock = inode ? SOCKET_I(inode) : NULL;
        struct sock *sk = sock ? sock->sk : NULL;
        if (sk) {
@@ -277,7 +277,7 @@ char *tomoyo_realpath_from_path(const struct path *path)
                        pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
                        goto encode;
                }
-               inode = sb->s_root->d_inode;
+               inode = d_backing_inode(sb->s_root);
                /*
                 * Get local name for filesystems without rename() operation
                 * or dentry without vfsmount.
index c0eea1d..f19da4b 100644 (file)
@@ -681,13 +681,8 @@ static int seq_timing_event(unsigned char *event_rec)
                        break;
 
                case TMR_ECHO:
-                       if (seq_mode == SEQ_2)
-                               seq_copy_to_input(event_rec, 8);
-                       else
-                       {
-                               parm = (parm << 8 | SEQ_ECHO);
-                               seq_copy_to_input((unsigned char *) &parm, 4);
-                       }
+                       parm = (parm << 8 | SEQ_ECHO);
+                       seq_copy_to_input((unsigned char *) &parm, 4);
                        break;
 
                default:;
@@ -1324,7 +1319,6 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a
        int mode = translate_mode(file);
        struct synth_info inf;
        struct seq_event_rec event_rec;
-       unsigned long flags;
        int __user *p = arg;
 
        orig_dev = dev = dev >> 4;
@@ -1479,9 +1473,7 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a
                case SNDCTL_SEQ_OUTOFBAND:
                        if (copy_from_user(&event_rec, arg, sizeof(event_rec)))
                                return -EFAULT;
-                       spin_lock_irqsave(&lock,flags);
                        play_event(event_rec.arr);
-                       spin_unlock_irqrestore(&lock,flags);
                        return 0;
 
                case SNDCTL_MIDI_INFO:
index e70a7fb..873ed1b 100644 (file)
@@ -2529,7 +2529,7 @@ static void set_dig_out(struct hda_codec *codec, hda_nid_t nid,
        if (!d)
                return;
        for (; *d; d++)
-               snd_hdac_regmap_update(&codec->core, nid,
+               snd_hdac_regmap_update(&codec->core, *d,
                                       AC_VERB_SET_DIGI_CONVERT_1, mask, val);
 }
 
index be1b7de..0efdb09 100644 (file)
@@ -404,7 +404,7 @@ struct azx {
        ((chip)->ops->reg_readb((dev)->sd_addr + AZX_REG_##reg))
 
 #define azx_has_pm_runtime(chip) \
-       (!AZX_DCAPS_PM_RUNTIME || ((chip)->driver_caps & AZX_DCAPS_PM_RUNTIME))
+       ((chip)->driver_caps & AZX_DCAPS_PM_RUNTIME)
 
 /* PCM setup */
 static inline struct azx_dev *get_azx_dev(struct snd_pcm_substream *substream)
index 52a85d8..3052a2b 100644 (file)
@@ -55,6 +55,12 @@ void haswell_set_bclk(struct hda_intel *hda)
        int cdclk_freq;
        unsigned int bclk_m, bclk_n;
        struct i915_audio_component *acomp = &hda->audio_component;
+       struct pci_dev *pci = hda->chip.pci;
+
+       /* Only Haswell/Broadwell need set BCLK */
+       if (pci->device != 0x0a0c && pci->device != 0x0c0c
+          && pci->device != 0x0d0c && pci->device != 0x160c)
+               return;
 
        if (!acomp->ops)
                return;
index e1c2105..34040d2 100644 (file)
@@ -297,6 +297,9 @@ enum {
         AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_POWERWELL |\
         AZX_DCAPS_SNOOP_TYPE(SCH))
 
+#define AZX_DCAPS_INTEL_BAYTRAIL \
+       (AZX_DCAPS_INTEL_PCH_NOPM | AZX_DCAPS_I915_POWERWELL)
+
 #define AZX_DCAPS_INTEL_BRASWELL \
        (AZX_DCAPS_INTEL_PCH | AZX_DCAPS_I915_POWERWELL)
 
@@ -1992,7 +1995,7 @@ static const struct pci_device_id azx_ids[] = {
          .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
        /* BayTrail */
        { PCI_DEVICE(0x8086, 0x0f04),
-         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH_NOPM },
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BAYTRAIL },
        /* Braswell */
        { PCI_DEVICE(0x8086, 0x2284),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BRASWELL },
index ee62307..baaf7ed 100644 (file)
@@ -582,8 +582,8 @@ static void print_conn_list(struct snd_info_buffer *buffer,
 
        /* Get Cache connections info */
        cache_len = snd_hda_get_conn_list(codec, nid, &list);
-       if (cache_len != conn_len
-                       || memcmp(list, conn, conn_len)) {
+       if (cache_len >= 0 && (cache_len != conn_len ||
+                             memcmp(list, conn, conn_len) != 0)) {
                snd_iprintf(buffer, "  In-driver Connection: %d\n", cache_len);
                if (cache_len > 0) {
                        snd_iprintf(buffer, "    ");
index b18b9c6..06199e4 100644 (file)
@@ -4176,17 +4176,15 @@ static void alc_fixup_disable_aamix(struct hda_codec *codec,
        }
 }
 
-static unsigned int alc_power_filter_xps13(struct hda_codec *codec,
-                               hda_nid_t nid,
-                               unsigned int power_state)
+static void alc_shutup_dell_xps13(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
+       int hp_pin = spec->gen.autocfg.hp_pins[0];
 
-       /* Avoid pop noises when headphones are plugged in */
-       if (spec->gen.hp_jack_present)
-               if (nid == codec->core.afg || nid == 0x02 || nid == 0x15)
-                       return AC_PWRST_D0;
-       return snd_hda_gen_path_power_filter(codec, nid, power_state);
+       /* Prevent pop noises when headphones are plugged in */
+       snd_hda_codec_write(codec, hp_pin, 0,
+                           AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+       msleep(20);
 }
 
 static void alc_fixup_dell_xps13(struct hda_codec *codec,
@@ -4197,8 +4195,7 @@ static void alc_fixup_dell_xps13(struct hda_codec *codec,
                struct hda_input_mux *imux = &spec->gen.input_mux;
                int i;
 
-               spec->shutup = alc_no_shutup;
-               codec->power_filter = alc_power_filter_xps13;
+               spec->shutup = alc_shutup_dell_xps13;
 
                /* Make the internal mic the default input source. */
                for (i = 0; i < imux->num_items; i++) {
@@ -5231,6 +5228,16 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {0x1b, 0x411111f0}, \
        {0x1e, 0x411111f0}
 
+#define ALC256_STANDARD_PINS \
+       {0x12, 0x90a60140}, \
+       {0x14, 0x90170110}, \
+       {0x19, 0x411111f0}, \
+       {0x1a, 0x411111f0}, \
+       {0x1b, 0x411111f0}, \
+       {0x1d, 0x40700001}, \
+       {0x1e, 0x411111f0}, \
+       {0x21, 0x02211020}
+
 #define ALC282_STANDARD_PINS \
        {0x14, 0x90170110}, \
        {0x18, 0x411111f0}, \
@@ -5331,15 +5338,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x1d, 0x40700001},
                {0x21, 0x02211050}),
        SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
-               {0x12, 0x90a60140},
-               {0x13, 0x40000000},
-               {0x14, 0x90170110},
-               {0x19, 0x411111f0},
-               {0x1a, 0x411111f0},
-               {0x1b, 0x411111f0},
-               {0x1d, 0x40700001},
-               {0x1e, 0x411111f0},
-               {0x21, 0x02211020}),
+               ALC256_STANDARD_PINS,
+               {0x13, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC256_STANDARD_PINS,
+               {0x13, 0x411111f0}),
        SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
                {0x12, 0x90a60130},
                {0x13, 0x40000000},
@@ -5667,6 +5670,8 @@ static int patch_alc269(struct hda_codec *codec)
                break;
        case 0x10ec0256:
                spec->codec_variant = ALC269_TYPE_ALC256;
+               spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
+               alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
                break;
        }
 
@@ -5680,8 +5685,8 @@ static int patch_alc269(struct hda_codec *codec)
        if (err < 0)
                goto error;
 
-       if (!spec->gen.no_analog && spec->gen.beep_nid)
-               set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
+       if (!spec->gen.no_analog && spec->gen.beep_nid && spec->gen.mixer_nid)
+               set_beep_amp(spec, spec->gen.mixer_nid, 0x04, HDA_INPUT);
 
        codec->patch_ops = alc_patch_ops;
        codec->patch_ops.stream_pm = snd_hda_gen_stream_pm;
index 749069a..b120925 100644 (file)
@@ -3101,13 +3101,13 @@ static int snd_intel8x0_create(struct snd_card *card,
                chip->bmaddr = pci_iomap(pci, 3, 0);
        else
                chip->bmaddr = pci_iomap(pci, 1, 0);
+
+ port_inited:
        if (!chip->bmaddr) {
                dev_err(card->dev, "Controller space ioremap problem\n");
                snd_intel8x0_free(chip);
                return -EIO;
        }
-
- port_inited:
        chip->bdbars_count = bdbars[device_type];
 
        /* initialize offsets */
index 0c2af21..142c066 100644 (file)
@@ -250,6 +250,7 @@ struct fsi_clk {
 
 struct fsi_priv {
        void __iomem *base;
+       phys_addr_t phys;
        struct fsi_master *master;
 
        struct fsi_stream playback;
@@ -1371,13 +1372,18 @@ static int fsi_dma_probe(struct fsi_priv *fsi, struct fsi_stream *io, struct dev
                                shdma_chan_filter, (void *)io->dma_id,
                                dev, is_play ? "tx" : "rx");
        if (io->chan) {
-               struct dma_slave_config cfg;
+               struct dma_slave_config cfg = {};
                int ret;
 
-               cfg.slave_id    = io->dma_id;
-               cfg.dst_addr    = 0; /* use default addr */
-               cfg.src_addr    = 0; /* use default addr */
-               cfg.direction   = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
+               if (is_play) {
+                       cfg.dst_addr            = fsi->phys + REG_DODT;
+                       cfg.dst_addr_width      = DMA_SLAVE_BUSWIDTH_4_BYTES;
+                       cfg.direction           = DMA_MEM_TO_DEV;
+               } else {
+                       cfg.src_addr            = fsi->phys + REG_DIDT;
+                       cfg.src_addr_width      = DMA_SLAVE_BUSWIDTH_4_BYTES;
+                       cfg.direction           = DMA_DEV_TO_MEM;
+               }
 
                ret = dmaengine_slave_config(io->chan, &cfg);
                if (ret < 0) {
@@ -1974,6 +1980,7 @@ static int fsi_probe(struct platform_device *pdev)
        /* FSI A setting */
        fsi             = &master->fsia;
        fsi->base       = master->base;
+       fsi->phys       = res->start;
        fsi->master     = master;
        fsi_port_info_init(fsi, &info.port_a);
        fsi_handler_init(fsi, &info.port_a);
@@ -1986,6 +1993,7 @@ static int fsi_probe(struct platform_device *pdev)
        /* FSI B setting */
        fsi             = &master->fsib;
        fsi->base       = master->base + 0x40;
+       fsi->phys       = res->start + 0x40;
        fsi->master     = master;
        fsi_port_info_init(fsi, &info.port_b);
        fsi_handler_init(fsi, &info.port_b);
index 8bcc87c..789d19e 100644 (file)
@@ -79,7 +79,10 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip,
                format = 1 << UAC_FORMAT_TYPE_I_PCM;
        }
        if (format & (1 << UAC_FORMAT_TYPE_I_PCM)) {
-               if (chip->usb_id == USB_ID(0x0582, 0x0016) /* Edirol SD-90 */ &&
+               if (((chip->usb_id == USB_ID(0x0582, 0x0016)) ||
+                    /* Edirol SD-90 */
+                    (chip->usb_id == USB_ID(0x0582, 0x000c))) &&
+                    /* Roland SC-D70 */
                    sample_width == 24 && sample_bytes == 2)
                        sample_bytes = 3;
                else if (sample_width > sample_bytes * 8) {
index 07f984d..2f6d3e9 100644 (file)
@@ -816,37 +816,11 @@ YAMAHA_DEVICE(0x7010, "UB99"),
                .data = (const struct snd_usb_audio_quirk[]) {
                        {
                                .ifnum = 0,
-                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
-                               .data = & (const struct audioformat) {
-                                       .formats = SNDRV_PCM_FMTBIT_S24_3LE,
-                                       .channels = 2,
-                                       .iface = 0,
-                                       .altsetting = 1,
-                                       .altset_idx = 1,
-                                       .attributes = 0,
-                                       .endpoint = 0x01,
-                                       .ep_attr = 0x01,
-                                       .rates = SNDRV_PCM_RATE_CONTINUOUS,
-                                       .rate_min = 44100,
-                                       .rate_max = 44100,
-                               }
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
                        },
                        {
                                .ifnum = 1,
-                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
-                               .data = & (const struct audioformat) {
-                                       .formats = SNDRV_PCM_FMTBIT_S24_3LE,
-                                       .channels = 2,
-                                       .iface = 1,
-                                       .altsetting = 1,
-                                       .altset_idx = 1,
-                                       .attributes = 0,
-                                       .endpoint = 0x81,
-                                       .ep_attr = 0x01,
-                                       .rates = SNDRV_PCM_RATE_CONTINUOUS,
-                                       .rate_min = 44100,
-                                       .rate_max = 44100,
-                               }
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
                        },
                        {
                                .ifnum = 2,
index 9690798..8b27898 100644 (file)
 struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus,
                             int slot, int func, int vendor, int dev)
 {
-       struct pci_filter filter_nb_link = { domain, bus, slot, func,
-                                            vendor, dev };
+       struct pci_filter filter_nb_link;
        struct pci_dev *device;
 
        *pacc = pci_alloc();
        if (*pacc == NULL)
                return NULL;
 
+       pci_filter_init(*pacc, &filter_nb_link);
+       filter_nb_link.domain   = domain;
+       filter_nb_link.bus      = bus;
+       filter_nb_link.slot     = slot;
+       filter_nb_link.func     = func;
+       filter_nb_link.vendor   = vendor;
+       filter_nb_link.device   = dev;
+
        pci_init(*pacc);
        pci_scan_bus(*pacc);
 
index 8d550ff..78fb820 100644 (file)
@@ -1561,6 +1561,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                        goto out;
        }
 
+       if (irq_num >= kvm->arch.vgic.nr_irqs)
+               return -EINVAL;
+
        vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
        if (vcpu_id >= 0) {
                /* kick the specified vcpu */
@@ -2141,7 +2144,7 @@ int kvm_irq_map_gsi(struct kvm *kvm,
                    struct kvm_kernel_irq_routing_entry *entries,
                    int gsi)
 {
-       return gsi;
+       return 0;
 }
 
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
index d3fc939..9097741 100644 (file)
@@ -89,6 +89,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
 static __read_mostly struct preempt_ops kvm_preempt_ops;
 
 struct dentry *kvm_debugfs_dir;
+EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                           unsigned long arg);