Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
diff --git a/CREDITS b/CREDITS

index 2ef5dce..40cc4bf 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -2049,6 +2049,10 @@ D: pirq addr, CS5535 alsa audio driver
  S: Gurgaon, India
  S: Kuala Lumpur, Malaysia
  
+N: Mohit Kumar
+D: ST Microelectronics SPEAr13xx PCI host bridge driver
+D: Synopsys Designware PCI host bridge driver
+
  N: Gabor Kuti
  M: seasons@falcon.sch.bme.hu
  M: seasons@makosteszta.sote.hu
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd

index 76ee192..3b5c3bc 100644 (file)
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -222,3 +222,13 @@ Description:
                 The number of blocks that are marked as reserved, if any, in
                 this partition. These are typically used to store the in-flash
                 bad block table (BBT).
+
+What:          /sys/class/mtd/mtdX/offset
+Date:          March 2015
+KernelVersion: 4.1
+Contact:       linux-mtd@lists.infradead.org
+Description:
+               For a partition, the offset of that partition from the start
+               of the master device in bytes. This attribute is absent on
+               main devices, so it can be used to distinguish between
+               partitions and devices that aren't partitions.
diff --git a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi

index ca9c71a..eed922e 100644 (file)
--- a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
+++ b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
@@ -8,9 +8,11 @@ Description:   This file controls the keyboard backlight operation mode, valid
                         * 0x2  -> AUTO (also called TIMER)
                         * 0x8  -> ON
                         * 0x10 -> OFF
-               Note that the kernel 3.16 onwards this file accepts all listed
+               Note that from kernel 3.16 onwards this file accepts all listed
                 parameters, kernel 3.15 only accepts the first two (FN-Z and
                 AUTO).
+               Also note that toggling this value on type 1 devices, requires
+               a reboot for changes to take effect.
  Users:         KToshiba
  
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_backlight_timeout
@@ -67,15 +69,72 @@ Description:        This file shows the current keyboard backlight type,
                         * 2 -> Type 2, supporting modes TIMER, ON and OFF
  Users:         KToshiba
  
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_charge
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Sleep & Charge charging mode, which
+               can be:
+                       * 0 -> Disabled         (0x00)
+                       * 1 -> Alternate        (0x09)
+                       * 2 -> Auto             (0x21)
+                       * 3 -> Typical          (0x11)
+               Note that from kernel 4.1 onwards this file accepts all listed
+               values, kernel 4.0 only supports the first three.
+               Note that this feature only works when connected to power, if
+               you want to use it under battery, see the entry named
+               "sleep_functions_on_battery"
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/sleep_functions_on_battery
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Sleep Functions under battery, and
+               set the level at which point they will be disabled, accepted
+               values can be:
+                       * 0     -> Disabled
+                       * 1-100 -> Battery level to disable sleep functions
+               Currently it prints two values, the first one indicates if the
+               feature is enabled or disabled, while the second one shows the
+               current battery level set.
+               Note that when the value is set to disabled, the sleep function
+               will only work when connected to power.
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_rapid_charge
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the USB Rapid Charge state, which can be:
+                       * 0 -> Disabled
+                       * 1 -> Enabled
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
+
+What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_music
+Date:          January 23, 2015
+KernelVersion: 4.0
+Contact:       Azael Avalos <coproscefalo@gmail.com>
+Description:   This file controls the Sleep & Music state, which values can be:
+                       * 0 -> Disabled
+                       * 1 -> Enabled
+               Note that this feature only works when connected to power, if
+               you want to use it under battery, see the entry named
+               "sleep_functions_on_battery"
+Users:         KToshiba
+
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/version
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
  Contact:       Azael Avalos <coproscefalo@gmail.com>
  Description:   This file shows the current version of the driver
+Users:         KToshiba
  
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/fan
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
  Contact:       Azael Avalos <coproscefalo@gmail.com>
  Description:   This file controls the state of the internal fan, valid
                 values are:
@@ -83,8 +142,8 @@ Description: This file controls the state of the internal fan, valid
                         * 1 -> ON
  
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_function_keys
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
  Contact:       Azael Avalos <coproscefalo@gmail.com>
  Description:   This file controls the Special Functions (hotkeys) operation
                 mode, valid values are:
@@ -94,21 +153,29 @@ Description:       This file controls the Special Functions (hotkeys) operation
                 and the hotkeys are accessed via FN-F{1-12}.
                 In the "Special Functions" mode, the F{1-12} keys trigger the
                 hotkey and the F{1-12} keys are accessed via FN-F{1-12}.
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
  
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/panel_power_on
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
  Contact:       Azael Avalos <coproscefalo@gmail.com>
  Description:   This file controls whether the laptop should turn ON whenever
                 the LID is opened, valid values are:
                         * 0 -> Disabled
                         * 1 -> Enabled
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
  
  What:          /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_three
-Date:          February, 2015
-KernelVersion: 3.20
+Date:          February 12, 2015
+KernelVersion: 4.0
  Contact:       Azael Avalos <coproscefalo@gmail.com>
-Description:   This file controls whether the USB 3 functionality, valid
-               values are:
+Description:   This file controls the USB 3 functionality, valid values are:
                         * 0 -> Disabled (Acts as a regular USB 2)
                         * 1 -> Enabled (Full USB 3 functionality)
+               Note that toggling this value requires a reboot for changes to
+               take effect.
+Users:         KToshiba
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop

new file mode 100644 (file)

index 0000000..8c6a0b8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@@ -0,0 +1,69 @@
+What:          /sys/class/leds/dell::kbd_backlight/als_enabled
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to control the automatic keyboard
+               illumination mode on some systems that have an ambient
+               light sensor. Write 1 to this file to enable the auto
+               mode, 0 to disable it.
+
+What:          /sys/class/leds/dell::kbd_backlight/als_setting
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to specifiy the on/off threshold value,
+               as reported by the ambient light sensor.
+
+What:          /sys/class/leds/dell::kbd_backlight/start_triggers
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to control the input triggers that
+               turn on the keyboard backlight illumination that is
+               disabled because of inactivity.
+               Read the file to see the triggers available. The ones
+               enabled are preceded by '+', those disabled by '-'.
+
+               To enable a trigger, write its name preceded by '+' to
+               this file. To disable a trigger, write its name preceded
+               by '-' instead.
+
+               For example, to enable the keyboard as trigger run:
+                   echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+               To disable it:
+                   echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+
+               Note that not all the available triggers can be configured.
+
+What:          /sys/class/leds/dell::kbd_backlight/stop_timeout
+Date:          December 2014
+KernelVersion: 3.19
+Contact:       Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+               Pali Rohár <pali.rohar@gmail.com>
+Description:
+               This file allows to specify the interval after which the
+               keyboard illumination is disabled because of inactivity.
+               The timeouts are expressed in seconds, minutes, hours and
+               days, for which the symbols are 's', 'm', 'h' and 'd'
+               respectively.
+
+               To configure the timeout, write to this file a value along
+               with any the above units. If no unit is specified, the value
+               is assumed to be expressed in seconds.
+
+               For example, to set the timeout to 10 minutes run:
+                   echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
+
+               Note that when this file is read, the returned value might be
+               expressed in a different unit than the one used when the timeout
+               was set.
+
+               Also note that only some timeouts are supported and that
+               some systems might fall back to a specific timeout in case
+               an invalid timeout is written to this file.
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt

new file mode 100644 (file)

index 0000000..a6e1a18
--- /dev/null
+++ b/Documentation/arm64/acpi_object_usage.txt
@@ -0,0 +1,593 @@
+ACPI Tables
+-----------
+The expectations of individual ACPI tables are discussed in the list that
+follows.
+
+If a section number is used, it refers to a section number in the ACPI
+specification where the object is defined.  If "Signature Reserved" is used,
+the table signature (the first four bytes of the table) is the only portion
+of the table recognized by the specification, and the actual table is defined
+outside of the UEFI Forum (see Section 5.2.6 of the specification).
+
+For ACPI on arm64, tables also fall into the following categories:
+
+       -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
+
+       -- Recommended: BERT, EINJ, ERST, HEST, SSDT
+
+       -- Optional: BGRT, CPEP, CSRT, DRTM, ECDT, FACS, FPDT, MCHI, MPST,
+          MSCT, RASF, SBST, SLIT, SPMI, SRAT, TCPA, TPM2, UEFI
+
+       -- Not supported: BOOT, DBG2, DBGP, DMAR, ETDT, HPET, IBFT, IVRS,
+          LPIT, MSDM, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+
+
+Table  Usage for ARMv8 Linux
+-----  ----------------------------------------------------------------
+BERT   Section 18.3 (signature == "BERT")
+       == Boot Error Record Table ==
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+BOOT   Signature Reserved (signature == "BOOT")
+       == simple BOOT flag table ==
+       Microsoft only table, will not be supported.
+
+BGRT   Section 5.2.22 (signature == "BGRT")
+       == Boot Graphics Resource Table ==
+       Optional, not currently supported, with no real use-case for an
+       ARM server.
+
+CPEP   Section 5.2.18 (signature == "CPEP")
+       == Corrected Platform Error Polling table ==
+       Optional, not currently supported, and not recommended until such
+       time as ARM-compatible hardware is available, and the specification
+       suitably modified.
+
+CSRT   Signature Reserved (signature == "CSRT")
+       == Core System Resources Table ==
+       Optional, not currently supported.
+
+DBG2   Signature Reserved (signature == "DBG2")
+       == DeBuG port table 2 ==
+       Microsoft only table, will not be supported.
+
+DBGP   Signature Reserved (signature == "DBGP")
+       == DeBuG Port table ==
+       Microsoft only table, will not be supported.
+
+DSDT   Section 5.2.11.1 (signature == "DSDT")
+       == Differentiated System Description Table ==
+       A DSDT is required; see also SSDT.
+
+       ACPI tables contain only one DSDT but can contain one or more SSDTs,
+       which are optional.  Each SSDT can only add to the ACPI namespace,
+       but cannot modify or replace anything in the DSDT.
+
+DMAR   Signature Reserved (signature == "DMAR")
+       == DMA Remapping table ==
+       x86 only table, will not be supported.
+
+DRTM   Signature Reserved (signature == "DRTM")
+       == Dynamic Root of Trust for Measurement table ==
+       Optional, not currently supported.
+
+ECDT   Section 5.2.16 (signature == "ECDT")
+       == Embedded Controller Description Table ==
+       Optional, not currently supported, but could be used on ARM if and
+       only if one uses the GPE_BIT field to represent an IRQ number, since
+       there are no GPE blocks defined in hardware reduced mode.  This would
+       need to be modified in the ACPI specification.
+
+EINJ   Section 18.6 (signature == "EINJ")
+       == Error Injection table ==
+       This table is very useful for testing platform response to error
+       conditions; it allows one to inject an error into the system as
+       if it had actually occurred.  However, this table should not be
+       shipped with a production system; it should be dynamically loaded
+       and executed with the ACPICA tools only during testing.
+
+ERST   Section 18.5 (signature == "ERST")
+       == Error Record Serialization Table ==
+       On a platform supports RAS, this table must be supplied if it is not
+       UEFI-based; if it is UEFI-based, this table may be supplied. When this
+       table is not present, UEFI run time service will be utilized to save
+       and retrieve hardware error information to and from a persistent store.
+
+ETDT   Signature Reserved (signature == "ETDT")
+       == Event Timer Description Table ==
+       Obsolete table, will not be supported.
+
+FACS   Section 5.2.10 (signature == "FACS")
+       == Firmware ACPI Control Structure ==
+       It is unlikely that this table will be terribly useful.  If it is
+       provided, the Global Lock will NOT be used since it is not part of
+       the hardware reduced profile, and only 64-bit address fields will
+       be considered valid.
+
+FADT   Section 5.2.9 (signature == "FACP")
+       == Fixed ACPI Description Table ==
+       Required for arm64.
+
+       The HW_REDUCED_ACPI flag must be set.  All of the fields that are
+       to be ignored when HW_REDUCED_ACPI is set are expected to be set to
+       zero.
+
+       If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
+       used, not FIRMWARE_CTRL.
+
+       If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
+       filled in properly -- that the PSCI_COMPLIANT flag is set and that
+       PSCI_USE_HVC is set or unset as needed (see table 5-37).
+
+       For the DSDT that is also required, the X_DSDT field is to be used,
+       not the DSDT field.
+
+FPDT   Section 5.2.23 (signature == "FPDT")
+       == Firmware Performance Data Table ==
+       Optional, not currently supported.
+
+GTDT   Section 5.2.24 (signature == "GTDT")
+       == Generic Timer Description Table ==
+       Required for arm64.
+
+HEST   Section 18.3.2 (signature == "HEST")
+       == Hardware Error Source Table ==
+       Until further error source types are defined, use only types 6 (AER
+       Root Port), 7 (AER Endpoint), 8 (AER Bridge), or 9 (Generic Hardware
+       Error Source).  Firmware first error handling is possible if and only
+       if Trusted Firmware is being used on arm64.
+
+       Must be supplied if RAS support is provided by the platform.  It
+       is recommended this table be supplied.
+
+HPET   Signature Reserved (signature == "HPET")
+       == High Precision Event timer Table ==
+       x86 only table, will not be supported.
+
+IBFT   Signature Reserved (signature == "IBFT")
+       == iSCSI Boot Firmware Table ==
+       Microsoft defined table, support TBD.
+
+IVRS   Signature Reserved (signature == "IVRS")
+       == I/O Virtualization Reporting Structure ==
+       x86_64 (AMD) only table, will not be supported.
+
+LPIT   Signature Reserved (signature == "LPIT")
+       == Low Power Idle Table ==
+       x86 only table as of ACPI 5.1; future versions have been adapted for
+       use with ARM and will be recommended in order to support ACPI power
+       management.
+
+MADT   Section 5.2.12 (signature == "APIC")
+       == Multiple APIC Description Table ==
+       Required for arm64.  Only the GIC interrupt controller structures
+       should be used (types 0xA - 0xE).
+
+MCFG   Signature Reserved (signature == "MCFG")
+       == Memory-mapped ConFiGuration space ==
+       If the platform supports PCI/PCIe, an MCFG table is required.
+
+MCHI   Signature Reserved (signature == "MCHI")
+       == Management Controller Host Interface table ==
+       Optional, not currently supported.
+
+MPST   Section 5.2.21 (signature == "MPST")
+       == Memory Power State Table ==
+       Optional, not currently supported.
+
+MSDM   Signature Reserved (signature == "MSDM")
+       == Microsoft Data Management table ==
+       Microsoft only table, will not be supported.
+
+MSCT   Section 5.2.19 (signature == "MSCT")
+       == Maximum System Characteristic Table ==
+       Optional, not currently supported.
+
+RASF   Section 5.2.20 (signature == "RASF")
+       == RAS Feature table ==
+       Optional, not currently supported.
+
+RSDP   Section 5.2.5 (signature == "RSD PTR")
+       == Root System Description PoinTeR ==
+       Required for arm64.
+
+RSDT   Section 5.2.7 (signature == "RSDT")
+       == Root System Description Table ==
+       Since this table can only provide 32-bit addresses, it is deprecated
+       on arm64, and will not be used.
+
+SBST   Section 5.2.14 (signature == "SBST")
+       == Smart Battery Subsystem Table ==
+       Optional, not currently supported.
+
+SLIC   Signature Reserved (signature == "SLIC")
+       == Software LIcensing table ==
+       Microsoft only table, will not be supported.
+
+SLIT   Section 5.2.17 (signature == "SLIT")
+       == System Locality distance Information Table ==
+       Optional in general, but required for NUMA systems.
+
+SPCR   Signature Reserved (signature == "SPCR")
+       == Serial Port Console Redirection table ==
+       Required for arm64.
+
+SPMI   Signature Reserved (signature == "SPMI")
+       == Server Platform Management Interface table ==
+       Optional, not currently supported.
+
+SRAT   Section 5.2.16 (signature == "SRAT")
+       == System Resource Affinity Table ==
+       Optional, but if used, only the GICC Affinity structures are read.
+       To support NUMA, this table is required.
+
+SSDT   Section 5.2.11.2 (signature == "SSDT")
+       == Secondary System Description Table ==
+       These tables are a continuation of the DSDT; these are recommended
+       for use with devices that can be added to a running system, but can
+       also serve the purpose of dividing up device descriptions into more
+       manageable pieces.
+
+       An SSDT can only ADD to the ACPI namespace.  It cannot modify or
+       replace existing device descriptions already in the namespace.
+
+       These tables are optional, however.  ACPI tables should contain only
+       one DSDT but can contain many SSDTs.
+
+TCPA   Signature Reserved (signature == "TCPA")
+       == Trusted Computing Platform Alliance table ==
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+TPM2   Signature Reserved (signature == "TPM2")
+       == Trusted Platform Module 2 table ==
+       Optional, not currently supported, and may need changes to fully
+       interoperate with arm64.
+
+UEFI   Signature Reserved (signature == "UEFI")
+       == UEFI ACPI data table ==
+       Optional, not currently supported.  No known use case for arm64,
+       at present.
+
+WAET   Signature Reserved (signature == "WAET")
+       == Windows ACPI Emulated devices Table ==
+       Microsoft only table, will not be supported.
+
+WDAT   Signature Reserved (signature == "WDAT")
+       == Watch Dog Action Table ==
+       Microsoft only table, will not be supported.
+
+WDRT   Signature Reserved (signature == "WDRT")
+       == Watch Dog Resource Table ==
+       Microsoft only table, will not be supported.
+
+WPBT   Signature Reserved (signature == "WPBT")
+       == Windows Platform Binary Table ==
+       Microsoft only table, will not be supported.
+
+XSDT   Section 5.2.8 (signature == "XSDT")
+       == eXtended System Description Table ==
+       Required for arm64.
+
+
+ACPI Objects
+------------
+The expectations on individual ACPI objects are discussed in the list that
+follows:
+
+Name   Section         Usage for ARMv8 Linux
+----   ------------    -------------------------------------------------
+_ADR   6.1.1           Use as needed.
+
+_BBN   6.5.5           Use as needed; PCI-specific.
+
+_BDN   6.5.3           Optional; not likely to be used on arm64.
+
+_CCA   6.2.17          This method should be defined for all bus masters
+                       on arm64.  While cache coherency is assumed, making
+                       it explicit ensures the kernel will set up DMA as
+                       it should.
+
+_CDM   6.2.1           Optional, to be used only for processor devices.
+
+_CID   6.1.2           Use as needed.
+
+_CLS   6.1.3           Use as needed.
+
+_CRS   6.2.2           Required on arm64.
+
+_DCK   6.5.2           Optional; not likely to be used on arm64.
+
+_DDN   6.1.4           This field can be used for a device name.  However,
+                       it is meant for DOS device names (e.g., COM1), so be
+                       careful of its use across OSes.
+
+_DEP   6.5.8           Use as needed.
+
+_DIS   6.2.3           Optional, for power management use.
+
+_DLM   5.7.5           Optional.
+
+_DMA   6.2.4           Optional.
+
+_DSD   6.2.5           To be used with caution.  If this object is used, try
+                       to use it within the constraints already defined by the
+                       Device Properties UUID.  Only in rare circumstances
+                       should it be necessary to create a new _DSD UUID.
+
+                       In either case, submit the _DSD definition along with
+                       any driver patches for discussion, especially when
+                       device properties are used.  A driver will not be
+                       considered complete without a corresponding _DSD
+                       description.  Once approved by kernel maintainers,
+                       the UUID or device properties must then be registered
+                       with the UEFI Forum; this may cause some iteration as
+                       more than one OS will be registering entries.
+
+_DSM                   Do not use this method.  It is not standardized, the
+                       return values are not well documented, and it is
+                       currently a frequent source of error.
+
+_DSW   7.2.1           Use as needed; power management specific.
+
+_EDL   6.3.1           Optional.
+
+_EJD   6.3.2           Optional.
+
+_EJx   6.3.3           Optional.
+
+_FIX   6.2.7           x86 specific, not used on arm64.
+
+\_GL   5.7.1           This object is not to be used in hardware reduced
+                       mode, and therefore should not be used on arm64.
+
+_GLK   6.5.7           This object requires a global lock be defined; there
+                       is no global lock on arm64 since it runs in hardware
+                       reduced mode.  Hence, do not use this object on arm64.
+
+\_GPE  5.3.1           This namespace is for x86 use only.  Do not use it
+                       on arm64.
+
+_GSB   6.2.7           Optional.
+
+_HID   6.1.5           Use as needed.  This is the primary object to use in
+                       device probing, though _CID and _CLS may also be used.
+
+_HPP   6.2.8           Optional, PCI specific.
+
+_HPX   6.2.9           Optional, PCI specific.
+
+_HRV   6.1.6           Optional, use as needed to clarify device behavior; in
+                       some cases, this may be easier to use than _DSD.
+
+_INI   6.5.1           Not required, but can be useful in setting up devices
+                       when UEFI leaves them in a state that may not be what
+                       the driver expects before it starts probing.
+
+_IRC   7.2.15          Use as needed; power management specific.
+
+_LCK   6.3.4           Optional.
+
+_MAT   6.2.10          Optional; see also the MADT.
+
+_MLS   6.1.7           Optional, but highly recommended for use in
+                       internationalization.
+
+_OFF   7.1.2           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+_ON    7.1.3           It is recommended to define this method for any device
+                       that can be turned on or off.
+
+\_OS   5.7.3           This method will return "Linux" by default (this is
+                       the value of the macro ACPI_OS_NAME on Linux).  The
+                       command line parameter acpi_os=<string> can be used
+                       to set it to some other value.
+
+_OSC   6.2.11          This method can be a global method in ACPI (i.e.,
+                       \_SB._OSC), or it may be associated with a specific
+                       device (e.g., \_SB.DEV0._OSC), or both.  When used
+                       as a global method, only capabilities published in
+                       the ACPI specification are allowed.  When used as
+                       a device-specific method, the process described for
+                       using _DSD MUST be used to create an _OSC definition;
+                       out-of-process use of _OSC is not allowed.  That is,
+                       submit the device-specific _OSC usage description as
+                       part of the kernel driver submission, get it approved
+                       by the kernel community, then register it with the
+                       UEFI Forum.
+
+\_OSI  5.7.2           Deprecated on ARM64.  Any invocation of this method
+                       will print a warning on the console and return false.
+                       That is, as far as ACPI firmware is concerned, _OSI
+                       cannot be used to determine what sort of system is
+                       being used or what functionality is provided.  The
+                       _OSC method is to be used instead.
+
+_OST   6.3.5           Optional.
+
+_PDC   8.4.1           Deprecated, do not use on arm64.
+
+\_PIC  5.8.1           The method should not be used.  On arm64, the only
+                       interrupt model available is GIC.
+
+_PLD   6.1.8           Optional.
+
+\_PR   5.3.1           This namespace is for x86 use only on legacy systems.
+                       Do not use it on arm64.
+
+_PRS   6.2.12          Optional.
+
+_PRT   6.2.13          Required as part of the definition of all PCI root
+                       devices.
+
+_PRW   7.2.13          Use as needed; power management specific.
+
+_PRx   7.2.8-11        Use as needed; power management specific.  If _PR0 is
+                       defined, _PR3 must also be defined.
+
+_PSC   7.2.6           Use as needed; power management specific.
+
+_PSE   7.2.7           Use as needed; power management specific.
+
+_PSW   7.2.14          Use as needed; power management specific.
+
+_PSx   7.2.2-5         Use as needed; power management specific.  If _PS0 is
+                       defined, _PS3 must also be defined.  If clocks or
+                       regulators need adjusting to be consistent with power
+                       usage, change them in these methods.
+
+\_PTS  7.3.1           Use as needed; power management specific.
+
+_PXM   6.2.14          Optional.
+
+_REG   6.5.4           Use as needed.
+
+\_REV  5.7.4           Always returns the latest version of ACPI supported.
+
+_RMV   6.3.6           Optional.
+
+\_SB   5.3.1           Required on arm64; all devices must be defined in this
+                       namespace.
+
+_SEG   6.5.6           Use as needed; PCI-specific.
+
+\_SI   5.3.1,          Optional.
+       9.1
+
+_SLI   6.2.15          Optional; recommended when SLIT table is in use.
+
+_STA   6.3.7,          It is recommended to define this method for any device
+       7.1.4           that can be turned on or off.
+
+_SRS   6.2.16          Optional; see also _PRS.
+
+_STR   6.1.10          Recommended for conveying device names to end users;
+                       this is preferred over using _DDN.
+
+_SUB   6.1.9           Use as needed; _HID or _CID are preferred.
+
+_SUN   6.1.11          Optional.
+
+\_Sx   7.3.2           Use as needed; power management specific.
+
+_SxD   7.2.16-19       Use as needed; power management specific.
+
+_SxW   7.2.20-24       Use as needed; power management specific.
+
+_SWS   7.3.3           Use as needed; power management specific; this may
+                       require specification changes for use on arm64.
+
+\_TTS  7.3.4           Use as needed; power management specific.
+
+\_TZ   5.3.1           Optional.
+
+_UID   6.1.12          Recommended for distinguishing devices of the same
+                       class; define it if at all possible.
+
+\_WAK  7.3.5           Use as needed; power management specific.
+
+
+ACPI Event Model
+----------------
+Do not use GPE block devices; these are not supported in the hardware reduced
+profile used by arm64.  Since there are no GPE blocks defined for use on ARM
+platforms, GPIO-signaled interrupts should be used for creating system events.
+
+
+ACPI Processor Control
+----------------------
+Section 8 of the ACPI specification is currently undergoing change that
+should be completed in the 6.0 version of the specification.  Processor
+performance control will be handled differently for arm64 at that point
+in time.  Processor aggregator devices (section 8.5) will not be used,
+for example, but another similar mechanism instead.
+
+While UEFI constrains what we can say until the release of 6.0, it is
+recommended that CPPC (8.4.5) be used as the primary model.  This will
+still be useful into the future.  C-states and P-states will still be
+provided, but most of the current design work appears to favor CPPC.
+
+Further, it is essential that the ARMv8 SoC provide a fully functional
+implementation of PSCI; this will be the only mechanism supported by ACPI
+to control CPU power state (including secondary CPU booting).
+
+More details will be provided on the release of the ACPI 6.0 specification.
+
+
+ACPI System Address Map Interfaces
+----------------------------------
+In Section 15 of the ACPI specification, several methods are mentioned as
+possible mechanisms for conveying memory resource information to the kernel.
+For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
+GetMemoryMap() boot service is the only mechanism that will be used.
+
+
+ACPI Platform Error Interfaces (APEI)
+-------------------------------------
+The APEI tables supported are described above.
+
+APEI requires the equivalent of an SCI and an NMI on ARMv8.  The SCI is used
+to notify the OSPM of errors that have occurred but can be corrected and the
+system can continue correct operation, even if possibly degraded.  The NMI is
+used to indicate fatal errors that cannot be corrected, and require immediate
+attention.
+
+Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
+these slightly differently.  The SCI is handled as a normal GPIO-signaled
+interrupt; given that these are corrected (or correctable) errors being
+reported, this is sufficient.  The NMI is emulated as the highest priority
+GPIO-signaled interrupt possible.  This implies some caution must be used
+since there could be interrupts at higher privilege levels or even interrupts
+at the same priority as the emulated NMI.  In Linux, this should not be the
+case but one should be aware it could happen.
+
+
+ACPI Objects Not Supported on ARM64
+-----------------------------------
+While this may change in the future, there are several classes of objects
+that can be defined, but are not currently of general interest to ARM servers.
+
+These are not supported:
+
+       -- Section 9.2: ambient light sensor devices
+
+       -- Section 9.3: battery devices
+
+       -- Section 9.4: lids (e.g., laptop lids)
+
+       -- Section 9.8.2: IDE controllers
+
+       -- Section 9.9: floppy controllers
+
+       -- Section 9.10: GPE block devices
+
+       -- Section 9.15: PC/AT RTC/CMOS devices
+
+       -- Section 9.16: user presence detection devices
+
+       -- Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
+
+       -- Section 9.18: time and alarm devices (see 9.15)
+
+
+ACPI Objects Not Yet Implemented
+--------------------------------
+While these objects have x86 equivalents, and they do make some sense in ARM
+servers, there is either no hardware available at present, or in some cases
+there may not yet be a non-ARM implementation.  Hence, they are currently not
+implemented though that may change in the future.
+
+Not yet implemented are:
+
+       -- Section 10: power source and power meter devices
+
+       -- Section 11: thermal management
+
+       -- Section 12: embedded controllers interface
+
+       -- Section 13: SMBus interfaces
+
+       -- Section 17: NUMA support (prototypes have been submitted for
+          review)
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt

new file mode 100644 (file)

index 0000000..570a4f8
--- /dev/null
+++ b/Documentation/arm64/arm-acpi.txt
@@ -0,0 +1,505 @@
+ACPI on ARMv8 Servers
+---------------------
+ACPI can be used for ARMv8 general purpose servers designed to follow
+the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
+Base Boot Requirements) [1] specifications.  Please note that the SBBR
+can be retrieved simply by visiting [1], but the SBSA is currently only
+available to those with an ARM login due to ARM IP licensing concerns.
+
+The ARMv8 kernel implements the reduced hardware model of ACPI version
+5.1 or later.  Links to the specification and all external documents
+it refers to are managed by the UEFI Forum.  The specification is
+available at http://www.uefi.org/specifications and documents referenced
+by the specification can be found via http://www.uefi.org/acpi.
+
+If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+or cannot be described using the mechanisms defined in the required ACPI
+specifications, then ACPI may not be a good fit for the hardware.
+
+While the documents mentioned above set out the requirements for building
+industry-standard ARMv8 servers, they also apply to more than one operating
+system.  The purpose of this document is to describe the interaction between
+ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and what ACPI can expect of Linux.
+
+
+Why ACPI on ARM?
+----------------
+Before examining the details of the interface between ACPI and Linux, it is
+useful to understand why ACPI is being used.  Several technologies already
+exist in Linux for describing non-enumerable hardware, after all.  In this
+section we summarize a blog post [2] from Grant Likely that outlines the
+reasoning behind ACPI on ARMv8 servers.  Actually, we snitch a good portion
+of the summary text almost directly, to be honest.
+
+The short form of the rationale for ACPI on ARM is:
+
+-- ACPI’s bytecode (AML) allows the platform to encode hardware behavior,
+   while DT explicitly does not support this.  For hardware vendors, being
+   able to encode behavior is a key tool used in supporting operating
+   system releases on new hardware.
+
+-- ACPI’s OSPM defines a power management model that constrains what the
+   platform is allowed to do into a specific model, while still providing
+   flexibility in hardware design.
+
+-- In the enterprise server environment, ACPI has established bindings (such
+   as for RAS) which are currently used in production systems.  DT does not.
+   Such bindings could be defined in DT at some point, but doing so means ARM
+   and x86 would end up using completely different code paths in both firmware
+   and the kernel.
+
+-- Choosing a single interface to describe the abstraction between a platform
+   and an OS is important.  Hardware vendors would not be required to implement
+   both DT and ACPI if they want to support multiple operating systems.  And,
+   agreeing on a single interface instead of being fragmented into per OS
+   interfaces makes for better interoperability overall.
+
+-- The new ACPI governance process works well and Linux is now at the same
+   table as hardware vendors and other OS vendors.  In fact, there is no
+   longer any reason to feel that ACPI is only belongs to Windows or that
+   Linux is in any way secondary to Microsoft in this arena.  The move of
+   ACPI governance into the UEFI forum has significantly opened up the
+   specification development process, and currently, a large portion of the
+   changes being made to ACPI is being driven by Linux.
+
+Key to the use of ACPI is the support model.  For servers in general, the
+responsibility for hardware behaviour cannot solely be the domain of the
+kernel, but rather must be split between the platform and the kernel, in
+order to allow for orderly change over time.  ACPI frees the OS from needing
+to understand all the minute details of the hardware so that the OS doesn’t
+need to be ported to each and every device individually.  It allows the
+hardware vendors to take responsibility for power management behaviour without
+depending on an OS release cycle which is not under their control.
+
+ACPI is also important because hardware and OS vendors have already worked
+out the mechanisms for supporting a general purpose computing ecosystem.  The
+infrastructure is in place, the bindings are in place, and the processes are
+in place.  DT does exactly what Linux needs it to when working with vertically
+integrated devices, but there are no good processes for supporting what the
+server vendors need.  Linux could potentially get there with DT, but doing so
+really just duplicates something that already works.  ACPI already does what
+the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
+vendors would still end up providing two completely separate firmware
+interfaces -- one for Linux and one for Windows.
+
+
+Kernel Compatibility
+--------------------
+One of the primary motivations for ACPI is standardization, and using that
+to provide backward compatibility for Linux kernels.  In the server market,
+software and hardware are often used for long periods.  ACPI allows the
+kernel and firmware to agree on a consistent abstraction that can be
+maintained over time, even as hardware or software change.  As long as the
+abstraction is supported, systems can be updated without necessarily having
+to replace the kernel.
+
+When a Linux driver or subsystem is first implemented using ACPI, it by
+definition ends up requiring a specific version of the ACPI specification
+-- it's baseline.  ACPI firmware must continue to work, even though it may
+not be optimal, with the earliest kernel version that first provides support
+for that baseline version of ACPI.  There may be a need for additional drivers,
+but adding new functionality (e.g., CPU power management) should not break
+older kernel versions.  Further, ACPI firmware must also work with the most
+recent version of the kernel.
+
+
+Relationship with Device Tree
+-----------------------------
+ACPI support in drivers and subsystems for ARMv8 should never be mutually
+exclusive with DT support at compile time.
+
+At boot time the kernel will only use one description method depending on
+parameters passed from the bootloader (including kernel bootargs).
+
+Regardless of whether DT or ACPI is used, the kernel must always be capable
+of booting with either scheme (in kernels with both schemes enabled at compile
+time).
+
+
+Booting using ACPI tables
+-------------------------
+The only defined method for passing ACPI tables to the kernel on ARMv8
+is via the UEFI system configuration table.  Just so it is explicit, this
+means that ACPI is only supported on platforms that boot via UEFI.
+
+When an ARMv8 system boots, it can either have DT information, ACPI tables,
+or in some very unusual cases, both.  If no command line parameters are used,
+the kernel will try to use DT for device enumeration; if there is no DT
+present, the kernel will try to use ACPI tables, but only if they are present.
+In neither is available, the kernel will not boot.  If acpi=force is used
+on the command line, the kernel will attempt to use ACPI tables first, but
+fall back to DT if there are no ACPI tables present.  The basic idea is that
+the kernel will not fail to boot unless it absolutely has no other choice.
+
+Processing of ACPI tables may be disabled by passing acpi=off on the kernel
+command line; this is the default behavior.
+
+In order for the kernel to load and use ACPI tables, the UEFI implementation
+MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
+the ACPI signature "RSD PTR ").  If this pointer is incorrect and acpi=force
+is used, the kernel will disable ACPI and try to use DT to boot instead; the
+kernel has, in effect, determined that ACPI tables are not present at that
+point.
+
+If the pointer to the RSDP table is correct, the table will be mapped into
+the kernel by the ACPI core, using the address provided by UEFI.
+
+The ACPI core will then locate and map in all other ACPI tables provided by
+using the addresses in the RSDP table to find the XSDT (eXtended System
+Description Table).  The XSDT in turn provides the addresses to all other
+ACPI tables provided by the system firmware; the ACPI core will then traverse
+this table and map in the tables listed.
+
+The ACPI core will ignore any provided RSDT (Root System Description Table).
+RSDTs have been deprecated and are ignored on arm64 since they only allow
+for 32-bit addresses.
+
+Further, the ACPI core will only use the 64-bit address fields in the FADT
+(Fixed ACPI Description Table).  Any 32-bit address fields in the FADT will
+be ignored on arm64.
+
+Hardware reduced mode (see Section 4.1 of the ACPI 5.1 specification) will
+be enforced by the ACPI core on arm64.  Doing so allows the ACPI core to
+run less complex code since it no longer has to provide support for legacy
+hardware from other architectures.  Any fields that are not to be used for
+hardware reduced mode must be set to zero.
+
+For the ACPI core to operate properly, and in turn provide the information
+the kernel needs to configure devices, it expects to find the following
+tables (all section numbers refer to the ACPI 5.1 specfication):
+
+    -- RSDP (Root System Description Pointer), section 5.2.5
+
+    -- XSDT (eXtended System Description Table), section 5.2.8
+
+    -- FADT (Fixed ACPI Description Table), section 5.2.9
+
+    -- DSDT (Differentiated System Description Table), section
+       5.2.11.1
+
+    -- MADT (Multiple APIC Description Table), section 5.2.12
+
+    -- GTDT (Generic Timer Description Table), section 5.2.24
+
+    -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
+       Table), section 5.2.6, specifically Table 5-31.
+
+If the above tables are not all present, the kernel may or may not be
+able to boot properly since it may not be able to configure all of the
+devices available.
+
+
+ACPI Detection
+--------------
+Drivers should determine their probe() type by checking for a null
+value for ACPI_HANDLE, or checking .of_node, or other information in
+the device structure.  This is detailed further in the "Driver
+Recommendations" section.
+
+In non-driver code, if the presence of ACPI needs to be detected at
+runtime, then check the value of acpi_disabled. If CONFIG_ACPI is not
+set, acpi_disabled will always be 1.
+
+
+Device Enumeration
+------------------
+Device descriptions in ACPI should use standard recognized ACPI interfaces.
+These may contain less information than is typically provided via a Device
+Tree description for the same device.  This is also one of the reasons that
+ACPI can be useful -- the driver takes into account that it may have less
+detailed information about the device and uses sensible defaults instead.
+If done properly in the driver, the hardware can change and improve over
+time without the driver having to change at all.
+
+Clocks provide an excellent example.  In DT, clocks need to be specified
+and the drivers need to take them into account.  In ACPI, the assumption
+is that UEFI will leave the device in a reasonable default state, including
+any clock settings.  If for some reason the driver needs to change a clock
+value, this can be done in an ACPI method; all the driver needs to do is
+invoke the method and not concern itself with what the method needs to do
+to change the clock.  Changing the hardware can then take place over time
+by changing what the ACPI method does, and not the driver.
+
+In DT, the parameters needed by the driver to set up clocks as in the example
+above are known as "bindings"; in ACPI, these are known as "Device Properties"
+and provided to a driver via the _DSD object.
+
+ACPI tables are described with a formal language called ASL, the ACPI
+Source Language (section 19 of the specification).  This means that there
+are always multiple ways to describe the same thing -- including device
+properties.  For example, device properties could use an ASL construct
+that looks like this: Name(KEY0, "value0").  An ACPI device driver would
+then retrieve the value of the property by evaluating the KEY0 object.
+However, using Name() this way has multiple problems: (1) ACPI limits
+names ("KEY0") to four characters unlike DT; (2) there is no industry
+wide registry that maintains a list of names, minimzing re-use; (3)
+there is also no registry for the definition of property values ("value0"),
+again making re-use difficult; and (4) how does one maintain backward
+compatibility as new hardware comes out?  The _DSD method was created
+to solve precisely these sorts of problems; Linux drivers should ALWAYS
+use the _DSD method for device properties and nothing else.
+
+The _DSM object (ACPI Section 9.14.1) could also be used for conveying
+device properties to a driver.  Linux drivers should only expect it to
+be used if _DSD cannot represent the data required, and there is no way
+to create a new UUID for the _DSD object.  Note that there is even less
+regulation of the use of _DSM than there is of _DSD.  Drivers that depend
+on the contents of _DSM objects will be more difficult to maintain over
+time because of this; as of this writing, the use of _DSM is the cause
+of quite a few firmware problems and is not recommended.
+
+Drivers should look for device properties in the _DSD object ONLY; the _DSD
+object is described in the ACPI specification section 6.2.5, but this only
+describes how to define the structure of an object returned via _DSD, and
+how specific data structures are defined by specific UUIDs.  Linux should
+only use the _DSD Device Properties UUID [5]:
+
+   -- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
+
+   -- http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+
+The UEFI Forum provides a mechanism for registering device properties [4]
+so that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum should
+not be used.
+
+Before creating new device properties, check to be sure that they have not
+been defined before and either registered in the Linux kernel documentation
+as DT bindings, or the UEFI Forum as device properties.  While we do not want
+to simply move all DT bindings into ACPI device properties, we can learn from
+what has been previously defined.
+
+If it is necessary to define a new device property, or if it makes sense to
+synthesize the definition of a binding so it can be used in any firmware,
+both DT bindings and ACPI device properties for device drivers have review
+processes.  Use them both.  When the driver itself is submitted for review
+to the Linux mailing lists, the device property definitions needed must be
+submitted at the same time.  A driver that supports ACPI and uses device
+properties will not be considered complete without their definitions.  Once
+the device property has been accepted by the Linux community, it must be
+registered with the UEFI Forum [4], which will review it again for consistency
+within the registry.  This may require iteration.  The UEFI Forum, though,
+will always be the canonical site for device property definitions.
+
+It may make sense to provide notice to the UEFI Forum that there is the
+intent to register a previously unused device property name as a means of
+reserving the name for later use.  Other operating system vendors will
+also be submitting registration requests and this may help smooth the
+process.
+
+Once registration and review have been completed, the kernel provides an
+interface for looking up device properties in a manner independent of
+whether DT or ACPI is being used.  This API should be used [6]; it can
+eliminate some duplication of code paths in driver probing functions and
+discourage divergence between DT bindings and ACPI device properties.
+
+
+Programmable Power Control Resources
+------------------------------------
+Programmable power control resources include such resources as voltage/current
+providers (regulators) and clock sources.
+
+With ACPI, the kernel clock and regulator framework is not expected to be used
+at all.
+
+The kernel assumes that power control of these resources is represented with
+Power Resource Objects (ACPI section 7.1).  The ACPI core will then handle
+correctly enabling and disabling resources as they are needed.  In order to
+get that to work, ACPI assumes each device has defined D-states and that these
+can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
+in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
+turning a device full off.
+
+There are two options for using those Power Resources.  They can:
+
+   -- be managed in a _PSx method which gets called on entry to power
+      state Dx.
+
+   -- be declared separately as power resources with their own _ON and _OFF
+      methods.  They are then tied back to D-states for a particular device
+      via _PRx which specifies which power resources a device needs to be on
+      while in Dx.  Kernel then tracks number of devices using a power resource
+      and calls _ON/_OFF as needed.
+
+The kernel ACPI code will also assume that the _PSx methods follow the normal
+ACPI rules for such methods:
+
+   -- If either _PS0 or _PS3 is implemented, then the other method must also
+      be implemented.
+
+   -- If a device requires usage or setup of a power resource when on, the ASL
+      should organize that it is allocated/enabled using the _PS0 method.
+
+   -- Resources allocated or enabled in the _PS0 method should be disabled
+      or de-allocated in the _PS3 method.
+
+   -- Firmware will leave the resources in a reasonable state before handing
+      over control to the kernel.
+
+Such code in _PSx methods will of course be very platform specific.  But,
+this allows the driver to abstract out the interface for operating the device
+and avoid having to read special non-standard values from ACPI tables. Further,
+abstracting the use of these resources allows the hardware to change over time
+without requiring updates to the driver.
+
+
+Clocks
+------
+ACPI makes the assumption that clocks are initialized by the firmware --
+UEFI, in this case -- to some working value before control is handed over
+to the kernel.  This has implications for devices such as UARTs, or SoC-driven
+LCD displays, for example.
+
+When the kernel boots, the clocks are assumed to be set to reasonable
+working values.  If for some reason the frequency needs to change -- e.g.,
+throttling for power management -- the device driver should expect that
+process to be abstracted out into some ACPI method that can be invoked
+(please see the ACPI specification for further recommendations on standard
+methods to be expected).  The only exceptions to this are CPU clocks where
+CPPC provides a much richer interface than ACPI methods.  If the clocks
+are not set, there is no direct way for Linux to control them.
+
+If an SoC vendor wants to provide fine-grained control of the system clocks,
+they could do so by providing ACPI methods that could be invoked by Linux
+drivers.  However, this is NOT recommended and Linux drivers should NOT use
+such methods, even if they are provided.  Such methods are not currently
+standardized in the ACPI specification, and using them could tie a kernel
+to a very specific SoC, or tie an SoC to a very specific version of the
+kernel, both of which we are trying to avoid.
+
+
+Driver Recommendations
+----------------------
+DO NOT remove any DT handling when adding ACPI support for a driver.  The
+same device may be used on many different systems.
+
+DO try to structure the driver so that it is data-driven.  That is, set up
+a struct containing internal per-device state based on defaults and whatever
+else must be discovered by the driver probe function.  Then, have the rest
+of the driver operate off of the contents of that struct.  Doing so should
+allow most divergence between ACPI and DT functionality to be kept local to
+the probe function instead of being scattered throughout the driver.  For
+example:
+
+static int device_probe_dt(struct platform_device *pdev)
+{
+       /* DT specific functionality */
+       ...
+}
+
+static int device_probe_acpi(struct platform_device *pdev)
+{
+       /* ACPI specific functionality */
+       ...
+}
+
+static int device_probe(struct platform_device *pdev)
+{
+       ...
+       struct device_node node = pdev->dev.of_node;
+       ...
+
+       if (node)
+               ret = device_probe_dt(pdev);
+       else if (ACPI_HANDLE(&pdev->dev))
+               ret = device_probe_acpi(pdev);
+       else
+               /* other initialization */
+               ...
+       /* Continue with any generic probe operations */
+       ...
+}
+
+DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
+clear the different names the driver is probed for, both from DT and from
+ACPI:
+
+static struct of_device_id virtio_mmio_match[] = {
+        { .compatible = "virtio,mmio", },
+        { }
+};
+MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+static const struct acpi_device_id virtio_mmio_acpi_match[] = {
+        { "LNRO0005", },
+        { }
+};
+MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
+
+
+ASWG
+----
+The ACPI specification changes regularly.  During the year 2014, for instance,
+version 5.1 was released and version 6.0 substantially completed, with most of
+the changes being driven by ARM-specific requirements.  Proposed changes are
+presented and discussed in the ASWG (ACPI Specification Working Group) which
+is a part of the UEFI Forum.
+
+Participation in this group is open to all UEFI members.  Please see
+http://www.uefi.org/workinggroup for details on group membership.
+
+It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+as closely as possible, and to only implement functionality that complies with
+the released standards from UEFI ASWG.  As a practical matter, there will be
+vendors that provide bad ACPI tables or violate the standards in some way.
+If this is because of errors, quirks and fixups may be necessary, but will
+be avoided if possible.  If there are features missing from ACPI that preclude
+it from being used on a platform, ECRs (Engineering Change Requests) should be
+submitted to ASWG and go through the normal approval process; for those that
+are not UEFI members, many other members of the Linux community are and would
+likely be willing to assist in submitting ECRs.
+
+
+Linux Code
+----------
+Individual items specific to Linux on ARM, contained in the the Linux
+source code, are in the list that follows:
+
+ACPI_OS_NAME           This macro defines the string to be returned when
+                       an ACPI method invokes the _OS method.  On ARM64
+                       systems, this macro will be "Linux" by default.
+                       The command line parameter acpi_os=<string>
+                       can be used to set it to some other value.  The
+                       default value for other architectures is "Microsoft
+                       Windows NT", for example.
+
+ACPI Objects
+------------
+Detailed expectations for ACPI tables and object are listed in the file
+Documentation/arm64/acpi_object_usage.txt.
+
+
+References
+----------
+[0] http://silver.arm.com -- document ARM-DEN-0029, or newer
+    "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+
+[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
+    Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
+    Software on ARM Platforms", dated 16 Aug 2014
+
+[2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
+    Linaro Ltd., written by Grant Likely.  A copy of the verbatim text (apart
+    from formatting) is also in Documentation/arm64/why_use_acpi.txt.
+
+[3] AMD ACPI for Seattle platform documentation:
+    http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
+
+[4] http://www.uefi.org/acpi -- please see the link for the "ACPI _DSD Device
+    Property Registry Instructions"
+
+[5] http://www.uefi.org/acpi -- please see the link for the "_DSD (Device
+    Specific Data) Implementation Guide"
+
+[6] Kernel code for the unified device property interface can be found in
+    include/linux/property.h and drivers/base/property.c.
+
+
+Authors
+-------
+Al Stone <al.stone@linaro.org>
+Graeme Gregory <graeme.gregory@linaro.org>
+Hanjun Guo <hanjun.guo@linaro.org>
+
+Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/devicetree/bindings/arc/pct.txt b/Documentation/devicetree/bindings/arc/pct.txt

new file mode 100644 (file)

index 0000000..7b95884
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/pct.txt
@@ -0,0 +1,20 @@
+* ARC Performance Counters
+
+The ARC700 can be configured with a pipeline performance monitor for counting
+CPU and cache events like cache misses and hits. Like conventional PCT there
+are 100+ hardware conditions dynamically mapped to upto 32 counters
+
+Note that:
+ * The ARC 700 PCT does not support interrupts; although HW events may be
+   counted, the HW events themselves cannot serve as a trigger for a sample.
+
+Required properties:
+
+- compatible : should contain
+       "snps,arc700-pct"
+
+Example:
+
+pmu {
+        compatible = "snps,arc700-pct";
+};
diff --git a/Documentation/devicetree/bindings/arc/pmu.txt b/Documentation/devicetree/bindings/arc/pmu.txt

deleted file mode 100644 (file)

index 49d5173..0000000
--- a/Documentation/devicetree/bindings/arc/pmu.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-* ARC Performance Monitor Unit
-
-The ARC 700 can be configured with a pipeline performance monitor for counting
-CPU and cache events like cache misses and hits.
-
-Note that:
- * ARC 700 refers to a family of ARC processor cores;
-   - There is only one type of PMU available for the whole family;
-   - The PMU may support different sets of events; supported events are probed
-     at boot time, as required by the reference manual.
-
- * The ARC 700 PMU does not support interrupts; although HW events may be
-   counted, the HW events themselves cannot serve as a trigger for a sample.
-
-Required properties:
-
-- compatible : should contain
-       "snps,arc700-pmu"
-
-Example:
-
-pmu {
-        compatible = "snps,arc700-pmu";
-};
diff --git a/Documentation/devicetree/bindings/arm/altera.txt b/Documentation/devicetree/bindings/arm/altera.txt

new file mode 100644 (file)

index 0000000..558735a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera.txt
@@ -0,0 +1,14 @@
+Altera's SoCFPGA platform device tree bindings
+---------------------------------------------
+
+Boards with Cyclone 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+
+Boards with Arria 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria5", "altr,socfpga";
+
+Boards with Arria 10 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria10", "altr,socfpga";
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt

index 256b4d8..e774128 100644 (file)
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -17,7 +17,10 @@ to deliver its interrupts via SPIs.
  - interrupts : Interrupt list for secure, non-secure, virtual and
    hypervisor timers, in that order.
  
-- clock-frequency : The frequency of the main counter, in Hz. Optional.
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+  only where necessary to work around broken firmware which does not configure
+  CNTFRQ on all CPUs to a uniform correct value. Use of this property is
+  strongly discouraged; fix your firmware unless absolutely impossible.
  
  - always-on : a boolean property. If present, the timer is powered through an
    always-on power domain, therefore it never loses context.
@@ -46,7 +49,8 @@ Example:
  
  - compatible : Should at least contain "arm,armv7-timer-mem".
  
-- clock-frequency : The frequency of the main counter, in Hz. Optional.
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+  only when firmware has not configured the MMIO CNTFRQ registers.
  
  - reg : The control frame base address.
  
diff --git a/Documentation/devicetree/bindings/arm/msm/timer.txt b/Documentation/devicetree/bindings/arm/msm/timer.txt

index 74607b6..5e10c34 100644 (file)
--- a/Documentation/devicetree/bindings/arm/msm/timer.txt
+++ b/Documentation/devicetree/bindings/arm/msm/timer.txt
@@ -9,11 +9,17 @@ Properties:
                 "qcom,scss-timer" - scorpion subsystem
  
  - interrupts : Interrupts for the debug timer, the first general purpose
-               timer, and optionally a second general purpose timer in that
-               order.
+               timer, and optionally a second general purpose timer, and
+               optionally as well, 2 watchdog interrupts, in that order.
  
  - reg : Specifies the base address of the timer registers.
  
+- clocks: Reference to the parent clocks, one per output clock. The parents
+          must appear in the same order as the clock names.
+
+- clock-names: The name of the clocks as free-form strings. They should be in
+               the same order as the clocks.
+
  - clock-frequency : The frequency of the debug timer and the general purpose
                      timer(s) in Hz in that order.
  
@@ -29,9 +35,13 @@ Example:
                 compatible = "qcom,scss-timer", "qcom,msm-timer";
                 interrupts = <1 1 0x301>,
                              <1 2 0x301>,
-                            <1 3 0x301>;
+                            <1 3 0x301>,
+                            <1 4 0x301>,
+                            <1 5 0x301>;
                 reg = <0x0200a000 0x100>;
                 clock-frequency = <19200000>,
                                   <32768>;
+               clocks = <&sleep_clk>;
+               clock-names = "sleep";
                 cpu-offset = <0x40000>;
         };
diff --git a/Documentation/devicetree/bindings/common-properties.txt b/Documentation/devicetree/bindings/common-properties.txt

new file mode 100644 (file)

index 0000000..3193979
--- /dev/null
+++ b/Documentation/devicetree/bindings/common-properties.txt
@@ -0,0 +1,60 @@
+Common properties
+
+The ePAPR specification does not define any properties related to hardware
+byteswapping, but endianness issues show up frequently in porting Linux to
+different machine types.  This document attempts to provide a consistent
+way of handling byteswapping across drivers.
+
+Optional properties:
+ - big-endian: Boolean; force big endian register accesses
+   unconditionally (e.g. ioread32be/iowrite32be).  Use this if you
+   know the peripheral always needs to be accessed in BE mode.
+ - little-endian: Boolean; force little endian register accesses
+   unconditionally (e.g. readl/writel).  Use this if you know the
+   peripheral always needs to be accessed in LE mode.
+ - native-endian: Boolean; always use register accesses matched to the
+   endianness of the kernel binary (e.g. LE vmlinux -> readl/writel,
+   BE vmlinux -> ioread32be/iowrite32be).  In this case no byteswaps
+   will ever be performed.  Use this if the hardware "self-adjusts"
+   register endianness based on the CPU's configured endianness.
+
+If a binding supports these properties, then the binding should also
+specify the default behavior if none of these properties are present.
+In such cases, little-endian is the preferred default, but it is not
+a requirement.  The of_device_is_big_endian() and of_fdt_is_big_endian()
+helper functions do assume that little-endian is the default, because
+most existing (PCI-based) drivers implicitly default to LE by using
+readl/writel for MMIO accesses.
+
+Examples:
+Scenario 1 : CPU in LE mode & device in LE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             native-endian;
+};
+
+Scenario 2 : CPU in LE mode & device in BE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             big-endian;
+};
+
+Scenario 3 : CPU in BE mode & device in BE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             native-endian;
+};
+
+Scenario 4 : CPU in BE mode & device in LE mode.
+dev: dev@40031000 {
+             compatible = "name";
+             reg = <0x40031000 0x1000>;
+             ...
+             little-endian;
+};
diff --git a/Documentation/devicetree/bindings/cris/axis.txt b/Documentation/devicetree/bindings/cris/axis.txt

new file mode 100644 (file)

index 0000000..d209ca2
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/axis.txt
@@ -0,0 +1,9 @@
+Axis Communications AB
+ARTPEC series SoC Device Tree Bindings
+
+
+CRISv32 based SoCs are ETRAX FS and ARTPEC-3:
+
+    - compatible = "axis,crisv32";
+
+
diff --git a/Documentation/devicetree/bindings/cris/boards.txt b/Documentation/devicetree/bindings/cris/boards.txt

new file mode 100644 (file)

index 0000000..533dd27
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/boards.txt
@@ -0,0 +1,8 @@
+Boards based on the CRIS SoCs:
+
+Required root node properties:
+    - compatible = should be one or more of the following:
+       - "axis,dev88"  - for Axis devboard 88 with ETRAX FS
+
+Optional:
+
diff --git a/Documentation/devicetree/bindings/cris/interrupts.txt b/Documentation/devicetree/bindings/cris/interrupts.txt

new file mode 100644 (file)

index 0000000..e8b123b
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/interrupts.txt
@@ -0,0 +1,23 @@
+* CRISv32 Interrupt Controller
+
+Interrupt controller for the CRISv32 SoCs.
+
+Main node required properties:
+
+- compatible : should be:
+       "axis,crisv32-intc"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+  interrupt source. The type shall be a <u32> and the value shall be 1.
+- reg: physical base address and size of the intc registers map.
+
+Example:
+
+       intc: interrupt-controller {
+               compatible = "axis,crisv32-intc";
+               reg = <0xb001c000 0x1000>;
+               interrupt-controller;
+               #interrupt-cells = <1>;
+       };
+
+
diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt

new file mode 100644 (file)

index 0000000..d305876
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt
@@ -0,0 +1,47 @@
+Applied Micro X-Gene SoC DMA nodes
+
+DMA nodes are defined to describe on-chip DMA interfaces in
+APM X-Gene SoC.
+
+Required properties for DMA interfaces:
+- compatible: Should be "apm,xgene-dma".
+- device_type: set to "dma".
+- reg: Address and length of the register set for the device.
+  It contains the information of registers in the following order:
+  1st - DMA control and status register address space.
+  2nd - Descriptor ring control and status register address space.
+  3rd - Descriptor ring command register address space.
+  4th - Soc efuse register address space.
+- interrupts: DMA has 5 interrupts sources. 1st interrupt is
+  DMA error reporting interrupt. 2nd, 3rd, 4th and 5th interrupts
+  are completion interrupts for each DMA channels.
+- clocks: Reference to the clock entry.
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+
+Example:
+       dmaclk: dmaclk@1f27c000 {
+               compatible = "apm,xgene-device-clock";
+               #clock-cells = <1>;
+               clocks = <&socplldiv2 0>;
+               reg = <0x0 0x1f27c000 0x0 0x1000>;
+               reg-names = "csr-reg";
+               clock-output-names = "dmaclk";
+       };
+
+       dma: dma@1f270000 {
+                       compatible = "apm,xgene-storm-dma";
+                       device_type = "dma";
+                       reg = <0x0 0x1f270000 0x0 0x10000>,
+                             <0x0 0x1f200000 0x0 0x10000>,
+                             <0x0 0x1b008000 0x0 0x2000>,
+                             <0x0 0x1054a000 0x0 0x100>;
+                       interrupts = <0x0 0x82 0x4>,
+                                    <0x0 0xb8 0x4>,
+                                    <0x0 0xb9 0x4>,
+                                    <0x0 0xba 0x4>,
+                                    <0x0 0xbb 0x4>;
+                       dma-coherent;
+                       clocks = <&dmaclk 0>;
+       };
diff --git a/Documentation/devicetree/bindings/dma/jz4780-dma.txt b/Documentation/devicetree/bindings/dma/jz4780-dma.txt

new file mode 100644 (file)

index 0000000..f25feee
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
@@ -0,0 +1,56 @@
+* Ingenic JZ4780 DMA Controller
+
+Required properties:
+
+- compatible: Should be "ingenic,jz4780-dma"
+- reg: Should contain the DMA controller registers location and length.
+- interrupts: Should contain the interrupt specifier of the DMA controller.
+- interrupt-parent: Should be the phandle of the interrupt controller that
+- clocks: Should contain a clock specifier for the JZ4780 PDMA clock.
+- #dma-cells: Must be <2>. Number of integer cells in the dmas property of
+  DMA clients (see below).
+
+Optional properties:
+
+- ingenic,reserved-channels: Bitmask of channels to reserve for devices that
+  need a specific channel. These channels will only be assigned when explicitly
+  requested by a client. The primary use for this is channels 0 and 1, which
+  can be configured to have special behaviour for NAND/BCH when using
+  programmable firmware.
+
+Example:
+
+dma: dma@13420000 {
+       compatible = "ingenic,jz4780-dma";
+       reg = <0x13420000 0x10000>;
+
+       interrupt-parent = <&intc>;
+       interrupts = <10>;
+
+       clocks = <&cgu JZ4780_CLK_PDMA>;
+
+       #dma-cells = <2>;
+
+       ingenic,reserved-channels = <0x3>;
+};
+
+DMA clients must use the format described in dma.txt, giving a phandle to the
+DMA controller plus the following 2 integer cells:
+
+1. Request type: The DMA request type for transfers to/from the device on
+   the allocated channel, as defined in the SoC documentation.
+
+2. Channel: If set to 0xffffffff, any available channel will be allocated for
+   the client. Otherwise, the exact channel specified will be used. The channel
+   should be reserved on the DMA controller using the ingenic,reserved-channels
+   property.
+
+Example:
+
+uart0: serial@10030000 {
+       ...
+       dmas = <&dma 0x14 0xffffffff
+               &dma 0x15 0xffffffff>;
+       dma-names = "tx", "rx";
+       ...
+};
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt

index f8c3311..1c9d48e 100644 (file)
--- a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -4,6 +4,7 @@ Required properties:
  - compatible: must be one of the following:
   * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
   * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
+ * "qcom,bam-v1.7.0" for MSM8916
  - reg: Address range for DMA registers
  - interrupts: Should contain the one interrupt shared by all channels
  - #dma-cells: must be <1>, the cell in the dmas property of the client device
diff --git a/Documentation/devicetree/bindings/dma/rcar-audmapp.txt b/Documentation/devicetree/bindings/dma/rcar-audmapp.txt

deleted file mode 100644 (file)

index 61bca50..0000000
--- a/Documentation/devicetree/bindings/dma/rcar-audmapp.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-* R-Car Audio DMAC peri peri Device Tree bindings
-
-Required properties:
-- compatible:  should be "renesas,rcar-audmapp"
-- #dma-cells:  should be <1>, see "dmas" property below
-
-Example:
-       audmapp: audio-dma-pp@0xec740000 {
-               compatible = "renesas,rcar-audmapp";
-               #dma-cells = <1>;
-
-               reg = <0 0xec740000 0 0x200>;
-       };
-
-
-* DMA client
-
-Required properties:
-- dmas:                a list of <[DMA multiplexer phandle] [SRS << 8 | DRS]> pairs.
-               where SRS/DRS are specified in the SoC manual.
-               It will be written into PDMACHCR as high 16-bit parts.
-- dma-names:   a list of DMA channel names, one per "dmas" entry
-
-Example:
-
-       dmas = <&audmapp 0x2d00
-               &audmapp 0x3700>;
-       dma-names =  "src0_ssiu0",
-                    "dvc0_ssiu0";
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt

new file mode 100644 (file)

index 0000000..040f365
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -0,0 +1,37 @@
+* Renesas USB DMA Controller Device Tree bindings
+
+Required Properties:
+- compatible: must contain "renesas,usb-dmac"
+- reg: base address and length of the registers block for the DMAC
+- interrupts: interrupt specifiers for the DMAC, one for each entry in
+  interrupt-names.
+- interrupt-names: one entry per channel, named "ch%u", where %u is the
+  channel number ranging from zero to the number of channels minus one.
+- clocks: a list of phandle + clock-specifier pairs.
+- #dma-cells: must be <1>, the cell specifies the channel number of the DMAC
+  port connected to the DMA client.
+- dma-channels: number of DMA channels
+
+Example: R8A7790 (R-Car H2) USB-DMACs
+
+       usb_dmac0: dma-controller@e65a0000 {
+               compatible = "renesas,usb-dmac";
+               reg = <0 0xe65a0000 0 0x100>;
+               interrupts = <0 109 IRQ_TYPE_LEVEL_HIGH
+                             0 109 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "ch0", "ch1";
+               clocks = <&mstp3_clks R8A7790_CLK_USBDMAC0>;
+               #dma-cells = <1>;
+               dma-channels = <2>;
+       };
+
+       usb_dmac1: dma-controller@e65b0000 {
+               compatible = "renesas,usb-dmac";
+               reg = <0 0xe65b0000 0 0x100>;
+               interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH
+                             0 110 IRQ_TYPE_LEVEL_HIGH>;
+               interrupt-names = "ch0", "ch1";
+               clocks = <&mstp3_clks R8A7790_CLK_USBDMAC1>;
+               #dma-cells = <1>;
+               dma-channels = <2>;
+       };
diff --git a/Documentation/devicetree/bindings/mtd/m25p80.txt b/Documentation/devicetree/bindings/mtd/m25p80.txt

index 4611aa8..f20b111 100644 (file)
--- a/Documentation/devicetree/bindings/mtd/m25p80.txt
+++ b/Documentation/devicetree/bindings/mtd/m25p80.txt
@@ -3,10 +3,13 @@
  Required properties:
  - #address-cells, #size-cells : Must be present if the device has sub-nodes
    representing partitions.
-- compatible : Should be the manufacturer and the name of the chip. Bear in mind
-               the DT binding is not Linux-only, but in case of Linux, see the
-               "spi_nor_ids" table in drivers/mtd/spi-nor/spi-nor.c for the list
-               of supported chips.
+- compatible : May include a device-specific string consisting of the
+               manufacturer and name of the chip. Bear in mind the DT binding
+               is not Linux-only, but in case of Linux, see the "m25p_ids"
+               table in drivers/mtd/devices/m25p80.c for the list of supported
+               chips.
+               Must also include "nor-jedec" for any SPI NOR flash that can be
+               identified by the JEDEC READ ID opcode (0x9F).
  - reg : Chip-Select number
  - spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
  
@@ -22,7 +25,7 @@ Example:
         flash: m25p80@0 {
                 #address-cells = <1>;
                 #size-cells = <1>;
-               compatible = "spansion,m25p80";
+               compatible = "spansion,m25p80", "nor-jedec";
                 reg = <0>;
                 spi-max-frequency = <40000000>;
                 m25p,fast-read;
diff --git a/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt

index de8b517..4f833e3 100644 (file)
--- a/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
@@ -14,7 +14,7 @@ Optional properties:
   - marvell,nand-enable-arbiter:        Set to enable the bus arbiter
   - marvell,nand-keep-config:   Set to keep the NAND controller config as set
                                 by the bootloader
- - num-cs:                     Number of chipselect lines to usw
+ - num-cs:                     Number of chipselect lines to use
   - nand-on-flash-bbt:          boolean to enable on flash bbt option if
                                 not present false
   - nand-ecc-strength:           number of bits to correct per ECC step
diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt

index 0273adb..086d6f4 100644 (file)
--- a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
+++ b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
@@ -21,7 +21,7 @@ Optional properties:
  - nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft",
    "soft_bch" or "none")
  
-see Documentation/devicetree/mtd/nand.txt for generic bindings.
+see Documentation/devicetree/bindings/mtd/nand.txt for generic bindings.
  
  
  Examples:
diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-pwm.txt

index b50d7a6..e00c2e9 100644 (file)
--- a/Documentation/devicetree/bindings/pwm/imx-pwm.txt
+++ b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
@@ -1,10 +1,17 @@
  Freescale i.MX PWM controller
  
  Required properties:
-- compatible: should be "fsl,<soc>-pwm"
+- compatible : should be "fsl,<soc>-pwm" and one of the following
+   compatible strings:
+  - "fsl,imx1-pwm" for PWM compatible with the one integrated on i.MX1
+  - "fsl,imx27-pwm" for PWM compatible with the one integrated on i.MX27
  - reg: physical base address and length of the controller's registers
  - #pwm-cells: should be 2. See pwm.txt in this directory for a description of
    the cells format.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+       Documentation/devicetree/bindings/clock/clock-bindings.txt
  - interrupts: The interrupt for the pwm controller
  
  Example:
@@ -13,5 +20,8 @@ pwm1: pwm@53fb4000 {
         #pwm-cells = <2>;
         compatible = "fsl,imx53-pwm", "fsl,imx27-pwm";
         reg = <0x53fb4000 0x4000>;
+       clocks = <&clks IMX5_CLK_PWM1_IPG_GATE>,
+                <&clks IMX5_CLK_PWM1_HF_GATE>;
+       clock-names = "ipg", "per";
         interrupts = <61>;
  };
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt

index 83737a3..8033919 100644 (file)
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -26,6 +26,7 @@ aptina        Aptina Imaging
  arasan Arasan Chip Systems
  arm    ARM Ltd.
  armadeus       ARMadeus Systems SARL
+artesyn        Artesyn Embedded Technologies Inc.
  asahi-kasei    Asahi Kasei Corp.
  atmel  Atmel Corporation
  auo    AU Optronics Corporation
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt

index bb9753b..480c8de 100644 (file)
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -49,25 +49,26 @@ The dma_buf buffer sharing API usage contains the following steps:
     The buffer exporter announces its wish to export a buffer. In this, it
     connects its own private buffer data, provides implementation for operations
     that can be performed on the exported dma_buf, and flags for the file
-   associated with this buffer.
+   associated with this buffer. All these fields are filled in struct
+   dma_buf_export_info, defined via the DEFINE_DMA_BUF_EXPORT_INFO macro.
  
     Interface:
-      struct dma_buf *dma_buf_export_named(void *priv, struct dma_buf_ops *ops,
-                                    size_t size, int flags,
-                                    const char *exp_name)
+      DEFINE_DMA_BUF_EXPORT_INFO(exp_info)
+      struct dma_buf *dma_buf_export(struct dma_buf_export_info *exp_info)
  
-   If this succeeds, dma_buf_export_named allocates a dma_buf structure, and
+   If this succeeds, dma_buf_export allocates a dma_buf structure, and
     returns a pointer to the same. It also associates an anonymous file with this
     buffer, so it can be exported. On failure to allocate the dma_buf object,
     it returns NULL.
  
-   'exp_name' is the name of exporter - to facilitate information while
-   debugging.
+   'exp_name' in struct dma_buf_export_info is the name of exporter - to
+   facilitate information while debugging. It is set to KBUILD_MODNAME by
+   default, so exporters don't have to provide a specific name, if they don't
+   wish to.
+
+   DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct dma_buf_export_info,
+   zeroes it out and pre-populates exp_name in it.
  
-   Exporting modules which do not wish to provide any specific name may use the
-   helper define 'dma_buf_export()', with the same arguments as above, but
-   without the last argument; a KBUILD_MODNAME pre-processor directive will be
-   inserted in place of 'exp_name' instead.
  
  2. Userspace gets a handle to pass around to potential buffer-users
  
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt

index 7240438..95c13aa 100644 (file)
--- a/Documentation/filesystems/nfs/nfs-rdma.txt
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
      To further test the InfiniBand software stack, use IPoIB (this
      assumes you have two IB hosts named host1 and host2):
  
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
+    host1$ ip link set dev ib0 up
+    host1$ ip address add dev ib0 a.b.c.x
+    host2$ ip link set dev ib0 up
+    host2$ ip address add dev ib0 a.b.c.y
      host1$ ping a.b.c.y
      host2$ ping a.b.c.x
  
@@ -229,7 +231,8 @@ NFS/RDMA Setup
  
      $ modprobe ib_mthca
      $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
+    $ ip li set dev ib0 up
+    $ ip addr add dev ib0 a.b.c.d
  
      NOTE: use unique addresses for the client and server
  
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt

index 0bfafe1..5a5a055 100644 (file)
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -228,30 +228,19 @@ default behaviour.
  Deprecated Mount Options
  ========================
  
-  delaylog/nodelaylog
-       Delayed logging is the only logging method that XFS supports
-       now, so these mount options are now ignored.
-
-       Due for removal in 3.12.
-
-  ihashsize=value
-       In memory inode hashes have been removed, so this option has
-       no function as of August 2007. Option is deprecated.
-
-       Due for removal in 3.12.
+None at present.
  
-  irixsgid
-       This behaviour is now controlled by a sysctl, so the mount
-       option is ignored.
  
-       Due for removal in 3.12.
+Removed Mount Options
+=====================
  
-  osyncisdsync
-  osyncisosync
-       O_SYNC and O_DSYNC are fully supported, so there is no need
-       for these options any more.
+  Name                         Removed
+  ----                         -------
+  delaylog/nodelaylog          v3.20
+  ihashsize                    v3.20
+  irixsgid                     v3.20
+  osyncisdsync/osyncisosync    v3.20
  
-       Due for removal in 3.12.
  
  sysctls
  =======
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt

index 8136e1f..51f4221 100644 (file)
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -321,6 +321,7 @@ Code  Seq#(hex)     Include File            Comments
  0xDB   00-0F   drivers/char/mwave/mwavepub.h
  0xDD   00-3F   ZFCP device driver      see drivers/s390/scsi/
                                         <mailto:aherrman@de.ibm.com>
+0xEC   00-01   drivers/platform/chrome/cros_ec_dev.h   ChromeOS EC driver
  0xF3   00-3F   drivers/usb/misc/sisusbvga/sisusb.h     sisfb (in development)
                                         <mailto:thomas@winischhofer.net>
  0xF4   00-1F   video/mbxfb.h           mbxfb
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index 84960c6..f6befa9 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -165,7 +165,7 @@ multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
  bytes respectively. Such letter suffixes can also be entirely omitted.
  
  
-       acpi=           [HW,ACPI,X86]
+       acpi=           [HW,ACPI,X86,ARM64]
                         Advanced Configuration and Power Interface
                         Format: { force | off | strict | noirq | rsdt }
                         force -- enable ACPI if default was off
@@ -175,6 +175,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                 strictly ACPI specification compliant.
                         rsdt -- prefer RSDT over (default) XSDT
                         copy_dsdt -- copy DSDT to memory
+                       For ARM64, ONLY "acpi=off" or "acpi=force" are available
  
                         See also Documentation/power/runtime_pm.txt, pci=noacpi
  
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt

index fc04c14..72a150d 100644 (file)
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1355,6 +1355,24 @@ Sysfs notes:
         rfkill controller switch "tpacpi_uwb_sw": refer to
         Documentation/rfkill.txt for details.
  
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+1 = Home mode
+2 = Web-browser mode
+3 = Web-conference mode
+4 = Function mode
+5 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
  
  Multiple Commands, Module Parameters
  ------------------------------------
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt

new file mode 100644 (file)

index 0000000..de1af7d
--- /dev/null
+++ b/Documentation/md-cluster.txt
@@ -0,0 +1,176 @@
+The cluster MD is a shared-device RAID for a cluster.
+
+
+1. On-disk format
+
+Separate write-intent-bitmap are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is:
+
+0                    4k                     8k                    12k
+-------------------------------------------------------------------
+| idle                | md super            | bm super [0] + bits |
+| bm bits[0, contd]   | bm super[1] + bits  | bm bits[1, contd]   |
+| bm super[2] + bits  | bm bits [2, contd]  | bm super[3] + bits  |
+| bm bits [3, contd]  |                     |                     |
+
+During "normal" functioning we assume the filesystem ensures that only one
+node writes to any given block at a time, so a write
+request will
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally.  It is up to the filesystem to
+ensure one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+
+There are two locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+
+ The bm_lockres protects individual node bitmaps. They are named in the
+ form bitmap001 for node 1, bitmap002 for node and so on. When a node
+ joins the cluster, it acquires the lock in PW mode and it stays so
+ during the lifetime the node is part of the cluster. The lock resource
+ number is based on the slot number returned by the DLM subsystem. Since
+ DLM starts node count from one and bitmap slots start from zero, one is
+ subtracted from the DLM slot number to arrive at the bitmap slot number.
+
+3. Communication
+
+Each node has to communicate with other nodes when starting or ending
+resync, and metadata superblock updates.
+
+3.1 Message Types
+
+ There are 3 types, of messages which are passed
+
+ 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has been
+   updated, and the node must re-read the md superblock. This is performed
+   synchronously.
+
+ 3.1.2 RESYNC: informs other nodes that a resync is initiated or ended
+   so that each node may suspend or resume the region.
+
+3.2 Communication mechanism
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+  3.2.1 Token: The resource which protects the entire communication
+   system. The node having the token resource is allowed to
+   communicate.
+
+  3.2.2 Message: The lock resource which carries the data to
+   communicate.
+
+  3.2.3 Ack: The resource, acquiring which means the message has been
+   acknowledged by all nodes in the cluster. The BAST of the resource
+   is used to inform the receive node that a node wants to communicate.
+
+The algorithm is:
+
+ 1. receive status
+
+   sender                         receiver                   receiver
+   ACK:CR                          ACK:CR                     ACK:CR
+
+ 2. sender get EX of TOKEN
+    sender get EX of MESSAGE
+    sender                        receiver                 receiver
+    TOKEN:EX                       ACK:CR                   ACK:CR
+    MESSAGE:EX
+    ACK:CR
+
+    Sender checks that it still needs to send a message. Messages received
+    or other events that happened while waiting for the TOKEN may have made
+    this message inappropriate or redundant.
+
+ 3. sender write LVB.
+    sender down-convert MESSAGE from EX to CR
+    sender try to get EX of ACK
+    [ wait until all receiver has *processed* the MESSAGE ]
+
+                                     [ triggered by bast of ACK ]
+                                     receiver get CR of MESSAGE
+                                     receiver read LVB
+                                     receiver processes the message
+                                     [ wait finish ]
+                                     receiver release ACK
+
+   sender                         receiver                   receiver
+   TOKEN:EX                       MESSAGE:CR                 MESSAGE:CR
+   MESSAGE:CR
+   ACK:EX
+
+ 4. triggered by grant of EX on ACK (indicating all receivers have processed
+    message)
+    sender down-convert ACK from EX to CR
+    sender release MESSAGE
+    sender release TOKEN
+                               receiver upconvert to EX of MESSAGE
+                               receiver get CR of ACK
+                               receiver release MESSAGE
+
+   sender                      receiver                   receiver
+   ACK:CR                       ACK:CR                     ACK:CR
+
+
+4. Handling Failures
+
+4.1 Node Failure
+ When a node fails, the DLM informs the cluster with the slot. The node
+ starts a cluster recovery thread. The cluster recovery thread:
+       - acquires the bitmap<number> lock of the failed node
+       - opens the bitmap
+       - reads the bitmap of the failed node
+       - copies the set bitmap to local node
+       - cleans the bitmap of the failed node
+       - releases bitmap<number> lock of the failed node
+       - initiates resync of the bitmap on the current node
+
+ The resync process, is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNC_START with the (lo,hi) range of the area which needs
+ to be suspended. Each node maintains a suspend_list, which contains
+ the list  of ranges which are currently suspended. On receiving
+ RESYNC_START, the node adds the range to the suspend_list. Similarly,
+ when the node performing resync finishes, it send RESYNC_FINISHED
+ to other nodes and other nodes remove the corresponding entry from
+ the suspend_list.
+
+ A helper function, should_suspend() can be used to check if a particular
+ I/O range should be suspended or not.
+
+4.2 Device Failure
+ Device failures are handled and communicated with the metadata update
+ routine.
+
+5. Adding a new Device
+For adding a new device, it is necessary that all nodes "see" the new device
+to be added. For this, the following algorithm is used:
+
+    1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+       ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
+    2. Node 1 sends NEWDISK with uuid and slot number
+    3. Other nodes issue kobject_uevent_env with uuid and slot number
+       (Steps 4,5 could be a udev rule)
+    4. In userspace, the node searches for the disk, perhaps
+       using blkid -t SUB_UUID=""
+    5. Other nodes issue either of the following depending on whether the disk
+       was found:
+       ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+                disc.number set to slot number)
+       ioctl(CLUSTERED_DISK_NACK)
+    6. Other nodes drop lock on no-new-devs (CR) if device is found
+    7. Node 1 attempts EX lock on no-new-devs
+    8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
+       as SpareLocal
+    9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
+    10. Other nodes get the information whether a disk is added or not
+       by the following METADATA_UPDATED.
diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py

index 2b47704..2ba71ce 100755 (executable)
--- a/Documentation/target/tcm_mod_builder.py
+++ b/Documentation/target/tcm_mod_builder.py
@@ -237,8 +237,7 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
         buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
         buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
  
-       buf += "/* Local pointer to allocated TCM configfs fabric module */\n"
-       buf += "struct target_fabric_configfs *" + fabric_mod_name + "_fabric_configfs;\n\n"
+       buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops;\n\n"
  
         buf += "static struct se_node_acl *" + fabric_mod_name + "_make_nodeacl(\n"
         buf += "        struct se_portal_group *se_tpg,\n"
@@ -309,8 +308,8 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
         buf += "        }\n"
         buf += "        tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n"
         buf += "        tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n"
-       buf += "        ret = core_tpg_register(&" + fabric_mod_name + "_fabric_configfs->tf_ops, wwn,\n"
-       buf += "                                &tpg->se_tpg, (void *)tpg,\n"
+       buf += "        ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n"
+       buf += "                                &tpg->se_tpg, tpg,\n"
         buf += "                                TRANSPORT_TPG_TYPE_NORMAL);\n"
         buf += "        if (ret < 0) {\n"
         buf += "                kfree(tpg);\n"
@@ -370,7 +369,10 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
         buf += "        NULL,\n"
         buf += "};\n\n"
  
-       buf += "static struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n"
+       buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n"
+       buf += "        .module                         = THIS_MODULE,\n"
+       buf += "        .name                           = " + fabric_mod_name + ",\n"
+       buf += "        .get_fabric_proto_ident         = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
         buf += "        .get_fabric_name                = " + fabric_mod_name + "_get_fabric_name,\n"
         buf += "        .get_fabric_proto_ident         = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
         buf += "        .tpg_get_wwn                    = " + fabric_mod_name + "_get_fabric_wwn,\n"
@@ -413,75 +415,18 @@ def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
         buf += "        .fabric_drop_np                 = NULL,\n"
         buf += "        .fabric_make_nodeacl            = " + fabric_mod_name + "_make_nodeacl,\n"
         buf += "        .fabric_drop_nodeacl            = " + fabric_mod_name + "_drop_nodeacl,\n"
-       buf += "};\n\n"
-
-       buf += "static int " + fabric_mod_name + "_register_configfs(void)\n"
-       buf += "{\n"
-       buf += "        struct target_fabric_configfs *fabric;\n"
-       buf += "        int ret;\n\n"
-       buf += "        printk(KERN_INFO \"" + fabric_mod_name.upper() + " fabric module %s on %s/%s\"\n"
-       buf += "                \" on \"UTS_RELEASE\"\\n\"," + fabric_mod_name.upper() + "_VERSION, utsname()->sysname,\n"
-       buf += "                utsname()->machine);\n"
-       buf += "        /*\n"
-       buf += "         * Register the top level struct config_item_type with TCM core\n"
-       buf += "         */\n"
-       buf += "        fabric = target_fabric_configfs_init(THIS_MODULE, \"" + fabric_mod_name + "\");\n"
-       buf += "        if (IS_ERR(fabric)) {\n"
-       buf += "                printk(KERN_ERR \"target_fabric_configfs_init() failed\\n\");\n"
-       buf += "                return PTR_ERR(fabric);\n"
-       buf += "        }\n"
-       buf += "        /*\n"
-       buf += "         * Setup fabric->tf_ops from our local " + fabric_mod_name + "_ops\n"
-       buf += "         */\n"
-       buf += "        fabric->tf_ops = " + fabric_mod_name + "_ops;\n"
-       buf += "        /*\n"
-       buf += "         * Setup default attribute lists for various fabric->tf_cit_tmpl\n"
-       buf += "         */\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = " + fabric_mod_name + "_wwn_attrs;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;\n"
-       buf += "        fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;\n"
-       buf += "        /*\n"
-       buf += "         * Register the fabric for use within TCM\n"
-       buf += "         */\n"
-       buf += "        ret = target_fabric_configfs_register(fabric);\n"
-       buf += "        if (ret < 0) {\n"
-       buf += "                printk(KERN_ERR \"target_fabric_configfs_register() failed\"\n"
-       buf += "                                \" for " + fabric_mod_name.upper() + "\\n\");\n"
-       buf += "                return ret;\n"
-       buf += "        }\n"
-       buf += "        /*\n"
-       buf += "         * Setup our local pointer to *fabric\n"
-       buf += "         */\n"
-       buf += "        " + fabric_mod_name + "_fabric_configfs = fabric;\n"
-       buf += "        printk(KERN_INFO \"" +  fabric_mod_name.upper() + "[0] - Set fabric -> " + fabric_mod_name + "_fabric_configfs\\n\");\n"
-       buf += "        return 0;\n"
-       buf += "};\n\n"
-       buf += "static void __exit " + fabric_mod_name + "_deregister_configfs(void)\n"
-       buf += "{\n"
-       buf += "        if (!" + fabric_mod_name + "_fabric_configfs)\n"
-       buf += "                return;\n\n"
-       buf += "        target_fabric_configfs_deregister(" + fabric_mod_name + "_fabric_configfs);\n"
-       buf += "        " + fabric_mod_name + "_fabric_configfs = NULL;\n"
-       buf += "        printk(KERN_INFO \"" +  fabric_mod_name.upper() + "[0] - Cleared " + fabric_mod_name + "_fabric_configfs\\n\");\n"
+       buf += "\n"
+       buf += "        .tfc_wwn_attrs                  = " + fabric_mod_name + "_wwn_attrs;\n"
         buf += "};\n\n"
  
         buf += "static int __init " + fabric_mod_name + "_init(void)\n"
         buf += "{\n"
-       buf += "        int ret;\n\n"
-       buf += "        ret = " + fabric_mod_name + "_register_configfs();\n"
-       buf += "        if (ret < 0)\n"
-       buf += "                return ret;\n\n"
-       buf += "        return 0;\n"
+       buf += "        return target_register_template(" + fabric_mod_name + "_ops);\n"
         buf += "};\n\n"
+
         buf += "static void __exit " + fabric_mod_name + "_exit(void)\n"
         buf += "{\n"
-       buf += "        " + fabric_mod_name + "_deregister_configfs();\n"
+       buf += "        target_unregister_template(" + fabric_mod_name + "_ops);\n"
         buf += "};\n\n"
  
         buf += "MODULE_DESCRIPTION(\"" + fabric_mod_name.upper() + " series fabric driver\");\n"
diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt

index 5518465..43e94ea 100644 (file)
--- a/Documentation/target/tcmu-design.txt
+++ b/Documentation/target/tcmu-design.txt
@@ -138,27 +138,40 @@ signals the kernel via a 4-byte write(). When cmd_head equals
  cmd_tail, the ring is empty -- no commands are currently waiting to be
  processed by userspace.
  
-TCMU commands start with a common header containing "len_op", a 32-bit
-value that stores the length, as well as the opcode in the lowest
-unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and
-TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it
-should skip ahead by the bytes in "length". (The kernel inserts PAD
-entries to ensure each CMD entry fits contigously into the circular
-buffer.)
-
-When userspace handles a CMD, it finds the SCSI CDB (Command Data
-Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the
-start of the overall shared memory region, not the entry. The data
-in/out buffers are accessible via tht req.iov[] array. Note that
-each iov.iov_base is also an offset from the start of the region.
-
-TCMU currently does not support BIDI operations.
+TCMU commands are 8-byte aligned. They start with a common header
+containing "len_op", a 32-bit value that stores the length, as well as
+the opcode in the lowest unused bits. It also contains cmd_id and
+flags fields for setting by the kernel (kflags) and userspace
+(uflags).
+
+Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
+
+When the opcode is CMD, the entry in the command ring is a struct
+tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
+tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
+overall shared memory region, not the entry. The data in/out buffers
+are accessible via tht req.iov[] array. iov_cnt contains the number of
+entries in iov[] needed to describe either the Data-In or Data-Out
+buffers. For bidirectional commands, iov_cnt specifies how many iovec
+entries cover the Data-Out area, and iov_bidi_count specifies how many
+iovec entries immediately after that in iov[] cover the Data-In
+area. Just like other fields, iov.iov_base is an offset from the start
+of the region.
  
  When completing a command, userspace sets rsp.scsi_status, and
  rsp.sense_buffer if necessary. Userspace then increments
  mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
  kernel via the UIO method, a 4-byte write to the file descriptor.
  
+When the opcode is PAD, userspace only updates cmd_tail as above --
+it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
+is contiguous within the command ring.)
+
+More opcodes may be added in the future. If userspace encounters an
+opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
+hdr.uflags, update cmd_tail, and proceed with processing additional
+commands, if any.
+
  The Data Area:
  
  This is shared-memory space after the command ring. The organization
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt

index bc9f6fe..9fa2bf8 100644 (file)
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3573,3 +3573,20 @@ struct {
  @ar   - access register number
  
  KVM handlers should exit to userspace with rc = -EREMOTE.
+
+
+8. Other capabilities.
+----------------------
+
+This section lists capabilities that give information about other
+features of the KVM implementation.
+
+8.1 KVM_CAP_PPC_HWRNG
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+H_RANDOM hypercall backed by a hardware random-number generator.
+If present, the kernel H_RANDOM handler can be enabled for guest use
+with the KVM_CAP_PPC_ENABLE_HCALL capability.
diff --git a/MAINTAINERS b/MAINTAINERS

index f6f5950..2e5bbc0 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3066,10 +3066,16 @@ F:      drivers/net/fddi/defxx.*
  
  DELL LAPTOP DRIVER
  M:     Matthew Garrett <mjg59@srcf.ucam.org>
+M:     Pali Rohár <pali.rohar@gmail.com>
  L:     platform-driver-x86@vger.kernel.org
  S:     Maintained
  F:     drivers/platform/x86/dell-laptop.c
  
+DELL LAPTOP FREEFALL DRIVER
+M:     Pali Rohár <pali.rohar@gmail.com>
+S:     Maintained
+F:     drivers/platform/x86/dell-smo8800.c
+
  DELL LAPTOP SMM DRIVER
  M:     Guenter Roeck <linux@roeck-us.net>
  S:     Maintained
@@ -3084,6 +3090,7 @@ F:        drivers/firmware/dcdbas.*
  
  DELL WMI EXTRAS DRIVER
  M:     Matthew Garrett <mjg59@srcf.ucam.org>
+M:     Pali Rohár <pali.rohar@gmail.com>
  S:     Maintained
  F:     drivers/platform/x86/dell-wmi.c
  
@@ -3271,12 +3278,6 @@ F:       drivers/firmware/dmi-id.c
  F:     drivers/firmware/dmi_scan.c
  F:     include/linux/dmi.h
  
-DOCKING STATION DRIVER
-M:     Shaohua Li <shaohua.li@intel.com>
-L:     linux-acpi@vger.kernel.org
-S:     Supported
-F:     drivers/acpi/dock.c
-
  DOCUMENTATION
  M:     Jonathan Corbet <corbet@lwn.net>
  L:     linux-doc@vger.kernel.org
@@ -5009,6 +5010,11 @@ W:       http://industrypack.sourceforge.net
  S:     Maintained
  F:     drivers/ipack/
  
+INGENIC JZ4780 DMA Driver
+M:     Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>
+S:     Maintained
+F:     drivers/dma/dma-jz4780.c
+
  INTEGRITY MEASUREMENT ARCHITECTURE (IMA)
  M:     Mimi Zohar <zohar@linux.vnet.ibm.com>
  M:     Dmitry Kasatkin <dmitry.kasatkin@gmail.com>
@@ -7533,7 +7539,6 @@ S:        Maintained
  F:     drivers/pci/host/pci-exynos.c
  
  PCI DRIVER FOR SYNOPSIS DESIGNWARE
-M:     Mohit Kumar <mohit.kumar@st.com>
  M:     Jingoo Han <jg1.han@samsung.com>
  L:     linux-pci@vger.kernel.org
  S:     Maintained
@@ -7548,9 +7553,8 @@ F:        Documentation/devicetree/bindings/pci/host-generic-pci.txt
  F:     drivers/pci/host/pci-host-generic.c
  
  PCIE DRIVER FOR ST SPEAR13XX
-M:     Mohit Kumar <mohit.kumar@st.com>
  L:     linux-pci@vger.kernel.org
-S:     Maintained
+S:     Orphan
  F:     drivers/pci/host/*spear*
  
  PCMCIA SUBSYSTEM
@@ -8805,6 +8809,15 @@ W:       http://www.emulex.com
  S:     Supported
  F:     drivers/net/ethernet/emulex/benet/
  
+EMULEX ONECONNECT ROCE DRIVER
+M:     Selvin Xavier <selvin.xavier@emulex.com>
+M:     Devesh Sharma <devesh.sharma@emulex.com>
+M:     Mitesh Ahuja <mitesh.ahuja@emulex.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.emulex.com
+S:     Supported
+F:     drivers/infiniband/hw/ocrdma/
+
  SFC NETWORK DRIVER
  M:     Solarflare linux maintainers <linux-net-drivers@solarflare.com>
  M:     Shradha Shah <sshah@solarflare.com>
@@ -9937,10 +9950,23 @@ S:      Maintained
  F:     drivers/platform/x86/topstar-laptop.c
  
  TOSHIBA ACPI EXTRAS DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
  L:     platform-driver-x86@vger.kernel.org
-S:     Orphan
+S:     Maintained
  F:     drivers/platform/x86/toshiba_acpi.c
  
+TOSHIBA BLUETOOTH DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/toshiba_bluetooth.c
+
+TOSHIBA HDD ACTIVE PROTECTION SENSOR DRIVER
+M:     Azael Avalos <coproscefalo@gmail.com>
+L:     platform-driver-x86@vger.kernel.org
+S:     Maintained
+F:     drivers/platform/x86/toshiba_haps.c
+
  TOSHIBA SMM DRIVER
  M:     Jonathan Buzzard <jonathan@buzzard.org.uk>
  L:     tlinux-users@tce.toshiba-dme.co.jp
@@ -10517,6 +10543,12 @@ S:     Maintained
  F:     drivers/vhost/
  F:     include/uapi/linux/vhost.h
  
+VIRTIO INPUT DRIVER
+M:     Gerd Hoffmann <kraxel@redhat.com>
+S:     Maintained
+F:     drivers/virtio/virtio_input.c
+F:     include/uapi/linux/virtio_input.h
+
  VIA RHINE NETWORK DRIVER
  M:     Roger Luethi <rl@hellgate.ch>
  S:     Maintained
diff --git a/Makefile b/Makefile

index 6cc5b24..7ff1239 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  VERSION = 4
-PATCHLEVEL = 0
+PATCHLEVEL = 1
  SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
  NAME = Hurr durr I'ma sheep
  
  # *DOCUMENTATION*
diff --git a/arch/arc/boot/dts/angel4.dts b/arch/arc/boot/dts/angel4.dts

index 757e0c6..3b076fb 100644 (file)
--- a/arch/arc/boot/dts/angel4.dts
+++ b/arch/arc/boot/dts/angel4.dts
@@ -64,7 +64,7 @@
                 };
  
                 arcpmu0: pmu {
-                       compatible = "snps,arc700-pmu";
+                       compatible = "snps,arc700-pct";
                 };
         };
  };
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig

index 278dacf..d2ac4e5 100644 (file)
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -2,6 +2,9 @@ CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
  # CONFIG_LOCALVERSION_AUTO is not set
  CONFIG_DEFAULT_HOSTNAME="ARCLinux"
  # CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
  CONFIG_HIGH_RES_TIMERS=y
  CONFIG_IKCONFIG=y
  CONFIG_IKCONFIG_PROC=y
@@ -9,7 +12,7 @@ CONFIG_NAMESPACES=y
  # CONFIG_UTS_NS is not set
  # CONFIG_PID_NS is not set
  CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE="../arc_initramfs"
+CONFIG_INITRAMFS_SOURCE="../arc_initramfs/"
  CONFIG_KALLSYMS_ALL=y
  CONFIG_EMBEDDED=y
  # CONFIG_SLUB_DEBUG is not set
@@ -21,12 +24,9 @@ CONFIG_MODULES=y
  # CONFIG_IOSCHED_DEADLINE is not set
  # CONFIG_IOSCHED_CFQ is not set
  CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_IDE is not set
-# CONFIG_ARCTANGENT_EMAC is not set
  # CONFIG_ARC_HAS_RTSC is not set
  CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci"
  # CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
  CONFIG_NET=y
  CONFIG_PACKET=y
  CONFIG_UNIX=y
@@ -39,23 +39,23 @@ CONFIG_INET=y
  # CONFIG_FIRMWARE_IN_KERNEL is not set
  # CONFIG_BLK_DEV is not set
  CONFIG_NETDEVICES=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
  # CONFIG_MOUSE_PS2_ALPS is not set
  # CONFIG_MOUSE_PS2_LOGIPS2PP is not set
  # CONFIG_MOUSE_PS2_SYNAPTICS is not set
+# CONFIG_MOUSE_PS2_CYPRESS is not set
  # CONFIG_MOUSE_PS2_TRACKPOINT is not set
  CONFIG_MOUSE_PS2_TOUCHKIT=y
-# CONFIG_SERIO_I8042 is not set
  # CONFIG_SERIO_SERPORT is not set
  CONFIG_SERIO_ARC_PS2=y
  # CONFIG_LEGACY_PTYS is not set
  # CONFIG_DEVKMEM is not set
  CONFIG_SERIAL_8250=y
  CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_DW=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
  CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
  # CONFIG_HW_RANDOM is not set
  # CONFIG_HWMON is not set
  CONFIG_FB=y
@@ -72,4 +72,3 @@ CONFIG_TMPFS=y
  CONFIG_NFS_FS=y
  # CONFIG_ENABLE_WARN_DEPRECATED is not set
  # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h

index be33db8..e2b1b12 100644 (file)
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -30,6 +30,7 @@
  #define ARC_REG_D_UNCACH_BCR   0x6A
  #define ARC_REG_BPU_BCR                0xc0
  #define ARC_REG_ISA_CFG_BCR    0xc1
+#define ARC_REG_RTT_BCR                0xF2
  #define ARC_REG_SMART_BCR      0xFF
  
  /* status32 Bits Positions */
@@ -50,11 +51,7 @@
   * [15: 8] = Exception Cause Code
   * [ 7: 0] = Exception Parameters (for certain types only)
   */
-#define ECR_VEC_MASK                   0xff0000
-#define ECR_CODE_MASK                  0x00ff00
-#define ECR_PARAM_MASK                 0x0000ff
-
-/* Exception Cause Vector Values */
+#define ECR_V_MEM_ERR                  0x01
  #define ECR_V_INSN_ERR                 0x02
  #define ECR_V_MACH_CHK                 0x20
  #define ECR_V_ITLB_MISS                        0x21
@@ -62,7 +59,8 @@
  #define ECR_V_PROTV                    0x23
  #define ECR_V_TRAP                     0x25
  
-/* Protection Violation Exception Cause Code Values */
+/* DTLB Miss and Protection Violation Cause Codes */
+
  #define ECR_C_PROTV_INST_FETCH         0x00
  #define ECR_C_PROTV_LOAD               0x01
  #define ECR_C_PROTV_STORE              0x02
@@ -173,11 +171,11 @@
         }                                               \
  }
  
-#define WRITE_BCR(reg, into)                           \
+#define WRITE_AUX(reg, into)                           \
  {                                                      \
         unsigned int tmp;                               \
         if (sizeof(tmp) == sizeof(into)) {              \
-               tmp = (*(unsigned int *)(into));        \
+               tmp = (*(unsigned int *)&(into));       \
                 write_aux_reg(reg, tmp);                \
         } else  {                                       \
                 extern void bogus_undefined(void);      \
diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h

index 1a5bf07..4051e95 100644 (file)
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -32,6 +32,20 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *m)
  
         m += nr >> 5;
  
+       /*
+        * ARC ISA micro-optimization:
+        *
+        * Instructions dealing with bitpos only consider lower 5 bits (0-31)
+        * e.g (x << 33) is handled like (x << 1) by ASL instruction
+        *  (mem pointer still needs adjustment to point to next word)
+        *
+        * Hence the masking to clamp @nr arg can be elided in general.
+        *
+        * However if @nr is a constant (above assumed it in a register),
+        * and greater than 31, gcc can optimize away (x << 33) to 0,
+        * as overflow, given the 32-bit ISA. Thus masking needs to be done
+        * for constant @nr, but no code is generated due to const prop.
+        */
         if (__builtin_constant_p(nr))
                 nr &= 0x1f;
  
@@ -374,29 +388,20 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *m)
   * This routine doesn't need to be atomic.
   */
  static inline int
-__constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
-{
-       return ((1UL << (nr & 31)) &
-               (((const volatile unsigned int *)addr)[nr >> 5])) != 0;
-}
-
-static inline int
-__test_bit(unsigned int nr, const volatile unsigned long *addr)
+test_bit(unsigned int nr, const volatile unsigned long *addr)
  {
         unsigned long mask;
  
         addr += nr >> 5;
  
-       /* ARC700 only considers 5 bits in bit-fiddling insn */
+       if (__builtin_constant_p(nr))
+               nr &= 0x1f;
+
         mask = 1 << nr;
  
         return ((mask & *addr) != 0);
  }
  
-#define test_bit(nr, addr)     (__builtin_constant_p(nr) ? \
-                                       __constant_test_bit((nr), (addr)) : \
-                                       __test_bit((nr), (addr)))
-
  /*
   * Count the number of zeros, starting from MSB
   * Helper for fls( ) friends
diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h

index cbf755e..2b8880e 100644 (file)
--- a/arch/arc/include/asm/perf_event.h
+++ b/arch/arc/include/asm/perf_event.h
@@ -54,29 +54,13 @@ struct arc_reg_cc_build {
  #define PERF_COUNT_ARC_BPOK    (PERF_COUNT_HW_MAX + 3)
  #define PERF_COUNT_ARC_EDTLB   (PERF_COUNT_HW_MAX + 4)
  #define PERF_COUNT_ARC_EITLB   (PERF_COUNT_HW_MAX + 5)
-#define PERF_COUNT_ARC_HW_MAX  (PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_LDC     (PERF_COUNT_HW_MAX + 6)
+#define PERF_COUNT_ARC_STC     (PERF_COUNT_HW_MAX + 7)
+
+#define PERF_COUNT_ARC_HW_MAX  (PERF_COUNT_HW_MAX + 8)
  
  /*
- * The "generalized" performance events seem to really be a copy
- * of the available events on x86 processors; the mapping to ARC
- * events is not always possible 1-to-1. Fortunately, there doesn't
- * seem to be an exact definition for these events, so we can cheat
- * a bit where necessary.
- *
- * In particular, the following PERF events may behave a bit differently
- * compared to other architectures:
- *
- * PERF_COUNT_HW_CPU_CYCLES
- *     Cycles not in halted state
- *
- * PERF_COUNT_HW_REF_CPU_CYCLES
- *     Reference cycles not in halted state, same as PERF_COUNT_HW_CPU_CYCLES
- *     for now as we don't do Dynamic Voltage/Frequency Scaling (yet)
- *
- * PERF_COUNT_HW_BUS_CYCLES
- *     Unclear what this means, Intel uses 0x013c, which according to
- *     their datasheet means "unhalted reference cycles". It sounds similar
- *     to PERF_COUNT_HW_REF_CPU_CYCLES, and we use the same counter for it.
+ * Some ARC pct quirks:
   *
   * PERF_COUNT_HW_STALLED_CYCLES_BACKEND
   * PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
@@ -91,21 +75,38 @@ struct arc_reg_cc_build {
   *     Note that I$ cache misses aren't counted by either of the two!
   */
  
+/*
+ * ARC PCT has hardware conditions with fixed "names" but variable "indexes"
+ * (based on a specific RTL build)
+ * Below is the static map between perf generic/arc specific event_id and
+ * h/w condition names.
+ * At the time of probe, we loop thru each index and find it's name to
+ * complete the mapping of perf event_id to h/w index as latter is needed
+ * to program the counter really
+ */
  static const char * const arc_pmu_ev_hw_map[] = {
+       /* count cycles */
         [PERF_COUNT_HW_CPU_CYCLES] = "crun",
         [PERF_COUNT_HW_REF_CPU_CYCLES] = "crun",
         [PERF_COUNT_HW_BUS_CYCLES] = "crun",
-       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
-       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail",
-       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+
         [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = "bflush",
         [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = "bstall",
-       [PERF_COUNT_ARC_DCLM] = "dclm",
-       [PERF_COUNT_ARC_DCSM] = "dcsm",
-       [PERF_COUNT_ARC_ICM] = "icm",
-       [PERF_COUNT_ARC_BPOK] = "bpok",
-       [PERF_COUNT_ARC_EDTLB] = "edtlb",
-       [PERF_COUNT_ARC_EITLB] = "eitlb",
+
+       /* counts condition */
+       [PERF_COUNT_HW_INSTRUCTIONS] = "iall",
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmp",
+       [PERF_COUNT_ARC_BPOK]         = "bpok",   /* NP-NT, PT-T, PNT-NT */
+       [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */
+
+       [PERF_COUNT_ARC_LDC] = "imemrdc",       /* Instr: mem read cached */
+       [PERF_COUNT_ARC_STC] = "imemwrc",       /* Instr: mem write cached */
+
+       [PERF_COUNT_ARC_DCLM] = "dclm",         /* D-cache Load Miss */
+       [PERF_COUNT_ARC_DCSM] = "dcsm",         /* D-cache Store Miss */
+       [PERF_COUNT_ARC_ICM] = "icm",           /* I-cache Miss */
+       [PERF_COUNT_ARC_EDTLB] = "edtlb",       /* D-TLB Miss */
+       [PERF_COUNT_ARC_EITLB] = "eitlb",       /* I-TLB Miss */
  };
  
  #define C(_x)                  PERF_COUNT_HW_CACHE_##_x
@@ -114,11 +115,11 @@ static const char * const arc_pmu_ev_hw_map[] = {
  static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
         [C(L1D)] = {
                 [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_LDC,
                         [C(RESULT_MISS)]        = PERF_COUNT_ARC_DCLM,
                 },
                 [C(OP_WRITE)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_STC,
                         [C(RESULT_MISS)]        = PERF_COUNT_ARC_DCSM,
                 },
                 [C(OP_PREFETCH)] = {
@@ -128,7 +129,7 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
         },
         [C(L1I)] = {
                 [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_HW_INSTRUCTIONS,
                         [C(RESULT_MISS)]        = PERF_COUNT_ARC_ICM,
                 },
                 [C(OP_WRITE)] = {
@@ -156,9 +157,10 @@ static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
         },
         [C(DTLB)] = {
                 [C(OP_READ)] = {
-                       [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_ACCESS)]      = PERF_COUNT_ARC_LDC,
                         [C(RESULT_MISS)]        = PERF_COUNT_ARC_EDTLB,
                 },
+                       /* DTLB LD/ST Miss not segregated by h/w*/
                 [C(OP_WRITE)] = {
                         [C(RESULT_ACCESS)]      = CACHE_OP_UNSUPPORTED,
                         [C(RESULT_MISS)]        = CACHE_OP_UNSUPPORTED,
diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c

index ae1c485..fd2ec50 100644 (file)
--- a/arch/arc/kernel/perf_event.c
+++ b/arch/arc/kernel/perf_event.c
@@ -16,6 +16,7 @@
  #include <linux/perf_event.h>
  #include <linux/platform_device.h>
  #include <asm/arcregs.h>
+#include <asm/stacktrace.h>
  
  struct arc_pmu {
         struct pmu      pmu;
@@ -25,6 +26,46 @@ struct arc_pmu {
         int             ev_hw_idx[PERF_COUNT_ARC_HW_MAX];
  };
  
+struct arc_callchain_trace {
+       int depth;
+       void *perf_stuff;
+};
+
+static int callchain_trace(unsigned int addr, void *data)
+{
+       struct arc_callchain_trace *ctrl = data;
+       struct perf_callchain_entry *entry = ctrl->perf_stuff;
+       perf_callchain_store(entry, addr);
+
+       if (ctrl->depth++ < 3)
+               return 0;
+
+       return -1;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct arc_callchain_trace ctrl = {
+               .depth = 0,
+               .perf_stuff = entry,
+       };
+
+       arc_unwind_core(NULL, regs, callchain_trace, &ctrl);
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       /*
+        * User stack can't be unwound trivially with kernel dwarf unwinder
+        * So for now just record the user PC
+        */
+       perf_callchain_store(entry, instruction_pointer(regs));
+}
+
+static struct arc_pmu *arc_pmu;
+
  /* read counter #idx; note that counter# != event# on ARC! */
  static uint64_t arc_pmu_read_counter(int idx)
  {
@@ -47,7 +88,6 @@ static uint64_t arc_pmu_read_counter(int idx)
  static void arc_perf_event_update(struct perf_event *event,
                                   struct hw_perf_event *hwc, int idx)
  {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
         uint64_t prev_raw_count, new_raw_count;
         int64_t delta;
  
@@ -89,13 +129,16 @@ static int arc_pmu_cache_event(u64 config)
         if (ret == CACHE_OP_UNSUPPORTED)
                 return -ENOENT;
  
+       pr_debug("init cache event: type/op/result %d/%d/%d with h/w %d \'%s\'\n",
+                cache_type, cache_op, cache_result, ret,
+                arc_pmu_ev_hw_map[ret]);
+
         return ret;
  }
  
  /* initializes hw_perf_event structure if event is supported */
  static int arc_pmu_event_init(struct perf_event *event)
  {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
         struct hw_perf_event *hwc = &event->hw;
         int ret;
  
@@ -106,8 +149,9 @@ static int arc_pmu_event_init(struct perf_event *event)
                 if (arc_pmu->ev_hw_idx[event->attr.config] < 0)
                         return -ENOENT;
                 hwc->config = arc_pmu->ev_hw_idx[event->attr.config];
-               pr_debug("initializing event %d with cfg %d\n",
-                        (int) event->attr.config, (int) hwc->config);
+               pr_debug("init event %d with h/w %d \'%s\'\n",
+                        (int) event->attr.config, (int) hwc->config,
+                        arc_pmu_ev_hw_map[event->attr.config]);
                 return 0;
         case PERF_TYPE_HW_CACHE:
                 ret = arc_pmu_cache_event(event->attr.config);
@@ -183,8 +227,6 @@ static void arc_pmu_stop(struct perf_event *event, int flags)
  
  static void arc_pmu_del(struct perf_event *event, int flags)
  {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
-
         arc_pmu_stop(event, PERF_EF_UPDATE);
         __clear_bit(event->hw.idx, arc_pmu->used_mask);
  
@@ -194,7 +236,6 @@ static void arc_pmu_del(struct perf_event *event, int flags)
  /* allocate hardware counter and optionally start counting */
  static int arc_pmu_add(struct perf_event *event, int flags)
  {
-       struct arc_pmu *arc_pmu = container_of(event->pmu, struct arc_pmu, pmu);
         struct hw_perf_event *hwc = &event->hw;
         int idx = hwc->idx;
  
@@ -247,10 +288,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
         BUG_ON(pct_bcr.c > ARC_PMU_MAX_HWEVENTS);
  
         READ_BCR(ARC_REG_CC_BUILD, cc_bcr);
-       if (!cc_bcr.v) {
-               pr_err("Performance counters exist, but no countable conditions?\n");
-               return -ENODEV;
-       }
+       BUG_ON(!cc_bcr.v); /* Counters exist but No countable conditions ? */
  
         arc_pmu = devm_kzalloc(&pdev->dev, sizeof(struct arc_pmu), GFP_KERNEL);
         if (!arc_pmu)
@@ -263,19 +301,22 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
                 arc_pmu->n_counters, arc_pmu->counter_size, cc_bcr.c);
  
         cc_name.str[8] = 0;
-       for (i = 0; i < PERF_COUNT_HW_MAX; i++)
+       for (i = 0; i < PERF_COUNT_ARC_HW_MAX; i++)
                 arc_pmu->ev_hw_idx[i] = -1;
  
+       /* loop thru all available h/w condition indexes */
         for (j = 0; j < cc_bcr.c; j++) {
                 write_aux_reg(ARC_REG_CC_INDEX, j);
                 cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0);
                 cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1);
+
+               /* See if it has been mapped to a perf event_id */
                 for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) {
                         if (arc_pmu_ev_hw_map[i] &&
                             !strcmp(arc_pmu_ev_hw_map[i], cc_name.str) &&
                             strlen(arc_pmu_ev_hw_map[i])) {
-                               pr_debug("mapping %d to idx %d with name %s\n",
-                                        i, j, cc_name.str);
+                               pr_debug("mapping perf event %2d to h/w event \'%8s\' (idx %d)\n",
+                                        i, cc_name.str, j);
                                 arc_pmu->ev_hw_idx[i] = j;
                         }
                 }
@@ -302,7 +343,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev)
  
  #ifdef CONFIG_OF
  static const struct of_device_id arc_pmu_match[] = {
-       { .compatible = "snps,arc700-pmu" },
+       { .compatible = "snps,arc700-pct" },
         {},
  };
  MODULE_DEVICE_TABLE(of, arc_pmu_match);
@@ -310,7 +351,7 @@ MODULE_DEVICE_TABLE(of, arc_pmu_match);
  
  static struct platform_driver arc_pmu_driver = {
         .driver = {
-               .name           = "arc700-pmu",
+               .name           = "arc700-pct",
                 .of_match_table = of_match_ptr(arc_pmu_match),
         },
         .probe          = arc_pmu_device_probe,
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c

index f46efd1..e095c55 100644 (file)
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -49,7 +49,10 @@ void arch_cpu_idle(void)
  
  asmlinkage void ret_from_fork(void);
  
-/* Layout of Child kernel mode stack as setup at the end of this function is
+/*
+ * Copy architecture-specific thread state
+ *
+ * Layout of Child kernel mode stack as setup at the end of this function is
   *
   * |     ...        |
   * |     ...        |
@@ -81,7 +84,7 @@ asmlinkage void ret_from_fork(void);
   * ------------------  <===== END of PAGE
   */
  int copy_thread(unsigned long clone_flags,
-               unsigned long usp, unsigned long arg,
+               unsigned long usp, unsigned long kthread_arg,
                 struct task_struct *p)
  {
         struct pt_regs *c_regs;        /* child's pt_regs */
@@ -112,7 +115,7 @@ int copy_thread(unsigned long clone_flags,
         if (unlikely(p->flags & PF_KTHREAD)) {
                 memset(c_regs, 0, sizeof(struct pt_regs));
  
-               c_callee->r13 = arg; /* argument to kernel thread */
+               c_callee->r13 = kthread_arg;
                 c_callee->r14 = usp;  /* function */
  
                 return 0;
diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c

index 900f68a..1d167c6 100644 (file)
--- a/arch/arc/kernel/setup.c
+++ b/arch/arc/kernel/setup.c
@@ -120,7 +120,10 @@ static void read_arc_build_cfg_regs(void)
         READ_BCR(ARC_REG_SMART_BCR, bcr);
         cpu->extn.smart = bcr.ver ? 1 : 0;
  
-       cpu->extn.debug = cpu->extn.ap | cpu->extn.smart;
+       READ_BCR(ARC_REG_RTT_BCR, bcr);
+       cpu->extn.rtt = bcr.ver ? 1 : 0;
+
+       cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt;
  }
  
  static const struct cpuinfo_data arc_cpu_tbl[] = {
diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c

index 3eadfda..c927aa8 100644 (file)
--- a/arch/arc/kernel/traps.c
+++ b/arch/arc/kernel/traps.c
@@ -42,7 +42,7 @@ void die(const char *str, struct pt_regs *regs, unsigned long address)
   *  -for kernel, chk if due to copy_(to|from)_user, otherwise die()
   */
  static noinline int
-handle_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
+unhandled_exception(const char *str, struct pt_regs *regs, siginfo_t *info)
  {
         if (user_mode(regs)) {
                 struct task_struct *tsk = current;
@@ -71,7 +71,7 @@ int name(unsigned long address, struct pt_regs *regs) \
                 .si_code  = sicode,             \
                 .si_addr = (void __user *)address,      \
         };                                      \
-       return handle_exception(str, regs, &info);\
+       return unhandled_exception(str, regs, &info);\
  }
  
  /*
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c

index 5234123..d44eedd 100644 (file)
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -71,7 +71,7 @@ early_param("initrd", early_initrd);
   */
  void __init setup_arch_memory(void)
  {
-       unsigned long zones_size[MAX_NR_ZONES] = { 0, 0 };
+       unsigned long zones_size[MAX_NR_ZONES];
         unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
  
         init_mm.start_code = (unsigned long)_text;
@@ -90,7 +90,7 @@ void __init setup_arch_memory(void)
         /*------------- externs in mm need setting up ---------------*/
  
         /* first page of system - kernel .vector starts here */
-       min_low_pfn = PFN_DOWN(CONFIG_LINUX_LINK_BASE);
+       min_low_pfn = ARCH_PFN_OFFSET;
  
         /* Last usable page of low mem (no HIGHMEM yet for ARC port) */
         max_low_pfn = max_pfn = PFN_DOWN(end_mem);
@@ -111,7 +111,7 @@ void __init setup_arch_memory(void)
  
         /*-------------- node setup --------------------------------*/
         memset(zones_size, 0, sizeof(zones_size));
-       zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
+       zones_size[ZONE_NORMAL] = max_mapnr;
  
         /*
          * We can't use the helper free_area_init(zones[]) because it uses
@@ -123,6 +123,8 @@ void __init setup_arch_memory(void)
                             zones_size,         /* num pages per zone */
                             min_low_pfn,        /* first pfn of node */
                             NULL);              /* NO holes */
+
+       high_memory = (void *)end_mem;
  }
  
  /*
@@ -133,7 +135,6 @@ void __init setup_arch_memory(void)
   */
  void __init mem_init(void)
  {
-       high_memory = (void *)(CONFIG_LINUX_LINK_BASE + arc_mem_sz);
         free_all_bootmem();
         mem_init_print_info(NULL);
  }
diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi

index 1bc5fdd..9f727d8 100644 (file)
--- a/arch/arm/boot/dts/qcom-ipq8064.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi
@@ -61,6 +61,14 @@
                 };
         };
  
+       clocks {
+               sleep_clk: sleep_clk {
+                       compatible = "fixed-clock";
+                       clock-frequency = <32768>;
+                       #clock-cells = <0>;
+               };
+       };
+
         soc: soc {
                 #address-cells = <1>;
                 #size-cells = <1>;
@@ -105,10 +113,14 @@
                         compatible = "qcom,kpss-timer", "qcom,msm-timer";
                         interrupts = <1 1 0x301>,
                                      <1 2 0x301>,
-                                    <1 3 0x301>;
+                                    <1 3 0x301>,
+                                    <1 4 0x301>,
+                                    <1 5 0x301>;
                         reg = <0x0200a000 0x100>;
                         clock-frequency = <25000000>,
                                           <32768>;
+                       clocks = <&sleep_clk>;
+                       clock-names = "sleep";
                         cpu-offset = <0x80000>;
                 };
  
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h

index 2499867..df3f60c 100644 (file)
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -195,8 +195,14 @@ struct kvm_arch_memory_slot {
  #define KVM_ARM_IRQ_CPU_IRQ            0
  #define KVM_ARM_IRQ_CPU_FIQ            1
  
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
  #define KVM_ARM_IRQ_GIC_MAX            127
+#endif
  
  /* One single KVM irqchip, ie. the VGIC */
  #define KVM_NR_IRQCHIPS          1
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S

index cc176b6..aebfbf7 100644 (file)
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -80,9 +80,9 @@ ENTRY(stext)
         ldr     r13, =__mmap_switched           @ address to jump to after
                                                 @ initialising sctlr
         adr     lr, BSYM(1f)                    @ return (PIC) address
- ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
- THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        ret     r12                             )
+       ldr     r12, [r10, #PROCINFO_INITFUNC]
+       add     r12, r12, r10
+       ret     r12
   1:    b       __after_proc_init
  ENDPROC(stext)
  
@@ -117,9 +117,9 @@ ENTRY(secondary_startup)
  
         adr     lr, BSYM(__after_proc_init)     @ return address
         mov     r13, r12                        @ __secondary_switched address
- ARM(  add     pc, r10, #PROCINFO_INITFUNC     )
- THUMB(        add     r12, r10, #PROCINFO_INITFUNC    )
- THUMB(        ret     r12                             )
+       ldr     r12, [r10, #PROCINFO_INITFUNC]
+       add     r12, r12, r10
+       ret     r12
  ENDPROC(secondary_startup)
  
  ENTRY(__secondary_switched)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c

index 6f53645..d9631ec 100644 (file)
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -671,8 +671,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
                 if (!irqchip_in_kernel(kvm))
                         return -ENXIO;
  
-               if (irq_num < VGIC_NR_PRIVATE_IRQS ||
-                   irq_num > KVM_ARM_IRQ_GIC_MAX)
+               if (irq_num < VGIC_NR_PRIVATE_IRQS)
                         return -EINVAL;
  
                 return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c

index 36aaeb1..bf37e3c 100644 (file)
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -754,12 +754,12 @@ static struct platform_device vcc_sdhi1 = {
  };
  
  /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                           MMC_CAP_POWER_OFF_CARD,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
         .cd_gpio        = 167,
  };
  
@@ -796,12 +796,12 @@ static struct platform_device sdhi0_device = {
  };
  
  /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi1_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                           MMC_CAP_POWER_OFF_CARD,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_USE_GPIO_CD,
         /* Port72 cannot generate IRQs, will be used in polling mode. */
         .cd_gpio        = 72,
  };
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c

index f27b5a8..25558d1 100644 (file)
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -201,12 +201,12 @@ static struct rcar_phy_platform_data usb_phy_platform_data __initdata =
  
  
  /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info __initdata = {
-       .dma_slave_tx   = HPBDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = HPBDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
-       .tmio_ocr_mask  = MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
+static struct tmio_mmc_data sdhi0_info __initdata = {
+       .chan_priv_tx   = (void *)HPBDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)HPBDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED,
+       .ocr_mask       = MMC_VDD_165_195 | MMC_VDD_32_33 | MMC_VDD_33_34,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT,
  };
  
  static struct resource sdhi0_resources[] __initdata = {
@@ -683,7 +683,7 @@ static void __init bockw_init(void)
                 platform_device_register_resndata(
                         NULL, "sh_mobile_sdhi", 0,
                         sdhi0_resources, ARRAY_SIZE(sdhi0_resources),
-                       &sdhi0_info, sizeof(struct sh_mobile_sdhi_info));
+                       &sdhi0_info, sizeof(struct tmio_mmc_data));
         }
  
         /* for Audio */
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c

index 7c9b63b..260d831 100644 (file)
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -442,11 +442,11 @@ static struct platform_device vcc_sdhi2 = {
  };
  
  /* SDHI */
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ |
                           MMC_CAP_POWER_OFF_CARD,
  };
  
@@ -484,13 +484,13 @@ static struct platform_device sdhi0_device = {
  };
  
  /* Micro SD */
-static struct sh_mobile_sdhi_info sdhi2_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI2_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI2_RX,
-       .tmio_flags     = TMIO_MMC_HAS_IDLE_WAIT |
+static struct tmio_mmc_data sdhi2_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI2_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI2_RX,
+       .flags          = TMIO_MMC_HAS_IDLE_WAIT |
                           TMIO_MMC_USE_GPIO_CD |
                           TMIO_MMC_WRPROTECT_DISABLE,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED | MMC_CAP_POWER_OFF_CARD,
         .cd_gpio        = 13,
  };
  
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c

index 598f704..51db288 100644 (file)
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -122,11 +122,11 @@ static struct resource sdhi0_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sdhi0_platform_data = {
-       .dma_slave_tx = HPBDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx = HPBDMA_SLAVE_SDHI0_RX,
-       .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
-       .tmio_caps = MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi0_platform_data = {
+       .chan_priv_tx = (void *)HPBDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx = (void *)HPBDMA_SLAVE_SDHI0_RX,
+       .flags        = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT,
+       .capabilities = MMC_CAP_SD_HIGHSPEED,
  };
  
  static struct platform_device sdhi0_device = {
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig

index b764431..b4f92b9 100644 (file)
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -827,7 +827,7 @@ config KUSER_HELPERS
  
  config VDSO
         bool "Enable VDSO for acceleration of some system calls"
-       depends on AEABI && MMU
+       depends on AEABI && MMU && CPU_V7
         default y if ARM_ARCH_TIMER
         select GENERIC_TIME_VSYSCALL
         help
diff --git a/arch/arm/vdso/.gitignore b/arch/arm/vdso/.gitignore

index f8b69d8..6b47f6e 100644 (file)
--- a/arch/arm/vdso/.gitignore
+++ b/arch/arm/vdso/.gitignore
@@ -1 +1,3 @@
  vdso.lds
+vdso.so.raw
+vdsomunge
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile

index bab0a8b..8aa7910 100644 (file)
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -10,8 +10,8 @@ ccflags-y := -shared -fPIC -fno-common -fno-builtin -fno-stack-protector
  ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 -DDISABLE_BRANCH_PROFILING
  ccflags-y += -Wl,--no-undefined $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
  
-obj-y += vdso.o
-extra-y += vdso.lds
+obj-$(CONFIG_VDSO) += vdso.o
+extra-$(CONFIG_VDSO) += vdso.lds
  CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
  
  CFLAGS_REMOVE_vdso.o = -pg
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index da5f20e..4269dba 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1,5 +1,7 @@
  config ARM64
         def_bool y
+       select ACPI_GENERIC_GSI if ACPI
+       select ACPI_REDUCED_HARDWARE_ONLY if ACPI
         select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
         select ARCH_HAS_ELF_RANDOMIZE
         select ARCH_HAS_GCOV_PROFILE_ALL
@@ -758,6 +760,8 @@ source "drivers/Kconfig"
  
  source "drivers/firmware/Kconfig"
  
+source "drivers/acpi/Kconfig"
+
  source "fs/Kconfig"
  
  source "arch/arm64/kvm/Kconfig"
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi

index e74f6e0..c8d3e0e 100644 (file)
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -102,6 +102,7 @@
                 #address-cells = <2>;
                 #size-cells = <2>;
                 ranges;
+               dma-ranges = <0x0 0x0 0x0 0x0 0x400 0x0>;
  
                 clocks {
                         #address-cells = <2>;
@@ -362,6 +363,15 @@
                                 reg-names = "csr-reg";
                                 clock-output-names = "pcie4clk";
                         };
+
+                       dmaclk: dmaclk@1f27c000 {
+                               compatible = "apm,xgene-device-clock";
+                               #clock-cells = <1>;
+                               clocks = <&socplldiv2 0>;
+                               reg = <0x0 0x1f27c000 0x0 0x1000>;
+                               reg-names = "csr-reg";
+                               clock-output-names = "dmaclk";
+                       };
                 };
  
                 pcie0: pcie@1f2b0000 {
@@ -684,5 +694,21 @@
                         interrupts = <0x0 0x41 0x4>;
                         clocks = <&rngpkaclk 0>;
                 };
+
+               dma: dma@1f270000 {
+                       compatible = "apm,xgene-storm-dma";
+                       device_type = "dma";
+                       reg = <0x0 0x1f270000 0x0 0x10000>,
+                             <0x0 0x1f200000 0x0 0x10000>,
+                             <0x0 0x1b008000 0x0 0x2000>,
+                             <0x0 0x1054a000 0x0 0x100>;
+                       interrupts = <0x0 0x82 0x4>,
+                                    <0x0 0xb8 0x4>,
+                                    <0x0 0xb9 0x4>,
+                                    <0x0 0xba 0x4>,
+                                    <0x0 0xbb 0x4>;
+                       dma-coherent;
+                       clocks = <&dmaclk 0>;
+               };
         };
  };
diff --git a/arch/arm64/include/asm/acenv.h b/arch/arm64/include/asm/acenv.h

new file mode 100644 (file)

index 0000000..b49166f
--- /dev/null
+++ b/arch/arm64/include/asm/acenv.h
@@ -0,0 +1,18 @@
+/*
+ * ARM64 specific ACPICA environments and implementation
+ *
+ * Copyright (C) 2014, Linaro Ltd.
+ *   Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *   Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _ASM_ACENV_H
+#define _ASM_ACENV_H
+
+/* It is required unconditionally by ACPI core, update it when needed. */
+
+#endif /* _ASM_ACENV_H */
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h

new file mode 100644 (file)

index 0000000..59c05d8
--- /dev/null
+++ b/arch/arm64/include/asm/acpi.h
@@ -0,0 +1,96 @@
+/*
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *     Author: Al Stone <al.stone@linaro.org>
+ *     Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *     Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation;
+ */
+
+#ifndef _ASM_ACPI_H
+#define _ASM_ACPI_H
+
+#include <linux/mm.h>
+#include <linux/irqchip/arm-gic-acpi.h>
+
+#include <asm/cputype.h>
+#include <asm/smp_plat.h>
+
+/* Basic configuration for ACPI */
+#ifdef CONFIG_ACPI
+/* ACPI table mapping after acpi_gbl_permanent_mmap is set */
+static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
+                                           acpi_size size)
+{
+       if (!page_is_ram(phys >> PAGE_SHIFT))
+               return ioremap(phys, size);
+
+       return ioremap_cache(phys, size);
+}
+#define acpi_os_ioremap acpi_os_ioremap
+
+typedef u64 phys_cpuid_t;
+#define PHYS_CPUID_INVALID INVALID_HWID
+
+#define acpi_strict 1  /* No out-of-spec workarounds on ARM64 */
+extern int acpi_disabled;
+extern int acpi_noirq;
+extern int acpi_pci_disabled;
+
+/* 1 to indicate PSCI 0.2+ is implemented */
+static inline bool acpi_psci_present(void)
+{
+       return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT;
+}
+
+/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */
+static inline bool acpi_psci_use_hvc(void)
+{
+       return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
+}
+
+static inline void disable_acpi(void)
+{
+       acpi_disabled = 1;
+       acpi_pci_disabled = 1;
+       acpi_noirq = 1;
+}
+
+static inline void enable_acpi(void)
+{
+       acpi_disabled = 0;
+       acpi_pci_disabled = 0;
+       acpi_noirq = 0;
+}
+
+/*
+ * The ACPI processor driver for ACPI core code needs this macro
+ * to find out this cpu was already mapped (mapping from CPU hardware
+ * ID to CPU logical ID) or not.
+ */
+#define cpu_physical_id(cpu) cpu_logical_map(cpu)
+
+/*
+ * It's used from ACPI core in kdump to boot UP system with SMP kernel,
+ * with this check the ACPI core will not override the CPU index
+ * obtained from GICC with 0 and not print some error message as well.
+ * Since MADT must provide at least one GICC structure for GIC
+ * initialization, CPU will be always available in MADT on ARM64.
+ */
+static inline bool acpi_has_cpu_in_madt(void)
+{
+       return true;
+}
+
+static inline void arch_fix_phys_package_id(int num, u32 slot) { }
+void __init acpi_init_cpus(void);
+
+#else
+static inline bool acpi_psci_present(void) { return false; }
+static inline bool acpi_psci_use_hvc(void) { return false; }
+static inline void acpi_init_cpus(void) { }
+#endif /* CONFIG_ACPI */
+
+#endif /*_ASM_ACPI_H*/
diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h

index da301ee..5a31d67 100644 (file)
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -66,5 +66,6 @@ struct cpu_operations {
  extern const struct cpu_operations *cpu_ops[NR_CPUS];
  int __init cpu_read_ops(struct device_node *dn, int cpu);
  void __init cpu_read_bootcpu_ops(void);
+const struct cpu_operations *cpu_get_ops(const char *name);
  
  #endif /* ifndef __ASM_CPU_OPS_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h

index 9264956..95e6b6d 100644 (file)
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -62,6 +62,9 @@ void __init early_fixmap_init(void);
  
  #define __early_set_fixmap __set_fixmap
  
+#define __late_set_fixmap __set_fixmap
+#define __late_clear_fixmap(idx) __set_fixmap((idx), 0, FIXMAP_PAGE_CLEAR)
+
  extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot);
  
  #include <asm-generic/fixmap.h>
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h

index 94c5367..bbb251b 100644 (file)
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -1,6 +1,8 @@
  #ifndef __ASM_IRQ_H
  #define __ASM_IRQ_H
  
+#include <linux/irqchip/arm-gic-acpi.h>
+
  #include <asm-generic/irq.h>
  
  struct pt_regs;
@@ -8,4 +10,15 @@ struct pt_regs;
  extern void migrate_irqs(void);
  extern void set_handle_irq(void (*handle_irq)(struct pt_regs *));
  
+static inline void acpi_irq_init(void)
+{
+       /*
+        * Hardcode ACPI IRQ chip initialization to GICv2 for now.
+        * Proper irqchip infrastructure will be implemented along with
+        * incoming  GICv2m|GICv3|ITS bits.
+        */
+       acpi_gic_init();
+}
+#define acpi_irq_init acpi_irq_init
+
  #endif
diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h

index 872ba93..b008a72 100644 (file)
--- a/arch/arm64/include/asm/pci.h
+++ b/arch/arm64/include/asm/pci.h
@@ -27,6 +27,12 @@
  extern int isa_dma_bridge_buggy;
  
  #ifdef CONFIG_PCI
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+       /* no legacy IRQ on arm64 */
+       return -ENODEV;
+}
+
  static inline int pci_proc_domain(struct pci_bus *bus)
  {
         return 1;
diff --git a/arch/arm64/include/asm/psci.h b/arch/arm64/include/asm/psci.h

index e5312ea..2454bc5 100644 (file)
--- a/arch/arm64/include/asm/psci.h
+++ b/arch/arm64/include/asm/psci.h
@@ -14,6 +14,7 @@
  #ifndef __ASM_PSCI_H
  #define __ASM_PSCI_H
  
-int psci_init(void);
+int psci_dt_init(void);
+int psci_acpi_init(void);
  
  #endif /* __ASM_PSCI_H */
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h

index 780f82c..bf22650 100644 (file)
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -39,9 +39,10 @@ extern void show_ipi_list(struct seq_file *p, int prec);
  extern void handle_IPI(int ipinr, struct pt_regs *regs);
  
  /*
- * Setup the set of possible CPUs (via set_cpu_possible)
+ * Discover the set of possible CPUs and determine their
+ * SMP operations.
   */
-extern void smp_init_cpus(void);
+extern void of_smp_init_cpus(void);
  
  /*
   * Provide a function to raise an IPI cross call on CPUs in callmap.
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h

index c154c0b..d268320 100644 (file)
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -188,8 +188,14 @@ struct kvm_arch_memory_slot {
  #define KVM_ARM_IRQ_CPU_IRQ            0
  #define KVM_ARM_IRQ_CPU_FIQ            1
  
-/* Highest supported SPI, from VGIC_NR_IRQS */
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
  #define KVM_ARM_IRQ_GIC_MAX            127
+#endif
  
  /* One single KVM irqchip, ie. the VGIC */
  #define KVM_NR_IRQCHIPS          1
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile

index b12e15b..426d076 100644 (file)
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -35,6 +35,7 @@ arm64-obj-$(CONFIG_KGDB)              += kgdb.o
  arm64-obj-$(CONFIG_EFI)                        += efi.o efi-stub.o efi-entry.o
  arm64-obj-$(CONFIG_PCI)                        += pci.o
  arm64-obj-$(CONFIG_ARMV8_DEPRECATED)   += armv8_deprecated.o
+arm64-obj-$(CONFIG_ACPI)               += acpi.o
  
  obj-y                                  += $(arm64-obj-y) vdso/
  obj-m                                  += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c

new file mode 100644 (file)

index 0000000..8b83955
--- /dev/null
+++ b/arch/arm64/kernel/acpi.c
@@ -0,0 +1,345 @@
+/*
+ *  ARM64 Specific Low-Level ACPI Boot Support
+ *
+ *  Copyright (C) 2013-2014, Linaro Ltd.
+ *     Author: Al Stone <al.stone@linaro.org>
+ *     Author: Graeme Gregory <graeme.gregory@linaro.org>
+ *     Author: Hanjun Guo <hanjun.guo@linaro.org>
+ *     Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *     Author: Naresh Bhat <naresh.bhat@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "ACPI: " fmt
+
+#include <linux/acpi.h>
+#include <linux/bootmem.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/smp.h>
+
+#include <asm/cputype.h>
+#include <asm/cpu_ops.h>
+#include <asm/smp_plat.h>
+
+int acpi_noirq = 1;            /* skip ACPI IRQ initialization */
+int acpi_disabled = 1;
+EXPORT_SYMBOL(acpi_disabled);
+
+int acpi_pci_disabled = 1;     /* skip ACPI PCI scan and IRQ initialization */
+EXPORT_SYMBOL(acpi_pci_disabled);
+
+/* Processors with enabled flag and sane MPIDR */
+static int enabled_cpus;
+
+/* Boot CPU is valid or not in MADT */
+static bool bootcpu_valid  __initdata;
+
+static bool param_acpi_off __initdata;
+static bool param_acpi_force __initdata;
+
+static int __init parse_acpi(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       /* "acpi=off" disables both ACPI table parsing and interpreter */
+       if (strcmp(arg, "off") == 0)
+               param_acpi_off = true;
+       else if (strcmp(arg, "force") == 0) /* force ACPI to be enabled */
+               param_acpi_force = true;
+       else
+               return -EINVAL; /* Core will print when we return error */
+
+       return 0;
+}
+early_param("acpi", parse_acpi);
+
+static int __init dt_scan_depth1_nodes(unsigned long node,
+                                      const char *uname, int depth,
+                                      void *data)
+{
+       /*
+        * Return 1 as soon as we encounter a node at depth 1 that is
+        * not the /chosen node.
+        */
+       if (depth == 1 && (strcmp(uname, "chosen") != 0))
+               return 1;
+       return 0;
+}
+
+/*
+ * __acpi_map_table() will be called before page_init(), so early_ioremap()
+ * or early_memremap() should be called here to for ACPI table mapping.
+ */
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+{
+       if (!size)
+               return NULL;
+
+       return early_memremap(phys, size);
+}
+
+void __init __acpi_unmap_table(char *map, unsigned long size)
+{
+       if (!map || !size)
+               return;
+
+       early_memunmap(map, size);
+}
+
+/**
+ * acpi_map_gic_cpu_interface - generates a logical cpu number
+ * and map to MPIDR represented by GICC structure
+ */
+static void __init
+acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
+{
+       int i;
+       u64 mpidr = processor->arm_mpidr & MPIDR_HWID_BITMASK;
+       bool enabled = !!(processor->flags & ACPI_MADT_ENABLED);
+
+       if (mpidr == INVALID_HWID) {
+               pr_info("Skip MADT cpu entry with invalid MPIDR\n");
+               return;
+       }
+
+       total_cpus++;
+       if (!enabled)
+               return;
+
+       if (enabled_cpus >=  NR_CPUS) {
+               pr_warn("NR_CPUS limit of %d reached, Processor %d/0x%llx ignored.\n",
+                       NR_CPUS, total_cpus, mpidr);
+               return;
+       }
+
+       /* Check if GICC structure of boot CPU is available in the MADT */
+       if (cpu_logical_map(0) == mpidr) {
+               if (bootcpu_valid) {
+                       pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+                              mpidr);
+                       return;
+               }
+
+               bootcpu_valid = true;
+       }
+
+       /*
+        * Duplicate MPIDRs are a recipe for disaster. Scan
+        * all initialized entries and check for
+        * duplicates. If any is found just ignore the CPU.
+        */
+       for (i = 1; i < enabled_cpus; i++) {
+               if (cpu_logical_map(i) == mpidr) {
+                       pr_err("Firmware bug, duplicate CPU MPIDR: 0x%llx in MADT\n",
+                              mpidr);
+                       return;
+               }
+       }
+
+       if (!acpi_psci_present())
+               return;
+
+       cpu_ops[enabled_cpus] = cpu_get_ops("psci");
+       /* CPU 0 was already initialized */
+       if (enabled_cpus) {
+               if (!cpu_ops[enabled_cpus])
+                       return;
+
+               if (cpu_ops[enabled_cpus]->cpu_init(NULL, enabled_cpus))
+                       return;
+
+               /* map the logical cpu id to cpu MPIDR */
+               cpu_logical_map(enabled_cpus) = mpidr;
+       }
+
+       enabled_cpus++;
+}
+
+static int __init
+acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+                               const unsigned long end)
+{
+       struct acpi_madt_generic_interrupt *processor;
+
+       processor = (struct acpi_madt_generic_interrupt *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       acpi_table_print_madt_entry(header);
+       acpi_map_gic_cpu_interface(processor);
+       return 0;
+}
+
+/* Parse GIC cpu interface entries in MADT for SMP init */
+void __init acpi_init_cpus(void)
+{
+       int count, i;
+
+       /*
+        * do a partial walk of MADT to determine how many CPUs
+        * we have including disabled CPUs, and get information
+        * we need for SMP init
+        */
+       count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT,
+                       acpi_parse_gic_cpu_interface, 0);
+
+       if (!count) {
+               pr_err("No GIC CPU interface entries present\n");
+               return;
+       } else if (count < 0) {
+               pr_err("Error parsing GIC CPU interface entry\n");
+               return;
+       }
+
+       if (!bootcpu_valid) {
+               pr_err("MADT missing boot CPU MPIDR, not enabling secondaries\n");
+               return;
+       }
+
+       for (i = 0; i < enabled_cpus; i++)
+               set_cpu_possible(i, true);
+
+       /* Make boot-up look pretty */
+       pr_info("%d CPUs enabled, %d CPUs total\n", enabled_cpus, total_cpus);
+}
+
+/*
+ * acpi_fadt_sanity_check() - Check FADT presence and carry out sanity
+ *                           checks on it
+ *
+ * Return 0 on success,  <0 on failure
+ */
+static int __init acpi_fadt_sanity_check(void)
+{
+       struct acpi_table_header *table;
+       struct acpi_table_fadt *fadt;
+       acpi_status status;
+       acpi_size tbl_size;
+       int ret = 0;
+
+       /*
+        * FADT is required on arm64; retrieve it to check its presence
+        * and carry out revision and ACPI HW reduced compliancy tests
+        */
+       status = acpi_get_table_with_size(ACPI_SIG_FADT, 0, &table, &tbl_size);
+       if (ACPI_FAILURE(status)) {
+               const char *msg = acpi_format_exception(status);
+
+               pr_err("Failed to get FADT table, %s\n", msg);
+               return -ENODEV;
+       }
+
+       fadt = (struct acpi_table_fadt *)table;
+
+       /*
+        * Revision in table header is the FADT Major revision, and there
+        * is a minor revision of FADT which was introduced by ACPI 5.1,
+        * we only deal with ACPI 5.1 or newer revision to get GIC and SMP
+        * boot protocol configuration data.
+        */
+       if (table->revision < 5 ||
+          (table->revision == 5 && fadt->minor_revision < 1)) {
+               pr_err("Unsupported FADT revision %d.%d, should be 5.1+\n",
+                      table->revision, fadt->minor_revision);
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (!(fadt->flags & ACPI_FADT_HW_REDUCED)) {
+               pr_err("FADT not ACPI hardware reduced compliant\n");
+               ret = -EINVAL;
+       }
+
+out:
+       /*
+        * acpi_get_table_with_size() creates FADT table mapping that
+        * should be released after parsing and before resuming boot
+        */
+       early_acpi_os_unmap_memory(table, tbl_size);
+       return ret;
+}
+
+/*
+ * acpi_boot_table_init() called from setup_arch(), always.
+ *     1. find RSDP and get its address, and then find XSDT
+ *     2. extract all tables and checksums them all
+ *     3. check ACPI FADT revision
+ *     4. check ACPI FADT HW reduced flag
+ *
+ * We can parse ACPI boot-time tables such as MADT after
+ * this function is called.
+ *
+ * On return ACPI is enabled if either:
+ *
+ * - ACPI tables are initialized and sanity checks passed
+ * - acpi=force was passed in the command line and ACPI was not disabled
+ *   explicitly through acpi=off command line parameter
+ *
+ * ACPI is disabled on function return otherwise
+ */
+void __init acpi_boot_table_init(void)
+{
+       /*
+        * Enable ACPI instead of device tree unless
+        * - ACPI has been disabled explicitly (acpi=off), or
+        * - the device tree is not empty (it has more than just a /chosen node)
+        *   and ACPI has not been force enabled (acpi=force)
+        */
+       if (param_acpi_off ||
+           (!param_acpi_force && of_scan_flat_dt(dt_scan_depth1_nodes, NULL)))
+               return;
+
+       /*
+        * ACPI is disabled at this point. Enable it in order to parse
+        * the ACPI tables and carry out sanity checks
+        */
+       enable_acpi();
+
+       /*
+        * If ACPI tables are initialized and FADT sanity checks passed,
+        * leave ACPI enabled and carry on booting; otherwise disable ACPI
+        * on initialization error.
+        * If acpi=force was passed on the command line it forces ACPI
+        * to be enabled even if its initialization failed.
+        */
+       if (acpi_table_init() || acpi_fadt_sanity_check()) {
+               pr_err("Failed to init ACPI tables\n");
+               if (!param_acpi_force)
+                       disable_acpi();
+       }
+}
+
+void __init acpi_gic_init(void)
+{
+       struct acpi_table_header *table;
+       acpi_status status;
+       acpi_size tbl_size;
+       int err;
+
+       if (acpi_disabled)
+               return;
+
+       status = acpi_get_table_with_size(ACPI_SIG_MADT, 0, &table, &tbl_size);
+       if (ACPI_FAILURE(status)) {
+               const char *msg = acpi_format_exception(status);
+
+               pr_err("Failed to get MADT table, %s\n", msg);
+               return;
+       }
+
+       err = gic_v2_acpi_init(table);
+       if (err)
+               pr_err("Failed to initialize GIC IRQ controller");
+
+       early_acpi_os_unmap_memory((char *)table, tbl_size);
+}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c

index cce9524..fb8ff9b 100644 (file)
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -35,7 +35,7 @@ static const struct cpu_operations *supported_cpu_ops[] __initconst = {
         NULL,
  };
  
-static const struct cpu_operations * __init cpu_get_ops(const char *name)
+const struct cpu_operations * __init cpu_get_ops(const char *name)
  {
         const struct cpu_operations **ops = supported_cpu_ops;
  
diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c

index 6f93c24..4095379 100644 (file)
--- a/arch/arm64/kernel/pci.c
+++ b/arch/arm64/kernel/pci.c
@@ -10,6 +10,7 @@
   *
   */
  
+#include <linux/acpi.h>
  #include <linux/init.h>
  #include <linux/io.h>
  #include <linux/kernel.h>
@@ -46,3 +47,27 @@ int pcibios_add_device(struct pci_dev *dev)
  
         return 0;
  }
+
+/*
+ * raw_pci_read/write - Platform-specific PCI config space access.
+ */
+int raw_pci_read(unsigned int domain, unsigned int bus,
+                 unsigned int devfn, int reg, int len, u32 *val)
+{
+       return -ENXIO;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus,
+               unsigned int devfn, int reg, int len, u32 val)
+{
+       return -ENXIO;
+}
+
+#ifdef CONFIG_ACPI
+/* Root bridge scanning */
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
+{
+       /* TODO: Should be revisited when implementing PCI on ACPI */
+       return NULL;
+}
+#endif
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c

index 9b8a70a..ea18cb5 100644 (file)
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -15,6 +15,7 @@
  
  #define pr_fmt(fmt) "psci: " fmt
  
+#include <linux/acpi.h>
  #include <linux/init.h>
  #include <linux/of.h>
  #include <linux/smp.h>
@@ -24,6 +25,7 @@
  #include <linux/slab.h>
  #include <uapi/linux/psci.h>
  
+#include <asm/acpi.h>
  #include <asm/compiler.h>
  #include <asm/cpu_ops.h>
  #include <asm/errno.h>
@@ -273,39 +275,8 @@ static void psci_sys_poweroff(void)
         invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
  }
  
-/*
- * PSCI Function IDs for v0.2+ are well defined so use
- * standard values.
- */
-static int __init psci_0_2_init(struct device_node *np)
+static void __init psci_0_2_set_functions(void)
  {
-       int err, ver;
-
-       err = get_set_conduit_method(np);
-
-       if (err)
-               goto out_put_node;
-
-       ver = psci_get_version();
-
-       if (ver == PSCI_RET_NOT_SUPPORTED) {
-               /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */
-               pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
-               err = -EOPNOTSUPP;
-               goto out_put_node;
-       } else {
-               pr_info("PSCIv%d.%d detected in firmware.\n",
-                               PSCI_VERSION_MAJOR(ver),
-                               PSCI_VERSION_MINOR(ver));
-
-               if (PSCI_VERSION_MAJOR(ver) == 0 &&
-                               PSCI_VERSION_MINOR(ver) < 2) {
-                       err = -EINVAL;
-                       pr_err("Conflicting PSCI version detected.\n");
-                       goto out_put_node;
-               }
-       }
-
         pr_info("Using standard PSCI v0.2 function IDs\n");
         psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND;
         psci_ops.cpu_suspend = psci_cpu_suspend;
@@ -329,6 +300,60 @@ static int __init psci_0_2_init(struct device_node *np)
         arm_pm_restart = psci_sys_reset;
  
         pm_power_off = psci_sys_poweroff;
+}
+
+/*
+ * Probe function for PSCI firmware versions >= 0.2
+ */
+static int __init psci_probe(void)
+{
+       int ver = psci_get_version();
+
+       if (ver == PSCI_RET_NOT_SUPPORTED) {
+               /*
+                * PSCI versions >=0.2 mandates implementation of
+                * PSCI_VERSION.
+                */
+               pr_err("PSCI firmware does not comply with the v0.2 spec.\n");
+               return -EOPNOTSUPP;
+       } else {
+               pr_info("PSCIv%d.%d detected in firmware.\n",
+                               PSCI_VERSION_MAJOR(ver),
+                               PSCI_VERSION_MINOR(ver));
+
+               if (PSCI_VERSION_MAJOR(ver) == 0 &&
+                               PSCI_VERSION_MINOR(ver) < 2) {
+                       pr_err("Conflicting PSCI version detected.\n");
+                       return -EINVAL;
+               }
+       }
+
+       psci_0_2_set_functions();
+
+       return 0;
+}
+
+/*
+ * PSCI init function for PSCI versions >=0.2
+ *
+ * Probe based on PSCI PSCI_VERSION function
+ */
+static int __init psci_0_2_init(struct device_node *np)
+{
+       int err;
+
+       err = get_set_conduit_method(np);
+
+       if (err)
+               goto out_put_node;
+       /*
+        * Starting with v0.2, the PSCI specification introduced a call
+        * (PSCI_VERSION) that allows probing the firmware version, so
+        * that PSCI function IDs and version specific initialization
+        * can be carried out according to the specific version reported
+        * by firmware
+        */
+       err = psci_probe();
  
  out_put_node:
         of_node_put(np);
@@ -381,7 +406,7 @@ static const struct of_device_id psci_of_match[] __initconst = {
         {},
  };
  
-int __init psci_init(void)
+int __init psci_dt_init(void)
  {
         struct device_node *np;
         const struct of_device_id *matched_np;
@@ -396,6 +421,27 @@ int __init psci_init(void)
         return init_fn(np);
  }
  
+/*
+ * We use PSCI 0.2+ when ACPI is deployed on ARM64 and it's
+ * explicitly clarified in SBBR
+ */
+int __init psci_acpi_init(void)
+{
+       if (!acpi_psci_present()) {
+               pr_info("is not implemented in ACPI.\n");
+               return -EOPNOTSUPP;
+       }
+
+       pr_info("probing for conduit method from ACPI.\n");
+
+       if (acpi_psci_use_hvc())
+               invoke_psci_fn = __invoke_psci_fn_hvc;
+       else
+               invoke_psci_fn = __invoke_psci_fn_smc;
+
+       return psci_probe();
+}
+
  #ifdef CONFIG_SMP
  
  static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c

index 51ef972..7475313 100644 (file)
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -17,6 +17,7 @@
   * along with this program.  If not, see <http://www.gnu.org/licenses/>.
   */
  
+#include <linux/acpi.h>
  #include <linux/export.h>
  #include <linux/kernel.h>
  #include <linux/stddef.h>
@@ -46,6 +47,7 @@
  #include <linux/efi.h>
  #include <linux/personality.h>
  
+#include <asm/acpi.h>
  #include <asm/fixmap.h>
  #include <asm/cpu.h>
  #include <asm/cputype.h>
@@ -395,18 +397,27 @@ void __init setup_arch(char **cmdline_p)
         efi_init();
         arm64_memblock_init();
  
+       /* Parse the ACPI tables for possible boot-time configuration */
+       acpi_boot_table_init();
+
         paging_init();
         request_standard_resources();
  
         early_ioremap_reset();
  
-       unflatten_device_tree();
-
-       psci_init();
+       if (acpi_disabled) {
+               unflatten_device_tree();
+               psci_dt_init();
+               cpu_read_bootcpu_ops();
+#ifdef CONFIG_SMP
+               of_smp_init_cpus();
+#endif
+       } else {
+               psci_acpi_init();
+               acpi_init_cpus();
+       }
  
-       cpu_read_bootcpu_ops();
  #ifdef CONFIG_SMP
-       smp_init_cpus();
         smp_build_mpidr_hash();
  #endif
  
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c

index 714411f..2cb0081 100644 (file)
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -323,7 +323,7 @@ void __init smp_prepare_boot_cpu(void)
   * cpu logical map array containing MPIDR values related to logical
   * cpus. Assumes that cpu_logical_map(0) has already been initialized.
   */
-void __init smp_init_cpus(void)
+void __init of_smp_init_cpus(void)
  {
         struct device_node *dn = NULL;
         unsigned int i, cpu = 1;
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c

index 1a7125c..42f9195 100644 (file)
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -35,6 +35,7 @@
  #include <linux/delay.h>
  #include <linux/clocksource.h>
  #include <linux/clk-provider.h>
+#include <linux/acpi.h>
  
  #include <clocksource/arm_arch_timer.h>
  
@@ -72,6 +73,12 @@ void __init time_init(void)
  
         tick_setup_hrtimer_broadcast();
  
+       /*
+        * Since ACPI or FDT will only one be available in the system,
+        * we can use acpi_generic_timer_init() here safely
+        */
+       acpi_generic_timer_init();
+
         arch_timer_rate = arch_timer_get_rate();
         if (!arch_timer_rate)
                 panic("Unable to initialise architected timer.\n");
diff --git a/arch/blackfin/configs/BF518F-EZBRD_defconfig b/arch/blackfin/configs/BF518F-EZBRD_defconfig

index 3830078..99c00d8 100644 (file)
--- a/arch/blackfin/configs/BF518F-EZBRD_defconfig
+++ b/arch/blackfin/configs/BF518F-EZBRD_defconfig
@@ -48,7 +48,6 @@ CONFIG_IP_PNP=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_JEDECPROBE=m
  CONFIG_MTD_RAM=y
diff --git a/arch/blackfin/configs/BF527-TLL6527M_defconfig b/arch/blackfin/configs/BF527-TLL6527M_defconfig

index cd0636b..cdeb518 100644 (file)
--- a/arch/blackfin/configs/BF527-TLL6527M_defconfig
+++ b/arch/blackfin/configs/BF527-TLL6527M_defconfig
@@ -67,7 +67,6 @@ CONFIG_BFIN_SIR0=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/BF533-EZKIT_defconfig b/arch/blackfin/configs/BF533-EZKIT_defconfig

index 16273a9..ed7d2c0 100644 (file)
--- a/arch/blackfin/configs/BF533-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF533-EZKIT_defconfig
@@ -50,7 +50,6 @@ CONFIG_IRTTY_SIR=m
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_JEDECPROBE=y
  CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF533-STAMP_defconfig b/arch/blackfin/configs/BF533-STAMP_defconfig

index 0df2f92..0c241f4 100644 (file)
--- a/arch/blackfin/configs/BF533-STAMP_defconfig
+++ b/arch/blackfin/configs/BF533-STAMP_defconfig
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=m
  CONFIG_MTD_CFI_AMDSTD=m
diff --git a/arch/blackfin/configs/BF537-STAMP_defconfig b/arch/blackfin/configs/BF537-STAMP_defconfig

index 91d3eda..e5360b3 100644 (file)
--- a/arch/blackfin/configs/BF537-STAMP_defconfig
+++ b/arch/blackfin/configs/BF537-STAMP_defconfig
@@ -55,13 +55,14 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=m
  CONFIG_MTD_CFI_AMDSTD=m
  CONFIG_MTD_RAM=y
  CONFIG_MTD_ROM=m
  CONFIG_MTD_PHYSMAP=m
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_SPI_NOR=y
  CONFIG_BLK_DEV_RAM=y
  CONFIG_NETDEVICES=y
  CONFIG_NET_BFIN=y
diff --git a/arch/blackfin/configs/BF538-EZKIT_defconfig b/arch/blackfin/configs/BF538-EZKIT_defconfig

index be03be6..60f6fb8 100644 (file)
--- a/arch/blackfin/configs/BF538-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF538-EZKIT_defconfig
@@ -60,7 +60,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=m
  CONFIG_MTD_CFI_AMDSTD=m
diff --git a/arch/blackfin/configs/BF561-ACVILON_defconfig b/arch/blackfin/configs/BF561-ACVILON_defconfig

index 802f9c4..78f6bc7 100644 (file)
--- a/arch/blackfin/configs/BF561-ACVILON_defconfig
+++ b/arch/blackfin/configs/BF561-ACVILON_defconfig
@@ -50,7 +50,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_PLATRAM=y
  CONFIG_MTD_PHRAM=y
diff --git a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig

index e2a2fa5..fac8bb5 100644 (file)
--- a/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT-SMP_defconfig
@@ -52,7 +52,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF561-EZKIT_defconfig b/arch/blackfin/configs/BF561-EZKIT_defconfig

index 680730e..2a2e4d0 100644 (file)
--- a/arch/blackfin/configs/BF561-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF561-EZKIT_defconfig
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/BF609-EZKIT_defconfig b/arch/blackfin/configs/BF609-EZKIT_defconfig

index fcec5ce..ba4267f 100644 (file)
--- a/arch/blackfin/configs/BF609-EZKIT_defconfig
+++ b/arch/blackfin/configs/BF609-EZKIT_defconfig
@@ -105,6 +105,7 @@ CONFIG_SPI=y
  CONFIG_SPI_ADI_V3=y
  CONFIG_GPIOLIB=y
  CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MCP23S08=y
  # CONFIG_HWMON is not set
  CONFIG_WATCHDOG=y
  CONFIG_BFIN_WDT=y
diff --git a/arch/blackfin/configs/CM-BF527_defconfig b/arch/blackfin/configs/CM-BF527_defconfig

index 05108b8..1902bb0 100644 (file)
--- a/arch/blackfin/configs/CM-BF527_defconfig
+++ b/arch/blackfin/configs/CM-BF527_defconfig
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF533_defconfig b/arch/blackfin/configs/CM-BF533_defconfig

index 5e0db82..9a5716d 100644 (file)
--- a/arch/blackfin/configs/CM-BF533_defconfig
+++ b/arch/blackfin/configs/CM-BF533_defconfig
@@ -37,7 +37,6 @@ CONFIG_UNIX=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF537E_defconfig b/arch/blackfin/configs/CM-BF537E_defconfig

index 2e47df7..6845928 100644 (file)
--- a/arch/blackfin/configs/CM-BF537E_defconfig
+++ b/arch/blackfin/configs/CM-BF537E_defconfig
@@ -52,7 +52,6 @@ CONFIG_IP_PNP=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF537U_defconfig b/arch/blackfin/configs/CM-BF537U_defconfig

index 6da629f..d9915e9 100644 (file)
--- a/arch/blackfin/configs/CM-BF537U_defconfig
+++ b/arch/blackfin/configs/CM-BF537U_defconfig
@@ -48,7 +48,6 @@ CONFIG_INET=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF548_defconfig b/arch/blackfin/configs/CM-BF548_defconfig

index 9ff79df..92d8130 100644 (file)
--- a/arch/blackfin/configs/CM-BF548_defconfig
+++ b/arch/blackfin/configs/CM-BF548_defconfig
@@ -54,7 +54,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/CM-BF561_defconfig b/arch/blackfin/configs/CM-BF561_defconfig

index d6dd98e..fa8d911 100644 (file)
--- a/arch/blackfin/configs/CM-BF561_defconfig
+++ b/arch/blackfin/configs/CM-BF561_defconfig
@@ -52,7 +52,6 @@ CONFIG_INET=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/configs/DNP5370_defconfig b/arch/blackfin/configs/DNP5370_defconfig

index 2b58cb2..8860059 100644 (file)
--- a/arch/blackfin/configs/DNP5370_defconfig
+++ b/arch/blackfin/configs/DNP5370_defconfig
@@ -36,7 +36,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_DEBUG=y
  CONFIG_MTD_DEBUG_VERBOSE=1
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_NFTL=y
  CONFIG_NFTL_RW=y
diff --git a/arch/blackfin/configs/IP0X_defconfig b/arch/blackfin/configs/IP0X_defconfig

index 5adf0da..9e3ae4b 100644 (file)
--- a/arch/blackfin/configs/IP0X_defconfig
+++ b/arch/blackfin/configs/IP0X_defconfig
@@ -43,7 +43,6 @@ CONFIG_IP_NF_TARGET_REJECT=y
  CONFIG_IP_NF_MANGLE=y
  # CONFIG_WIRELESS is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/blackfin/configs/PNAV-10_defconfig b/arch/blackfin/configs/PNAV-10_defconfig

index a6a7298..c792681 100644 (file)
--- a/arch/blackfin/configs/PNAV-10_defconfig
+++ b/arch/blackfin/configs/PNAV-10_defconfig
@@ -46,7 +46,6 @@ CONFIG_IP_PNP=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_RAM=y
  CONFIG_MTD_COMPLEX_MAPPINGS=y
diff --git a/arch/blackfin/configs/SRV1_defconfig b/arch/blackfin/configs/SRV1_defconfig

index bc21664..23fdc57 100644 (file)
--- a/arch/blackfin/configs/SRV1_defconfig
+++ b/arch/blackfin/configs/SRV1_defconfig
@@ -38,7 +38,6 @@ CONFIG_IRTTY_SIR=m
  # CONFIG_WIRELESS is not set
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
-CONFIG_MTD_CHAR=m
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_JEDECPROBE=m
  CONFIG_MTD_RAM=y
diff --git a/arch/blackfin/configs/TCM-BF518_defconfig b/arch/blackfin/configs/TCM-BF518_defconfig

index ea88158..e289594 100644 (file)
--- a/arch/blackfin/configs/TCM-BF518_defconfig
+++ b/arch/blackfin/configs/TCM-BF518_defconfig
@@ -55,7 +55,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  # CONFIG_FW_LOADER is not set
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_ADV_OPTIONS=y
diff --git a/arch/blackfin/configs/TCM-BF537_defconfig b/arch/blackfin/configs/TCM-BF537_defconfig

index c1f45f1..39e85cc 100644 (file)
--- a/arch/blackfin/configs/TCM-BF537_defconfig
+++ b/arch/blackfin/configs/TCM-BF537_defconfig
@@ -44,7 +44,6 @@ CONFIG_INET=y
  CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
  CONFIG_MTD=y
  CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
  CONFIG_MTD_BLOCK=y
  CONFIG_MTD_CFI=y
  CONFIG_MTD_CFI_INTELEXT=y
diff --git a/arch/blackfin/include/asm/io.h b/arch/blackfin/include/asm/io.h

index dccae26..4e8ad05 100644 (file)
--- a/arch/blackfin/include/asm/io.h
+++ b/arch/blackfin/include/asm/io.h
@@ -11,27 +11,12 @@
  #include <linux/types.h>
  #include <asm/byteorder.h>
  
-#define DECLARE_BFIN_RAW_READX(size, type, asm, asm_sign) \
-static inline type __raw_read##size(const volatile void __iomem *addr) \
-{ \
-       unsigned int val; \
-       int tmp; \
-       __asm__ __volatile__ ( \
-               "cli %1;" \
-               "NOP; NOP; SSYNC;" \
-               "%0 = "#asm" [%2] "#asm_sign";" \
-               "sti %1;" \
-               : "=d"(val), "=d"(tmp) \
-               : "a"(addr) \
-       ); \
-       return (type) val; \
-}
-DECLARE_BFIN_RAW_READX(b, u8, b, (z))
-#define __raw_readb __raw_readb
-DECLARE_BFIN_RAW_READX(w, u16, w, (z))
-#define __raw_readw __raw_readw
-DECLARE_BFIN_RAW_READX(l, u32, , )
-#define __raw_readl __raw_readl
+#define __raw_readb bfin_read8
+#define __raw_readw bfin_read16
+#define __raw_readl bfin_read32
+#define __raw_writeb(val, addr) bfin_write8(addr, val)
+#define __raw_writew(val, addr) bfin_write16(addr, val)
+#define __raw_writel(val, addr) bfin_write32(addr, val)
  
  extern void outsb(unsigned long port, const void *addr, unsigned long count);
  extern void outsw(unsigned long port, const void *addr, unsigned long count);
@@ -50,14 +35,6 @@ extern void insl_16(unsigned long port, void *addr, unsigned long count);
  #define insw insw
  #define insl insl
  
-extern void dma_outsb(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsw(unsigned long port, const void *addr, unsigned short count);
-extern void dma_outsl(unsigned long port, const void *addr, unsigned short count);
-
-extern void dma_insb(unsigned long port, void *addr, unsigned short count);
-extern void dma_insw(unsigned long port, void *addr, unsigned short count);
-extern void dma_insl(unsigned long port, void *addr, unsigned short count);
-
  /**
   * I/O write barrier
   *
diff --git a/arch/blackfin/include/uapi/asm/unistd.h b/arch/blackfin/include/uapi/asm/unistd.h

index a451164..0cb9078 100644 (file)
--- a/arch/blackfin/include/uapi/asm/unistd.h
+++ b/arch/blackfin/include/uapi/asm/unistd.h
@@ -401,8 +401,18 @@
  #define __NR_sendmmsg          380
  #define __NR_process_vm_readv  381
  #define __NR_process_vm_writev 382
+#define __NR_kcmp              383
+#define __NR_finit_module      384
+#define __NR_sched_setattr     385
+#define __NR_sched_getattr     386
+#define __NR_renameat2         387
+#define __NR_seccomp           388
+#define __NR_getrandom         389
+#define __NR_memfd_create      390
+#define __NR_bpf               391
+#define __NR_execveat          392
  
-#define __NR_syscall           383
+#define __NR_syscall           393  /* For internal using, not implemented */
  #define NR_syscalls            __NR_syscall
  
  /* Old optional stuff no one actually uses */
diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c

index 947ad08..86b1cd3 100644 (file)
--- a/arch/blackfin/kernel/debug-mmrs.c
+++ b/arch/blackfin/kernel/debug-mmrs.c
@@ -1620,7 +1620,6 @@ static int __init bfin_debug_mmrs_init(void)
         D16(USB_APHY_CNTRL);
         D16(USB_APHY_CALIB);
         D16(USB_APHY_CNTRL2);
-       D16(USB_PHY_TEST);
         D16(USB_PLLOSC_CTRL);
         D16(USB_SRP_CLKDIV);
         D16(USB_EP_NI0_TXMAXP);
diff --git a/arch/blackfin/kernel/kgdb.c b/arch/blackfin/kernel/kgdb.c

index fa53fae..cf773f0 100644 (file)
--- a/arch/blackfin/kernel/kgdb.c
+++ b/arch/blackfin/kernel/kgdb.c
@@ -330,9 +330,6 @@ static void bfin_disable_hw_debug(struct pt_regs *regs)
  }
  
  #ifdef CONFIG_SMP
-extern void generic_exec_single(int cpu, struct call_single_data *data, int wait);
-static struct call_single_data kgdb_smp_ipi_data[NR_CPUS];
-
  void kgdb_passive_cpu_callback(void *info)
  {
         kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs());
@@ -343,15 +340,14 @@ void kgdb_roundup_cpus(unsigned long flags)
         unsigned int cpu;
  
         for (cpu = cpumask_first(cpu_online_mask); cpu < nr_cpu_ids;
-               cpu = cpumask_next(cpu, cpu_online_mask)) {
-               kgdb_smp_ipi_data[cpu].func = kgdb_passive_cpu_callback;
-               generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
-       }
+               cpu = cpumask_next(cpu, cpu_online_mask))
+               smp_call_function_single(cpu, kgdb_passive_cpu_callback,
+                                        NULL, 0);
  }
  
  void kgdb_roundup_cpu(int cpu, unsigned long flags)
  {
-       generic_exec_single(cpu, &kgdb_smp_ipi_data[cpu], 0);
+       smp_call_function_single(cpu, kgdb_passive_cpu_callback, NULL, 0);
  }
  #endif
  
@@ -359,19 +355,6 @@ void kgdb_roundup_cpu(int cpu, unsigned long flags)
  static unsigned long kgdb_arch_imask;
  #endif
  
-void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
-       if (kgdb_single_step)
-               preempt_enable();
-
-#ifdef CONFIG_IPIPE
-       if (kgdb_arch_imask) {
-               cpu_pda[raw_smp_processor_id()].ex_imask = kgdb_arch_imask;
-               kgdb_arch_imask = 0;
-       }
-#endif
-}
-
  int kgdb_arch_handle_exception(int vector, int signo,
                                int err_code, char *remcom_in_buffer,
                                char *remcom_out_buffer,
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c

index 4f424ae..ad82468 100644 (file)
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -1464,5 +1464,5 @@ void __init cmdline_init(const char *r0)
  {
         early_shadow_stamp();
         if (r0)
-               strncpy(command_line, r0, COMMAND_LINE_SIZE);
+               strlcpy(command_line, r0, COMMAND_LINE_SIZE);
  }
diff --git a/arch/blackfin/mach-bf527/include/mach/cdefBF525.h b/arch/blackfin/mach-bf527/include/mach/cdefBF525.h

index d90a85b..bd04531 100644 (file)
--- a/arch/blackfin/mach-bf527/include/mach/cdefBF525.h
+++ b/arch/blackfin/mach-bf527/include/mach/cdefBF525.h
@@ -122,11 +122,6 @@
  #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
  #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
-
  #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
  #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
  #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf527/include/mach/defBF525.h b/arch/blackfin/mach-bf527/include/mach/defBF525.h

index 71578d9..591e00f 100644 (file)
--- a/arch/blackfin/mach-bf527/include/mach/defBF525.h
+++ b/arch/blackfin/mach-bf527/include/mach/defBF525.h
@@ -77,10 +77,6 @@
  
  #define                  USB_APHY_CNTRL2  0xffc039e8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc039ec   /* Used for reducing simulation time and simplifies FIFO testability */
-
  #define                  USB_PLLOSC_CTRL  0xffc039f0   /* Used to program different parameters for USB PLL and Oscillator */
  #define                   USB_SRP_CLKDIV  0xffc039f4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
  
diff --git a/arch/blackfin/mach-bf548/include/mach/cdefBF542.h b/arch/blackfin/mach-bf548/include/mach/cdefBF542.h

index d09c19c..9163479 100644 (file)
--- a/arch/blackfin/mach-bf548/include/mach/cdefBF542.h
+++ b/arch/blackfin/mach-bf548/include/mach/cdefBF542.h
@@ -241,10 +241,6 @@
  #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
  #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
  #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
  #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
  #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf548/include/mach/cdefBF547.h b/arch/blackfin/mach-bf548/include/mach/cdefBF547.h

index bcb9726..be83f64 100644 (file)
--- a/arch/blackfin/mach-bf548/include/mach/cdefBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/cdefBF547.h
@@ -408,10 +408,6 @@
  #define bfin_read_USB_APHY_CNTRL2()            bfin_read16(USB_APHY_CNTRL2)
  #define bfin_write_USB_APHY_CNTRL2(val)                bfin_write16(USB_APHY_CNTRL2, val)
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define bfin_read_USB_PHY_TEST()               bfin_read16(USB_PHY_TEST)
-#define bfin_write_USB_PHY_TEST(val)           bfin_write16(USB_PHY_TEST, val)
  #define bfin_read_USB_PLLOSC_CTRL()            bfin_read16(USB_PLLOSC_CTRL)
  #define bfin_write_USB_PLLOSC_CTRL(val)                bfin_write16(USB_PLLOSC_CTRL, val)
  #define bfin_read_USB_SRP_CLKDIV()             bfin_read16(USB_SRP_CLKDIV)
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF542.h b/arch/blackfin/mach-bf548/include/mach/defBF542.h

index 5116157..ae4b889 100644 (file)
--- a/arch/blackfin/mach-bf548/include/mach/defBF542.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF542.h
@@ -140,9 +140,6 @@
  #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
  #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
  #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
  #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
  
diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h

index d55dcc0..7cc7928 100644 (file)
--- a/arch/blackfin/mach-bf548/include/mach/defBF547.h
+++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h
@@ -254,9 +254,6 @@
  #define                   USB_APHY_CALIB  0xffc03de4   /* Register used to set some calibration values */
  #define                  USB_APHY_CNTRL2  0xffc03de8   /* Register used to prevent re-enumeration once Moab goes into hibernate mode */
  
-/* (PHY_TEST is for ADI usage only) */
-
-#define                     USB_PHY_TEST  0xffc03dec   /* Used for reducing simulation time and simplifies FIFO testability */
  #define                  USB_PLLOSC_CTRL  0xffc03df0   /* Used to program different parameters for USB PLL and Oscillator */
  #define                   USB_SRP_CLKDIV  0xffc03df4   /* Used to program clock divide value for the clock fed to the SRP detection logic */
  
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c

index 7f9fc27..2c61fc0 100644 (file)
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -780,8 +780,8 @@ static struct adi_spi3_chip spidev_chip_info = {
  };
  #endif
  
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-static struct platform_device bfin_i2s_pcm = {
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+static struct platform_device bfin_pcm = {
         .name = "bfin-i2s-pcm-audio",
         .id = -1,
  };
@@ -1034,7 +1034,6 @@ static struct adv7842_platform_data adv7842_data = {
         .i2c_infoframe = 0x48,
         .i2c_cec = 0x49,
         .i2c_avlink = 0x4a,
-       .i2c_ex = 0x26,
  };
  
  static struct bfin_capture_config bfin_capture_data = {
@@ -1104,7 +1103,6 @@ static struct disp_route adv7511_routes[] = {
  
  static struct adv7511_platform_data adv7511_data = {
         .edid_addr = 0x7e,
-       .i2c_ex = 0x25,
  };
  
  static struct bfin_display_config bfin_display_data = {
@@ -1209,6 +1207,35 @@ static struct platform_device bfin_display_device = {
  };
  #endif
  
+#if defined(CONFIG_FB_BF609_NL8048) \
+       || defined(CONFIG_FB_BF609_NL8048_MODULE)
+static struct resource nl8048_resources[] = {
+       {
+               .start = EPPI2_STAT,
+               .end = EPPI2_STAT,
+               .flags = IORESOURCE_MEM,
+       },
+       {
+               .start = CH_EPPI2_CH0,
+               .end = CH_EPPI2_CH0,
+               .flags = IORESOURCE_DMA,
+       },
+       {
+               .start = IRQ_EPPI2_STAT,
+               .end = IRQ_EPPI2_STAT,
+               .flags = IORESOURCE_IRQ,
+       },
+};
+static struct platform_device bfin_fb_device = {
+       .name = "bf609_nl8048",
+       .num_resources = ARRAY_SIZE(nl8048_resources),
+       .resource = nl8048_resources,
+       .dev = {
+               .platform_data = (void *)GPIO_PC15,
+       },
+};
+#endif
+
  #if defined(CONFIG_BFIN_CRC)
  #define BFIN_CRC_NAME "bfin-crc"
  
@@ -1862,6 +1889,29 @@ static struct platform_device i2c_bfin_twi1_device = {
  };
  #endif
  
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+#include <linux/spi/mcp23s08.h>
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch0 = {
+       .base = 120,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch1 = {
+       .base = 130,
+};
+static const struct mcp23s08_platform_data bfin_mcp23s08_soft_switch2 = {
+       .base = 140,
+};
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+static const struct mcp23s08_platform_data bfin_adv7842_soft_switch = {
+       .base = 150,
+};
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+static const struct mcp23s08_platform_data bfin_adv7511_soft_switch = {
+       .base = 160,
+};
+# endif
+#endif
+
  static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
  #if IS_ENABLED(CONFIG_INPUT_ADXL34X_I2C)
         {
@@ -1881,6 +1931,32 @@ static struct i2c_board_info __initdata bfin_i2c_board_info0[] = {
                 I2C_BOARD_INFO("ssm2602", 0x1b),
         },
  #endif
+#if IS_ENABLED(CONFIG_GPIO_MCP23S08)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x21),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch0
+       },
+       {
+               I2C_BOARD_INFO("mcp23017", 0x22),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch1
+       },
+       {
+               I2C_BOARD_INFO("mcp23017", 0x23),
+               .platform_data = (void *)&bfin_mcp23s08_soft_switch2
+       },
+# if IS_ENABLED(CONFIG_VIDEO_ADV7842)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x26),
+               .platform_data = (void *)&bfin_adv7842_soft_switch
+       },
+# endif
+# if IS_ENABLED(CONFIG_VIDEO_ADV7511) || IS_ENABLED(CONFIG_VIDEO_ADV7343)
+       {
+               I2C_BOARD_INFO("mcp23017", 0x25),
+               .platform_data = (void *)&bfin_adv7511_soft_switch
+       },
+# endif
+#endif
  };
  
  static struct i2c_board_info __initdata bfin_i2c_board_info1[] = {
@@ -2023,8 +2099,8 @@ static struct platform_device *ezkit_devices[] __initdata = {
  #if IS_ENABLED(CONFIG_MTD_PHYSMAP)
         &ezkit_flash_device,
  #endif
-#if IS_ENABLED(CONFIG_SND_BF5XX_I2S)
-       &bfin_i2s_pcm,
+#if IS_ENABLED(CONFIG_SND_BF6XX_PCM)
+       &bfin_pcm,
  #endif
  #if IS_ENABLED(CONFIG_SND_BF6XX_SOC_I2S)
         &bfin_i2s,
@@ -2060,7 +2136,7 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = {
         PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary",  "pinctrl-adi2.0", NULL, "rotary"),
         PIN_MAP_MUX_GROUP_DEFAULT("bfin_can.0",  "pinctrl-adi2.0", NULL, "can0"),
         PIN_MAP_MUX_GROUP_DEFAULT("physmap-flash.0",  "pinctrl-adi2.0", NULL, "smc0"),
-       PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.2",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
+       PIN_MAP_MUX_GROUP_DEFAULT("bf609_nl8048.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
         PIN_MAP_MUX_GROUP("bfin_display.0", "8bit",  "pinctrl-adi2.0", "ppi2_8bgrp", "ppi2"),
         PIN_MAP_MUX_GROUP_DEFAULT("bfin_display.0",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
         PIN_MAP_MUX_GROUP("bfin_display.0", "16bit",  "pinctrl-adi2.0", "ppi2_16bgrp", "ppi2"),
diff --git a/arch/blackfin/mach-bf609/clock.c b/arch/blackfin/mach-bf609/clock.c

index 244fa4a..3783058 100644 (file)
--- a/arch/blackfin/mach-bf609/clock.c
+++ b/arch/blackfin/mach-bf609/clock.c
@@ -363,6 +363,12 @@ static struct clk ethclk = {
         .ops        = &dummy_clk_ops,
  };
  
+static struct clk ethpclk = {
+       .name       = "pclk",
+       .parent     = &sclk0,
+       .ops        = &dummy_clk_ops,
+};
+
  static struct clk spiclk = {
         .name       = "spi",
         .parent     = &sclk1,
@@ -381,6 +387,7 @@ static struct clk_lookup bf609_clks[] = {
         CLK(dclk, NULL, "DCLK"),
         CLK(oclk, NULL, "OCLK"),
         CLK(ethclk, NULL, "stmmaceth"),
+       CLK(ethpclk, NULL, "pclk"),
         CLK(spiclk, NULL, "spi"),
  };
  
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S

index 86b5a09..8d9431e 100644 (file)
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1694,6 +1694,16 @@ ENTRY(_sys_call_table)
         .long _sys_sendmmsg             /* 380 */
         .long _sys_process_vm_readv
         .long _sys_process_vm_writev
+       .long _sys_kcmp
+       .long _sys_finit_module
+       .long _sys_sched_setattr        /* 385 */
+       .long _sys_sched_getattr
+       .long _sys_renameat2
+       .long _sys_seccomp
+       .long _sys_getrandom
+       .long _sys_memfd_create         /* 390 */
+       .long _sys_bpf
+       .long _sys_execveat
  
         .rept NR_syscalls-(.-_sys_call_table)/4
         .long _sys_ni_syscall
diff --git a/arch/blackfin/mach-common/pm.c b/arch/blackfin/mach-common/pm.c

index 1387a94..a66d979 100644 (file)
--- a/arch/blackfin/mach-common/pm.c
+++ b/arch/blackfin/mach-common/pm.c
@@ -14,6 +14,7 @@
  #include <linux/slab.h>
  #include <linux/io.h>
  #include <linux/irq.h>
+#include <linux/delay.h>
  
  #include <asm/cplb.h>
  #include <asm/gpio.h>
@@ -180,6 +181,7 @@ int bfin_pm_suspend_mem_enter(void)
  
  #if defined(CONFIG_BFIN_EXTMEM_WRITEBACK) || defined(CONFIG_BFIN_L2_WRITEBACK)
         flushinv_all_dcache();
+       udelay(1);
  #endif
         _disable_dcplb();
         _disable_icplb();
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig

index 4a03911..0314e32 100644 (file)
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -46,12 +46,18 @@ config CRIS
         select ARCH_WANT_IPC_PARSE_VERSION
         select GENERIC_IRQ_SHOW
         select GENERIC_IOMAP
-       select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
         select GENERIC_CMOS_UPDATE
         select MODULES_USE_ELF_RELA
         select CLONE_BACKWARDS2
         select OLD_SIGSUSPEND
         select OLD_SIGACTION
+       select ARCH_REQUIRE_GPIOLIB
+       select IRQ_DOMAIN if ETRAX_ARCH_V32
+       select OF if ETRAX_ARCH_V32
+       select OF_EARLY_FLATTREE if ETRAX_ARCH_V32
+       select CLKSRC_MMIO if ETRAX_ARCH_V32
+       select GENERIC_CLOCKEVENTS if ETRAX_ARCH_V32
+       select GENERIC_SCHED_CLOCK if ETRAX_ARCH_V32
  
  config HZ
         int
@@ -61,6 +67,10 @@ config NR_CPUS
         int
         default "1"
  
+config BUILTIN_DTB
+       string "DTB to build into the kernel image"
+       depends on OF
+
  source "init/Kconfig"
  
  source "kernel/Kconfig.freezer"
diff --git a/arch/cris/Makefile b/arch/cris/Makefile

index 39dc7d0..4a5404b 100644 (file)
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -40,6 +40,10 @@ else
  MACH :=
  endif
  
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+core-$(CONFIG_OF) += arch/cris/boot/dts/
+endif
+
  LD = $(CROSS_COMPILE)ld -mcrislinux
  
  OBJCOPYFLAGS := -O binary -R .note -R .comment -S
diff --git a/arch/cris/arch-v32/kernel/Makefile b/arch/cris/arch-v32/kernel/Makefile

index 4035835..d9fc617 100644 (file)
--- a/arch/cris/arch-v32/kernel/Makefile
+++ b/arch/cris/arch-v32/kernel/Makefile
@@ -9,7 +9,6 @@ obj-y   := entry.o traps.o irq.o debugport.o \
            process.o ptrace.o setup.o signal.o traps.o time.o \
            cache.o cacheflush.o
  
-obj-$(CONFIG_SMP) += smp.o
  obj-$(CONFIG_ETRAX_KGDB) += kgdb.o kgdb_asm.o
  obj-$(CONFIG_ETRAX_FAST_TIMER) += fasttimer.o
  obj-$(CONFIG_MODULES)    += crisksyms.o
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S

index 2f19ac6..026a0b2 100644 (file)
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -99,6 +99,8 @@ ret_from_kernel_thread:
  
         .type   ret_from_intr,@function
  ret_from_intr:
+       moveq   0, $r9                  ; not a syscall
+
         ;; Check for resched if preemptive kernel, or if we're going back to
         ;; user-mode. This test matches the user_regs(regs) macro. Don't simply
         ;; test CCS since that doesn't necessarily reflect what mode we'll
@@ -145,7 +147,7 @@ system_call:
         ;; Stack-frame similar to the irq heads, which is reversed in
         ;; ret_from_sys_call.
  
-       sub.d   92, $sp         ; Skip EXS and EDA.
+       sub.d   92, $sp         ; Skip EDA.
         movem   $r13, [$sp]
         move.d  $sp, $r8
         addq    14*4, $r8
@@ -156,8 +158,9 @@ system_call:
         move    $ccs, $r4
         move    $srp, $r5
         move    $erp, $r6
+       move.d  $r9, $r7        ; Store syscall number in EXS
         subq    4, $sp
-       movem   $r6, [$r8]
+       movem   $r7, [$r8]
         ei                      ; Enable interrupts while processing syscalls.
         move.d  $r10, [$sp]
  
@@ -277,44 +280,15 @@ _syscall_exit_work:
  
         .type   _work_pending,@function
  _work_pending:
-       addoq   +TI_flags, $r0, $acr
-       move.d  [$acr], $r10
-       btstq   TIF_NEED_RESCHED, $r10  ; Need resched?
-       bpl     _work_notifysig         ; No, must be signal/notify.
-       nop
-       .size   _work_pending, . - _work_pending
-
-       .type   _work_resched,@function
-_work_resched:
-       move.d  $r9, $r1                ; Preserve R9.
-       jsr     schedule
-       nop
-       move.d  $r1, $r9
-       di
-
-       addoq   +TI_flags, $r0, $acr
-       move.d  [$acr], $r1
-       and.d   _TIF_WORK_MASK, $r1     ; Ignore sycall trace counter.
-       beq     _Rexit
-       nop
-       btstq   TIF_NEED_RESCHED, $r1
-       bmi     _work_resched           ; current->work.need_resched.
-       nop
-       .size   _work_resched, . - _work_resched
-
-       .type   _work_notifysig,@function
-_work_notifysig:
-       ;; Deal with pending signals and notify-resume requests.
-
         addoq   +TI_flags, $r0, $acr
         move.d  [$acr], $r12            ; The thread_info_flags parameter.
         move.d  $sp, $r11               ; The regs param.
-       jsr     do_notify_resume
-       move.d  $r9, $r10               ; do_notify_resume syscall/irq param.
+       jsr     do_work_pending
+       move.d  $r9, $r10               ; The syscall/irq param.
  
         ba _Rexit
         nop
-       .size   _work_notifysig, . - _work_notifysig
+       .size   _work_pending, . - _work_pending
  
         ;; We get here as a sidetrack when we've entered a syscall with the
         ;; trace-bit set. We need to call do_syscall_trace and then continue
diff --git a/arch/cris/arch-v32/kernel/head.S b/arch/cris/arch-v32/kernel/head.S

index 51e3416..74a66e0 100644 (file)
--- a/arch/cris/arch-v32/kernel/head.S
+++ b/arch/cris/arch-v32/kernel/head.S
@@ -52,11 +52,6 @@ tstart:
  
         GIO_INIT
  
-#ifdef CONFIG_SMP
-secondary_cpu_entry: /* Entry point for secondary CPUs */
-       di
-#endif
-
         ;; Setup and enable the MMU. Use same configuration for both the data
         ;; and the instruction MMU.
         ;;
@@ -164,33 +159,6 @@ secondary_cpu_entry: /* Entry point for secondary CPUs */
         nop
         nop
  
-#ifdef CONFIG_SMP
-       ;; Read CPU ID
-       move    0, $srs
-       nop
-       nop
-       nop
-       move    $s12, $r0
-       cmpq    0, $r0
-       beq     master_cpu
-       nop
-slave_cpu:
-       ; Time to boot-up. Get stack location provided by master CPU.
-       move.d  smp_init_current_idle_thread, $r1
-       move.d  [$r1], $sp
-       add.d   8192, $sp
-       move.d  ebp_start, $r0  ; Defined in linker-script.
-       move    $r0, $ebp
-       jsr     smp_callin
-       nop
-master_cpu:
-       /* Set up entry point for secondary CPUs. The boot ROM has set up
-        * EBP at start of internal memory. The CPU will get there
-        * later when we issue an IPI to them... */
-       move.d MEM_INTMEM_START + IPI_INTR_VECT * 4, $r0
-       move.d secondary_cpu_entry, $r1
-       move.d $r1, [$r0]
-#endif
         ; Check if starting from DRAM (network->RAM boot or unpacked
         ; compressed kernel), or directly from flash.
         lapcq   ., $r0
diff --git a/arch/cris/arch-v32/kernel/irq.c b/arch/cris/arch-v32/kernel/irq.c

index 25437ae..6a881e0 100644 (file)
--- a/arch/cris/arch-v32/kernel/irq.c
+++ b/arch/cris/arch-v32/kernel/irq.c
@@ -10,6 +10,8 @@
  #include <linux/errno.h>
  #include <linux/init.h>
  #include <linux/profile.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
  #include <linux/proc_fs.h>
  #include <linux/seq_file.h>
  #include <linux/threads.h>
@@ -56,9 +58,6 @@ struct cris_irq_allocation irq_allocations[NR_REAL_IRQS] =
  static unsigned long irq_regs[NR_CPUS] =
  {
    regi_irq,
-#ifdef CONFIG_SMP
-  regi_irq2,
-#endif
  };
  
  #if NR_REAL_IRQS > 32
@@ -431,6 +430,19 @@ crisv32_do_multiple(struct pt_regs* regs)
         irq_exit();
  }
  
+static int crisv32_irq_map(struct irq_domain *h, unsigned int virq,
+                          irq_hw_number_t hw_irq_num)
+{
+       irq_set_chip_and_handler(virq, &crisv32_irq_type, handle_simple_irq);
+
+       return 0;
+}
+
+static struct irq_domain_ops crisv32_irq_ops = {
+       .map    = crisv32_irq_map,
+       .xlate  = irq_domain_xlate_onecell,
+};
+
  /*
   * This is called by start_kernel. It fixes the IRQ masks and setup the
   * interrupt vector table to point to bad_interrupt pointers.
@@ -441,6 +453,8 @@ init_IRQ(void)
         int i;
         int j;
         reg_intr_vect_rw_mask vect_mask = {0};
+       struct device_node *np;
+       struct irq_domain *domain;
  
         /* Clear all interrupts masks. */
         for (i = 0; i < NBR_REGS; i++)
@@ -449,10 +463,15 @@ init_IRQ(void)
         for (i = 0; i < 256; i++)
                 etrax_irv->v[i] = weird_irq;
  
-       /* Point all IRQ's to bad handlers. */
+       np = of_find_compatible_node(NULL, NULL, "axis,crisv32-intc");
+       domain = irq_domain_add_legacy(np, NR_IRQS - FIRST_IRQ,
+                                      FIRST_IRQ, FIRST_IRQ,
+                                      &crisv32_irq_ops, NULL);
+       BUG_ON(!domain);
+       irq_set_default_host(domain);
+       of_node_put(np);
+
         for (i = FIRST_IRQ, j = 0; j < NR_IRQS; i++, j++) {
-               irq_set_chip_and_handler(j, &crisv32_irq_type,
-                                        handle_simple_irq);
                 set_exception_vector(i, interrupt[j]);
         }
  
diff --git a/arch/cris/arch-v32/kernel/setup.c b/arch/cris/arch-v32/kernel/setup.c

index 81715c6..cd1865d 100644 (file)
--- a/arch/cris/arch-v32/kernel/setup.c
+++ b/arch/cris/arch-v32/kernel/setup.c
@@ -63,11 +63,6 @@ int show_cpuinfo(struct seq_file *m, void *v)
  
         info = &cpinfo[ARRAY_SIZE(cpinfo) - 1];
  
-#ifdef CONFIG_SMP
-       if (!cpu_online(cpu))
-               return 0;
-#endif
-
         revision = rdvr();
  
         for (i = 0; i < ARRAY_SIZE(cpinfo); i++) {
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c

index 0c9ce9e..3a36ae6 100644 (file)
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -72,6 +72,9 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
         /* Make that the user-mode flag is set. */
         regs->ccs |= (1 << (U_CCS_BITNR + CCS_SHIFT));
  
+       /* Don't perform syscall restarting */
+       regs->exs = -1;
+
         /* Restore the old USP. */
         err |= __get_user(old_usp, &sc->usp);
         wrusp(old_usp);
@@ -425,6 +428,8 @@ do_signal(int canrestart, struct pt_regs *regs)
  {
         struct ksignal ksig;
  
+       canrestart = canrestart && ((int)regs->exs >= 0);
+
         /*
          * The common case should go fast, which is why this point is
          * reached from kernel-mode. If that's the case, just return
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c

deleted file mode 100644 (file)

index 0698582..0000000
--- a/arch/cris/arch-v32/kernel/smp.c
+++ /dev/null
@@ -1,358 +0,0 @@
-#include <linux/types.h>
-#include <asm/delay.h>
-#include <irq.h>
-#include <hwregs/intr_vect.h>
-#include <hwregs/intr_vect_defs.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <hwregs/asm/mmu_defs_asm.h>
-#include <hwregs/supp_reg.h>
-#include <linux/atomic.h>
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/timex.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/cpumask.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-
-#define IPI_SCHEDULE 1
-#define IPI_CALL 2
-#define IPI_FLUSH_TLB 4
-#define IPI_BOOT 8
-
-#define FLUSH_ALL (void*)0xffffffff
-
-/* Vector of locks used for various atomic operations */
-spinlock_t cris_atomic_locks[] = {
-       [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(cris_atomic_locks)
-};
-
-/* CPU masks */
-cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
-EXPORT_SYMBOL(phys_cpu_present_map);
-
-/* Variables used during SMP boot */
-volatile int cpu_now_booting = 0;
-volatile struct thread_info *smp_init_current_idle_thread;
-
-/* Variables used during IPI */
-static DEFINE_SPINLOCK(call_lock);
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-struct call_data_struct {
-       void (*func) (void *info);
-       void *info;
-       int wait;
-};
-
-static struct call_data_struct * call_data;
-
-static struct mm_struct* flush_mm;
-static struct vm_area_struct* flush_vma;
-static unsigned long flush_addr;
-
-/* Mode registers */
-static unsigned long irq_regs[NR_CPUS] = {
-  regi_irq,
-  regi_irq2
-};
-
-static irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id);
-static int send_ipi(int vector, int wait, cpumask_t cpu_mask);
-static struct irqaction irq_ipi  = {
-       .handler = crisv32_ipi_interrupt,
-       .flags = 0,
-       .name = "ipi",
-};
-
-extern void cris_mmu_init(void);
-extern void cris_timer_init(void);
-
-/* SMP initialization */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-       int i;
-
-       /* From now on we can expect IPIs so set them up */
-       setup_irq(IPI_INTR_VECT, &irq_ipi);
-
-       /* Mark all possible CPUs as present */
-       for (i = 0; i < max_cpus; i++)
-               cpumask_set_cpu(i, &phys_cpu_present_map);
-}
-
-void smp_prepare_boot_cpu(void)
-{
-       /* PGD pointer has moved after per_cpu initialization so
-        * update the MMU.
-        */
-       pgd_t **pgd;
-       pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-
-       SUPP_BANK_SEL(1);
-       SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-       SUPP_BANK_SEL(2);
-       SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-
-       set_cpu_online(0, true);
-       cpumask_set_cpu(0, &phys_cpu_present_map);
-       set_cpu_possible(0, true);
-}
-
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-}
-
-/* Bring one cpu online.*/
-static int __init
-smp_boot_one_cpu(int cpuid, struct task_struct idle)
-{
-       unsigned timeout;
-       cpumask_t cpu_mask;
-
-       cpumask_clear(&cpu_mask);
-       task_thread_info(idle)->cpu = cpuid;
-
-       /* Information to the CPU that is about to boot */
-       smp_init_current_idle_thread = task_thread_info(idle);
-       cpu_now_booting = cpuid;
-
-       /* Kick it */
-       set_cpu_online(cpuid, true);
-       cpumask_set_cpu(cpuid, &cpu_mask);
-       send_ipi(IPI_BOOT, 0, cpu_mask);
-       set_cpu_online(cpuid, false);
-
-       /* Wait for CPU to come online */
-       for (timeout = 0; timeout < 10000; timeout++) {
-               if(cpu_online(cpuid)) {
-                       cpu_now_booting = 0;
-                       smp_init_current_idle_thread = NULL;
-                       return 0; /* CPU online */
-               }
-               udelay(100);
-               barrier();
-       }
-
-       printk(KERN_CRIT "SMP: CPU:%d is stuck.\n", cpuid);
-       return -1;
-}
-
-/* Secondary CPUs starts using C here. Here we need to setup CPU
- * specific stuff such as the local timer and the MMU. */
-void __init smp_callin(void)
-{
-       int cpu = cpu_now_booting;
-       reg_intr_vect_rw_mask vect_mask = {0};
-
-       /* Initialise the idle task for this CPU */
-       atomic_inc(&init_mm.mm_count);
-       current->active_mm = &init_mm;
-
-       /* Set up MMU */
-       cris_mmu_init();
-       __flush_tlb_all();
-
-       /* Setup local timer. */
-       cris_timer_init();
-
-       /* Enable IRQ and idle */
-       REG_WR(intr_vect, irq_regs[cpu], rw_mask, vect_mask);
-       crisv32_unmask_irq(IPI_INTR_VECT);
-       crisv32_unmask_irq(TIMER0_INTR_VECT);
-       preempt_disable();
-       notify_cpu_starting(cpu);
-       local_irq_enable();
-
-       set_cpu_online(cpu, true);
-       cpu_startup_entry(CPUHP_ONLINE);
-}
-
-/* Stop execution on this CPU.*/
-void stop_this_cpu(void* dummy)
-{
-       local_irq_disable();
-       asm volatile("halt");
-}
-
-/* Other calls */
-void smp_send_stop(void)
-{
-       smp_call_function(stop_this_cpu, NULL, 0);
-}
-
-int setup_profiling_timer(unsigned int multiplier)
-{
-       return -EINVAL;
-}
-
-
-/* cache_decay_ticks is used by the scheduler to decide if a process
- * is "hot" on one CPU. A higher value means a higher penalty to move
- * a process to another CPU. Our cache is rather small so we report
- * 1 tick.
- */
-unsigned long cache_decay_ticks = 1;
-
-int __cpu_up(unsigned int cpu, struct task_struct *tidle)
-{
-       smp_boot_one_cpu(cpu, tidle);
-       return cpu_online(cpu) ? 0 : -ENOSYS;
-}
-
-void smp_send_reschedule(int cpu)
-{
-       cpumask_t cpu_mask;
-       cpumask_clear(&cpu_mask);
-       cpumask_set_cpu(cpu, &cpu_mask);
-       send_ipi(IPI_SCHEDULE, 0, cpu_mask);
-}
-
-/* TLB flushing
- *
- * Flush needs to be done on the local CPU and on any other CPU that
- * may have the same mapping. The mm->cpu_vm_mask is used to keep track
- * of which CPUs that a specific process has been executed on.
- */
-void flush_tlb_common(struct mm_struct* mm, struct vm_area_struct* vma, unsigned long addr)
-{
-       unsigned long flags;
-       cpumask_t cpu_mask;
-
-       spin_lock_irqsave(&tlbstate_lock, flags);
-       cpu_mask = (mm == FLUSH_ALL ? cpu_all_mask : *mm_cpumask(mm));
-       cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-       flush_mm = mm;
-       flush_vma = vma;
-       flush_addr = addr;
-       send_ipi(IPI_FLUSH_TLB, 1, cpu_mask);
-       spin_unlock_irqrestore(&tlbstate_lock, flags);
-}
-
-void flush_tlb_all(void)
-{
-       __flush_tlb_all();
-       flush_tlb_common(FLUSH_ALL, FLUSH_ALL, 0);
-}
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
-       __flush_tlb_mm(mm);
-       flush_tlb_common(mm, FLUSH_ALL, 0);
-       /* No more mappings in other CPUs */
-       cpumask_clear(mm_cpumask(mm));
-       cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
-}
-
-void flush_tlb_page(struct vm_area_struct *vma,
-                          unsigned long addr)
-{
-       __flush_tlb_page(vma, addr);
-       flush_tlb_common(vma->vm_mm, vma, addr);
-}
-
-/* Inter processor interrupts
- *
- * The IPIs are used for:
- *   * Force a schedule on a CPU
- *   * FLush TLB on other CPUs
- *   * Call a function on other CPUs
- */
-
-int send_ipi(int vector, int wait, cpumask_t cpu_mask)
-{
-       int i = 0;
-       reg_intr_vect_rw_ipi ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-       int ret = 0;
-
-       /* Calculate CPUs to send to. */
-       cpumask_and(&cpu_mask, &cpu_mask, cpu_online_mask);
-
-       /* Send the IPI. */
-       for_each_cpu(i, &cpu_mask)
-       {
-               ipi.vector |= vector;
-               REG_WR(intr_vect, irq_regs[i], rw_ipi, ipi);
-       }
-
-       /* Wait for IPI to finish on other CPUS */
-       if (wait) {
-               for_each_cpu(i, &cpu_mask) {
-                        int j;
-                        for (j = 0 ; j < 1000; j++) {
-                               ipi = REG_RD(intr_vect, irq_regs[i], rw_ipi);
-                               if (!ipi.vector)
-                                       break;
-                               udelay(100);
-                       }
-
-                       /* Timeout? */
-                       if (ipi.vector) {
-                               printk("SMP call timeout from %d to %d\n", smp_processor_id(), i);
-                               ret = -ETIMEDOUT;
-                               dump_stack();
-                       }
-               }
-       }
-       return ret;
-}
-
-/*
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func)(void *info), void *info, int wait)
-{
-       cpumask_t cpu_mask;
-       struct call_data_struct data;
-       int ret;
-
-       cpumask_setall(&cpu_mask);
-       cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
-
-       WARN_ON(irqs_disabled());
-
-       data.func = func;
-       data.info = info;
-       data.wait = wait;
-
-       spin_lock(&call_lock);
-       call_data = &data;
-       ret = send_ipi(IPI_CALL, wait, cpu_mask);
-       spin_unlock(&call_lock);
-
-       return ret;
-}
-
-irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
-{
-       void (*func) (void *info) = call_data->func;
-       void *info = call_data->info;
-       reg_intr_vect_rw_ipi ipi;
-
-       ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
-
-       if (ipi.vector & IPI_SCHEDULE) {
-               scheduler_ipi();
-       }
-       if (ipi.vector & IPI_CALL) {
-               func(info);
-       }
-       if (ipi.vector & IPI_FLUSH_TLB) {
-               if (flush_mm == FLUSH_ALL)
-                       __flush_tlb_all();
-               else if (flush_vma == FLUSH_ALL)
-                       __flush_tlb_mm(flush_mm);
-               else
-                       __flush_tlb_page(flush_vma, flush_addr);
-       }
-
-       ipi.vector = 0;
-       REG_WR(intr_vect, irq_regs[smp_processor_id()], rw_ipi, ipi);
-
-       return IRQ_HANDLED;
-}
-
diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c

index c17b01a..4fce9f1 100644 (file)
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c
@@ -8,12 +8,14 @@
  #include <linux/timex.h>
  #include <linux/time.h>
  #include <linux/clocksource.h>
+#include <linux/clockchips.h>
  #include <linux/interrupt.h>
  #include <linux/swap.h>
  #include <linux/sched.h>
  #include <linux/init.h>
  #include <linux/threads.h>
  #include <linux/cpufreq.h>
+#include <linux/sched_clock.h>
  #include <linux/mm.h>
  #include <asm/types.h>
  #include <asm/signal.h>
@@ -36,33 +38,11 @@
  /* Number of 763 counts before watchdog bites */
  #define ETRAX_WD_CNT           ((2*ETRAX_WD_HZ)/HZ + 1)
  
-/* Register the continuos readonly timer available in FS and ARTPEC-3.  */
-static cycle_t read_cont_rotime(struct clocksource *cs)
-{
-       return (u32)REG_RD(timer, regi_timer0, r_time);
-}
-
-static struct clocksource cont_rotime = {
-       .name   = "crisv32_rotime",
-       .rating = 300,
-       .read   = read_cont_rotime,
-       .mask   = CLOCKSOURCE_MASK(32),
-       .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
-};
-
-static int __init etrax_init_cont_rotime(void)
-{
-       clocksource_register_khz(&cont_rotime, 100000);
-       return 0;
-}
-arch_initcall(etrax_init_cont_rotime);
+#define CRISV32_TIMER_FREQ     (100000000lu)
  
  unsigned long timer_regs[NR_CPUS] =
  {
         regi_timer0,
-#ifdef CONFIG_SMP
-       regi_timer2
-#endif
  };
  
  extern int set_rtc_mmss(unsigned long nowtime);
@@ -189,81 +169,104 @@ void handle_watchdog_bite(struct pt_regs *regs)
  #endif
  }
  
-/*
- * timer_interrupt() needs to keep up the real-time clock,
- * as well as call the "xtime_update()" routine every clocktick.
- */
-extern void cris_do_profile(struct pt_regs *regs);
+extern void cris_profile_sample(struct pt_regs *regs);
+static void __iomem *timer_base;
  
-static inline irqreturn_t timer_interrupt(int irq, void *dev_id)
+static void crisv32_clkevt_mode(enum clock_event_mode mode,
+                               struct clock_event_device *dev)
  {
-       struct pt_regs *regs = get_irq_regs();
-       int cpu = smp_processor_id();
-       reg_timer_r_masked_intr masked_intr;
-       reg_timer_rw_ack_intr ack_intr = { 0 };
-
-       /* Check if the timer interrupt is for us (a tmr0 int) */
-       masked_intr = REG_RD(timer, timer_regs[cpu], r_masked_intr);
-       if (!masked_intr.tmr0)
-               return IRQ_NONE;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
  
-       /* Acknowledge the timer irq. */
-       ack_intr.tmr0 = 1;
-       REG_WR(timer, timer_regs[cpu], rw_ack_intr, ack_intr);
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+}
  
-       /* Reset watchdog otherwise it resets us! */
-       reset_watchdog();
+static int crisv32_clkevt_next_event(unsigned long evt,
+                                    struct clock_event_device *dev)
+{
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_ld,
+               .freq = regk_timer_f100,
+       };
+
+       REG_WR(timer, timer_base, rw_tmr0_div, evt);
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+       ctrl.op = regk_timer_run;
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+
+       return 0;
+}
+
+static irqreturn_t crisv32_timer_interrupt(int irq, void *dev_id)
+{
+       struct clock_event_device *evt = dev_id;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
+       reg_timer_rw_ack_intr ack = { .tmr0 = 1 };
+       reg_timer_r_masked_intr intr;
+
+       intr = REG_RD(timer, timer_base, r_masked_intr);
+       if (!intr.tmr0)
+               return IRQ_NONE;
  
-       /* Update statistics. */
-       update_process_times(user_mode(regs));
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
+       REG_WR(timer, timer_base, rw_ack_intr, ack);
  
-       cris_do_profile(regs); /* Save profiling information */
+       reset_watchdog();
+#ifdef CONFIG_SYSTEM_PROFILER
+       cris_profile_sample(get_irq_regs());
+#endif
  
-       /* The master CPU is responsible for the time keeping. */
-       if (cpu != 0)
-               return IRQ_HANDLED;
+       evt->event_handler(evt);
  
-       /* Call the real timer interrupt handler */
-       xtime_update(1);
         return IRQ_HANDLED;
  }
  
+static struct clock_event_device crisv32_clockevent = {
+       .name = "crisv32-timer",
+       .rating = 300,
+       .features = CLOCK_EVT_FEAT_ONESHOT,
+       .set_mode = crisv32_clkevt_mode,
+       .set_next_event = crisv32_clkevt_next_event,
+};
+
  /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
  static struct irqaction irq_timer = {
-       .handler = timer_interrupt,
-       .flags = IRQF_SHARED,
-       .name = "timer"
+       .handler = crisv32_timer_interrupt,
+       .flags = IRQF_TIMER | IRQF_SHARED,
+       .name = "crisv32-timer",
+       .dev_id = &crisv32_clockevent,
  };
  
-void __init cris_timer_init(void)
+static u64 notrace crisv32_timer_sched_clock(void)
  {
-       int cpu = smp_processor_id();
-       reg_timer_rw_tmr0_ctrl tmr0_ctrl = { 0 };
-       reg_timer_rw_tmr0_div tmr0_div = TIMER0_DIV;
-       reg_timer_rw_intr_mask timer_intr_mask;
+       return REG_RD(timer, timer_base, r_time);
+}
  
-       /* Setup the etrax timers.
-        * Base frequency is 100MHz, divider 1000000 -> 100 HZ
-        * We use timer0, so timer1 is free.
-        * The trig timer is used by the fasttimer API if enabled.
-        */
+static void __init crisv32_timer_init(void)
+{
+       reg_timer_rw_intr_mask timer_intr_mask;
+       reg_timer_rw_tmr0_ctrl ctrl = {
+               .op = regk_timer_hold,
+               .freq = regk_timer_f100,
+       };
  
-       tmr0_ctrl.op = regk_timer_ld;
-       tmr0_ctrl.freq = regk_timer_f100;
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_div, tmr0_div);
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Load */
-       tmr0_ctrl.op = regk_timer_run;
-       REG_WR(timer, timer_regs[cpu], rw_tmr0_ctrl, tmr0_ctrl); /* Start */
+       REG_WR(timer, timer_base, rw_tmr0_ctrl, ctrl);
  
-       /* Enable the timer irq. */
-       timer_intr_mask = REG_RD(timer, timer_regs[cpu], rw_intr_mask);
+       timer_intr_mask = REG_RD(timer, timer_base, rw_intr_mask);
         timer_intr_mask.tmr0 = 1;
-       REG_WR(timer, timer_regs[cpu], rw_intr_mask, timer_intr_mask);
+       REG_WR(timer, timer_base, rw_intr_mask, timer_intr_mask);
  }
  
  void __init time_init(void)
  {
-       reg_intr_vect_rw_mask intr_mask;
+       int irq;
+       int ret;
  
         /* Probe for the RTC and read it if it exists.
          * Before the RTC can be probed the loops_per_usec variable needs
@@ -273,17 +276,28 @@ void __init time_init(void)
          */
         loops_per_usec = 50;
  
-       /* Start CPU local timer. */
-       cris_timer_init();
+       irq = TIMER0_INTR_VECT;
+       timer_base = (void __iomem *) regi_timer0;
+
+       crisv32_timer_init();
+
+       sched_clock_register(crisv32_timer_sched_clock, 32,
+                            CRISV32_TIMER_FREQ);
+
+       clocksource_mmio_init(timer_base + REG_RD_ADDR_timer_r_time,
+                             "crisv32-timer", CRISV32_TIMER_FREQ,
+                             300, 32, clocksource_mmio_readl_up);
+
+       crisv32_clockevent.cpumask = cpu_possible_mask;
+       crisv32_clockevent.irq = irq;
  
-       /* Enable the timer irq in global config. */
-       intr_mask = REG_RD_VECT(intr_vect, regi_irq, rw_mask, 1);
-       intr_mask.timer0 = 1;
-       REG_WR_VECT(intr_vect, regi_irq, rw_mask, 1, intr_mask);
+       ret = setup_irq(irq, &irq_timer);
+       if (ret)
+               pr_warn("failed to setup irq %d\n", irq);
  
-       /* Now actually register the timer irq handler that calls
-        * timer_interrupt(). */
-       setup_irq(TIMER0_INTR_VECT, &irq_timer);
+       clockevents_config_and_register(&crisv32_clockevent,
+                                       CRISV32_TIMER_FREQ,
+                                       2, 0xffffffff);
  
         /* Enable watchdog if we should use one. */
  
diff --git a/arch/cris/arch-v32/lib/Makefile b/arch/cris/arch-v32/lib/Makefile

index dd296b9..e91cf02 100644 (file)
--- a/arch/cris/arch-v32/lib/Makefile
+++ b/arch/cris/arch-v32/lib/Makefile
@@ -3,5 +3,5 @@
  #
  
  lib-y  = checksum.o checksumcopy.o string.o usercopy.o memset.o \
-       csumcpfruser.o spinlock.o delay.o strcmp.o
+       csumcpfruser.o delay.o strcmp.o
  
diff --git a/arch/cris/arch-v32/lib/spinlock.S b/arch/cris/arch-v32/lib/spinlock.S

deleted file mode 100644 (file)

index fe610b9..0000000
--- a/arch/cris/arch-v32/lib/spinlock.S
+++ /dev/null
@@ -1,40 +0,0 @@
-;; Core of the spinlock implementation
-;;
-;; Copyright (C) 2004 Axis Communications AB.
-;;
-;; Author: Mikael Starvik
-
-
-       .global cris_spin_lock
-       .type   cris_spin_lock,@function
-       .global cris_spin_trylock
-       .type   cris_spin_trylock,@function
-
-       .text
-
-cris_spin_lock:
-       clearf  p
-1:     test.b  [$r10]
-       beq     1b
-       clearf  p
-       ax
-       clear.b [$r10]
-       bcs     1b
-       clearf  p
-       ret
-       nop
-
-       .size   cris_spin_lock, . - cris_spin_lock
-
-cris_spin_trylock:
-       clearf  p
-1:     move.b  [$r10], $r11
-       ax
-       clear.b [$r10]
-        bcs    1b
-        clearf p
-       ret
-       movu.b  $r11,$r10
-
-       .size   cris_spin_trylock, . - cris_spin_trylock
-
diff --git a/arch/cris/arch-v32/mm/init.c b/arch/cris/arch-v32/mm/init.c

index 3deca52..f5438ca 100644 (file)
--- a/arch/cris/arch-v32/mm/init.c
+++ b/arch/cris/arch-v32/mm/init.c
@@ -40,17 +40,6 @@ void __init cris_mmu_init(void)
          */
         per_cpu(current_pgd, smp_processor_id()) = init_mm.pgd;
  
-#ifdef CONFIG_SMP
-       {
-               pgd_t **pgd;
-               pgd = (pgd_t**)&per_cpu(current_pgd, smp_processor_id());
-               SUPP_BANK_SEL(1);
-               SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-               SUPP_BANK_SEL(2);
-               SUPP_REG_WR(RW_MM_TLB_PGD, pgd);
-       }
-#endif
-
         /* Initialise the TLB. Function found in tlb.c. */
         tlb_init();
  
diff --git a/arch/cris/arch-v32/mm/mmu.S b/arch/cris/arch-v32/mm/mmu.S

index 72727c1..c098104 100644 (file)
--- a/arch/cris/arch-v32/mm/mmu.S
+++ b/arch/cris/arch-v32/mm/mmu.S
@@ -115,11 +115,7 @@
         move.d  $r0, [$r1]      ; last_refill_cause = rw_mm_cause
  
  3:     ; Probably not in a loop, continue normal processing
-#ifdef CONFIG_SMP
-       move    $s7, $acr       ; PGD
-#else
         move.d  current_pgd, $acr ; PGD
-#endif
         ; Look up PMD in PGD
         lsrq    24, $r0 ; Get PMD index into PGD (bit 24-31)
         move.d  [$acr], $acr    ; PGD for the current process
diff --git a/arch/cris/boot/dts/Makefile b/arch/cris/boot/dts/Makefile

new file mode 100644 (file)

index 0000000..faf69fb
--- /dev/null
+++ b/arch/cris/boot/dts/Makefile
@@ -0,0 +1,6 @@
+BUILTIN_DTB := $(patsubst "%",%,$(CONFIG_BUILTIN_DTB)).dtb.o
+ifneq ($(CONFIG_BUILTIN_DTB),"")
+obj-$(CONFIG_OF) += $(BUILTIN_DTB)
+endif
+
+clean-files := *.dtb.S
diff --git a/arch/cris/boot/dts/dev88.dts b/arch/cris/boot/dts/dev88.dts

new file mode 100644 (file)

index 0000000..4fa5a3f
--- /dev/null
+++ b/arch/cris/boot/dts/dev88.dts
@@ -0,0 +1,18 @@
+/dts-v1/;
+
+/include/ "etraxfs.dtsi"
+
+/ {
+       model = "Axis 88 Developer Board";
+       compatible = "axis,dev88";
+
+       aliases {
+               serial0 = &uart0;
+       };
+
+       soc {
+               uart0: serial@b00260000 {
+                       status = "okay";
+               };
+       };
+};
diff --git a/arch/cris/boot/dts/etraxfs.dtsi b/arch/cris/boot/dts/etraxfs.dtsi

new file mode 100644 (file)

index 0000000..909bced
--- /dev/null
+++ b/arch/cris/boot/dts/etraxfs.dtsi
@@ -0,0 +1,38 @@
+/ {
+       #address-cells = <1>;
+       #size-cells = <1>;
+       interrupt-parent = <&intc>;
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               cpu@0 {
+                       device_type = "cpu";
+                       model = "axis,crisv32";
+                       reg = <0>;
+               };
+       };
+
+       soc {
+               compatible = "simple-bus";
+               model = "etraxfs";
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges;
+
+               intc: interrupt-controller {
+                       compatible = "axis,crisv32-intc";
+                       reg = <0xb001c000 0x1000>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
+               };
+
+               serial@b00260000 {
+                       compatible = "axis,etraxfs-uart";
+                       reg = <0xb0026000 0x1000>;
+                       interrupts = <68>;
+                       status = "disabled";
+               };
+       };
+};
diff --git a/arch/cris/include/arch-v10/arch/atomic.h b/arch/cris/include/arch-v10/arch/atomic.h

deleted file mode 100644 (file)

index 6ef5e7d..0000000
--- a/arch/cris/include/arch-v10/arch/atomic.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-
-#endif
diff --git a/arch/cris/include/arch-v10/arch/system.h b/arch/cris/include/arch-v10/arch/system.h

index 935fde3..9b5580f 100644 (file)
--- a/arch/cris/include/arch-v10/arch/system.h
+++ b/arch/cris/include/arch-v10/arch/system.h
@@ -36,12 +36,4 @@ static inline unsigned long _get_base(char * addr)
    return 0;
  }
  
-#define nop() __asm__ __volatile__ ("nop");
-
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-#define tas(ptr) (xchg((ptr),1))
-
-struct __xchg_dummy { unsigned long a[100]; };
-#define __xg(x) ((struct __xchg_dummy *)(x))
-
  #endif
diff --git a/arch/cris/include/arch-v32/arch/atomic.h b/arch/cris/include/arch-v32/arch/atomic.h

deleted file mode 100644 (file)

index 852ceff..0000000
--- a/arch/cris/include/arch-v32/arch/atomic.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __ASM_CRIS_ARCH_ATOMIC__
-#define __ASM_CRIS_ARCH_ATOMIC__
-
-#include <linux/spinlock_types.h>
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void* l);
-
-#ifndef CONFIG_SMP
-#define cris_atomic_save(addr, flags) local_irq_save(flags);
-#define cris_atomic_restore(addr, flags) local_irq_restore(flags);
-#else
-
-extern spinlock_t cris_atomic_locks[];
-#define LOCK_COUNT 128
-#define HASH_ADDR(a) (((int)a) & 127)
-
-#define cris_atomic_save(addr, flags) \
-  local_irq_save(flags); \
-  cris_spin_lock((void *)&cris_atomic_locks[HASH_ADDR(addr)].raw_lock.slock);
-
-#define cris_atomic_restore(addr, flags) \
-  { \
-    spinlock_t *lock = (void*)&cris_atomic_locks[HASH_ADDR(addr)]; \
-    __asm__ volatile ("move.d %1,%0" \
-                       : "=m" (lock->raw_lock.slock) \
-                       : "r" (1) \
-                       : "memory"); \
-    local_irq_restore(flags); \
-  }
-
-#endif
-
-#endif
-
diff --git a/arch/cris/include/arch-v32/arch/processor.h b/arch/cris/include/arch-v32/arch/processor.h

index a024b7d..5687592 100644 (file)
--- a/arch/cris/include/arch-v32/arch/processor.h
+++ b/arch/cris/include/arch-v32/arch/processor.h
@@ -25,8 +25,7 @@ struct thread_struct {
   */
  #define TASK_SIZE      (0xB0000000UL)
  
-/* CCS I=1, enable interrupts. */
-#define INIT_THREAD { 0, 0, (1 << I_CCS_BITNR) }
+#define INIT_THREAD { }
  
  #define KSTK_EIP(tsk)          \
  ({                             \
diff --git a/arch/cris/include/arch-v32/arch/spinlock.h b/arch/cris/include/arch-v32/arch/spinlock.h

deleted file mode 100644 (file)

index f132755..0000000
--- a/arch/cris/include/arch-v32/arch/spinlock.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#ifndef __ASM_ARCH_SPINLOCK_H
-#define __ASM_ARCH_SPINLOCK_H
-
-#include <linux/spinlock_types.h>
-
-#define RW_LOCK_BIAS 0x01000000
-
-extern void cris_spin_unlock(void *l, int val);
-extern void cris_spin_lock(void *l);
-extern int cris_spin_trylock(void *l);
-
-static inline int arch_spin_is_locked(arch_spinlock_t *x)
-{
-       return *(volatile signed char *)(&(x)->slock) <= 0;
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-       __asm__ volatile ("move.d %1,%0" \
-                         : "=m" (lock->slock) \
-                         : "r" (1) \
-                         : "memory");
-}
-
-static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
-       while (arch_spin_is_locked(lock))
-               cpu_relax();
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-       return cris_spin_trylock((void *)&lock->slock);
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-       cris_spin_lock((void *)&lock->slock);
-}
-
-static inline void
-arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-       arch_spin_lock(lock);
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- *
- */
-
-static inline int arch_read_can_lock(arch_rwlock_t *x)
-{
-       return (int)(x)->lock > 0;
-}
-
-static inline int arch_write_can_lock(arch_rwlock_t *x)
-{
-       return (x)->lock == RW_LOCK_BIAS;
-}
-
-static  inline void arch_read_lock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock == 0);
-       rw->lock--;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_lock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock != RW_LOCK_BIAS);
-       rw->lock = 0;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       rw->lock++;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-       arch_spin_lock(&rw->slock);
-       while (rw->lock != RW_LOCK_BIAS);
-       rw->lock = RW_LOCK_BIAS;
-       arch_spin_unlock(&rw->slock);
-}
-
-static  inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-       int ret = 0;
-       arch_spin_lock(&rw->slock);
-       if (rw->lock != 0) {
-               rw->lock--;
-               ret = 1;
-       }
-       arch_spin_unlock(&rw->slock);
-       return ret;
-}
-
-static  inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-       int ret = 0;
-       arch_spin_lock(&rw->slock);
-       if (rw->lock == RW_LOCK_BIAS) {
-               rw->lock = 0;
-               ret = 1;
-       }
-       arch_spin_unlock(&rw->slock);
-       return ret;
-}
-
-#define _raw_read_lock_flags(lock, flags) _raw_read_lock(lock)
-#define _raw_write_lock_flags(lock, flags) _raw_write_lock(lock)
-
-#define arch_spin_relax(lock)  cpu_relax()
-#define arch_read_relax(lock)  cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
-
-#endif /* __ASM_ARCH_SPINLOCK_H */
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild

index 889f2de..057e518 100644 (file)
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -1,16 +1,29 @@
-
+generic-y += atomic.h
  generic-y += barrier.h
  generic-y += clkdev.h
+generic-y += cmpxchg.h
  generic-y += cputime.h
+generic-y += device.h
+generic-y += div64.h
  generic-y += exec.h
+generic-y += emergency-restart.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += irq_regs.h
  generic-y += irq_work.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
  generic-y += kvm_para.h
  generic-y += linkage.h
+generic-y += local.h
+generic-y += local64.h
  generic-y += mcs_spinlock.h
  generic-y += module.h
+generic-y += percpu.h
  generic-y += preempt.h
  generic-y += scatterlist.h
  generic-y += sections.h
+generic-y += topology.h
  generic-y += trace_clock.h
  generic-y += vga.h
  generic-y += xor.h
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h

deleted file mode 100644 (file)

index 279766a..0000000
--- a/arch/cris/include/asm/atomic.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/* $Id: atomic.h,v 1.3 2001/07/25 16:15:19 bjornw Exp $ */
-
-#ifndef __ASM_CRIS_ATOMIC__
-#define __ASM_CRIS_ATOMIC__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/cmpxchg.h>
-#include <arch/atomic.h>
-#include <arch/system.h>
-#include <asm/barrier.h>
-
-/*
- * Atomic operations that C can't guarantee us.  Useful for
- * resource counting etc..
- */
-
-#define ATOMIC_INIT(i)  { (i) }
-
-#define atomic_read(v) ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i) (((v)->counter) = (i))
-
-/* These should be written in asm but we do it in C for now. */
-
-#define ATOMIC_OP(op, c_op)                                            \
-static inline void atomic_##op(int i, volatile atomic_t *v)            \
-{                                                                      \
-       unsigned long flags;                                            \
-       cris_atomic_save(v, flags);                                     \
-       v->counter c_op i;                                              \
-       cris_atomic_restore(v, flags);                                  \
-}                                                                      \
-
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
-static inline int atomic_##op##_return(int i, volatile atomic_t *v)    \
-{                                                                      \
-       unsigned long flags;                                            \
-       int retval;                                                     \
-       cris_atomic_save(v, flags);                                     \
-       retval = (v->counter c_op i);                                   \
-       cris_atomic_restore(v, flags);                                  \
-       return retval;                                                  \
-}
-
-#define ATOMIC_OPS(op, c_op) ATOMIC_OP(op, c_op) ATOMIC_OP_RETURN(op, c_op)
-
-ATOMIC_OPS(add, +=)
-ATOMIC_OPS(sub, -=)
-
-#undef ATOMIC_OPS
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#define atomic_add_negative(a, v)      (atomic_add_return((a), (v)) < 0)
-
-static inline int atomic_sub_and_test(int i, volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = (v->counter -= i) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline void atomic_inc(volatile atomic_t *v)
-{
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       (v->counter)++;
-       cris_atomic_restore(v, flags);
-}
-
-static inline void atomic_dec(volatile atomic_t *v)
-{
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       (v->counter)--;
-       cris_atomic_restore(v, flags);
-}
-
-static inline int atomic_inc_return(volatile atomic_t *v)
-{
-       unsigned long flags;
-       int retval;
-       cris_atomic_save(v, flags);
-       retval = ++(v->counter);
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_dec_return(volatile atomic_t *v)
-{
-       unsigned long flags;
-       int retval;
-       cris_atomic_save(v, flags);
-       retval = --(v->counter);
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-static inline int atomic_dec_and_test(volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = --(v->counter) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_inc_and_test(volatile atomic_t *v)
-{
-       int retval;
-       unsigned long flags;
-       cris_atomic_save(v, flags);
-       retval = ++(v->counter) == 0;
-       cris_atomic_restore(v, flags);
-       return retval;
-}
-
-static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
-{
-       int ret;
-       unsigned long flags;
-
-       cris_atomic_save(v, flags);
-       ret = v->counter;
-       if (likely(ret == old))
-               v->counter = new;
-       cris_atomic_restore(v, flags);
-       return ret;
-}
-
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-       int ret;
-       unsigned long flags;
-
-       cris_atomic_save(v, flags);
-       ret = v->counter;
-       if (ret != u)
-               v->counter += a;
-       cris_atomic_restore(v, flags);
-       return ret;
-}
-
-#endif
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h

index bd49a54..8062cb5 100644 (file)
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -19,119 +19,10 @@
  #endif
  
  #include <arch/bitops.h>
-#include <linux/atomic.h>
  #include <linux/compiler.h>
  #include <asm/barrier.h>
  
-/*
- * set_bit - Atomically set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * This function is atomic and may not be reordered.  See __set_bit()
- * if you do not require the atomic guarantees.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define set_bit(nr, addr)    (void)test_and_set_bit(nr, addr)
-
-/*
- * clear_bit - Clears a bit in memory
- * @nr: Bit to clear
- * @addr: Address to start counting from
- *
- * clear_bit() is atomic and may not be reordered.  However, it does
- * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
- * in order to ensure changes are visible on other processors.
- */
-
-#define clear_bit(nr, addr)  (void)test_and_clear_bit(nr, addr)
-
-/*
- * change_bit - Toggle a bit in memory
- * @nr: Bit to change
- * @addr: Address to start counting from
- *
- * change_bit() is atomic and may not be reordered.
- * Note that @nr may be almost arbitrarily large; this function is not
- * restricted to acting on a single-word quantity.
- */
-
-#define change_bit(nr, addr) (void)test_and_change_bit(nr, addr)
-
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr |= mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr &= ~mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
-/**
- * test_and_change_bit - Change a bit and return its old value
- * @nr: Bit to change
- * @addr: Address to count from
- *
- * This operation is atomic and cannot be reordered.  
- * It also implies a memory barrier.
- */
-
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
-{
-       unsigned int mask, retval;
-       unsigned long flags;
-       unsigned int *adr = (unsigned int *)addr;
-       adr += nr >> 5;
-       mask = 1 << (nr & 0x1f);
-       cris_atomic_save(addr, flags);
-       retval = (mask & *adr) != 0;
-       *adr ^= mask;
-       cris_atomic_restore(addr, flags);
-       return retval;
-}
-
+#include <asm-generic/bitops/atomic.h>
  #include <asm-generic/bitops/non-atomic.h>
  
  /*
diff --git a/arch/cris/include/asm/cmpxchg.h b/arch/cris/include/asm/cmpxchg.h

deleted file mode 100644 (file)

index b756dac..0000000
--- a/arch/cris/include/asm/cmpxchg.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef __ASM_CRIS_CMPXCHG__
-#define __ASM_CRIS_CMPXCHG__
-
-#include <linux/irqflags.h>
-
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
-{
-  /* since Etrax doesn't have any atomic xchg instructions, we need to disable
-     irq's (if enabled) and do it with move.d's */
-  unsigned long flags,temp;
-  local_irq_save(flags); /* save flags, including irq enable bit and shut off irqs */
-  switch (size) {
-  case 1:
-    *((unsigned char *)&temp) = x;
-    x = *(unsigned char *)ptr;
-    *(unsigned char *)ptr = *((unsigned char *)&temp);
-    break;
-  case 2:
-    *((unsigned short *)&temp) = x;
-    x = *(unsigned short *)ptr;
-    *(unsigned short *)ptr = *((unsigned short *)&temp);
-    break;
-  case 4:
-    temp = x;
-    x = *(unsigned long *)ptr;
-    *(unsigned long *)ptr = temp;
-    break;
-  }
-  local_irq_restore(flags); /* restore irq enable bit */
-  return x;
-}
-
-#define xchg(ptr,x) \
-       ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
-
-#define tas(ptr) (xchg((ptr),1))
-
-#include <asm-generic/cmpxchg-local.h>
-
-/*
- * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make
- * them available.
- */
-#define cmpxchg_local(ptr, o, n)                                              \
-       ((__typeof__(*(ptr)))__cmpxchg_local_generic((ptr), (unsigned long)(o),\
-                       (unsigned long)(n), sizeof(*(ptr))))
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
-
-#ifndef CONFIG_SMP
-#include <asm-generic/cmpxchg.h>
-#endif
-
-#endif /* __ASM_CRIS_CMPXCHG__ */
diff --git a/arch/cris/include/asm/device.h b/arch/cris/include/asm/device.h

deleted file mode 100644 (file)

index d8f9872..0000000
--- a/arch/cris/include/asm/device.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#include <asm-generic/device.h>
-
diff --git a/arch/cris/include/asm/div64.h b/arch/cris/include/asm/div64.h

deleted file mode 100644 (file)

index 6cd978c..0000000
--- a/arch/cris/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/cris/include/asm/elf.h b/arch/cris/include/asm/elf.h

index 30ded8f..c2a394f 100644 (file)
--- a/arch/cris/include/asm/elf.h
+++ b/arch/cris/include/asm/elf.h
@@ -71,7 +71,7 @@ typedef unsigned long elf_fpregset_t;
     the loader.  We need to make sure that it is out of the way of the program
     that it will "exec", and that there is sufficient room for the brk.  */
  
-#define ELF_ET_DYN_BASE         (2 * TASK_SIZE / 3)
+#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
  
  /* This yields a mask that user programs can use to figure out what
     instruction set this CPU supports.  This could be done in user space,
diff --git a/arch/cris/include/asm/emergency-restart.h b/arch/cris/include/asm/emergency-restart.h

deleted file mode 100644 (file)

index 108d8c4..0000000
--- a/arch/cris/include/asm/emergency-restart.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/cris/include/asm/futex.h b/arch/cris/include/asm/futex.h

deleted file mode 100644 (file)

index 6a332a9..0000000
--- a/arch/cris/include/asm/futex.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
-
-#include <asm-generic/futex.h>
-
-#endif
diff --git a/arch/cris/include/asm/hardirq.h b/arch/cris/include/asm/hardirq.h

deleted file mode 100644 (file)

index 04126f7..0000000
--- a/arch/cris/include/asm/hardirq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_HARDIRQ_H
-#define __ASM_HARDIRQ_H
-
-#include <asm/irq.h>
-#include <asm-generic/hardirq.h>
-
-#endif /* __ASM_HARDIRQ_H */
diff --git a/arch/cris/include/asm/irq_regs.h b/arch/cris/include/asm/irq_regs.h

deleted file mode 100644 (file)

index 3dd9c0b..0000000
--- a/arch/cris/include/asm/irq_regs.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/irq_regs.h>
diff --git a/arch/cris/include/asm/kdebug.h b/arch/cris/include/asm/kdebug.h

deleted file mode 100644 (file)

index 6ece1b0..0000000
--- a/arch/cris/include/asm/kdebug.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/kdebug.h>
diff --git a/arch/cris/include/asm/kmap_types.h b/arch/cris/include/asm/kmap_types.h

deleted file mode 100644 (file)

index d2d643c..0000000
--- a/arch/cris/include/asm/kmap_types.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_KMAP_TYPES_H
-#define _ASM_KMAP_TYPES_H
-
-/* Dummy header just to define km_type.  None of this
- * is actually used on cris. 
- */
-
-#include <asm-generic/kmap_types.h>
-
-#endif
diff --git a/arch/cris/include/asm/local.h b/arch/cris/include/asm/local.h

deleted file mode 100644 (file)

index c11c530..0000000
--- a/arch/cris/include/asm/local.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h

deleted file mode 100644 (file)

index 36c93b5..0000000
--- a/arch/cris/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/percpu.h b/arch/cris/include/asm/percpu.h

deleted file mode 100644 (file)

index 6db9b43..0000000
--- a/arch/cris/include/asm/percpu.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _CRIS_PERCPU_H
-#define _CRIS_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* _CRIS_PERCPU_H */
diff --git a/arch/cris/include/asm/smp.h b/arch/cris/include/asm/smp.h

deleted file mode 100644 (file)

index c615a06..0000000
--- a/arch/cris/include/asm/smp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_SMP_H
-#define __ASM_SMP_H
-
-#include <linux/cpumask.h>
-
-extern cpumask_t phys_cpu_present_map;
-
-#define raw_smp_processor_id() (current_thread_info()->cpu)
-
-#endif
diff --git a/arch/cris/include/asm/spinlock.h b/arch/cris/include/asm/spinlock.h

deleted file mode 100644 (file)

index ed816b5..0000000
--- a/arch/cris/include/asm/spinlock.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <arch/spinlock.h>
diff --git a/arch/cris/include/asm/tlbflush.h b/arch/cris/include/asm/tlbflush.h

index 20697e7..b424f43 100644 (file)
--- a/arch/cris/include/asm/tlbflush.h
+++ b/arch/cris/include/asm/tlbflush.h
@@ -22,16 +22,9 @@ extern void __flush_tlb_mm(struct mm_struct *mm);
  extern void __flush_tlb_page(struct vm_area_struct *vma,
                            unsigned long addr);
  
-#ifdef CONFIG_SMP
-extern void flush_tlb_all(void);
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, 
-                          unsigned long addr);
-#else
  #define flush_tlb_all __flush_tlb_all
  #define flush_tlb_mm __flush_tlb_mm
  #define flush_tlb_page __flush_tlb_page
-#endif
  
  static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
  {
diff --git a/arch/cris/include/asm/topology.h b/arch/cris/include/asm/topology.h

deleted file mode 100644 (file)

index 2ac613d..0000000
--- a/arch/cris/include/asm/topology.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_CRIS_TOPOLOGY_H
-#define _ASM_CRIS_TOPOLOGY_H
-
-#include <asm-generic/topology.h>
-
-#endif /* _ASM_CRIS_TOPOLOGY_H */
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile

index b45640b..edef71f 100644 (file)
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -7,6 +7,7 @@ CPPFLAGS_vmlinux.lds := -DDRAM_VIRTUAL_BASE=0x$(CONFIG_ETRAX_DRAM_VIRTUAL_BASE)
  extra-y        := vmlinux.lds
  
  obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
+obj-y += devicetree.o
  
  obj-$(CONFIG_MODULES)    += crisksyms.o
  obj-$(CONFIG_MODULES)   += module.o
diff --git a/arch/cris/kernel/devicetree.c b/arch/cris/kernel/devicetree.c

new file mode 100644 (file)

index 0000000..53ff8d7
--- /dev/null
+++ b/arch/cris/kernel/devicetree.c
@@ -0,0 +1,14 @@
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/printk.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+       pr_err("%s(%llx, %llx)\n",
+              __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+       return alloc_bootmem_align(size, align);
+}
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c

index 58d44ee..fd3427e 100644 (file)
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -42,3 +42,26 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
                 tracehook_notify_resume(regs);
         }
  }
+
+void do_work_pending(int syscall, struct pt_regs *regs,
+                    unsigned int thread_flags)
+{
+       do {
+               if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+                       schedule();
+               } else {
+                       if (unlikely(!user_mode(regs)))
+                               return;
+                       local_irq_enable();
+                       if (thread_flags & _TIF_SIGPENDING) {
+                               do_signal(syscall, regs);
+                               syscall = 0;
+                       } else {
+                               clear_thread_flag(TIF_NOTIFY_RESUME);
+                               tracehook_notify_resume(regs);
+                       }
+               }
+               local_irq_disable();
+               thread_flags = current_thread_info()->flags;
+       } while (thread_flags & _TIF_WORK_MASK);
+}
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c

index 905b70e..bb12aa9 100644 (file)
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -19,6 +19,9 @@
  #include <linux/utsname.h>
  #include <linux/pfn.h>
  #include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
  #include <asm/setup.h>
  #include <arch/system.h>
  
@@ -64,6 +67,10 @@ void __init setup_arch(char **cmdline_p)
         unsigned long start_pfn, max_pfn;
         unsigned long memory_start;
  
+#ifdef CONFIG_OF
+       early_init_dt_scan(__dtb_start);
+#endif
+
         /* register an initial console printing routine for printk's */
  
         init_etrax_debug();
@@ -141,6 +148,8 @@ void __init setup_arch(char **cmdline_p)
  
         reserve_bootmem(PFN_PHYS(start_pfn), bootmap_size, BOOTMEM_DEFAULT);
  
+       unflatten_and_copy_device_tree();
+
         /* paging_init() sets up the MMU and marks all pages as reserved */
  
         paging_init();
@@ -204,3 +213,9 @@ static int __init topology_init(void)
  
  subsys_initcall(topology_init);
  
+static int __init cris_of_init(void)
+{
+       of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+       return 0;
+}
+core_initcall(cris_of_init);
diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c

index fe6acda..7780d37 100644 (file)
--- a/arch/cris/kernel/time.c
+++ b/arch/cris/kernel/time.c
@@ -79,11 +79,13 @@ cris_do_profile(struct pt_regs* regs)
  #endif
  }
  
+#ifndef CONFIG_GENERIC_SCHED_CLOCK
  unsigned long long sched_clock(void)
  {
         return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ) +
                 get_ns_in_jiffie();
  }
+#endif
  
  static int
  __init init_udelay(void)
diff --git a/arch/frv/include/asm/io.h b/arch/frv/include/asm/io.h

index 99bb7ef..0b78bc8 100644 (file)
--- a/arch/frv/include/asm/io.h
+++ b/arch/frv/include/asm/io.h
@@ -342,6 +342,11 @@ static inline void iowrite32(u32 val, void __iomem *p)
                 __flush_PCI_writes();
  }
  
+#define ioread16be(addr)       be16_to_cpu(ioread16(addr))
+#define ioread32be(addr)       be32_to_cpu(ioread32(addr))
+#define iowrite16be(v, addr)   iowrite16(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)   iowrite32(cpu_to_be32(v), (addr))
+
  static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count)
  {
         io_insb((unsigned long) p, dst, count);
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig

index 4f9a666..76d25b2 100644 (file)
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -15,6 +15,7 @@ config IA64
         select ARCH_MIGHT_HAVE_PC_SERIO
         select PCI if (!IA64_HP_SIM)
         select ACPI if (!IA64_HP_SIM)
+       select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
         select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
         select HAVE_UNSTABLE_SCHED_CLOCK
         select HAVE_IDE
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c

index 35bf22c..b1698bc 100644 (file)
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -887,7 +887,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
  }
  
  /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
  {
         return _acpi_map_lsapic(handle, physid, pcpu);
  }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c

index 5f4243f..60e02f7 100644 (file)
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2159,7 +2159,7 @@ static const struct file_operations pfm_file_ops = {
  static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
         return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]",
-                            dentry->d_inode->i_ino);
+                            d_inode(dentry)->i_ino);
  }
  
  static const struct dentry_operations pfmfs_dentry_operations = {
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c

index 48cc657..d4e162d 100644 (file)
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -240,15 +240,12 @@ static acpi_status resource_to_window(struct acpi_resource *resource,
          * We're only interested in _CRS descriptors that are
          *      - address space descriptors for memory or I/O space
          *      - non-zero size
-        *      - producers, i.e., the address space is routed downstream,
-        *        not consumed by the bridge itself
          */
         status = acpi_resource_to_address64(resource, addr);
         if (ACPI_SUCCESS(status) &&
             (addr->resource_type == ACPI_MEMORY_RANGE ||
              addr->resource_type == ACPI_IO_RANGE) &&
-           addr->address.address_length &&
-           addr->producer_consumer == ACPI_PRODUCER)
+           addr->address.address_length)
                 return AE_OK;
  
         return AE_ERROR;
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c

index 483dff9..7f54618 100644 (file)
--- a/arch/metag/kernel/process.c
+++ b/arch/metag/kernel/process.c
@@ -174,8 +174,11 @@ void show_regs(struct pt_regs *regs)
         show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
  }
  
+/*
+ * Copy architecture-specific thread state
+ */
  int copy_thread(unsigned long clone_flags, unsigned long usp,
-               unsigned long arg, struct task_struct *tsk)
+               unsigned long kthread_arg, struct task_struct *tsk)
  {
         struct pt_regs *childregs = task_pt_regs(tsk);
         void *kernel_context = ((void *) childregs +
@@ -202,12 +205,13 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
                 global_base = __core_reg_get(A1GbP);
                 childregs->ctx.AX[0].U1 = (unsigned long) global_base;
                 childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
-               /* Set D1Ar1=arg and D1RtP=usp (fn) */
+               /* Set D1Ar1=kthread_arg and D1RtP=usp (fn) */
                 childregs->ctx.DX[4].U1 = usp;
-               childregs->ctx.DX[3].U1 = arg;
+               childregs->ctx.DX[3].U1 = kthread_arg;
                 tsk->thread.int_depth = 2;
                 return 0;
         }
+
         /*
          * Get a pointer to where the new child's register block should have
          * been pushed.
diff --git a/arch/mn10300/include/asm/io.h b/arch/mn10300/include/asm/io.h

index 897ba3c..cc4a2ba 100644 (file)
--- a/arch/mn10300/include/asm/io.h
+++ b/arch/mn10300/include/asm/io.h
@@ -197,6 +197,11 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
  #define iowrite16(v, addr)     writew((v), (addr))
  #define iowrite32(v, addr)     writel((v), (addr))
  
+#define ioread16be(addr)       be16_to_cpu(readw(addr))
+#define ioread32be(addr)       be32_to_cpu(readl(addr))
+#define iowrite16be(v, addr)   writew(cpu_to_be16(v), (addr))
+#define iowrite32be(v, addr)   writel(cpu_to_be32(v), (addr))
+
  #define ioread8_rep(p, dst, count) \
         insb((unsigned long) (p), (dst), (count))
  #define ioread16_rep(p, dst, count) \
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild

index 01c75f3..24b3d89 100644 (file)
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -46,7 +46,6 @@ generic-y += segment.h
  generic-y += sembuf.h
  generic-y += serial.h
  generic-y += shmbuf.h
-generic-y += shmparam.h
  generic-y += siginfo.h
  generic-y += signal.h
  generic-y += socket.h
diff --git a/arch/nios2/include/asm/shmparam.h b/arch/nios2/include/asm/shmparam.h

new file mode 100644 (file)

index 0000000..6078429
--- /dev/null
+++ b/arch/nios2/include/asm/shmparam.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright Altera Corporation (C) <2015>. All rights reserved
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ASM_NIOS2_SHMPARAM_H
+#define _ASM_NIOS2_SHMPARAM_H
+
+#define        SHMLBA  CONFIG_NIOS2_DCACHE_SIZE
+
+#endif /* _ASM_NIOS2_SHMPARAM_H */
diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h

index eff00e6..1d35de9 100644 (file)
--- a/arch/nios2/include/uapi/asm/ptrace.h
+++ b/arch/nios2/include/uapi/asm/ptrace.h
@@ -14,6 +14,8 @@
  
  #ifndef __ASSEMBLY__
  
+#include <linux/types.h>
+
  /*
   * Register numbers used by 'ptrace' system call interface.
   */
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S

index 27b006c..1e515cc 100644 (file)
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -92,35 +92,35 @@ exception_table:
  
  trap_table:
         .word   handle_system_call      /* 0  */
-       .word   instruction_trap        /* 1  */
-       .word   instruction_trap        /* 2  */
-       .word   instruction_trap        /* 3  */
-       .word   instruction_trap        /* 4  */
-       .word   instruction_trap        /* 5  */
-       .word   instruction_trap        /* 6  */
-       .word   instruction_trap        /* 7  */
-       .word   instruction_trap        /* 8  */
-       .word   instruction_trap        /* 9  */
-       .word   instruction_trap        /* 10 */
-       .word   instruction_trap        /* 11 */
-       .word   instruction_trap        /* 12 */
-       .word   instruction_trap        /* 13 */
-       .word   instruction_trap        /* 14 */
-       .word   instruction_trap        /* 15 */
-       .word   instruction_trap        /* 16 */
-       .word   instruction_trap        /* 17 */
-       .word   instruction_trap        /* 18 */
-       .word   instruction_trap        /* 19 */
-       .word   instruction_trap        /* 20 */
-       .word   instruction_trap        /* 21 */
-       .word   instruction_trap        /* 22 */
-       .word   instruction_trap        /* 23 */
-       .word   instruction_trap        /* 24 */
-       .word   instruction_trap        /* 25 */
-       .word   instruction_trap        /* 26 */
-       .word   instruction_trap        /* 27 */
-       .word   instruction_trap        /* 28 */
-       .word   instruction_trap        /* 29 */
+       .word   handle_trap_1           /* 1  */
+       .word   handle_trap_2           /* 2  */
+       .word   handle_trap_3           /* 3  */
+       .word   handle_trap_reserved    /* 4  */
+       .word   handle_trap_reserved    /* 5  */
+       .word   handle_trap_reserved    /* 6  */
+       .word   handle_trap_reserved    /* 7  */
+       .word   handle_trap_reserved    /* 8  */
+       .word   handle_trap_reserved    /* 9  */
+       .word   handle_trap_reserved    /* 10 */
+       .word   handle_trap_reserved    /* 11 */
+       .word   handle_trap_reserved    /* 12 */
+       .word   handle_trap_reserved    /* 13 */
+       .word   handle_trap_reserved    /* 14 */
+       .word   handle_trap_reserved    /* 15 */
+       .word   handle_trap_reserved    /* 16 */
+       .word   handle_trap_reserved    /* 17 */
+       .word   handle_trap_reserved    /* 18 */
+       .word   handle_trap_reserved    /* 19 */
+       .word   handle_trap_reserved    /* 20 */
+       .word   handle_trap_reserved    /* 21 */
+       .word   handle_trap_reserved    /* 22 */
+       .word   handle_trap_reserved    /* 23 */
+       .word   handle_trap_reserved    /* 24 */
+       .word   handle_trap_reserved    /* 25 */
+       .word   handle_trap_reserved    /* 26 */
+       .word   handle_trap_reserved    /* 27 */
+       .word   handle_trap_reserved    /* 28 */
+       .word   handle_trap_reserved    /* 29 */
  #ifdef CONFIG_KGDB
         .word   handle_kgdb_breakpoint  /* 30 KGDB breakpoint */
  #else
@@ -455,6 +455,19 @@ handle_kgdb_breakpoint:
         br      ret_from_exception
  #endif
  
+handle_trap_1:
+       call    handle_trap_1_c
+       br      ret_from_exception
+
+handle_trap_2:
+       call    handle_trap_2_c
+       br      ret_from_exception
+
+handle_trap_3:
+handle_trap_reserved:
+       call    handle_trap_3_c
+       br      ret_from_exception
+
  /*
   * Beware - when entering resume, prev (the current task) is
   * in r4, next (the new task) is in r5, don't change these
diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c

index b7b9764..81f7da7 100644 (file)
--- a/arch/nios2/kernel/traps.c
+++ b/arch/nios2/kernel/traps.c
@@ -23,6 +23,17 @@
  
  static DEFINE_SPINLOCK(die_lock);
  
+static void _send_sig(int signo, int code, unsigned long addr)
+{
+       siginfo_t info;
+
+       info.si_signo = signo;
+       info.si_errno = 0;
+       info.si_code = code;
+       info.si_addr = (void __user *) addr;
+       force_sig_info(signo, &info, current);
+}
+
  void die(const char *str, struct pt_regs *regs, long err)
  {
         console_verbose();
@@ -39,16 +50,10 @@ void die(const char *str, struct pt_regs *regs, long err)
  
  void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr)
  {
-       siginfo_t info;
-
         if (!user_mode(regs))
                 die("Exception in kernel mode", regs, signo);
  
-       info.si_signo = signo;
-       info.si_errno = 0;
-       info.si_code = code;
-       info.si_addr = (void __user *) addr;
-       force_sig_info(signo, &info, current);
+       _send_sig(signo, code, addr);
  }
  
  /*
@@ -183,3 +188,18 @@ asmlinkage void unhandled_exception(struct pt_regs *regs, int cause)
  
         pr_emerg("opcode: 0x%08lx\n", *(unsigned long *)(regs->ea));
  }
+
+asmlinkage void handle_trap_1_c(struct pt_regs *fp)
+{
+       _send_sig(SIGUSR1, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_2_c(struct pt_regs *fp)
+{
+       _send_sig(SIGUSR2, 0, fp->ea);
+}
+
+asmlinkage void handle_trap_3_c(struct pt_regs *fp)
+{
+       _send_sig(SIGILL, ILL_ILLTRP, fp->ea);
+}
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c

index 7966429..223cdcc 100644 (file)
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -58,9 +58,6 @@ static void __invalidate_dcache(unsigned long start, unsigned long end)
         end += (cpuinfo.dcache_line_size - 1);
         end &= ~(cpuinfo.dcache_line_size - 1);
  
-       if (end > start + cpuinfo.dcache_size)
-               end = start + cpuinfo.dcache_size;
-
         for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
                 __asm__ __volatile__ ("   initda 0(%0)\n"
                                         : /* Outputs */
@@ -131,12 +128,14 @@ void flush_cache_dup_mm(struct mm_struct *mm)
  
  void flush_icache_range(unsigned long start, unsigned long end)
  {
+       __flush_dcache(start, end);
         __flush_icache(start, end);
  }
  
  void flush_dcache_range(unsigned long start, unsigned long end)
  {
         __flush_dcache(start, end);
+       __flush_icache(start, end);
  }
  EXPORT_SYMBOL(flush_dcache_range);
  
@@ -159,6 +158,7 @@ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
         unsigned long start = (unsigned long) page_address(page);
         unsigned long end = start + PAGE_SIZE;
  
+       __flush_dcache(start, end);
         __flush_icache(start, end);
  }
  
@@ -173,6 +173,18 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
                 __flush_icache(start, end);
  }
  
+void __flush_dcache_page(struct address_space *mapping, struct page *page)
+{
+       /*
+        * Writeback any data associated with the kernel mapping of this
+        * page.  This ensures that data in the physical page is mutually
+        * coherent with the kernels mapping.
+        */
+       unsigned long start = (unsigned long)page_address(page);
+
+       __flush_dcache_all(start, start + PAGE_SIZE);
+}
+
  void flush_dcache_page(struct page *page)
  {
         struct address_space *mapping;
@@ -190,11 +202,12 @@ void flush_dcache_page(struct page *page)
         if (mapping && !mapping_mapped(mapping)) {
                 clear_bit(PG_dcache_clean, &page->flags);
         } else {
-               unsigned long start = (unsigned long)page_address(page);
-
-               __flush_dcache_all(start, start + PAGE_SIZE);
-               if (mapping)
+               __flush_dcache_page(mapping, page);
+               if (mapping) {
+                       unsigned long start = (unsigned long)page_address(page);
                         flush_aliases(mapping,  page);
+                       flush_icache_range(start, start + PAGE_SIZE);
+               }
                 set_bit(PG_dcache_clean, &page->flags);
         }
  }
@@ -205,6 +218,7 @@ void update_mmu_cache(struct vm_area_struct *vma,
  {
         unsigned long pfn = pte_pfn(*pte);
         struct page *page;
+       struct address_space *mapping;
  
         if (!pfn_valid(pfn))
                 return;
@@ -217,16 +231,15 @@ void update_mmu_cache(struct vm_area_struct *vma,
         if (page == ZERO_PAGE(0))
                 return;
  
-       if (!PageReserved(page) &&
-            !test_and_set_bit(PG_dcache_clean, &page->flags)) {
-               unsigned long start = page_to_virt(page);
-               struct address_space *mapping;
-
-               __flush_dcache(start, start + PAGE_SIZE);
-
-               mapping = page_mapping(page);
-               if (mapping)
-                       flush_aliases(mapping, page);
+       mapping = page_mapping(page);
+       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+               __flush_dcache_page(mapping, page);
+
+       if(mapping)
+       {
+               flush_aliases(mapping, page);
+               if (vma->vm_flags & VM_EXEC)
+                       flush_icache_page(vma, page);
         }
  }
  
@@ -234,15 +247,19 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
                     struct page *to)
  {
         __flush_dcache(vaddr, vaddr + PAGE_SIZE);
+       __flush_icache(vaddr, vaddr + PAGE_SIZE);
         copy_page(vto, vfrom);
         __flush_dcache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
+       __flush_icache((unsigned long)vto, (unsigned long)vto + PAGE_SIZE);
  }
  
  void clear_user_page(void *addr, unsigned long vaddr, struct page *page)
  {
         __flush_dcache(vaddr, vaddr + PAGE_SIZE);
+       __flush_icache(vaddr, vaddr + PAGE_SIZE);
         clear_page(addr);
         __flush_dcache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
+       __flush_icache((unsigned long)addr, (unsigned long)addr + PAGE_SIZE);
  }
  
  void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
@@ -251,7 +268,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
  {
         flush_cache_page(vma, user_vaddr, page_to_pfn(page));
         memcpy(dst, src, len);
-       __flush_dcache((unsigned long)src, (unsigned long)src + len);
+       __flush_dcache_all((unsigned long)src, (unsigned long)src + len);
         if (vma->vm_flags & VM_EXEC)
                 __flush_icache((unsigned long)src, (unsigned long)src + len);
  }
@@ -262,7 +279,7 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
  {
         flush_cache_page(vma, user_vaddr, page_to_pfn(page));
         memcpy(dst, src, len);
-       __flush_dcache((unsigned long)dst, (unsigned long)dst + len);
+       __flush_dcache_all((unsigned long)dst, (unsigned long)dst + len);
         if (vma->vm_flags & VM_EXEC)
                 __flush_icache((unsigned long)dst, (unsigned long)dst + len);
  }
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h

index bde5311..0cc6eed 100644 (file)
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -30,8 +30,6 @@ static inline int arch_has_random(void)
         return !!ppc_md.get_random_long;
  }
  
-int powernv_get_random_long(unsigned long *v);
-
  static inline int arch_get_random_seed_long(unsigned long *v)
  {
         return 0;
@@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void)
  
  #endif /* CONFIG_ARCH_RANDOM */
  
+#ifdef CONFIG_PPC_POWERNV
+int powernv_hwrng_present(void);
+int powernv_get_random_long(unsigned long *v);
+int powernv_get_random_real_mode(unsigned long *v);
+#else
+static inline int powernv_hwrng_present(void) { return 0; }
+static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+#endif
+
  #endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h

index 9930904..b91e74a 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -288,6 +288,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
         return !is_kvmppc_hv_enabled(vcpu->kvm);
  }
  
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
  /* Magic register values loaded into r3 and r4 before the 'sc' assembly
   * instruction for the OSI hypercalls */
  #define OSI_SC_MAGIC_R3                        0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h

index 14619a5..3536d12 100644 (file)
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -85,6 +85,20 @@ static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
         return old == 0;
  }
  
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+       hpte_v &= ~HPTE_V_HVLOCK;
+       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+       hpte[0] = cpu_to_be64(hpte_v);
+}
+
+/* Without barrier */
+static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+       hpte_v &= ~HPTE_V_HVLOCK;
+       hpte[0] = cpu_to_be64(hpte_v);
+}
+
  static inline int __hpte_actual_psize(unsigned int lp, int psize)
  {
         int i, shift;
@@ -281,16 +295,17 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
  
  /*
   * If it's present and writable, atomically set dirty and referenced bits and
- * return the PTE, otherwise return 0. If we find a transparent hugepage
- * and if it is marked splitting we return 0;
+ * return the PTE, otherwise return 0.
   */
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
-                                                unsigned int hugepage)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
  {
         pte_t old_pte, new_pte = __pte(0);
  
         while (1) {
-               old_pte = *ptep;
+               /*
+                * Make sure we don't reload from ptep
+                */
+               old_pte = READ_ONCE(*ptep);
                 /*
                  * wait until _PAGE_BUSY is clear then set it atomically
                  */
@@ -298,12 +313,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
                         cpu_relax();
                         continue;
                 }
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               /* If hugepage and is trans splitting return None */
-               if (unlikely(hugepage &&
-                            pmd_trans_splitting(pte_pmd(old_pte))))
-                       return __pte(0);
-#endif
                 /* If pte is not present return None */
                 if (unlikely(!(pte_val(old_pte) & _PAGE_PRESENT)))
                         return __pte(0);
@@ -424,6 +433,10 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
         return rcu_dereference_raw_notrace(kvm->memslots);
  }
  
+extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+
+extern void kvmhv_rm_send_ipi(int cpu);
+
  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  
  #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h

index c610961..a193a13 100644 (file)
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -227,10 +227,8 @@ struct kvm_arch {
         unsigned long host_sdr1;
         int tlbie_lock;
         unsigned long lpcr;
-       unsigned long rmor;
-       struct kvm_rma_info *rma;
         unsigned long vrma_slb_v;
-       int rma_setup_done;
+       int hpte_setup_done;
         u32 hpt_order;
         atomic_t vcpus_running;
         u32 online_vcores;
@@ -239,6 +237,8 @@ struct kvm_arch {
         atomic_t hpte_mod_interest;
         cpumask_t need_tlb_flush;
         int hpt_cma_alloc;
+       struct dentry *debugfs_dir;
+       struct dentry *htab_dentry;
  #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
  #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
         struct mutex hpt_mutex;
@@ -263,18 +263,15 @@ struct kvm_arch {
  
  /*
   * Struct for a virtual core.
- * Note: entry_exit_count combines an entry count in the bottom 8 bits
- * and an exit count in the next 8 bits.  This is so that we can
- * atomically increment the entry count iff the exit count is 0
- * without taking the lock.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits.  This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
   */
  struct kvmppc_vcore {
         int n_runnable;
-       int n_busy;
         int num_threads;
-       int entry_exit_count;
-       int n_woken;
-       int nap_count;
+       int entry_exit_map;
         int napping_threads;
         int first_vcpuid;
         u16 pcpu;
@@ -299,13 +296,14 @@ struct kvmppc_vcore {
         ulong conferring_threads;
  };
  
-#define VCORE_ENTRY_COUNT(vc)  ((vc)->entry_exit_count & 0xff)
-#define VCORE_EXIT_COUNT(vc)   ((vc)->entry_exit_count >> 8)
+#define VCORE_ENTRY_MAP(vc)    ((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc)     ((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc)   (VCORE_EXIT_MAP(vc) != 0)
  
  /* Values for vcore_state */
  #define VCORE_INACTIVE 0
  #define VCORE_SLEEPING 1
-#define VCORE_STARTING 2
+#define VCORE_PREEMPT  2
  #define VCORE_RUNNING  3
  #define VCORE_EXITING  4
  
@@ -368,6 +366,14 @@ struct kvmppc_slb {
         u8 base_page_size;      /* MMU_PAGE_xxx */
  };
  
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+       u64     seqcount;       /* used to synchronize access, also count * 2 */
+       u64     tb_total;       /* total time in timebase ticks */
+       u64     tb_min;         /* min time */
+       u64     tb_max;         /* max time */
+};
+
  # ifdef CONFIG_PPC_FSL_BOOK3E
  #define KVMPPC_BOOKE_IAC_NUM   2
  #define KVMPPC_BOOKE_DAC_NUM   2
@@ -656,6 +662,19 @@ struct kvm_vcpu_arch {
  
         u32 emul_inst;
  #endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       struct kvmhv_tb_accumulator *cur_activity;      /* What we're timing */
+       u64     cur_tb_start;                   /* when it started */
+       struct kvmhv_tb_accumulator rm_entry;   /* real-mode entry code */
+       struct kvmhv_tb_accumulator rm_intr;    /* real-mode intr handling */
+       struct kvmhv_tb_accumulator rm_exit;    /* real-mode exit code */
+       struct kvmhv_tb_accumulator guest_time; /* guest execution */
+       struct kvmhv_tb_accumulator cede_time;  /* time napping inside guest */
+
+       struct dentry *debugfs_dir;
+       struct dentry *debugfs_timings;
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
  };
  
  #define VCPU_FPR(vcpu, i)      (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h

index 46bf652..b8475da 100644 (file)
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
         return kvm->arch.kvm_ops == kvmppc_hv_ops;
  }
  
+extern int kvmppc_hwrng_present(void);
+
  /*
   * Cuts out inst bits with ordering according to spec.
   * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h

index 9835ac4..11a3863 100644 (file)
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -247,28 +247,16 @@ extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
  #define pmd_large(pmd)         0
  #define has_transparent_hugepage() 0
  #endif
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
                                  unsigned *shift);
-
-static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
-                                    unsigned long *pte_sizep)
+static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+                                              unsigned *shift)
  {
-       pte_t *ptep;
-       unsigned long ps = *pte_sizep;
-       unsigned int shift;
-
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
-       if (!ptep)
-               return NULL;
-       if (shift)
-               *pte_sizep = 1ul << shift;
-       else
-               *pte_sizep = PAGE_SIZE;
-
-       if (ps > *pte_sizep)
-               return NULL;
-
-       return ptep;
+       if (!arch_irqs_disabled()) {
+               pr_info("%s called with irq enabled\n", __func__);
+               dump_stack();
+       }
+       return __find_linux_pte_or_hugepte(pgdir, ea, shift);
  }
  #endif /* __ASSEMBLY__ */
  
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h

index 03cbada..10fc784 100644 (file)
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
  
  DECLARE_PER_CPU(u64, decrementers_next_tb);
  
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
+
  #endif /* __KERNEL__ */
  #endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c

index 4717859..0034b6b 100644 (file)
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -37,6 +37,7 @@
  #include <asm/thread_info.h>
  #include <asm/rtas.h>
  #include <asm/vdso_datapage.h>
+#include <asm/dbell.h>
  #ifdef CONFIG_PPC64
  #include <asm/paca.h>
  #include <asm/lppaca.h>
@@ -458,6 +459,19 @@ int main(void)
         DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
         DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
         DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
+       DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
+       DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
+       DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
+       DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
+       DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
+       DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
+       DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
+       DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
+       DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
+       DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
  #endif
         DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
         DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
@@ -492,7 +506,6 @@ int main(void)
         DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
         DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
         DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
-       DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
         DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
         DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
         DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
@@ -550,8 +563,7 @@ int main(void)
         DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
         DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
         DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
-       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
-       DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
+       DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
         DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
         DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
         DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
@@ -748,5 +760,7 @@ int main(void)
                         offsetof(struct paca_struct, subcore_sibling_mask));
  #endif
  
+       DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+
         return 0;
  }
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c

index a4c62eb..44b480e 100644 (file)
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -334,9 +334,11 @@ static inline unsigned long eeh_token_to_phys(unsigned long token)
         int hugepage_shift;
  
         /*
-        * We won't find hugepages here, iomem
+        * We won't find hugepages here(this is iomem). Hence we are not
+        * worried about _PAGE_SPLITTING/collapse. Also we will not hit
+        * page table free, because of init_mm.
          */
-       ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
+       ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
         if (!ptep)
                 return token;
         WARN_ON(hugepage_shift);
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c

index 24b968f..63d9cc4 100644 (file)
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -71,15 +71,15 @@ struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
                 vaddr = (unsigned long)PCI_FIX_ADDR(addr);
                 if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
                         return NULL;
-
-               ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
+               /*
+                * We won't find huge pages here (iomem). Also can't hit
+                * a page table free due to init_mm
+                */
+               ptep = __find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
                                                  &hugepage_shift);
                 if (ptep == NULL)
                         paddr = 0;
                 else {
-                       /*
-                        * we don't have hugepages backing iomem
-                        */
                         WARN_ON(hugepage_shift);
                         paddr = pte_pfn(*ptep) << PAGE_SHIFT;
                 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c

index 2d7b33f..56f4484 100644 (file)
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
  }
  #endif
  
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+       return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
  /*
   * Scheduler clock - returns current time in nanosec units.
   *
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig

index 11850f3..3caec2c 100644 (file)
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,7 +75,7 @@ config KVM_BOOK3S_64
  
  config KVM_BOOK3S_64_HV
         tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
-       depends on KVM_BOOK3S_64
+       depends on KVM_BOOK3S_64 && PPC_POWERNV
         select KVM_BOOK3S_HV_POSSIBLE
         select MMU_NOTIFIER
         select CMA
@@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
           processor, including emulating 32-bit processors on a 64-bit
           host.
  
+config KVM_BOOK3S_HV_EXIT_TIMING
+       bool "Detailed timing for hypervisor real-mode code"
+       depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+       ---help---
+         Calculate time taken for each vcpu in the real-mode guest entry,
+         exit, and interrupt handling code, plus time spent in the guest
+         and in nap mode due to idle (cede) while other threads are still
+         in the guest.  The total, minimum and maximum times in nanoseconds
+         together with the number of executions are reported in debugfs in
+         kvm/vm#/vcpu#/timings.  The overhead is of the order of 30 - 40
+         ns per exit on POWER8.
+
+         If unsure, say N.
+
  config KVM_BOOKE_HV
         bool
  
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c

index cfbcdc6..453a8a4 100644 (file)
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -821,6 +821,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
  #endif
  }
  
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+       unsigned long size = kvmppc_get_gpr(vcpu, 4);
+       unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+       u64 buf;
+       int ret;
+
+       if (!is_power_of_2(size) || (size > sizeof(buf)))
+               return H_TOO_HARD;
+
+       ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+       if (ret != 0)
+               return H_TOO_HARD;
+
+       switch (size) {
+       case 1:
+               kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+               break;
+
+       case 2:
+               kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
+               break;
+
+       case 4:
+               kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
+               break;
+
+       case 8:
+               kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
+               break;
+
+       default:
+               BUG();
+       }
+
+       return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+       unsigned long size = kvmppc_get_gpr(vcpu, 4);
+       unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+       unsigned long val = kvmppc_get_gpr(vcpu, 6);
+       u64 buf;
+       int ret;
+
+       switch (size) {
+       case 1:
+               *(u8 *)&buf = val;
+               break;
+
+       case 2:
+               *(__be16 *)&buf = cpu_to_be16(val);
+               break;
+
+       case 4:
+               *(__be32 *)&buf = cpu_to_be32(val);
+               break;
+
+       case 8:
+               *(__be64 *)&buf = cpu_to_be64(val);
+               break;
+
+       default:
+               return H_TOO_HARD;
+       }
+
+       ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+       if (ret != 0)
+               return H_TOO_HARD;
+
+       return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
+
  int kvmppc_core_check_processor_compat(void)
  {
         /*
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index 534acb3..1a4acf8 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -27,6 +27,7 @@
  #include <linux/srcu.h>
  #include <linux/anon_inodes.h>
  #include <linux/file.h>
+#include <linux/debugfs.h>
  
  #include <asm/tlbflush.h>
  #include <asm/kvm_ppc.h>
@@ -116,12 +117,12 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
         long order;
  
         mutex_lock(&kvm->lock);
-       if (kvm->arch.rma_setup_done) {
-               kvm->arch.rma_setup_done = 0;
-               /* order rma_setup_done vs. vcpus_running */
+       if (kvm->arch.hpte_setup_done) {
+               kvm->arch.hpte_setup_done = 0;
+               /* order hpte_setup_done vs. vcpus_running */
                 smp_mb();
                 if (atomic_read(&kvm->arch.vcpus_running)) {
-                       kvm->arch.rma_setup_done = 1;
+                       kvm->arch.hpte_setup_done = 1;
                         goto out;
                 }
         }
@@ -338,9 +339,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
         v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
         gr = kvm->arch.revmap[index].guest_rpte;
  
-       /* Unlock the HPTE */
-       asm volatile("lwsync" : : : "memory");
-       hptep[0] = cpu_to_be64(v);
+       unlock_hpte(hptep, v);
         preempt_enable();
  
         gpte->eaddr = eaddr;
@@ -469,8 +468,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
         hpte[1] = be64_to_cpu(hptep[1]);
         hpte[2] = r = rev->guest_rpte;
-       asm volatile("lwsync" : : : "memory");
-       hptep[0] = cpu_to_be64(hpte[0]);
+       unlock_hpte(hptep, hpte[0]);
         preempt_enable();
  
         if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@ -537,23 +535,21 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
                 }
                 /* if the guest wants write access, see if that is OK */
                 if (!writing && hpte_is_writable(r)) {
-                       unsigned int hugepage_shift;
                         pte_t *ptep, pte;
-
+                       unsigned long flags;
                         /*
                          * We need to protect against page table destruction
-                        * while looking up and updating the pte.
+                        * hugepage split and collapse.
                          */
-                       rcu_read_lock_sched();
+                       local_irq_save(flags);
                         ptep = find_linux_pte_or_hugepte(current->mm->pgd,
-                                                        hva, &hugepage_shift);
+                                                        hva, NULL);
                         if (ptep) {
-                               pte = kvmppc_read_update_linux_pte(ptep, 1,
-                                                          hugepage_shift);
+                               pte = kvmppc_read_update_linux_pte(ptep, 1);
                                 if (pte_write(pte))
                                         write_ok = 1;
                         }
-                       rcu_read_unlock_sched();
+                       local_irq_restore(flags);
                 }
         }
  
@@ -621,7 +617,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
  
         hptep[1] = cpu_to_be64(r);
         eieio();
-       hptep[0] = cpu_to_be64(hpte[0]);
+       __unlock_hpte(hptep, hpte[0]);
         asm volatile("ptesync" : : : "memory");
         preempt_enable();
         if (page && hpte_is_writable(r))
@@ -642,7 +638,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
         return ret;
  
   out_unlock:
-       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+       __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
         preempt_enable();
         goto out_put;
  }
@@ -771,7 +767,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
                         }
                 }
                 unlock_rmap(rmapp);
-               hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
         }
         return 0;
  }
@@ -857,7 +853,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
                         }
                         ret = 1;
                 }
-               hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
         } while ((i = j) != head);
  
         unlock_rmap(rmapp);
@@ -974,8 +970,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
  
                 /* Now check and modify the HPTE */
                 if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
-                       /* unlock and continue */
-                       hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                       __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
                         continue;
                 }
  
@@ -996,9 +991,9 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
                                 npages_dirty = n;
                         eieio();
                 }
-               v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+               v &= ~HPTE_V_ABSENT;
                 v |= HPTE_V_VALID;
-               hptep[0] = cpu_to_be64(v);
+               __unlock_hpte(hptep, v);
         } while ((i = j) != head);
  
         unlock_rmap(rmapp);
@@ -1218,8 +1213,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
                         r &= ~HPTE_GR_MODIFIED;
                         revp->guest_rpte = r;
                 }
-               asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-               hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               unlock_hpte(hptp, be64_to_cpu(hptp[0]));
                 preempt_enable();
                 if (!(valid == want_valid && (first_pass || dirty)))
                         ok = 0;
@@ -1339,20 +1333,20 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
         unsigned long tmp[2];
         ssize_t nb;
         long int err, ret;
-       int rma_setup;
+       int hpte_setup;
  
         if (!access_ok(VERIFY_READ, buf, count))
                 return -EFAULT;
  
         /* lock out vcpus from running while we're doing this */
         mutex_lock(&kvm->lock);
-       rma_setup = kvm->arch.rma_setup_done;
-       if (rma_setup) {
-               kvm->arch.rma_setup_done = 0;   /* temporarily */
-               /* order rma_setup_done vs. vcpus_running */
+       hpte_setup = kvm->arch.hpte_setup_done;
+       if (hpte_setup) {
+               kvm->arch.hpte_setup_done = 0;  /* temporarily */
+               /* order hpte_setup_done vs. vcpus_running */
                 smp_mb();
                 if (atomic_read(&kvm->arch.vcpus_running)) {
-                       kvm->arch.rma_setup_done = 1;
+                       kvm->arch.hpte_setup_done = 1;
                         mutex_unlock(&kvm->lock);
                         return -EBUSY;
                 }
@@ -1405,7 +1399,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                                        "r=%lx\n", ret, i, v, r);
                                 goto out;
                         }
-                       if (!rma_setup && is_vrma_hpte(v)) {
+                       if (!hpte_setup && is_vrma_hpte(v)) {
                                 unsigned long psize = hpte_base_page_size(v, r);
                                 unsigned long senc = slb_pgsize_encoding(psize);
                                 unsigned long lpcr;
@@ -1414,7 +1408,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
                                         (VRMA_VSID << SLB_VSID_SHIFT_1T);
                                 lpcr = senc << (LPCR_VRMASD_SH - 4);
                                 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
-                               rma_setup = 1;
+                               hpte_setup = 1;
                         }
                         ++i;
                         hptp += 2;
@@ -1430,9 +1424,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
         }
  
   out:
-       /* Order HPTE updates vs. rma_setup_done */
+       /* Order HPTE updates vs. hpte_setup_done */
         smp_wmb();
-       kvm->arch.rma_setup_done = rma_setup;
+       kvm->arch.hpte_setup_done = hpte_setup;
         mutex_unlock(&kvm->lock);
  
         if (err)
@@ -1495,6 +1489,141 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
         return ret;
  }
  
+struct debugfs_htab_state {
+       struct kvm      *kvm;
+       struct mutex    mutex;
+       unsigned long   hpt_index;
+       int             chars_left;
+       int             buf_index;
+       char            buf[64];
+};
+
+static int debugfs_htab_open(struct inode *inode, struct file *file)
+{
+       struct kvm *kvm = inode->i_private;
+       struct debugfs_htab_state *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       kvm_get_kvm(kvm);
+       p->kvm = kvm;
+       mutex_init(&p->mutex);
+       file->private_data = p;
+
+       return nonseekable_open(inode, file);
+}
+
+static int debugfs_htab_release(struct inode *inode, struct file *file)
+{
+       struct debugfs_htab_state *p = file->private_data;
+
+       kvm_put_kvm(p->kvm);
+       kfree(p);
+       return 0;
+}
+
+static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
+                                size_t len, loff_t *ppos)
+{
+       struct debugfs_htab_state *p = file->private_data;
+       ssize_t ret, r;
+       unsigned long i, n;
+       unsigned long v, hr, gr;
+       struct kvm *kvm;
+       __be64 *hptp;
+
+       ret = mutex_lock_interruptible(&p->mutex);
+       if (ret)
+               return ret;
+
+       if (p->chars_left) {
+               n = p->chars_left;
+               if (n > len)
+                       n = len;
+               r = copy_to_user(buf, p->buf + p->buf_index, n);
+               n -= r;
+               p->chars_left -= n;
+               p->buf_index += n;
+               buf += n;
+               len -= n;
+               ret = n;
+               if (r) {
+                       if (!n)
+                               ret = -EFAULT;
+                       goto out;
+               }
+       }
+
+       kvm = p->kvm;
+       i = p->hpt_index;
+       hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+       for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
+               if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
+                       continue;
+
+               /* lock the HPTE so it's stable and read it */
+               preempt_disable();
+               while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+                       cpu_relax();
+               v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
+               hr = be64_to_cpu(hptp[1]);
+               gr = kvm->arch.revmap[i].guest_rpte;
+               unlock_hpte(hptp, v);
+               preempt_enable();
+
+               if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+                       continue;
+
+               n = scnprintf(p->buf, sizeof(p->buf),
+                             "%6lx %.16lx %.16lx %.16lx\n",
+                             i, v, hr, gr);
+               p->chars_left = n;
+               if (n > len)
+                       n = len;
+               r = copy_to_user(buf, p->buf, n);
+               n -= r;
+               p->chars_left -= n;
+               p->buf_index = n;
+               buf += n;
+               len -= n;
+               ret += n;
+               if (r) {
+                       if (!ret)
+                               ret = -EFAULT;
+                       goto out;
+               }
+       }
+       p->hpt_index = i;
+
+ out:
+       mutex_unlock(&p->mutex);
+       return ret;
+}
+
+ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+                          size_t len, loff_t *ppos)
+{
+       return -EACCES;
+}
+
+static const struct file_operations debugfs_htab_fops = {
+       .owner   = THIS_MODULE,
+       .open    = debugfs_htab_open,
+       .release = debugfs_htab_release,
+       .read    = debugfs_htab_read,
+       .write   = debugfs_htab_write,
+       .llseek  = generic_file_llseek,
+};
+
+void kvmppc_mmu_debugfs_init(struct kvm *kvm)
+{
+       kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
+                                                   kvm->arch.debugfs_dir, kvm,
+                                                   &debugfs_htab_fops);
+}
+
  void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
  {
         struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c

index de74756..48d3c5d 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -32,6 +32,7 @@
  #include <linux/page-flags.h>
  #include <linux/srcu.h>
  #include <linux/miscdevice.h>
+#include <linux/debugfs.h>
  
  #include <asm/reg.h>
  #include <asm/cputable.h>
@@ -50,6 +51,7 @@
  #include <asm/hvcall.h>
  #include <asm/switch_to.h>
  #include <asm/smp.h>
+#include <asm/dbell.h>
  #include <linux/gfp.h>
  #include <linux/vmalloc.h>
  #include <linux/highmem.h>
@@ -83,9 +85,35 @@ static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
  static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
  static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
  
+static bool kvmppc_ipi_thread(int cpu)
+{
+       /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+               preempt_disable();
+               if (cpu_first_thread_sibling(cpu) ==
+                   cpu_first_thread_sibling(smp_processor_id())) {
+                       unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+                       msg |= cpu_thread_in_core(cpu);
+                       smp_mb();
+                       __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+                       preempt_enable();
+                       return true;
+               }
+               preempt_enable();
+       }
+
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+       if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
+               xics_wake_cpu(cpu);
+               return true;
+       }
+#endif
+
+       return false;
+}
+
  static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
  {
-       int me;
         int cpu = vcpu->cpu;
         wait_queue_head_t *wqp;
  
@@ -95,20 +123,12 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
                 ++vcpu->stat.halt_wakeup;
         }
  
-       me = get_cpu();
+       if (kvmppc_ipi_thread(cpu + vcpu->arch.ptid))
+               return;
  
         /* CPU points to the first thread of the core */
-       if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
-#ifdef CONFIG_PPC_ICP_NATIVE
-               int real_cpu = cpu + vcpu->arch.ptid;
-               if (paca[real_cpu].kvm_hstate.xics_phys)
-                       xics_wake_cpu(real_cpu);
-               else
-#endif
-               if (cpu_online(cpu))
-                       smp_send_reschedule(cpu);
-       }
-       put_cpu();
+       if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
+               smp_send_reschedule(cpu);
  }
  
  /*
@@ -706,6 +726,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
  
                 /* Send the error out to userspace via KVM_RUN */
                 return rc;
+       case H_LOGICAL_CI_LOAD:
+               ret = kvmppc_h_logical_ci_load(vcpu);
+               if (ret == H_TOO_HARD)
+                       return RESUME_HOST;
+               break;
+       case H_LOGICAL_CI_STORE:
+               ret = kvmppc_h_logical_ci_store(vcpu);
+               if (ret == H_TOO_HARD)
+                       return RESUME_HOST;
+               break;
         case H_SET_MODE:
                 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
                                         kvmppc_get_gpr(vcpu, 5),
@@ -740,6 +770,8 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
         case H_CONFER:
         case H_REGISTER_VPA:
         case H_SET_MODE:
+       case H_LOGICAL_CI_LOAD:
+       case H_LOGICAL_CI_STORE:
  #ifdef CONFIG_KVM_XICS
         case H_XIRR:
         case H_CPPR:
@@ -1410,6 +1442,154 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
         return vcore;
  }
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+       const char *name;
+       size_t offset;
+} timings[] = {
+       {"rm_entry",    offsetof(struct kvm_vcpu, arch.rm_entry)},
+       {"rm_intr",     offsetof(struct kvm_vcpu, arch.rm_intr)},
+       {"rm_exit",     offsetof(struct kvm_vcpu, arch.rm_exit)},
+       {"guest",       offsetof(struct kvm_vcpu, arch.guest_time)},
+       {"cede",        offsetof(struct kvm_vcpu, arch.cede_time)},
+};
+
+#define N_TIMINGS      (sizeof(timings) / sizeof(timings[0]))
+
+struct debugfs_timings_state {
+       struct kvm_vcpu *vcpu;
+       unsigned int    buflen;
+       char            buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+       struct kvm_vcpu *vcpu = inode->i_private;
+       struct debugfs_timings_state *p;
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+
+       kvm_get_kvm(vcpu->kvm);
+       p->vcpu = vcpu;
+       file->private_data = p;
+
+       return nonseekable_open(inode, file);
+}
+
+static int debugfs_timings_release(struct inode *inode, struct file *file)
+{
+       struct debugfs_timings_state *p = file->private_data;
+
+       kvm_put_kvm(p->vcpu->kvm);
+       kfree(p);
+       return 0;
+}
+
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+                                   size_t len, loff_t *ppos)
+{
+       struct debugfs_timings_state *p = file->private_data;
+       struct kvm_vcpu *vcpu = p->vcpu;
+       char *s, *buf_end;
+       struct kvmhv_tb_accumulator tb;
+       u64 count;
+       loff_t pos;
+       ssize_t n;
+       int i, loops;
+       bool ok;
+
+       if (!p->buflen) {
+               s = p->buf;
+               buf_end = s + sizeof(p->buf);
+               for (i = 0; i < N_TIMINGS; ++i) {
+                       struct kvmhv_tb_accumulator *acc;
+
+                       acc = (struct kvmhv_tb_accumulator *)
+                               ((unsigned long)vcpu + timings[i].offset);
+                       ok = false;
+                       for (loops = 0; loops < 1000; ++loops) {
+                               count = acc->seqcount;
+                               if (!(count & 1)) {
+                                       smp_rmb();
+                                       tb = *acc;
+                                       smp_rmb();
+                                       if (count == acc->seqcount) {
+                                               ok = true;
+                                               break;
+                                       }
+                               }
+                               udelay(1);
+                       }
+                       if (!ok)
+                               snprintf(s, buf_end - s, "%s: stuck\n",
+                                       timings[i].name);
+                       else
+                               snprintf(s, buf_end - s,
+                                       "%s: %llu %llu %llu %llu\n",
+                                       timings[i].name, count / 2,
+                                       tb_to_ns(tb.tb_total),
+                                       tb_to_ns(tb.tb_min),
+                                       tb_to_ns(tb.tb_max));
+                       s += strlen(s);
+               }
+               p->buflen = s - p->buf;
+       }
+
+       pos = *ppos;
+       if (pos >= p->buflen)
+               return 0;
+       if (len > p->buflen - pos)
+               len = p->buflen - pos;
+       n = copy_to_user(buf, p->buf + pos, len);
+       if (n) {
+               if (n == len)
+                       return -EFAULT;
+               len -= n;
+       }
+       *ppos = pos + len;
+       return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+                                    size_t len, loff_t *ppos)
+{
+       return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+       .owner   = THIS_MODULE,
+       .open    = debugfs_timings_open,
+       .release = debugfs_timings_release,
+       .read    = debugfs_timings_read,
+       .write   = debugfs_timings_write,
+       .llseek  = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+       char buf[16];
+       struct kvm *kvm = vcpu->kvm;
+
+       snprintf(buf, sizeof(buf), "vcpu%u", id);
+       if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+               return;
+       vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
+       if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
+               return;
+       vcpu->arch.debugfs_timings =
+               debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
+                                   vcpu, &debugfs_timings_ops);
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
+{
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
  static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
                                                    unsigned int id)
  {
@@ -1479,6 +1659,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
         vcpu->arch.cpu_type = KVM_CPU_3S_64;
         kvmppc_sanity_check(vcpu);
  
+       debugfs_vcpu_init(vcpu, id);
+
         return vcpu;
  
  free_vcpu:
@@ -1566,8 +1748,10 @@ static int kvmppc_grab_hwthread(int cpu)
         tpaca = &paca[cpu];
  
         /* Ensure the thread won't go into the kernel if it wakes */
-       tpaca->kvm_hstate.hwthread_req = 1;
         tpaca->kvm_hstate.kvm_vcpu = NULL;
+       tpaca->kvm_hstate.napping = 0;
+       smp_wmb();
+       tpaca->kvm_hstate.hwthread_req = 1;
  
         /*
          * If the thread is already executing in the kernel (e.g. handling
@@ -1610,35 +1794,41 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
         }
         cpu = vc->pcpu + vcpu->arch.ptid;
         tpaca = &paca[cpu];
-       tpaca->kvm_hstate.kvm_vcpu = vcpu;
         tpaca->kvm_hstate.kvm_vcore = vc;
         tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
         vcpu->cpu = vc->pcpu;
+       /* Order stores to hstate.kvm_vcore etc. before store to kvm_vcpu */
         smp_wmb();
-#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
-       if (cpu != smp_processor_id()) {
-               xics_wake_cpu(cpu);
-               if (vcpu->arch.ptid)
-                       ++vc->n_woken;
-       }
-#endif
+       tpaca->kvm_hstate.kvm_vcpu = vcpu;
+       if (cpu != smp_processor_id())
+               kvmppc_ipi_thread(cpu);
  }
  
-static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+static void kvmppc_wait_for_nap(void)
  {
-       int i;
+       int cpu = smp_processor_id();
+       int i, loops;
  
-       HMT_low();
-       i = 0;
-       while (vc->nap_count < vc->n_woken) {
-               if (++i >= 1000000) {
-                       pr_err("kvmppc_wait_for_nap timeout %d %d\n",
-                              vc->nap_count, vc->n_woken);
-                       break;
+       for (loops = 0; loops < 1000000; ++loops) {
+               /*
+                * Check if all threads are finished.
+                * We set the vcpu pointer when starting a thread
+                * and the thread clears it when finished, so we look
+                * for any threads that still have a non-NULL vcpu ptr.
+                */
+               for (i = 1; i < threads_per_subcore; ++i)
+                       if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+                               break;
+               if (i == threads_per_subcore) {
+                       HMT_medium();
+                       return;
                 }
-               cpu_relax();
+               HMT_low();
         }
         HMT_medium();
+       for (i = 1; i < threads_per_subcore; ++i)
+               if (paca[cpu + i].kvm_hstate.kvm_vcpu)
+                       pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
  }
  
  /*
@@ -1700,54 +1890,91 @@ static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
         mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
  }
  
+static void prepare_threads(struct kvmppc_vcore *vc)
+{
+       struct kvm_vcpu *vcpu, *vnext;
+
+       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                arch.run_list) {
+               if (signal_pending(vcpu->arch.run_task))
+                       vcpu->arch.ret = -EINTR;
+               else if (vcpu->arch.vpa.update_pending ||
+                        vcpu->arch.slb_shadow.update_pending ||
+                        vcpu->arch.dtl.update_pending)
+                       vcpu->arch.ret = RESUME_GUEST;
+               else
+                       continue;
+               kvmppc_remove_runnable(vc, vcpu);
+               wake_up(&vcpu->arch.cpu_run);
+       }
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc)
+{
+       u64 now;
+       long ret;
+       struct kvm_vcpu *vcpu, *vnext;
+
+       now = get_tb();
+       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
+                                arch.run_list) {
+               /* cancel pending dec exception if dec is positive */
+               if (now < vcpu->arch.dec_expires &&
+                   kvmppc_core_pending_dec(vcpu))
+                       kvmppc_core_dequeue_dec(vcpu);
+
+               trace_kvm_guest_exit(vcpu);
+
+               ret = RESUME_GUEST;
+               if (vcpu->arch.trap)
+                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
+                                                   vcpu->arch.run_task);
+
+               vcpu->arch.ret = ret;
+               vcpu->arch.trap = 0;
+
+               if (vcpu->arch.ceded) {
+                       if (!is_kvmppc_resume_guest(ret))
+                               kvmppc_end_cede(vcpu);
+                       else
+                               kvmppc_set_timer(vcpu);
+               }
+               if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
+                       kvmppc_remove_runnable(vc, vcpu);
+                       wake_up(&vcpu->arch.cpu_run);
+               }
+       }
+}
+
  /*
   * Run a set of guest threads on a physical core.
   * Called with vc->lock held.
   */
-static void kvmppc_run_core(struct kvmppc_vcore *vc)
+static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
  {
-       struct kvm_vcpu *vcpu, *vnext;
-       long ret;
-       u64 now;
-       int i, need_vpa_update;
+       struct kvm_vcpu *vcpu;
+       int i;
         int srcu_idx;
-       struct kvm_vcpu *vcpus_to_update[threads_per_core];
  
-       /* don't start if any threads have a signal pending */
-       need_vpa_update = 0;
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-               if (signal_pending(vcpu->arch.run_task))
-                       return;
-               if (vcpu->arch.vpa.update_pending ||
-                   vcpu->arch.slb_shadow.update_pending ||
-                   vcpu->arch.dtl.update_pending)
-                       vcpus_to_update[need_vpa_update++] = vcpu;
-       }
+       /*
+        * Remove from the list any threads that have a signal pending
+        * or need a VPA update done
+        */
+       prepare_threads(vc);
+
+       /* if the runner is no longer runnable, let the caller pick a new one */
+       if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
+               return;
  
         /*
-        * Initialize *vc, in particular vc->vcore_state, so we can
-        * drop the vcore lock if necessary.
+        * Initialize *vc.
          */
-       vc->n_woken = 0;
-       vc->nap_count = 0;
-       vc->entry_exit_count = 0;
+       vc->entry_exit_map = 0;
         vc->preempt_tb = TB_NIL;
-       vc->vcore_state = VCORE_STARTING;
         vc->in_guest = 0;
         vc->napping_threads = 0;
         vc->conferring_threads = 0;
  
-       /*
-        * Updating any of the vpas requires calling kvmppc_pin_guest_page,
-        * which can't be called with any spinlocks held.
-        */
-       if (need_vpa_update) {
-               spin_unlock(&vc->lock);
-               for (i = 0; i < need_vpa_update; ++i)
-                       kvmppc_update_vpas(vcpus_to_update[i]);
-               spin_lock(&vc->lock);
-       }
-
         /*
          * Make sure we are running on primary threads, and that secondary
          * threads are offline.  Also check if the number of threads in this
@@ -1755,8 +1982,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
          */
         if ((threads_per_core > 1) &&
             ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
-               list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+               list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
                         vcpu->arch.ret = -EBUSY;
+                       kvmppc_remove_runnable(vc, vcpu);
+                       wake_up(&vcpu->arch.cpu_run);
+               }
                 goto out;
         }
  
@@ -1797,8 +2027,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
         list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
                 vcpu->cpu = -1;
         /* wait for secondary threads to finish writing their state to memory */
-       if (vc->nap_count < vc->n_woken)
-               kvmppc_wait_for_nap(vc);
+       kvmppc_wait_for_nap();
         for (i = 0; i < threads_per_subcore; ++i)
                 kvmppc_release_hwthread(vc->pcpu + i);
         /* prevent other vcpu threads from doing kvmppc_start_thread() now */
@@ -1812,44 +2041,12 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
         kvm_guest_exit();
  
         preempt_enable();
-       cond_resched();
  
         spin_lock(&vc->lock);
-       now = get_tb();
-       list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
-               /* cancel pending dec exception if dec is positive */
-               if (now < vcpu->arch.dec_expires &&
-                   kvmppc_core_pending_dec(vcpu))
-                       kvmppc_core_dequeue_dec(vcpu);
-
-               trace_kvm_guest_exit(vcpu);
-
-               ret = RESUME_GUEST;
-               if (vcpu->arch.trap)
-                       ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
-                                                   vcpu->arch.run_task);
-
-               vcpu->arch.ret = ret;
-               vcpu->arch.trap = 0;
-
-               if (vcpu->arch.ceded) {
-                       if (!is_kvmppc_resume_guest(ret))
-                               kvmppc_end_cede(vcpu);
-                       else
-                               kvmppc_set_timer(vcpu);
-               }
-       }
+       post_guest_process(vc);
  
   out:
         vc->vcore_state = VCORE_INACTIVE;
-       list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
-                                arch.run_list) {
-               if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
-                       kvmppc_remove_runnable(vc, vcpu);
-                       wake_up(&vcpu->arch.cpu_run);
-               }
-       }
-
         trace_kvmppc_run_core(vc, 1);
  }
  
@@ -1939,8 +2136,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
          * this thread straight away and have it join in.
          */
         if (!signal_pending(current)) {
-               if (vc->vcore_state == VCORE_RUNNING &&
-                   VCORE_EXIT_COUNT(vc) == 0) {
+               if (vc->vcore_state == VCORE_RUNNING && !VCORE_IS_EXITING(vc)) {
                         kvmppc_create_dtl_entry(vcpu, vc);
                         kvmppc_start_thread(vcpu);
                         trace_kvm_guest_enter(vcpu);
@@ -1971,7 +2167,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                 }
                 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
                         break;
-               vc->runner = vcpu;
                 n_ceded = 0;
                 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
                         if (!v->arch.pending_exceptions)
@@ -1979,10 +2174,17 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                         else
                                 v->arch.ceded = 0;
                 }
-               if (n_ceded == vc->n_runnable)
+               vc->runner = vcpu;
+               if (n_ceded == vc->n_runnable) {
                         kvmppc_vcore_blocked(vc);
-               else
+               } else if (should_resched()) {
+                       vc->vcore_state = VCORE_PREEMPT;
+                       /* Let something else run */
+                       cond_resched_lock(&vc->lock);
+                       vc->vcore_state = VCORE_INACTIVE;
+               } else {
                         kvmppc_run_core(vc);
+               }
                 vc->runner = NULL;
         }
  
@@ -2032,11 +2234,11 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
         }
  
         atomic_inc(&vcpu->kvm->arch.vcpus_running);
-       /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+       /* Order vcpus_running vs. hpte_setup_done, see kvmppc_alloc_reset_hpt */
         smp_mb();
  
         /* On the first time here, set up HTAB and VRMA */
-       if (!vcpu->kvm->arch.rma_setup_done) {
+       if (!vcpu->kvm->arch.hpte_setup_done) {
                 r = kvmppc_hv_setup_htab_rma(vcpu);
                 if (r)
                         goto out;
@@ -2238,7 +2440,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
         int srcu_idx;
  
         mutex_lock(&kvm->lock);
-       if (kvm->arch.rma_setup_done)
+       if (kvm->arch.hpte_setup_done)
                 goto out;       /* another vcpu beat us to it */
  
         /* Allocate hashed page table (if not done already) and reset it */
@@ -2289,9 +2491,9 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  
         kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
  
-       /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+       /* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
         smp_wmb();
-       kvm->arch.rma_setup_done = 1;
+       kvm->arch.hpte_setup_done = 1;
         err = 0;
   out_srcu:
         srcu_read_unlock(&kvm->srcu, srcu_idx);
@@ -2307,6 +2509,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
  static int kvmppc_core_init_vm_hv(struct kvm *kvm)
  {
         unsigned long lpcr, lpid;
+       char buf[32];
  
         /* Allocate the guest's logical partition ID */
  
@@ -2347,6 +2550,14 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
          */
         kvm_hv_vm_activated();
  
+       /*
+        * Create a debugfs directory for the VM
+        */
+       snprintf(buf, sizeof(buf), "vm%d", current->pid);
+       kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
+       if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
+               kvmppc_mmu_debugfs_init(kvm);
+
         return 0;
  }
  
@@ -2367,6 +2578,8 @@ static void kvmppc_free_vcores(struct kvm *kvm)
  
  static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
  {
+       debugfs_remove_recursive(kvm->arch.debugfs_dir);
+
         kvm_hv_vm_deactivated();
  
         kvmppc_free_vcores(kvm);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c

index 1f083ff..ed2589d 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,6 +21,10 @@
  #include <asm/cputable.h>
  #include <asm/kvm_ppc.h>
  #include <asm/kvm_book3s.h>
+#include <asm/archrandom.h>
+#include <asm/xics.h>
+#include <asm/dbell.h>
+#include <asm/cputhreads.h>
  
  #define KVM_CMA_CHUNK_ORDER    18
  
@@ -114,11 +118,11 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
         int rv = H_SUCCESS; /* => don't yield */
  
         set_bit(vcpu->arch.ptid, &vc->conferring_threads);
-       while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
-               threads_running = VCORE_ENTRY_COUNT(vc);
-               threads_ceded = hweight32(vc->napping_threads);
-               threads_conferring = hweight32(vc->conferring_threads);
-               if (threads_ceded + threads_conferring >= threads_running) {
+       while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+               threads_running = VCORE_ENTRY_MAP(vc);
+               threads_ceded = vc->napping_threads;
+               threads_conferring = vc->conferring_threads;
+               if ((threads_ceded | threads_conferring) == threads_running) {
                         rv = H_TOO_HARD; /* => do yield */
                         break;
                 }
@@ -169,3 +173,89 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
         return 0;
  }
  EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+       return powernv_hwrng_present();
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_h_random(struct kvm_vcpu *vcpu)
+{
+       if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+               return H_SUCCESS;
+
+       return H_HARDWARE;
+}
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+       __asm__ __volatile__("stbcix %0,0,%1"
+               : : "r" (val), "r" (paddr) : "memory");
+}
+
+/*
+ * Send an interrupt or message to another CPU.
+ * This can only be called in real mode.
+ * The caller needs to include any barrier needed to order writes
+ * to memory vs. the IPI/message.
+ */
+void kvmhv_rm_send_ipi(int cpu)
+{
+       unsigned long xics_phys;
+
+       /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+       if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+           cpu_first_thread_sibling(cpu) ==
+           cpu_first_thread_sibling(raw_smp_processor_id())) {
+               unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+               msg |= cpu_thread_in_core(cpu);
+               __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+               return;
+       }
+
+       /* Else poke the target with an IPI */
+       xics_phys = paca[cpu].kvm_hstate.xics_phys;
+       rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+/*
+ * The following functions are called from the assembly code
+ * in book3s_hv_rmhandlers.S.
+ */
+static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
+{
+       int cpu = vc->pcpu;
+
+       /* Order setting of exit map vs. msgsnd/IPI */
+       smp_mb();
+       for (; active; active >>= 1, ++cpu)
+               if (active & 1)
+                       kvmhv_rm_send_ipi(cpu);
+}
+
+void kvmhv_commence_exit(int trap)
+{
+       struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+       int ptid = local_paca->kvm_hstate.ptid;
+       int me, ee;
+
+       /* Set our bit in the threads-exiting-guest map in the 0xff00
+          bits of vcore->entry_exit_map */
+       me = 0x100 << ptid;
+       do {
+               ee = vc->entry_exit_map;
+       } while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
+
+       /* Are we the first here? */
+       if ((ee >> 8) != 0)
+               return;
+
+       /*
+        * Trigger the other threads in this vcore to exit the guest.
+        * If this is a hypervisor decrementer interrupt then they
+        * will be already on their way out of the guest.
+        */
+       if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
+               kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c

index 625407e..b027a89 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -26,11 +26,14 @@ static void *real_vmalloc_addr(void *x)
  {
         unsigned long addr = (unsigned long) x;
         pte_t *p;
-
-       p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
+       /*
+        * assume we don't have huge pages in vmalloc space...
+        * So don't worry about THP collapse/split. Called
+        * Only in realmode, hence won't need irq_save/restore.
+        */
+       p = __find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
         if (!p || !pte_present(*p))
                 return NULL;
-       /* assume we don't have huge pages in vmalloc space... */
         addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
         return __va(addr);
  }
@@ -131,31 +134,6 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
         unlock_rmap(rmap);
  }
  
-static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
-                             int writing, unsigned long *pte_sizep)
-{
-       pte_t *ptep;
-       unsigned long ps = *pte_sizep;
-       unsigned int hugepage_shift;
-
-       ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
-       if (!ptep)
-               return __pte(0);
-       if (hugepage_shift)
-               *pte_sizep = 1ul << hugepage_shift;
-       else
-               *pte_sizep = PAGE_SIZE;
-       if (ps > *pte_sizep)
-               return __pte(0);
-       return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
-static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
-{
-       asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-       hpte[0] = cpu_to_be64(hpte_v);
-}
-
  long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                        long pte_index, unsigned long pteh, unsigned long ptel,
                        pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -166,13 +144,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
         struct revmap_entry *rev;
         unsigned long g_ptel;
         struct kvm_memory_slot *memslot;
-       unsigned long pte_size;
+       unsigned hpage_shift;
         unsigned long is_io;
         unsigned long *rmap;
-       pte_t pte;
+       pte_t *ptep;
         unsigned int writing;
         unsigned long mmu_seq;
-       unsigned long rcbits;
+       unsigned long rcbits, irq_flags = 0;
  
         psize = hpte_page_size(pteh, ptel);
         if (!psize)
@@ -208,22 +186,46 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  
         /* Translate to host virtual address */
         hva = __gfn_to_hva_memslot(memslot, gfn);
-
-       /* Look up the Linux PTE for the backing page */
-       pte_size = psize;
-       pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
-       if (pte_present(pte) && !pte_protnone(pte)) {
-               if (writing && !pte_write(pte))
-                       /* make the actual HPTE be read-only */
-                       ptel = hpte_make_readonly(ptel);
-               is_io = hpte_cache_bits(pte_val(pte));
-               pa = pte_pfn(pte) << PAGE_SHIFT;
-               pa |= hva & (pte_size - 1);
-               pa |= gpa & ~PAGE_MASK;
+       /*
+        * If we had a page table table change after lookup, we would
+        * retry via mmu_notifier_retry.
+        */
+       if (realmode)
+               ptep = __find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
+       else {
+               local_irq_save(irq_flags);
+               ptep = find_linux_pte_or_hugepte(pgdir, hva, &hpage_shift);
         }
+       if (ptep) {
+               pte_t pte;
+               unsigned int host_pte_size;
  
-       if (pte_size < psize)
-               return H_PARAMETER;
+               if (hpage_shift)
+                       host_pte_size = 1ul << hpage_shift;
+               else
+                       host_pte_size = PAGE_SIZE;
+               /*
+                * We should always find the guest page size
+                * to <= host page size, if host is using hugepage
+                */
+               if (host_pte_size < psize) {
+                       if (!realmode)
+                               local_irq_restore(flags);
+                       return H_PARAMETER;
+               }
+               pte = kvmppc_read_update_linux_pte(ptep, writing);
+               if (pte_present(pte) && !pte_protnone(pte)) {
+                       if (writing && !pte_write(pte))
+                               /* make the actual HPTE be read-only */
+                               ptel = hpte_make_readonly(ptel);
+                       is_io = hpte_cache_bits(pte_val(pte));
+                       pa = pte_pfn(pte) << PAGE_SHIFT;
+                       pa |= hva & (host_pte_size - 1);
+                       pa |= gpa & ~PAGE_MASK;
+               }
+       }
+       if (!realmode)
+               local_irq_restore(irq_flags);
  
         ptel &= ~(HPTE_R_PP0 - psize);
         ptel |= pa;
@@ -271,10 +273,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
                                 u64 pte;
                                 while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                         cpu_relax();
-                               pte = be64_to_cpu(*hpte);
+                               pte = be64_to_cpu(hpte[0]);
                                 if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
                                         break;
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                 hpte += 2;
                         }
                         if (i == 8)
@@ -290,9 +292,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  
                         while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
                                 cpu_relax();
-                       pte = be64_to_cpu(*hpte);
+                       pte = be64_to_cpu(hpte[0]);
                         if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-                               *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+                               __unlock_hpte(hpte, pte);
                                 return H_PTEG_FULL;
                         }
                 }
@@ -331,7 +333,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
  
         /* Write the first HPTE dword, unlocking the HPTE and making it valid */
         eieio();
-       hpte[0] = cpu_to_be64(pteh);
+       __unlock_hpte(hpte, pteh);
         asm volatile("ptesync" : : : "memory");
  
         *pte_idx_ret = pte_index;
@@ -412,7 +414,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
         if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
             ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
             ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                 return H_NOT_FOUND;
         }
  
@@ -548,7 +550,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
                                 be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
                         rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
                         args[j] |= rcbits << (56 - 5);
-                       hp[0] = 0;
+                       __unlock_hpte(hp, 0);
                 }
         }
  
@@ -574,7 +576,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
         pte = be64_to_cpu(hpte[0]);
         if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
             ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
-               hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+               __unlock_hpte(hpte, pte);
                 return H_NOT_FOUND;
         }
  
@@ -755,8 +757,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
                                 /* Return with the HPTE still locked */
                                 return (hash << 3) + (i >> 1);
  
-                       /* Unlock and move on */
-                       hpte[i] = cpu_to_be64(v);
+                       __unlock_hpte(&hpte[i], v);
                 }
  
                 if (val & HPTE_V_SECONDARY)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c

index 7c22997..00e45b6 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -23,17 +23,37 @@
  
  #define DEBUG_PASSUP
  
-static inline void rm_writeb(unsigned long paddr, u8 val)
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+                           u32 new_irq);
+
+/* -- ICS routines -- */
+static void ics_rm_check_resend(struct kvmppc_xics *xics,
+                               struct kvmppc_ics *ics, struct kvmppc_icp *icp)
  {
-       __asm__ __volatile__("sync; stbcix %0,0,%1"
-               : : "r" (val), "r" (paddr) : "memory");
+       int i;
+
+       arch_spin_lock(&ics->lock);
+
+       for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+               struct ics_irq_state *state = &ics->irq_state[i];
+
+               if (!state->resend)
+                       continue;
+
+               arch_spin_unlock(&ics->lock);
+               icp_rm_deliver_irq(xics, icp, state->number);
+               arch_spin_lock(&ics->lock);
+       }
+
+       arch_spin_unlock(&ics->lock);
  }
  
+/* -- ICP routines -- */
+
  static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
                                 struct kvm_vcpu *this_vcpu)
  {
         struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
-       unsigned long xics_phys;
         int cpu;
  
         /* Mark the target VCPU as having an interrupt pending */
@@ -56,9 +76,8 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
         /* In SMT cpu will always point to thread 0, we adjust it */
         cpu += vcpu->arch.ptid;
  
-       /* Not too hard, then poke the target */
-       xics_phys = paca[cpu].kvm_hstate.xics_phys;
-       rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+       smp_mb();
+       kvmhv_rm_send_ipi(cpu);
  }
  
  static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
@@ -116,6 +135,180 @@ static inline int check_too_hard(struct kvmppc_xics *xics,
         return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
  }
  
+static void icp_rm_check_resend(struct kvmppc_xics *xics,
+                            struct kvmppc_icp *icp)
+{
+       u32 icsid;
+
+       /* Order this load with the test for need_resend in the caller */
+       smp_rmb();
+       for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+               struct kvmppc_ics *ics = xics->ics[icsid];
+
+               if (!test_and_clear_bit(icsid, icp->resend_map))
+                       continue;
+               if (!ics)
+                       continue;
+               ics_rm_check_resend(xics, ics, icp);
+       }
+}
+
+static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+                              u32 *reject)
+{
+       union kvmppc_icp_state old_state, new_state;
+       bool success;
+
+       do {
+               old_state = new_state = READ_ONCE(icp->state);
+
+               *reject = 0;
+
+               /* See if we can deliver */
+               success = new_state.cppr > priority &&
+                       new_state.mfrr > priority &&
+                       new_state.pending_pri > priority;
+
+               /*
+                * If we can, check for a rejection and perform the
+                * delivery
+                */
+               if (success) {
+                       *reject = new_state.xisr;
+                       new_state.xisr = irq;
+                       new_state.pending_pri = priority;
+               } else {
+                       /*
+                        * If we failed to deliver we set need_resend
+                        * so a subsequent CPPR state change causes us
+                        * to try a new delivery.
+                        */
+                       new_state.need_resend = true;
+               }
+
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       return success;
+}
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+                           u32 new_irq)
+{
+       struct ics_irq_state *state;
+       struct kvmppc_ics *ics;
+       u32 reject;
+       u16 src;
+
+       /*
+        * This is used both for initial delivery of an interrupt and
+        * for subsequent rejection.
+        *
+        * Rejection can be racy vs. resends. We have evaluated the
+        * rejection in an atomic ICP transaction which is now complete,
+        * so potentially the ICP can already accept the interrupt again.
+        *
+        * So we need to retry the delivery. Essentially the reject path
+        * boils down to a failed delivery. Always.
+        *
+        * Now the interrupt could also have moved to a different target,
+        * thus we may need to re-do the ICP lookup as well
+        */
+
+ again:
+       /* Get the ICS state and lock it */
+       ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+       if (!ics) {
+               /* Unsafe increment, but this does not need to be accurate */
+               xics->err_noics++;
+               return;
+       }
+       state = &ics->irq_state[src];
+
+       /* Get a lock on the ICS */
+       arch_spin_lock(&ics->lock);
+
+       /* Get our server */
+       if (!icp || state->server != icp->server_num) {
+               icp = kvmppc_xics_find_server(xics->kvm, state->server);
+               if (!icp) {
+                       /* Unsafe increment again*/
+                       xics->err_noicp++;
+                       goto out;
+               }
+       }
+
+       /* Clear the resend bit of that interrupt */
+       state->resend = 0;
+
+       /*
+        * If masked, bail out
+        *
+        * Note: PAPR doesn't mention anything about masked pending
+        * when doing a resend, only when doing a delivery.
+        *
+        * However that would have the effect of losing a masked
+        * interrupt that was rejected and isn't consistent with
+        * the whole masked_pending business which is about not
+        * losing interrupts that occur while masked.
+        *
+        * I don't differentiate normal deliveries and resends, this
+        * implementation will differ from PAPR and not lose such
+        * interrupts.
+        */
+       if (state->priority == MASKED) {
+               state->masked_pending = 1;
+               goto out;
+       }
+
+       /*
+        * Try the delivery, this will set the need_resend flag
+        * in the ICP as part of the atomic transaction if the
+        * delivery is not possible.
+        *
+        * Note that if successful, the new delivery might have itself
+        * rejected an interrupt that was "delivered" before we took the
+        * ics spin lock.
+        *
+        * In this case we do the whole sequence all over again for the
+        * new guy. We cannot assume that the rejected interrupt is less
+        * favored than the new one, and thus doesn't need to be delivered,
+        * because by the time we exit icp_rm_try_to_deliver() the target
+        * processor may well have already consumed & completed it, and thus
+        * the rejected interrupt might actually be already acceptable.
+        */
+       if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+               /*
+                * Delivery was successful, did we reject somebody else ?
+                */
+               if (reject && reject != XICS_IPI) {
+                       arch_spin_unlock(&ics->lock);
+                       new_irq = reject;
+                       goto again;
+               }
+       } else {
+               /*
+                * We failed to deliver the interrupt we need to set the
+                * resend map bit and mark the ICS state as needing a resend
+                */
+               set_bit(ics->icsid, icp->resend_map);
+               state->resend = 1;
+
+               /*
+                * If the need_resend flag got cleared in the ICP some time
+                * between icp_rm_try_to_deliver() atomic update and now, then
+                * we know it might have missed the resend_map bit. So we
+                * retry
+                */
+               smp_mb();
+               if (!icp->state.need_resend) {
+                       arch_spin_unlock(&ics->lock);
+                       goto again;
+               }
+       }
+ out:
+       arch_spin_unlock(&ics->lock);
+}
+
  static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                              u8 new_cppr)
  {
@@ -184,8 +377,8 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
          * separately here as well.
          */
         if (resend) {
-               icp->rm_action |= XICS_RM_CHECK_RESEND;
-               icp->rm_resend_icp = icp;
+               icp->n_check_resend++;
+               icp_rm_check_resend(xics, icp);
         }
  }
  
@@ -300,16 +493,16 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
                 }
         } while (!icp_rm_try_update(icp, old_state, new_state));
  
-       /* Pass rejects to virtual mode */
+       /* Handle reject in real mode */
         if (reject && reject != XICS_IPI) {
-               this_icp->rm_action |= XICS_RM_REJECT;
-               this_icp->rm_reject = reject;
+               this_icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, reject);
         }
  
-       /* Pass resends to virtual mode */
+       /* Handle resends in real mode */
         if (resend) {
-               this_icp->rm_action |= XICS_RM_CHECK_RESEND;
-               this_icp->rm_resend_icp = icp;
+               this_icp->n_check_resend++;
+               icp_rm_check_resend(xics, icp);
         }
  
         return check_too_hard(xics, this_icp);
@@ -365,10 +558,13 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
  
         } while (!icp_rm_try_update(icp, old_state, new_state));
  
-       /* Pass rejects to virtual mode */
+       /*
+        * Check for rejects. They are handled by doing a new delivery
+        * attempt (see comments in icp_rm_deliver_irq).
+        */
         if (reject && reject != XICS_IPI) {
-               icp->rm_action |= XICS_RM_REJECT;
-               icp->rm_reject = reject;
+               icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, reject);
         }
   bail:
         return check_too_hard(xics, icp);
@@ -416,10 +612,10 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
                 goto bail;
         state = &ics->irq_state[src];
  
-       /* Still asserted, resend it, we make it look like a reject */
+       /* Still asserted, resend it */
         if (state->asserted) {
-               icp->rm_action |= XICS_RM_REJECT;
-               icp->rm_reject = irq;
+               icp->n_reject++;
+               icp_rm_deliver_irq(xics, icp, irq);
         }
  
         if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 6cbf163..4d70df2 100644 (file)
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -172,6 +172,22 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
  kvmppc_primary_no_guest:
         /* We handle this much like a ceded vcpu */
+       /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+       mfspr   r3, SPRN_HDEC
+       mtspr   SPRN_DEC, r3
+       /*
+        * Make sure the primary has finished the MMU switch.
+        * We should never get here on a secondary thread, but
+        * check it for robustness' sake.
+        */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+65:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     65b
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
         /* set our bit in napping_threads */
         ld      r5, HSTATE_KVM_VCORE(r13)
         lbz     r7, HSTATE_PTID(r13)
@@ -182,7 +198,7 @@ kvmppc_primary_no_guest:
         or      r3, r3, r0
         stwcx.  r3, 0, r6
         bne     1b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
         isync
         li      r12, 0
         lwz     r7, VCORE_ENTRY_EXIT(r5)
@@ -191,6 +207,7 @@ kvmppc_primary_no_guest:
         li      r3, NAPPING_NOVCPU
         stb     r3, HSTATE_NAPPING(r13)
  
+       li      r3, 0           /* Don't wake on privileged (OS) doorbell */
         b       kvm_do_nap
  
  kvm_novcpu_wakeup:
@@ -202,7 +219,7 @@ kvm_novcpu_wakeup:
  
         /* check the wake reason */
         bl      kvmppc_check_wake_reason
-       
+
         /* see if any other thread is already exiting */
         lwz     r0, VCORE_ENTRY_EXIT(r5)
         cmpwi   r0, 0x100
@@ -222,13 +239,37 @@ kvm_novcpu_wakeup:
         cmpdi   r3, 0
         bge     kvm_novcpu_exit
  
+       /* See if our timeslice has expired (HDEC is negative) */
+       mfspr   r0, SPRN_HDEC
+       li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+       cmpwi   r0, 0
+       blt     kvm_novcpu_exit
+
         /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
         ld      r4, HSTATE_KVM_VCPU(r13)
         cmpdi   r4, 0
-       bne     kvmppc_got_guest
+       beq     kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+#endif
+       b       kvmppc_got_guest
  
  kvm_novcpu_exit:
-       b       hdec_soon
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       beq     13f
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+13:    mr      r3, r12
+       stw     r12, 112-4(r1)
+       bl      kvmhv_commence_exit
+       nop
+       lwz     r12, 112-4(r1)
+       b       kvmhv_switch_to_host
  
  /*
   * We come in here when wakened from nap mode.
@@ -239,9 +280,9 @@ kvm_novcpu_exit:
  kvm_start_guest:
  
         /* Set runlatch bit the minute you wake up from nap */
-       mfspr   r1, SPRN_CTRLF
-       ori     r1, r1, 1
-       mtspr   SPRN_CTRLT, r1
+       mfspr   r0, SPRN_CTRLF
+       ori     r0, r0, 1
+       mtspr   SPRN_CTRLT, r0
  
         ld      r2,PACATOC(r13)
  
@@ -286,26 +327,21 @@ kvm_secondary_got_guest:
         ld      r6, PACA_DSCR(r13)
         std     r6, HSTATE_DSCR(r13)
  
+       /* Order load of vcore, ptid etc. after load of vcpu */
+       lwsync
         bl      kvmppc_hv_entry
  
         /* Back from the guest, go back to nap */
         /* Clear our vcpu pointer so we don't come back in early */
         li      r0, 0
-       std     r0, HSTATE_KVM_VCPU(r13)
         /*
-        * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
-        * the nap_count, because once the increment to nap_count is
-        * visible we could be given another vcpu.
+        * Once we clear HSTATE_KVM_VCPU(r13), the code in
+        * kvmppc_run_core() is going to assume that all our vcpu
+        * state is visible in memory.  This lwsync makes sure
+        * that that is true.
          */
         lwsync
-
-       /* increment the nap count and then go to nap mode */
-       ld      r4, HSTATE_KVM_VCORE(r13)
-       addi    r4, r4, VCORE_NAP_COUNT
-51:    lwarx   r3, 0, r4
-       addi    r3, r3, 1
-       stwcx.  r3, 0, r4
-       bne     51b
+       std     r0, HSTATE_KVM_VCPU(r13)
  
  /*
   * At this point we have finished executing in the guest.
@@ -376,6 +412,14 @@ kvmppc_hv_entry:
         li      r6, KVM_GUEST_MODE_HOST_HV
         stb     r6, HSTATE_IN_GUEST(r13)
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Store initial timestamp */
+       cmpdi   r4, 0
+       beq     1f
+       addi    r3, r4, VCPU_TB_RMENTRY
+       bl      kvmhv_start_timing
+1:
+#endif
         /* Clear out SLB */
         li      r6,0
         slbmte  r6,r6
@@ -387,21 +431,23 @@ kvmppc_hv_entry:
          * We don't have to lock against concurrent tlbies,
          * but we do have to coordinate across hardware threads.
          */
-       /* Increment entry count iff exit count is zero. */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r9,r5,VCORE_ENTRY_EXIT
-21:    lwarx   r3,0,r9
-       cmpwi   r3,0x100                /* any threads starting to exit? */
+       /* Set bit in entry map iff exit map is zero. */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       li      r7, 1
+       lbz     r6, HSTATE_PTID(r13)
+       sld     r7, r7, r6
+       addi    r9, r5, VCORE_ENTRY_EXIT
+21:    lwarx   r3, 0, r9
+       cmpwi   r3, 0x100               /* any threads starting to exit? */
         bge     secondary_too_late      /* if so we're too late to the party */
-       addi    r3,r3,1
-       stwcx.  r3,0,r9
+       or      r3, r3, r7
+       stwcx.  r3, 0, r9
         bne     21b
  
         /* Primary thread switches to guest partition. */
         ld      r9,VCORE_KVM(r5)        /* pointer to struct kvm */
-       lbz     r6,HSTATE_PTID(r13)
         cmpwi   r6,0
-       bne     20f
+       bne     10f
         ld      r6,KVM_SDR1(r9)
         lwz     r7,KVM_LPID(r9)
         li      r0,LPID_RSVD            /* switch to reserved LPID */
@@ -472,28 +518,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
         li      r0,1
         stb     r0,VCORE_IN_GUEST(r5)   /* signal secondaries to continue */
-       b       10f
-
-       /* Secondary threads wait for primary to have done partition switch */
-20:    lbz     r0,VCORE_IN_GUEST(r5)
-       cmpwi   r0,0
-       beq     20b
-
-       /* Set LPCR and RMOR. */
-10:    ld      r8,VCORE_LPCR(r5)
-       mtspr   SPRN_LPCR,r8
-       ld      r8,KVM_RMOR(r9)
-       mtspr   SPRN_RMOR,r8
-       isync
-
-       /* Check if HDEC expires soon */
-       mfspr   r3,SPRN_HDEC
-       cmpwi   r3,512          /* 1 microsecond */
-       li      r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       blt     hdec_soon
  
         /* Do we have a guest vcpu to run? */
-       cmpdi   r4, 0
+10:    cmpdi   r4, 0
         beq     kvmppc_primary_no_guest
  kvmppc_got_guest:
  
@@ -818,6 +845,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         clrrdi  r6,r6,1
         mtspr   SPRN_CTRLT,r6
  4:
+       /* Secondary threads wait for primary to have done partition switch */
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r6, HSTATE_PTID(r13)
+       cmpwi   r6, 0
+       beq     21f
+       lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       bne     21f
+       HMT_LOW
+20:    lbz     r0, VCORE_IN_GUEST(r5)
+       cmpwi   r0, 0
+       beq     20b
+       HMT_MEDIUM
+21:
+       /* Set LPCR. */
+       ld      r8,VCORE_LPCR(r5)
+       mtspr   SPRN_LPCR,r8
+       isync
+
+       /* Check if HDEC expires soon */
+       mfspr   r3, SPRN_HDEC
+       cmpwi   r3, 512         /* 1 microsecond */
+       blt     hdec_soon
+
         ld      r6, VCPU_CTR(r4)
         lwz     r7, VCPU_XER(r4)
  
@@ -880,6 +931,12 @@ fast_guest_return:
         li      r9, KVM_GUEST_MODE_GUEST_HV
         stb     r9, HSTATE_IN_GUEST(r13)
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Accumulate timing */
+       addi    r3, r4, VCPU_TB_GUEST
+       bl      kvmhv_accumulate_time
+#endif
+
         /* Enter guest */
  
  BEGIN_FTR_SECTION
@@ -917,6 +974,27 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         hrfid
         b       .
  
+secondary_too_late:
+       li      r12, 0
+       cmpdi   r4, 0
+       beq     11f
+       stw     r12, VCPU_TRAP(r4)
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+11:    b       kvmhv_switch_to_host
+
+hdec_soon:
+       li      r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+       stw     r12, VCPU_TRAP(r4)
+       mr      r9, r4
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMEXIT
+       bl      kvmhv_accumulate_time
+#endif
+       b       guest_exit_cont
+
  /******************************************************************************
   *                                                                            *
   *                               Exit code                                    *
@@ -1002,6 +1080,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
  
         stw     r12,VCPU_TRAP(r9)
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMINTR
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+       ld      r5, VCPU_GPR(R5)(r9)
+       ld      r6, VCPU_GPR(R6)(r9)
+       ld      r7, VCPU_GPR(R7)(r9)
+       ld      r8, VCPU_GPR(R8)(r9)
+#endif
+
         /* Save HEIR (HV emulation assist reg) in emul_inst
            if this is an HEI (HV emulation interrupt, e40) */
         li      r3,KVM_INST_FETCH_FAILED
@@ -1028,34 +1116,37 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
         bne     2f
         mfspr   r3,SPRN_HDEC
         cmpwi   r3,0
-       bge     ignore_hdec
+       mr      r4,r9
+       bge     fast_guest_return
  2:
         /* See if this is an hcall we can handle in real mode */
         cmpwi   r12,BOOK3S_INTERRUPT_SYSCALL
         beq     hcall_try_real_mode
  
+       /* Hypervisor doorbell - exit only if host IPI flag set */
+       cmpwi   r12, BOOK3S_INTERRUPT_H_DOORBELL
+       bne     3f
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       beq     4f
+       b       guest_exit_cont
+3:
         /* External interrupt ? */
         cmpwi   r12, BOOK3S_INTERRUPT_EXTERNAL
-       bne+    ext_interrupt_to_host
+       bne+    guest_exit_cont
  
         /* External interrupt, first check for host_ipi. If this is
          * set, we know the host wants us out so let's do it now
          */
         bl      kvmppc_read_intr
         cmpdi   r3, 0
-       bgt     ext_interrupt_to_host
+       bgt     guest_exit_cont
  
         /* Check if any CPU is heading out to the host, if so head out too */
-       ld      r5, HSTATE_KVM_VCORE(r13)
+4:     ld      r5, HSTATE_KVM_VCORE(r13)
         lwz     r0, VCORE_ENTRY_EXIT(r5)
         cmpwi   r0, 0x100
-       bge     ext_interrupt_to_host
-
-       /* Return to guest after delivering any pending interrupt */
         mr      r4, r9
-       b       deliver_guest_interrupt
-
-ext_interrupt_to_host:
+       blt     deliver_guest_interrupt
  
  guest_exit_cont:               /* r9 = vcpu, r12 = trap, r13 = paca */
         /* Save more register state  */
@@ -1065,7 +1156,7 @@ guest_exit_cont:          /* r9 = vcpu, r12 = trap, r13 = paca */
         stw     r7, VCPU_DSISR(r9)
         /* don't overwrite fault_dar/fault_dsisr if HDSI */
         cmpwi   r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
-       beq     6f
+       beq     mc_cont
         std     r6, VCPU_FAULT_DAR(r9)
         stw     r7, VCPU_FAULT_DSISR(r9)
  
@@ -1073,9 +1164,20 @@ guest_exit_cont:         /* r9 = vcpu, r12 = trap, r13 = paca */
         cmpwi   r12, BOOK3S_INTERRUPT_MACHINE_CHECK
         beq     machine_check_realmode
  mc_cont:
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r9, VCPU_TB_RMEXIT
+       mr      r4, r9
+       bl      kvmhv_accumulate_time
+#endif
+
+       /* Increment exit count, poke other threads to exit */
+       bl      kvmhv_commence_exit
+       nop
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       lwz     r12, VCPU_TRAP(r9)
  
         /* Save guest CTRL register, set runlatch to 1 */
-6:     mfspr   r6,SPRN_CTRLF
+       mfspr   r6,SPRN_CTRLF
         stw     r6,VCPU_CTRL(r9)
         andi.   r0,r6,1
         bne     4f
@@ -1417,68 +1519,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         slbia
         ptesync
  
-hdec_soon:                     /* r12 = trap, r13 = paca */
         /*
          * POWER7/POWER8 guest -> host partition switch code.
          * We don't have to lock against tlbies but we do
          * have to coordinate the hardware threads.
          */
-       /* Increment the threads-exiting-guest count in the 0xff00
-          bits of vcore->entry_exit_count */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       addi    r6,r5,VCORE_ENTRY_EXIT
-41:    lwarx   r3,0,r6
-       addi    r0,r3,0x100
-       stwcx.  r0,0,r6
-       bne     41b
-       isync           /* order stwcx. vs. reading napping_threads */
-
-       /*
-        * At this point we have an interrupt that we have to pass
-        * up to the kernel or qemu; we can't handle it in real mode.
-        * Thus we have to do a partition switch, so we have to
-        * collect the other threads, if we are the first thread
-        * to take an interrupt.  To do this, we set the HDEC to 0,
-        * which causes an HDEC interrupt in all threads within 2ns
-        * because the HDEC register is shared between all 4 threads.
-        * However, we don't need to bother if this is an HDEC
-        * interrupt, since the other threads will already be on their
-        * way here in that case.
-        */
-       cmpwi   r3,0x100        /* Are we the first here? */
-       bge     43f
-       cmpwi   r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-       beq     40f
-       li      r0,0
-       mtspr   SPRN_HDEC,r0
-40:
-       /*
-        * Send an IPI to any napping threads, since an HDEC interrupt
-        * doesn't wake CPUs up from nap.
-        */
-       lwz     r3,VCORE_NAPPING_THREADS(r5)
-       lbz     r4,HSTATE_PTID(r13)
-       li      r0,1
-       sld     r0,r0,r4
-       andc.   r3,r3,r0                /* no sense IPI'ing ourselves */
-       beq     43f
-       /* Order entry/exit update vs. IPIs */
-       sync
-       mulli   r4,r4,PACA_SIZE         /* get paca for thread 0 */
-       subf    r6,r4,r13
-42:    andi.   r0,r3,1
-       beq     44f
-       ld      r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
-       li      r0,IPI_PRIORITY
-       li      r7,XICS_MFRR
-       stbcix  r0,r7,r8                /* trigger the IPI */
-44:    srdi.   r3,r3,1
-       addi    r6,r6,PACA_SIZE
-       bne     42b
-
-secondary_too_late:
+kvmhv_switch_to_host:
         /* Secondary threads wait for primary to do partition switch */
-43:    ld      r5,HSTATE_KVM_VCORE(r13)
+       ld      r5,HSTATE_KVM_VCORE(r13)
         ld      r4,VCORE_KVM(r5)        /* pointer to struct kvm */
         lbz     r3,HSTATE_PTID(r13)
         cmpwi   r3,0
@@ -1562,6 +1610,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  1:     addi    r8,r8,16
         .endr
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       /* Finish timing, if we have a vcpu */
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       cmpdi   r4, 0
+       li      r3, 0
+       beq     2f
+       bl      kvmhv_accumulate_time
+2:
+#endif
         /* Unset guest mode */
         li      r0, KVM_GUEST_MODE_NONE
         stb     r0, HSTATE_IN_GUEST(r13)
@@ -1696,8 +1753,10 @@ kvmppc_hisi:
   * Returns to the guest if we handle it, or continues on up to
   * the kernel if we can't (i.e. if we don't have a handler for
   * it, or if the handler returns H_TOO_HARD).
+ *
+ * r5 - r8 contain hcall args,
+ * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
   */
-       .globl  hcall_try_real_mode
  hcall_try_real_mode:
         ld      r3,VCPU_GPR(R3)(r9)
         andi.   r0,r11,MSR_PR
@@ -1839,13 +1898,124 @@ hcall_real_table:
         .long   0               /* 0x12c */
         .long   0               /* 0x130 */
         .long   DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+       .long   0               /* 0x138 */
+       .long   0               /* 0x13c */
+       .long   0               /* 0x140 */
+       .long   0               /* 0x144 */
+       .long   0               /* 0x148 */
+       .long   0               /* 0x14c */
+       .long   0               /* 0x150 */
+       .long   0               /* 0x154 */
+       .long   0               /* 0x158 */
+       .long   0               /* 0x15c */
+       .long   0               /* 0x160 */
+       .long   0               /* 0x164 */
+       .long   0               /* 0x168 */
+       .long   0               /* 0x16c */
+       .long   0               /* 0x170 */
+       .long   0               /* 0x174 */
+       .long   0               /* 0x178 */
+       .long   0               /* 0x17c */
+       .long   0               /* 0x180 */
+       .long   0               /* 0x184 */
+       .long   0               /* 0x188 */
+       .long   0               /* 0x18c */
+       .long   0               /* 0x190 */
+       .long   0               /* 0x194 */
+       .long   0               /* 0x198 */
+       .long   0               /* 0x19c */
+       .long   0               /* 0x1a0 */
+       .long   0               /* 0x1a4 */
+       .long   0               /* 0x1a8 */
+       .long   0               /* 0x1ac */
+       .long   0               /* 0x1b0 */
+       .long   0               /* 0x1b4 */
+       .long   0               /* 0x1b8 */
+       .long   0               /* 0x1bc */
+       .long   0               /* 0x1c0 */
+       .long   0               /* 0x1c4 */
+       .long   0               /* 0x1c8 */
+       .long   0               /* 0x1cc */
+       .long   0               /* 0x1d0 */
+       .long   0               /* 0x1d4 */
+       .long   0               /* 0x1d8 */
+       .long   0               /* 0x1dc */
+       .long   0               /* 0x1e0 */
+       .long   0               /* 0x1e4 */
+       .long   0               /* 0x1e8 */
+       .long   0               /* 0x1ec */
+       .long   0               /* 0x1f0 */
+       .long   0               /* 0x1f4 */
+       .long   0               /* 0x1f8 */
+       .long   0               /* 0x1fc */
+       .long   0               /* 0x200 */
+       .long   0               /* 0x204 */
+       .long   0               /* 0x208 */
+       .long   0               /* 0x20c */
+       .long   0               /* 0x210 */
+       .long   0               /* 0x214 */
+       .long   0               /* 0x218 */
+       .long   0               /* 0x21c */
+       .long   0               /* 0x220 */
+       .long   0               /* 0x224 */
+       .long   0               /* 0x228 */
+       .long   0               /* 0x22c */
+       .long   0               /* 0x230 */
+       .long   0               /* 0x234 */
+       .long   0               /* 0x238 */
+       .long   0               /* 0x23c */
+       .long   0               /* 0x240 */
+       .long   0               /* 0x244 */
+       .long   0               /* 0x248 */
+       .long   0               /* 0x24c */
+       .long   0               /* 0x250 */
+       .long   0               /* 0x254 */
+       .long   0               /* 0x258 */
+       .long   0               /* 0x25c */
+       .long   0               /* 0x260 */
+       .long   0               /* 0x264 */
+       .long   0               /* 0x268 */
+       .long   0               /* 0x26c */
+       .long   0               /* 0x270 */
+       .long   0               /* 0x274 */
+       .long   0               /* 0x278 */
+       .long   0               /* 0x27c */
+       .long   0               /* 0x280 */
+       .long   0               /* 0x284 */
+       .long   0               /* 0x288 */
+       .long   0               /* 0x28c */
+       .long   0               /* 0x290 */
+       .long   0               /* 0x294 */
+       .long   0               /* 0x298 */
+       .long   0               /* 0x29c */
+       .long   0               /* 0x2a0 */
+       .long   0               /* 0x2a4 */
+       .long   0               /* 0x2a8 */
+       .long   0               /* 0x2ac */
+       .long   0               /* 0x2b0 */
+       .long   0               /* 0x2b4 */
+       .long   0               /* 0x2b8 */
+       .long   0               /* 0x2bc */
+       .long   0               /* 0x2c0 */
+       .long   0               /* 0x2c4 */
+       .long   0               /* 0x2c8 */
+       .long   0               /* 0x2cc */
+       .long   0               /* 0x2d0 */
+       .long   0               /* 0x2d4 */
+       .long   0               /* 0x2d8 */
+       .long   0               /* 0x2dc */
+       .long   0               /* 0x2e0 */
+       .long   0               /* 0x2e4 */
+       .long   0               /* 0x2e8 */
+       .long   0               /* 0x2ec */
+       .long   0               /* 0x2f0 */
+       .long   0               /* 0x2f4 */
+       .long   0               /* 0x2f8 */
+       .long   0               /* 0x2fc */
+       .long   DOTSYM(kvmppc_h_random) - hcall_real_table
         .globl  hcall_real_table_end
  hcall_real_table_end:
  
-ignore_hdec:
-       mr      r4,r9
-       b       fast_guest_return
-
  _GLOBAL(kvmppc_h_set_xdabr)
         andi.   r0, r5, DABRX_USER | DABRX_KERNEL
         beq     6f
@@ -1884,7 +2054,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         li      r3, 0
         blr
  
-_GLOBAL(kvmppc_h_cede)
+_GLOBAL(kvmppc_h_cede)         /* r3 = vcpu pointer, r11 = msr, r13 = paca */
         ori     r11,r11,MSR_EE
         std     r11,VCPU_MSR(r3)
         li      r0,1
@@ -1893,8 +2063,8 @@ _GLOBAL(kvmppc_h_cede)
         lbz     r5,VCPU_PRODDED(r3)
         cmpwi   r5,0
         bne     kvm_cede_prodded
-       li      r0,0            /* set trap to 0 to say hcall is handled */
-       stw     r0,VCPU_TRAP(r3)
+       li      r12,0           /* set trap to 0 to say hcall is handled */
+       stw     r12,VCPU_TRAP(r3)
         li      r0,H_SUCCESS
         std     r0,VCPU_GPR(R3)(r3)
  
@@ -1912,12 +2082,11 @@ _GLOBAL(kvmppc_h_cede)
         addi    r6,r5,VCORE_NAPPING_THREADS
  31:    lwarx   r4,0,r6
         or      r4,r4,r0
-       PPC_POPCNTW(R7,R4)
-       cmpw    r7,r8
-       bge     kvm_cede_exit
+       cmpw    r4,r8
+       beq     kvm_cede_exit
         stwcx.  r4,0,r6
         bne     31b
-       /* order napping_threads update vs testing entry_exit_count */
+       /* order napping_threads update vs testing entry_exit_map */
         isync
         li      r0,NAPPING_CEDE
         stb     r0,HSTATE_NAPPING(r13)
@@ -1954,22 +2123,53 @@ _GLOBAL(kvmppc_h_cede)
         /* save FP state */
         bl      kvmppc_save_fp
  
+       /*
+        * Set DEC to the smaller of DEC and HDEC, so that we wake
+        * no later than the end of our timeslice (HDEC interrupts
+        * don't wake us from nap).
+        */
+       mfspr   r3, SPRN_DEC
+       mfspr   r4, SPRN_HDEC
+       mftb    r5
+       cmpw    r3, r4
+       ble     67f
+       mtspr   SPRN_DEC, r4
+67:
+       /* save expiry time of guest decrementer */
+       extsw   r3, r3
+       add     r3, r3, r5
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_TB_OFFSET(r5)
+       subf    r3, r6, r3      /* convert to host TB value */
+       std     r3, VCPU_DEC_EXPIRES(r4)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       ld      r4, HSTATE_KVM_VCPU(r13)
+       addi    r3, r4, VCPU_TB_CEDE
+       bl      kvmhv_accumulate_time
+#endif
+
+       lis     r3, LPCR_PECEDP@h       /* Do wake on privileged doorbell */
+
         /*
          * Take a nap until a decrementer or external or doobell interrupt
-        * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
-        * runlatch bit before napping.
+        * occurs, with PECE1 and PECE0 set in LPCR.
+        * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
+        * Also clear the runlatch bit before napping.
          */
  kvm_do_nap:
-       mfspr   r2, SPRN_CTRLF
-       clrrdi  r2, r2, 1
-       mtspr   SPRN_CTRLT, r2
+       mfspr   r0, SPRN_CTRLF
+       clrrdi  r0, r0, 1
+       mtspr   SPRN_CTRLT, r0
  
         li      r0,1
         stb     r0,HSTATE_HWTHREAD_REQ(r13)
         mfspr   r5,SPRN_LPCR
         ori     r5,r5,LPCR_PECE0 | LPCR_PECE1
  BEGIN_FTR_SECTION
-       oris    r5,r5,LPCR_PECEDP@h
+       ori     r5, r5, LPCR_PECEDH
+       rlwimi  r5, r3, 0, LPCR_PECEDP
  END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
         mtspr   SPRN_LPCR,r5
         isync
@@ -1994,9 +2194,23 @@ kvm_end_cede:
         /* Woken by external or decrementer interrupt */
         ld      r1, HSTATE_HOST_R1(r13)
  
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+       addi    r3, r4, VCPU_TB_RMINTR
+       bl      kvmhv_accumulate_time
+#endif
+
         /* load up FP state */
         bl      kvmppc_load_fp
  
+       /* Restore guest decrementer */
+       ld      r3, VCPU_DEC_EXPIRES(r4)
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       ld      r6, VCORE_TB_OFFSET(r5)
+       add     r3, r3, r6      /* convert host TB to guest TB value */
+       mftb    r7
+       subf    r3, r7, r3
+       mtspr   SPRN_DEC, r3
+
         /* Load NV GPRS */
         ld      r14, VCPU_GPR(R14)(r4)
         ld      r15, VCPU_GPR(R15)(r4)
@@ -2057,7 +2271,8 @@ kvm_cede_prodded:
  
         /* we've ceded but we want to give control to the host */
  kvm_cede_exit:
-       b       hcall_real_fallback
+       ld      r9, HSTATE_KVM_VCPU(r13)
+       b       guest_exit_cont
  
         /* Try to handle a machine check in real mode */
  machine_check_realmode:
@@ -2089,13 +2304,14 @@ machine_check_realmode:
  
  /*
   * Check the reason we woke from nap, and take appropriate action.
- * Returns:
+ * Returns (in r3):
   *     0 if nothing needs to be done
   *     1 if something happened that needs to be handled by the host
- *     -1 if there was a guest wakeup (IPI)
+ *     -1 if there was a guest wakeup (IPI or msgsnd)
   *
   * Also sets r12 to the interrupt vector for any interrupt that needs
   * to be handled now by the host (0x500 for external interrupt), or zero.
+ * Modifies r0, r6, r7, r8.
   */
  kvmppc_check_wake_reason:
         mfspr   r6, SPRN_SRR1
@@ -2122,7 +2338,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  
         /* hypervisor doorbell */
  3:     li      r12, BOOK3S_INTERRUPT_H_DOORBELL
+       /* see if it's a host IPI */
         li      r3, 1
+       lbz     r0, HSTATE_HOST_IPI(r13)
+       cmpwi   r0, 0
+       bnelr
+       /* if not, clear it and return -1 */
+       lis     r6, (PPC_DBELL_SERVER << (63-36))@h
+       PPC_MSGCLR(6)
+       li      r3, -1
         blr
  
  /*
@@ -2131,6 +2355,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
   *     0 if no interrupt is pending
   *     1 if an interrupt is pending that needs to be handled by the host
   *     -1 if there was a guest wakeup IPI (which has now been cleared)
+ * Modifies r0, r6, r7, r8, returns value in r3.
   */
  kvmppc_read_intr:
         /* see if a host IPI is pending */
@@ -2185,6 +2410,7 @@ kvmppc_read_intr:
         bne-    43f
  
         /* OK, it's an IPI for us */
+       li      r12, 0
         li      r3, -1
  1:     blr
  
@@ -2314,3 +2540,62 @@ kvmppc_fix_pmao:
         mtspr   SPRN_PMC6, r3
         isync
         blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r6, VCORE_IN_GUEST(r5)
+       cmpwi   r6, 0
+       beq     5f                              /* if in guest, need to */
+       ld      r6, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
+5:     mftb    r5
+       subf    r5, r6, r5
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       std     r5, VCPU_ACTIVITY_START(r4)
+       blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+       ld      r5, HSTATE_KVM_VCORE(r13)
+       lbz     r8, VCORE_IN_GUEST(r5)
+       cmpwi   r8, 0
+       beq     4f                              /* if in guest, need to */
+       ld      r8, VCORE_TB_OFFSET(r5)         /* subtract timebase offset */
+4:     ld      r5, VCPU_CUR_ACTIVITY(r4)
+       ld      r6, VCPU_ACTIVITY_START(r4)
+       std     r3, VCPU_CUR_ACTIVITY(r4)
+       mftb    r7
+       subf    r7, r8, r7
+       std     r7, VCPU_ACTIVITY_START(r4)
+       cmpdi   r5, 0
+       beqlr
+       subf    r3, r6, r7
+       ld      r8, TAS_SEQCOUNT(r5)
+       cmpdi   r8, 0
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       lwsync
+       ld      r7, TAS_TOTAL(r5)
+       add     r7, r7, r3
+       std     r7, TAS_TOTAL(r5)
+       ld      r6, TAS_MIN(r5)
+       ld      r7, TAS_MAX(r5)
+       beq     3f
+       cmpd    r3, r6
+       bge     1f
+3:     std     r3, TAS_MIN(r5)
+1:     cmpd    r3, r7
+       ble     2f
+       std     r3, TAS_MAX(r5)
+2:     lwsync
+       addi    r8, r8, 1
+       std     r8, TAS_SEQCOUNT(r5)
+       blr
+#endif
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c

index ce3c893..f2c75a1 100644 (file)
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -258,6 +258,28 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
         return EMULATE_DONE;
  }
  
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+       long rc;
+
+       rc = kvmppc_h_logical_ci_load(vcpu);
+       if (rc == H_TOO_HARD)
+               return EMULATE_FAIL;
+       kvmppc_set_gpr(vcpu, 3, rc);
+       return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+       long rc;
+
+       rc = kvmppc_h_logical_ci_store(vcpu);
+       if (rc == H_TOO_HARD)
+               return EMULATE_FAIL;
+       kvmppc_set_gpr(vcpu, 3, rc);
+       return EMULATE_DONE;
+}
+
  static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
  {
         long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -290,6 +312,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
                 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
                 vcpu->stat.halt_wakeup++;
                 return EMULATE_DONE;
+       case H_LOGICAL_CI_LOAD:
+               return kvmppc_h_pr_logical_ci_load(vcpu);
+       case H_LOGICAL_CI_STORE:
+               return kvmppc_h_pr_logical_ci_store(vcpu);
         case H_XIRR:
         case H_CPPR:
         case H_EOI:
@@ -323,6 +349,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
         case H_BULK_REMOVE:
         case H_PUT_TCE:
         case H_CEDE:
+       case H_LOGICAL_CI_LOAD:
+       case H_LOGICAL_CI_STORE:
  #ifdef CONFIG_KVM_XICS
         case H_XIRR:
         case H_CPPR:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c

index a4a8d9f..8f3e6cc 100644 (file)
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -20,6 +20,7 @@
  #include <asm/xics.h>
  #include <asm/debug.h>
  #include <asm/time.h>
+#include <asm/spinlock.h>
  
  #include <linux/debugfs.h>
  #include <linux/seq_file.h>
@@ -39,7 +40,7 @@
   * LOCKING
   * =======
   *
- * Each ICS has a mutex protecting the information about the IRQ
+ * Each ICS has a spin lock protecting the information about the IRQ
   * sources and avoiding simultaneous deliveries if the same interrupt.
   *
   * ICP operations are done via a single compare & swap transaction
@@ -109,7 +110,10 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
  {
         int i;
  
-       mutex_lock(&ics->lock);
+       unsigned long flags;
+
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
  
         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
                 struct ics_irq_state *state = &ics->irq_state[i];
@@ -120,12 +124,15 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                 XICS_DBG("resend %#x prio %#x\n", state->number,
                               state->priority);
  
-               mutex_unlock(&ics->lock);
+               arch_spin_unlock(&ics->lock);
+               local_irq_restore(flags);
                 icp_deliver_irq(xics, icp, state->number);
-               mutex_lock(&ics->lock);
+               local_irq_save(flags);
+               arch_spin_lock(&ics->lock);
         }
  
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  }
  
  static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -133,8 +140,10 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                        u32 server, u32 priority, u32 saved_priority)
  {
         bool deliver;
+       unsigned long flags;
  
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
  
         state->server = server;
         state->priority = priority;
@@ -145,7 +154,8 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
                 deliver = true;
         }
  
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  
         return deliver;
  }
@@ -186,6 +196,7 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
         struct kvmppc_ics *ics;
         struct ics_irq_state *state;
         u16 src;
+       unsigned long flags;
  
         if (!xics)
                 return -ENODEV;
@@ -195,10 +206,12 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
                 return -EINVAL;
         state = &ics->irq_state[src];
  
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
         *server = state->server;
         *priority = state->priority;
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  
         return 0;
  }
@@ -365,6 +378,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
         struct kvmppc_ics *ics;
         u32 reject;
         u16 src;
+       unsigned long flags;
  
         /*
          * This is used both for initial delivery of an interrupt and
@@ -391,7 +405,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
         state = &ics->irq_state[src];
  
         /* Get a lock on the ICS */
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
  
         /* Get our server */
         if (!icp || state->server != icp->server_num) {
@@ -434,7 +449,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
          *
          * Note that if successful, the new delivery might have itself
          * rejected an interrupt that was "delivered" before we took the
-        * icp mutex.
+        * ics spin lock.
          *
          * In this case we do the whole sequence all over again for the
          * new guy. We cannot assume that the rejected interrupt is less
@@ -448,7 +463,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                  * Delivery was successful, did we reject somebody else ?
                  */
                 if (reject && reject != XICS_IPI) {
-                       mutex_unlock(&ics->lock);
+                       arch_spin_unlock(&ics->lock);
+                       local_irq_restore(flags);
                         new_irq = reject;
                         goto again;
                 }
@@ -468,12 +484,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
                  */
                 smp_mb();
                 if (!icp->state.need_resend) {
-                       mutex_unlock(&ics->lock);
+                       arch_spin_unlock(&ics->lock);
+                       local_irq_restore(flags);
                         goto again;
                 }
         }
   out:
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  }
  
  static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -802,14 +820,22 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
         XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
                  hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
  
-       if (icp->rm_action & XICS_RM_KICK_VCPU)
+       if (icp->rm_action & XICS_RM_KICK_VCPU) {
+               icp->n_rm_kick_vcpu++;
                 kvmppc_fast_vcpu_kick(icp->rm_kick_target);
-       if (icp->rm_action & XICS_RM_CHECK_RESEND)
+       }
+       if (icp->rm_action & XICS_RM_CHECK_RESEND) {
+               icp->n_rm_check_resend++;
                 icp_check_resend(xics, icp->rm_resend_icp);
-       if (icp->rm_action & XICS_RM_REJECT)
+       }
+       if (icp->rm_action & XICS_RM_REJECT) {
+               icp->n_rm_reject++;
                 icp_deliver_irq(xics, icp, icp->rm_reject);
-       if (icp->rm_action & XICS_RM_NOTIFY_EOI)
+       }
+       if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
+               icp->n_rm_notify_eoi++;
                 kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
+       }
  
         icp->rm_action = 0;
  
@@ -872,10 +898,21 @@ static int xics_debug_show(struct seq_file *m, void *private)
         struct kvm *kvm = xics->kvm;
         struct kvm_vcpu *vcpu;
         int icsid, i;
+       unsigned long flags;
+       unsigned long t_rm_kick_vcpu, t_rm_check_resend;
+       unsigned long t_rm_reject, t_rm_notify_eoi;
+       unsigned long t_reject, t_check_resend;
  
         if (!kvm)
                 return 0;
  
+       t_rm_kick_vcpu = 0;
+       t_rm_notify_eoi = 0;
+       t_rm_check_resend = 0;
+       t_rm_reject = 0;
+       t_check_resend = 0;
+       t_reject = 0;
+
         seq_printf(m, "=========\nICP state\n=========\n");
  
         kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -890,8 +927,19 @@ static int xics_debug_show(struct seq_file *m, void *private)
                            icp->server_num, state.xisr,
                            state.pending_pri, state.cppr, state.mfrr,
                            state.out_ee, state.need_resend);
+               t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
+               t_rm_notify_eoi += icp->n_rm_notify_eoi;
+               t_rm_check_resend += icp->n_rm_check_resend;
+               t_rm_reject += icp->n_rm_reject;
+               t_check_resend += icp->n_check_resend;
+               t_reject += icp->n_reject;
         }
  
+       seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
+                       t_rm_kick_vcpu, t_rm_check_resend,
+                       t_rm_reject, t_rm_notify_eoi);
+       seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
+                       t_check_resend, t_reject);
         for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
                 struct kvmppc_ics *ics = xics->ics[icsid];
  
@@ -901,7 +949,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
                 seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
                            icsid);
  
-               mutex_lock(&ics->lock);
+               local_irq_save(flags);
+               arch_spin_lock(&ics->lock);
  
                 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
                         struct ics_irq_state *irq = &ics->irq_state[i];
@@ -912,7 +961,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
                                    irq->resend, irq->masked_pending);
  
                 }
-               mutex_unlock(&ics->lock);
+               arch_spin_unlock(&ics->lock);
+               local_irq_restore(flags);
         }
         return 0;
  }
@@ -965,7 +1015,6 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
         if (!ics)
                 goto out;
  
-       mutex_init(&ics->lock);
         ics->icsid = icsid;
  
         for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
@@ -1107,13 +1156,15 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
         u64 __user *ubufp = (u64 __user *) addr;
         u16 idx;
         u64 val, prio;
+       unsigned long flags;
  
         ics = kvmppc_xics_find_ics(xics, irq, &idx);
         if (!ics)
                 return -ENOENT;
  
         irqp = &ics->irq_state[idx];
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
         ret = -ENOENT;
         if (irqp->exists) {
                 val = irqp->server;
@@ -1129,7 +1180,8 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
                         val |= KVM_XICS_PENDING;
                 ret = 0;
         }
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  
         if (!ret && put_user(val, ubufp))
                 ret = -EFAULT;
@@ -1146,6 +1198,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
         u64 val;
         u8 prio;
         u32 server;
+       unsigned long flags;
  
         if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
                 return -ENOENT;
@@ -1166,7 +1219,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
             kvmppc_xics_find_server(xics->kvm, server) == NULL)
                 return -EINVAL;
  
-       mutex_lock(&ics->lock);
+       local_irq_save(flags);
+       arch_spin_lock(&ics->lock);
         irqp->server = server;
         irqp->saved_priority = prio;
         if (val & KVM_XICS_MASKED)
@@ -1178,7 +1232,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
         if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
                 irqp->asserted = 1;
         irqp->exists = 1;
-       mutex_unlock(&ics->lock);
+       arch_spin_unlock(&ics->lock);
+       local_irq_restore(flags);
  
         if (val & KVM_XICS_PENDING)
                 icp_deliver_irq(xics, NULL, irqp->number);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h

index 73f0f27..56ea44f 100644 (file)
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -78,13 +78,22 @@ struct kvmppc_icp {
         u32  rm_reject;
         u32  rm_eoied_irq;
  
+       /* Counters for each reason we exited real mode */
+       unsigned long n_rm_kick_vcpu;
+       unsigned long n_rm_check_resend;
+       unsigned long n_rm_reject;
+       unsigned long n_rm_notify_eoi;
+       /* Counters for handling ICP processing in real mode */
+       unsigned long n_check_resend;
+       unsigned long n_reject;
+
         /* Debug stuff for real mode */
         union kvmppc_icp_state rm_dbgstate;
         struct kvm_vcpu *rm_dbgtgt;
  };
  
  struct kvmppc_ics {
-       struct mutex lock;
+       arch_spinlock_t lock;
         u16 icsid;
         struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
  };
@@ -96,6 +105,8 @@ struct kvmppc_xics {
         u32 max_icsid;
         bool real_mode;
         bool real_mode_dbg;
+       u32 err_noics;
+       u32 err_noicp;
         struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
  };
  
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c

index cc536d4..4d33e19 100644 (file)
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -338,6 +338,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
         pte_t *ptep;
         unsigned int wimg = 0;
         pgd_t *pgdir;
+       unsigned long flags;
  
         /* used to check for invalidations in progress */
         mmu_seq = kvm->mmu_notifier_seq;
@@ -468,15 +469,28 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
  
  
         pgdir = vcpu_e500->vcpu.arch.pgdir;
-       ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
-       if (pte_present(*ptep))
-               wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
-       else {
-               if (printk_ratelimit())
-                       pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
-                               __func__, (long)gfn, pfn);
-               ret = -EINVAL;
-               goto out;
+       /*
+        * We are just looking at the wimg bits, so we don't
+        * care much about the trans splitting bit.
+        * We are holding kvm->mmu_lock so a notifier invalidate
+        * can't run hence pfn won't change.
+        */
+       local_irq_save(flags);
+       ptep = find_linux_pte_or_hugepte(pgdir, hva, NULL);
+       if (ptep) {
+               pte_t pte = READ_ONCE(*ptep);
+
+               if (pte_present(pte)) {
+                       wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
+                               MAS2_WIMGE_MASK;
+                       local_irq_restore(flags);
+               } else {
+                       local_irq_restore(flags);
+                       pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
+                                          __func__, (long)gfn, pfn);
+                       ret = -EINVAL;
+                       goto out;
+               }
         }
         kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
  
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c

index 91bbc84..ac3ddf1 100644 (file)
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
         case KVM_CAP_PPC_RMA:
                 r = 0;
                 break;
+       case KVM_CAP_PPC_HWRNG:
+               r = kvmppc_hwrng_present();
+               break;
  #endif
         case KVM_CAP_SYNC_MMU:
  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c

index 2c2022d..fda236f 100644 (file)
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1066,7 +1066,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
  #endif /* CONFIG_PPC_64K_PAGES */
  
         /* Get PTE and page size from page tables */
-       ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
+       ptep = __find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
         if (ptep == NULL || !pte_present(*ptep)) {
                 DBG_LOW(" no PTE !\n");
                 rc = 1;
@@ -1394,6 +1394,7 @@ tm_abort:
                 tm_abort(TM_CAUSE_TLBI);
         }
  #endif
+       return;
  }
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c

index fa9d5c2..0ce968b 100644 (file)
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -109,7 +109,7 @@ int pgd_huge(pgd_t pgd)
  pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
  {
         /* Only called for hugetlbfs pages, hence can ignore THP */
-       return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
+       return __find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
  }
  
  static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
@@ -581,6 +581,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
         pmd = pmd_offset(pud, start);
         pud_clear(pud);
         pmd_free_tlb(tlb, pmd, start);
+       mm_dec_nr_pmds(tlb->mm);
  }
  
  static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -681,28 +682,35 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
         } while (addr = next, addr != end);
  }
  
+/*
+ * We are holding mmap_sem, so a parallel huge page collapse cannot run.
+ * To prevent hugepage split, disable irq.
+ */
  struct page *
  follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
  {
         pte_t *ptep;
         struct page *page;
         unsigned shift;
-       unsigned long mask;
+       unsigned long mask, flags;
         /*
          * Transparent hugepages are handled by generic code. We can skip them
          * here.
          */
+       local_irq_save(flags);
         ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
  
         /* Verify it is a huge page else bail. */
-       if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
+       if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) {
+               local_irq_restore(flags);
                 return ERR_PTR(-EINVAL);
-
+       }
         mask = (1UL << shift) - 1;
         page = pte_page(*ptep);
         if (page)
                 page += (address & mask) / PAGE_SIZE;
  
+       local_irq_restore(flags);
         return page;
  }
  
@@ -949,9 +957,12 @@ void flush_dcache_icache_hugepage(struct page *page)
   *
   * So long as we atomically load page table pointers we are safe against teardown,
   * we can follow the address down to the the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->soft_enabled = 1
   */
  
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+pte_t *__find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
+                                  unsigned *shift)
  {
         pgd_t pgd, *pgdp;
         pud_t pud, *pudp;
@@ -1003,12 +1014,11 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
                          * A hugepage collapse is captured by pmd_none, because
                          * it mark the pmd none and do a hpte invalidate.
                          *
-                        * A hugepage split is captured by pmd_trans_splitting
-                        * because we mark the pmd trans splitting and do a
-                        * hpte invalidate
-                        *
+                        * We don't worry about pmd_trans_splitting here, The
+                        * caller if it needs to handle the splitting case
+                        * should check for that.
                          */
-                       if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+                       if (pmd_none(pmd))
                                 return NULL;
  
                         if (pmd_huge(pmd) || pmd_large(pmd)) {
@@ -1030,7 +1040,7 @@ out:
                 *shift = pdshift;
         return ret_pte;
  }
-EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
+EXPORT_SYMBOL_GPL(__find_linux_pte_or_hugepte);
  
  int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
                 unsigned long end, int write, struct page **pages, int *nr)
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c

index ead5535..ff09cde 100644 (file)
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -111,41 +111,45 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
   * interrupt context, so if the access faults, we read the page tables
   * to find which page (if any) is mapped and access it directly.
   */
-static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
+static int read_user_stack_slow(void __user *ptr, void *buf, int nb)
  {
+       int ret = -EFAULT;
         pgd_t *pgdir;
         pte_t *ptep, pte;
         unsigned shift;
         unsigned long addr = (unsigned long) ptr;
         unsigned long offset;
-       unsigned long pfn;
+       unsigned long pfn, flags;
         void *kaddr;
  
         pgdir = current->mm->pgd;
         if (!pgdir)
                 return -EFAULT;
  
+       local_irq_save(flags);
         ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
+       if (!ptep)
+               goto err_out;
         if (!shift)
                 shift = PAGE_SHIFT;
  
         /* align address to page boundary */
         offset = addr & ((1UL << shift) - 1);
-       addr -= offset;
  
-       if (ptep == NULL)
-               return -EFAULT;
-       pte = *ptep;
+       pte = READ_ONCE(*ptep);
         if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
-               return -EFAULT;
+               goto err_out;
         pfn = pte_pfn(pte);
         if (!page_is_ram(pfn))
-               return -EFAULT;
+               goto err_out;
  
         /* no highmem to worry about here */
         kaddr = pfn_to_kaddr(pfn);
-       memcpy(ret, kaddr + offset, nb);
-       return 0;
+       memcpy(buf, kaddr + offset, nb);
+       ret = 0;
+err_out:
+       local_irq_restore(flags);
+       return ret;
  }
  
  static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c

index 1a3429e..1ba6307 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -111,7 +111,7 @@ out:
  static int
  spufs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if ((attr->ia_valid & ATTR_SIZE) &&
             (attr->ia_size != inode->i_size))
@@ -163,14 +163,14 @@ static void spufs_prune_dir(struct dentry *dir)
  {
         struct dentry *dentry, *tmp;
  
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
         list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
                 spin_lock(&dentry->d_lock);
-               if (!(d_unhashed(dentry)) && dentry->d_inode) {
+               if (!(d_unhashed(dentry)) && d_really_is_positive(dentry)) {
                         dget_dlock(dentry);
                         __d_drop(dentry);
                         spin_unlock(&dentry->d_lock);
-                       simple_unlink(dir->d_inode, dentry);
+                       simple_unlink(d_inode(dir), dentry);
                         /* XXX: what was dcache_lock protecting here? Other
                          * filesystems (IB, configfs) release dcache_lock
                          * before unlink */
@@ -180,7 +180,7 @@ static void spufs_prune_dir(struct dentry *dir)
                 }
         }
         shrink_dcache_parent(dir);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  }
  
  /* Caller must hold parent->i_mutex */
@@ -192,7 +192,7 @@ static int spufs_rmdir(struct inode *parent, struct dentry *dir)
         d_drop(dir);
         res = simple_rmdir(parent, dir);
         /* We have to give up the mm_struct */
-       spu_forget(SPUFS_I(dir->d_inode)->i_ctx);
+       spu_forget(SPUFS_I(d_inode(dir))->i_ctx);
         return res;
  }
  
@@ -222,8 +222,8 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
         int ret;
  
         dir = file->f_path.dentry;
-       parent = dir->d_parent->d_inode;
-       ctx = SPUFS_I(dir->d_inode)->i_ctx;
+       parent = d_inode(dir->d_parent);
+       ctx = SPUFS_I(d_inode(dir))->i_ctx;
  
         mutex_lock_nested(&parent->i_mutex, I_MUTEX_PARENT);
         ret = spufs_rmdir(parent, dir);
@@ -460,7 +460,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry,
                 goto out_aff_unlock;
  
         if (affinity) {
-               spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+               spufs_set_affinity(flags, SPUFS_I(d_inode(dentry))->i_ctx,
                                                                 neighbor);
                 if (neighbor)
                         put_spu_context(neighbor);
@@ -504,7 +504,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode)
  
         d_instantiate(dentry, inode);
         inc_nlink(dir);
-       inc_nlink(dentry->d_inode);
+       inc_nlink(d_inode(dentry));
         return ret;
  
  out_iput:
@@ -561,7 +561,7 @@ static struct file_system_type spufs_type;
  long spufs_create(struct path *path, struct dentry *dentry,
                 unsigned int flags, umode_t mode, struct file *filp)
  {
-       struct inode *dir = path->dentry->d_inode;
+       struct inode *dir = d_inode(path->dentry);
         int ret;
  
         /* check if we are on spufs */
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c

index 80db439..6eb808f 100644 (file)
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -24,12 +24,22 @@
  
  struct powernv_rng {
         void __iomem *regs;
+       void __iomem *regs_real;
         unsigned long mask;
  };
  
  static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
  
  
+int powernv_hwrng_present(void)
+{
+       struct powernv_rng *rng;
+
+       rng = get_cpu_var(powernv_rng);
+       put_cpu_var(rng);
+       return rng != NULL;
+}
+
  static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
  {
         unsigned long parity;
@@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
         return val;
  }
  
+int powernv_get_random_real_mode(unsigned long *v)
+{
+       struct powernv_rng *rng;
+
+       rng = raw_cpu_read(powernv_rng);
+
+       *v = rng_whiten(rng, in_rm64(rng->regs_real));
+
+       return 1;
+}
+
  int powernv_get_random_long(unsigned long *v)
  {
         struct powernv_rng *rng;
@@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
  static __init int rng_create(struct device_node *dn)
  {
         struct powernv_rng *rng;
+       struct resource res;
         unsigned long val;
  
         rng = kzalloc(sizeof(*rng), GFP_KERNEL);
         if (!rng)
                 return -ENOMEM;
  
+       if (of_address_to_resource(dn, 0, &res)) {
+               kfree(rng);
+               return -ENXIO;
+       }
+
+       rng->regs_real = (void __iomem *)res.start;
+
         rng->regs = of_iomap(dn, 0);
         if (!rng->regs) {
                 kfree(rng);
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c

index 3f5c799..d3f896a 100644 (file)
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -48,7 +48,7 @@ static struct dentry *hypfs_last_dentry;
  static void hypfs_update_update(struct super_block *sb)
  {
         struct hypfs_sb_info *sb_info = sb->s_fs_info;
-       struct inode *inode = sb_info->update_file->d_inode;
+       struct inode *inode = d_inode(sb_info->update_file);
  
         sb_info->last_update = get_seconds();
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -64,7 +64,7 @@ static void hypfs_add_dentry(struct dentry *dentry)
  
  static inline int hypfs_positive(struct dentry *dentry)
  {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  static void hypfs_remove(struct dentry *dentry)
@@ -72,16 +72,16 @@ static void hypfs_remove(struct dentry *dentry)
         struct dentry *parent;
  
         parent = dentry->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         if (hypfs_positive(dentry)) {
                 if (d_is_dir(dentry))
-                       simple_rmdir(parent->d_inode, dentry);
+                       simple_rmdir(d_inode(parent), dentry);
                 else
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
         }
         d_delete(dentry);
         dput(dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  }
  
  static void hypfs_delete_tree(struct dentry *root)
@@ -336,7 +336,7 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
         struct dentry *dentry;
         struct inode *inode;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         dentry = lookup_one_len(name, parent, strlen(name));
         if (IS_ERR(dentry)) {
                 dentry = ERR_PTR(-ENOMEM);
@@ -357,14 +357,14 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name,
         } else if (S_ISDIR(mode)) {
                 inode->i_op = &simple_dir_inode_operations;
                 inode->i_fop = &simple_dir_operations;
-               inc_nlink(parent->d_inode);
+               inc_nlink(d_inode(parent));
         } else
                 BUG();
         inode->i_private = data;
         d_instantiate(dentry, inode);
         dget(dentry);
  fail:
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         return dentry;
  }
  
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index afa2bd7..8cd8e7b 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
  /* upper facilities limit for kvm */
  unsigned long kvm_s390_fac_list_mask[] = {
         0xffe6fffbfcfdfc40UL,
-       0x205c800000000000UL,
+       0x005c800000000000UL,
  };
  
  unsigned long kvm_s390_fac_list_mask_size(void)
diff --git a/arch/sh/boards/board-sh7757lcr.c b/arch/sh/boards/board-sh7757lcr.c

index 669df51..324599b 100644 (file)
--- a/arch/sh/boards/board-sh7757lcr.c
+++ b/arch/sh/boards/board-sh7757lcr.c
@@ -17,6 +17,7 @@
  #include <linux/spi/spi.h>
  #include <linux/spi/flash.h>
  #include <linux/io.h>
+#include <linux/mfd/tmio.h>
  #include <linux/mmc/host.h>
  #include <linux/mmc/sh_mmcif.h>
  #include <linux/mmc/sh_mobile_sdhi.h>
@@ -243,10 +244,10 @@ static struct platform_device sh_mmcif_device = {
  };
  
  /* SDHI0 */
-static struct sh_mobile_sdhi_info sdhi_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI_RX,
-       .tmio_caps      = MMC_CAP_SD_HIGHSPEED,
+static struct tmio_mmc_data sdhi_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI_RX,
+       .capabilities   = MMC_CAP_SD_HIGHSPEED,
  };
  
  static struct resource sdhi_resources[] = {
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c

index d4b01d4..cbd2a9f 100644 (file)
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -18,6 +18,7 @@
  #include <linux/mmc/sh_mobile_sdhi.h>
  #include <linux/mtd/physmap.h>
  #include <linux/mtd/sh_flctl.h>
+#include <linux/mfd/tmio.h>
  #include <linux/delay.h>
  #include <linux/i2c.h>
  #include <linux/regulator/fixed.h>
@@ -447,8 +448,8 @@ static struct resource sdhi0_cn3_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sdhi0_cn3_data = {
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi0_cn3_data = {
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device sdhi0_cn3_device = {
@@ -474,8 +475,8 @@ static struct resource sdhi1_cn7_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sdhi1_cn7_data = {
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sdhi1_cn7_data = {
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device sdhi1_cn7_device = {
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c

index 0d30492..d531791 100644 (file)
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -601,12 +601,12 @@ static struct platform_device sdhi0_power = {
         },
  };
  
-static struct sh_mobile_sdhi_info sdhi0_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi0_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
                           MMC_CAP_NEEDS_POLL,
-       .tmio_flags     = TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_USE_GPIO_CD,
         .cd_gpio        = GPIO_PTY7,
  };
  
@@ -635,12 +635,12 @@ static struct platform_device sdhi0_device = {
  
  #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
  /* SDHI1 */
-static struct sh_mobile_sdhi_info sdhi1_info = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
+static struct tmio_mmc_data sdhi1_info = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ | MMC_CAP_POWER_OFF_CARD |
                           MMC_CAP_NEEDS_POLL,
-       .tmio_flags     = TMIO_MMC_USE_GPIO_CD,
+       .flags          = TMIO_MMC_USE_GPIO_CD,
         .cd_gpio        = GPIO_PTW7,
  };
  
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c

index 1df4398..7d997ce 100644 (file)
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -373,11 +373,11 @@ static struct resource kfr2r09_sh_sdhi0_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_flags     = TMIO_MMC_WRPROTECT_DISABLE,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .flags          = TMIO_MMC_WRPROTECT_DISABLE,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device kfr2r09_sh_sdhi0_device = {
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c

index 8b73194..29b7c0d 100644 (file)
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -15,6 +15,7 @@
  #include <linux/mmc/host.h>
  #include <linux/mmc/sh_mobile_sdhi.h>
  #include <linux/mtd/physmap.h>
+#include <linux/mfd/tmio.h>
  #include <linux/mtd/nand.h>
  #include <linux/i2c.h>
  #include <linux/regulator/fixed.h>
@@ -408,10 +409,10 @@ static struct resource sdhi_cn9_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sh7724_sdhi_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device sdhi_cn9_device = {
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c

index 1162bc6..4f6635a 100644 (file)
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -16,6 +16,7 @@
  #include <linux/platform_device.h>
  #include <linux/mmc/host.h>
  #include <linux/mmc/sh_mobile_sdhi.h>
+#include <linux/mfd/tmio.h>
  #include <linux/mtd/physmap.h>
  #include <linux/delay.h>
  #include <linux/regulator/fixed.h>
@@ -468,10 +469,10 @@ static struct resource sdhi0_cn7_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sh7724_sdhi0_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI0_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI0_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi0_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI0_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI0_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device sdhi0_cn7_device = {
@@ -497,10 +498,10 @@ static struct resource sdhi1_cn8_resources[] = {
         },
  };
  
-static struct sh_mobile_sdhi_info sh7724_sdhi1_data = {
-       .dma_slave_tx   = SHDMA_SLAVE_SDHI1_TX,
-       .dma_slave_rx   = SHDMA_SLAVE_SDHI1_RX,
-       .tmio_caps      = MMC_CAP_SDIO_IRQ,
+static struct tmio_mmc_data sh7724_sdhi1_data = {
+       .chan_priv_tx   = (void *)SHDMA_SLAVE_SDHI1_TX,
+       .chan_priv_rx   = (void *)SHDMA_SLAVE_SDHI1_RX,
+       .capabilities   = MMC_CAP_SDIO_IRQ,
  };
  
  static struct platform_device sdhi1_cn8_device = {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 6049d58..226d569 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,6 +22,7 @@ config X86_64
  ### Arch settings
  config X86
         def_bool y
+       select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
         select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
         select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
         select ARCH_HAS_FAST_MULTIPLIER
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S

index a4771dc..1f20b35 100644 (file)
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -79,7 +79,7 @@ NUM_BLKS    = %rdx
  c           = %rcx
  d           = %r8
  e           = %rdx
-y3          = %rdi
+y3          = %rsi
  
  TBL   = %rbp
  
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S

index a821b1c..72bf268 100644 (file)
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -427,6 +427,13 @@ sysretl_from_sys_call:
          * cs and ss are loaded from MSRs.
          * (Note: 32bit->32bit SYSRET is different: since r11
          * does not exist, it merely sets eflags.IF=1).
+        *
+        * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+        * descriptor is not reinitialized.  This means that we must
+        * avoid SYSRET with SS == NULL, which could happen if we schedule,
+        * exit the kernel, and re-enter using an interrupt vector.  (All
+        * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+        * from happening by reloading SS in __switch_to.
          */
         USERGS_SYSRET32
  
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h

index 7ee9b94..3d6606f 100644 (file)
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -265,6 +265,7 @@
  #define X86_BUG_11AP           X86_BUG(5) /* Bad local APIC aka 11AP */
  #define X86_BUG_FXSAVE_LEAK    X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
  #define X86_BUG_CLFLUSH_MONITOR        X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS        X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
  
  #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
  
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h

index e2d4a4a..3bbc07a 100644 (file)
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -20,13 +20,10 @@ extern unsigned long switcher_addr;
  /* Found in switcher.S */
  extern unsigned long default_idt_entries[];
  
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
+/* Declarations for definitions in arch/x86/lguest/head_32.S */
+extern char lguest_noirq_iret[];
  extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
  extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
  
  extern void lguest_iret(void);
  extern void lguest_init(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c

index 803b684..dbe76a1 100644 (file)
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -757,7 +757,7 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
  }
  
  /* wrapper to silence section mismatch warning */
-int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu)
  {
         return _acpi_map_lsapic(handle, physid, pcpu);
  }
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index fd470eb..e4cf633 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -720,6 +720,9 @@ static void init_amd(struct cpuinfo_x86 *c)
         if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
                 if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
                         set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
+
+       /* AMD CPUs don't reset SS attributes on SYSRET */
+       set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
  }
  
  #ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S

index c7b2384..02c2eff 100644 (file)
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -295,6 +295,15 @@ system_call_fastpath:
          * rflags from r11 (but RF and VM bits are forced to 0),
          * cs and ss are loaded from MSRs.
          * Restoration of rflags re-enables interrupts.
+        *
+        * NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
+        * descriptor is not reinitialized.  This means that we should
+        * avoid SYSRET with SS == NULL, which could happen if we schedule,
+        * exit the kernel, and re-enter using an interrupt vector.  (All
+        * interrupt entries on x86_64 set SS to NULL.)  We prevent that
+        * from happening by reloading SS in __switch_to.  (Actually
+        * detecting the failure in 64-bit userspace is tricky but can be
+        * done.)
          */
         USERGS_SYSRET64
  
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c

index 4baaa97..ddfdbf7 100644 (file)
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -419,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
                      task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
                 __switch_to_xtra(prev_p, next_p, tss);
  
+       if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+               /*
+                * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+                * does not update the cached descriptor.  As a result, if we
+                * do SYSRET while SS is NULL, we'll end up in user mode with
+                * SS apparently equal to __USER_DS but actually unusable.
+                *
+                * The straightforward workaround would be to fix it up just
+                * before SYSRET, but that would slow down the system call
+                * fast paths.  Instead, we ensure that SS is never NULL in
+                * system call context.  We do this by replacing NULL SS
+                * selectors at every context switch.  SYSCALL sets up a valid
+                * SS, so the only way to get NULL is to re-enter the kernel
+                * from CPL 3 through an interrupt.  Since that can't happen
+                * in the same task as a running syscall, we are guaranteed to
+                * context switch between every interrupt vector entry and a
+                * subsequent SYSRET.
+                *
+                * We read SS first because SS reads are much faster than
+                * writes.  Out of caution, we force SS to __KERNEL_DS even if
+                * it previously had a different non-NULL value.
+                */
+               unsigned short ss_sel;
+               savesegment(ss, ss_sel);
+               if (ss_sel != __KERNEL_DS)
+                       loadsegment(ss, __KERNEL_DS);
+       }
+
         return prev_p;
  }
  
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c

index 6eb5c20..d090ecf 100644 (file)
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -666,7 +666,7 @@ static int probe_sysfs_permissions(struct pci_dev *dev)
                 if (r)
                         return r;
  
-               inode = path.dentry->d_inode;
+               inode = d_backing_inode(path.dentry);
  
                 r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
                 path_put(&path);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index d67206a..629af0f 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
         unsigned long bitmap = 1;
         struct kvm_lapic **dst;
         int i;
-       bool ret = false;
-       bool x2apic_ipi = src && apic_x2apic_mode(src);
+       bool ret, x2apic_ipi;
  
         *r = -1;
  
@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
         if (irq->shorthand)
                 return false;
  
+       x2apic_ipi = src && apic_x2apic_mode(src);
         if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
                 return false;
  
+       ret = true;
         rcu_read_lock();
         map = rcu_dereference(kvm->arch.apic_map);
  
-       if (!map)
+       if (!map) {
+               ret = false;
                 goto out;
-
-       ret = true;
+       }
  
         if (irq->dest_mode == APIC_DEST_PHYSICAL) {
                 if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c

index 146f295..d43867c 100644 (file)
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
                 pfn = spte_to_pfn(*sptep);
  
                 /*
-                * Only EPT supported for now; otherwise, one would need to
-                * find out efficiently whether the guest page tables are
-                * also using huge pages.
+                * We cannot do huge page mapping for indirect shadow pages,
+                * which are found on the last rmap (level = 1) when not using
+                * tdp; such shadow pages are synced with the page table in
+                * the guest, and the guest page table is using 4K page size
+                * mapping if the indirect sp has level = 1.
                  */
                 if (sp->role.direct &&
                         !kvm_is_reserved_pfn(pfn) &&
@@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
         bool flush = false;
         unsigned long *rmapp;
         unsigned long last_index, index;
-       gfn_t gfn_start, gfn_end;
  
         spin_lock(&kvm->mmu_lock);
  
-       gfn_start = memslot->base_gfn;
-       gfn_end = memslot->base_gfn + memslot->npages - 1;
-
-       if (gfn_start >= gfn_end)
-               goto out;
-
         rmapp = memslot->arch.rmap[0];
-       last_index = gfn_to_index(gfn_end, memslot->base_gfn,
-                                       PT_PAGE_TABLE_LEVEL);
+       last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
+                               memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
  
         for (index = 0; index <= last_index; ++index, ++rmapp) {
                 if (*rmapp)
@@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
         if (flush)
                 kvm_flush_remote_tlbs(kvm);
  
-out:
         spin_unlock(&kvm->mmu_lock);
  }
  
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c

index f5e8dce..f7b6168 100644 (file)
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
  
  static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
  {
-       unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
-                   KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
+       /*
+        * Pass through host's Machine Check Enable value to hw_cr4, which
+        * is in force while we are in guest mode.  Do not let guests control
+        * this bit, even if host CR4.MCE == 0.
+        */
+       unsigned long hw_cr4 =
+               (cr4_read_shadow() & X86_CR4_MCE) |
+               (cr4 & ~X86_CR4_MCE) |
+               (to_vmx(vcpu)->rmode.vm86_active ?
+                KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
  
         if (cr4 & X86_CR4_VMXE) {
                 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index e1a8126..ed31c31 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque)
         kvm_set_mmio_spte_mask();
  
         kvm_x86_ops = ops;
-       kvm_init_msr_list();
  
         kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
                         PT_DIRTY_MASK, PT64_NX_MASK, 0);
@@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void)
  
  int kvm_arch_hardware_setup(void)
  {
-       return kvm_x86_ops->hardware_setup();
+       int r;
+
+       r = kvm_x86_ops->hardware_setup();
+       if (r != 0)
+               return r;
+
+       kvm_init_msr_list();
+       return 0;
  }
  
  void kvm_arch_hardware_unsetup(void)
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c

index 717908b..8f9a133 100644 (file)
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -87,8 +87,7 @@
  
  struct lguest_data lguest_data = {
         .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
-       .noirq_start = (u32)lguest_noirq_start,
-       .noirq_end = (u32)lguest_noirq_end,
+       .noirq_iret = (u32)lguest_noirq_iret,
         .kernel_address = PAGE_OFFSET,
         .blocked_interrupts = { 1 }, /* Block timer interrupts */
         .syscall_vec = SYSCALL_VECTOR,
@@ -262,7 +261,7 @@ PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
  PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
  /*:*/
  
-/* These are in i386_head.S */
+/* These are in head_32.S */
  extern void lg_irq_enable(void);
  extern void lg_restore_fl(unsigned long flags);
  
@@ -1368,7 +1367,7 @@ static void lguest_restart(char *reason)
   * fit comfortably.
   *
   * First we need assembly templates of each of the patchable Guest operations,
- * and these are in i386_head.S.
+ * and these are in head_32.S.
   */
  
  /*G:060 We construct a table from the assembler templates: */
diff --git a/arch/x86/lguest/head_32.S b/arch/x86/lguest/head_32.S

index 6ddfe4f..d5ae63f 100644 (file)
--- a/arch/x86/lguest/head_32.S
+++ b/arch/x86/lguest/head_32.S
@@ -84,7 +84,7 @@ ENTRY(lg_irq_enable)
          * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
          * jump to send_interrupts, otherwise we're done.
          */
-       testl $0, lguest_data+LGUEST_DATA_irq_pending
+       cmpl $0, lguest_data+LGUEST_DATA_irq_pending
         jnz send_interrupts
         /*
          * One cool thing about x86 is that you can do many things without using
@@ -133,9 +133,8 @@ ENTRY(lg_restore_fl)
         ret
  /*:*/
  
-/* These demark the EIP range where host should never deliver interrupts. */
-.global lguest_noirq_start
-.global lguest_noirq_end
+/* These demark the EIP where host should never deliver interrupts. */
+.global lguest_noirq_iret
  
  /*M:004
   * When the Host reflects a trap or injects an interrupt into the Guest, it
@@ -168,29 +167,26 @@ ENTRY(lg_restore_fl)
   * So we have to copy eflags from the stack to lguest_data.irq_enabled before
   * we do the "iret".
   *
- * There are two problems with this: firstly, we need to use a register to do
- * the copy and secondly, the whole thing needs to be atomic.  The first
- * problem is easy to solve: push %eax on the stack so we can use it, and then
- * restore it at the end just before the real "iret".
+ * There are two problems with this: firstly, we can't clobber any registers
+ * and secondly, the whole thing needs to be atomic.  The first problem
+ * is solved by using "push memory"/"pop memory" instruction pair for copying.
   *
   * The second is harder: copying eflags to lguest_data.irq_enabled will turn
   * interrupts on before we're finished, so we could be interrupted before we
- * return to userspace or wherever.  Our solution to this is to surround the
- * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
+ * return to userspace or wherever.  Our solution to this is to tell the
   * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled.
+ * enabled. (It's not necessary to protect pop instruction, since
+ * data gets updated only after it completes, so we only need to protect
+ * one instruction, iret).
   */
  ENTRY(lguest_iret)
-       pushl   %eax
-       movl    12(%esp), %eax
-lguest_noirq_start:
+       pushl   2*4(%esp)
         /*
          * Note the %ss: segment prefix here.  Normal data accesses use the
          * "ds" segment, but that will have already been restored for whatever
          * we're returning to (such as userspace): we can't trust it.  The %ss:
          * prefix makes sure we use the stack segment, which is still valid.
          */
-       movl    %eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
-       popl    %eax
+       popl    %ss:lguest_data+LGUEST_DATA_irq_enabled
+lguest_noirq_iret:
         iret
-lguest_noirq_end:
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c

index 1f33b3d..0a42327 100644 (file)
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -82,7 +82,7 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
         clac();
  
         /* If the destination is a kernel buffer, we always clear the end */
-       if ((unsigned long)to >= TASK_SIZE_MAX)
+       if (!__addr_ok(to))
                 memset(to, 0, len);
         return len;
  }
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c

index d05327c..5d355e0 100644 (file)
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -124,6 +124,7 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
  {
         void **srcs;
         int i;
+       int start = -1, stop = disks - 3;
  
         if (submit->scribble)
                 srcs = submit->scribble;
@@ -134,10 +135,21 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
                 if (blocks[i] == NULL) {
                         BUG_ON(i > disks - 3); /* P or Q can't be zero */
                         srcs[i] = (void*)raid6_empty_zero_page;
-               } else
+               } else {
                         srcs[i] = page_address(blocks[i]) + offset;
+                       if (i < disks - 2) {
+                               stop = i;
+                               if (start == -1)
+                                       start = i;
+                       }
+               }
         }
-       raid6_call.gen_syndrome(disks, len, srcs);
+       if (submit->flags & ASYNC_TX_PQ_XOR_DST) {
+               BUG_ON(!raid6_call.xor_syndrome);
+               if (start >= 0)
+                       raid6_call.xor_syndrome(disks, start, stop, len, srcs);
+       } else
+               raid6_call.gen_syndrome(disks, len, srcs);
         async_tx_sync_epilog(submit);
  }
  
@@ -178,7 +190,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
         if (device)
                 unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO);
  
-       if (unmap &&
+       /* XORing P/Q is only implemented in software */
+       if (unmap && !(submit->flags & ASYNC_TX_PQ_XOR_DST) &&
             (src_cnt <= dma_maxpq(device, 0) ||
              dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
             is_dma_pq_aligned(device, offset, 0, len)) {
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig

index e6c3ddd..ab2cbb5 100644 (file)
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -5,7 +5,7 @@
  menuconfig ACPI
         bool "ACPI (Advanced Configuration and Power Interface) Support"
         depends on !IA64_HP_SIM
-       depends on IA64 || X86
+       depends on IA64 || X86 || (ARM64 && EXPERT)
         depends on PCI
         select PNP
         default y
@@ -48,9 +48,16 @@ config ACPI_LEGACY_TABLES_LOOKUP
  config ARCH_MIGHT_HAVE_ACPI_PDC
         bool
  
+config ACPI_GENERIC_GSI
+       bool
+
+config ACPI_SYSTEM_POWER_STATES_SUPPORT
+       bool
+
  config ACPI_SLEEP
         bool
         depends on SUSPEND || HIBERNATION
+       depends on ACPI_SYSTEM_POWER_STATES_SUPPORT
         default y
  
  config ACPI_PROCFS_POWER
@@ -163,6 +170,7 @@ config ACPI_PROCESSOR
         tristate "Processor"
         select THERMAL
         select CPU_IDLE
+       depends on X86 || IA64
         default y
         help
           This driver installs ACPI as the idle handler for Linux and uses
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile

index 623b117..8a063e2 100644 (file)
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -23,7 +23,7 @@ acpi-y                                += nvs.o
  
  # Power management related files
  acpi-y                         += wakeup.o
-acpi-y                         += sleep.o
+acpi-$(CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT) += sleep.o
  acpi-y                         += device_pm.o
  acpi-$(CONFIG_ACPI_SLEEP)      += proc.o
  
@@ -56,6 +56,7 @@ ifdef CONFIG_ACPI_VIDEO
  acpi-y                         += video_detect.o
  endif
  acpi-y                         += acpi_lpat.o
+acpi-$(CONFIG_ACPI_GENERIC_GSI) += gsi.o
  
  # These are (potentially) separate modules
  
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c

index 1020b1b..58f335c 100644 (file)
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -170,7 +170,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
         acpi_status status;
         int ret;
  
-       if (pr->phys_id == -1)
+       if (pr->phys_id == PHYS_CPUID_INVALID)
                 return -ENODEV;
  
         status = acpi_evaluate_integer(pr->handle, "_STA", NULL, &sta);
@@ -215,7 +215,8 @@ static int acpi_processor_get_info(struct acpi_device *device)
         union acpi_object object = { 0 };
         struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
         struct acpi_processor *pr = acpi_driver_data(device);
-       int phys_id, cpu_index, device_declaration = 0;
+       phys_cpuid_t phys_id;
+       int cpu_index, device_declaration = 0;
         acpi_status status = AE_OK;
         static int cpu0_initialized;
         unsigned long long value;
@@ -263,7 +264,7 @@ static int acpi_processor_get_info(struct acpi_device *device)
         }
  
         phys_id = acpi_get_phys_id(pr->handle, device_declaration, pr->acpi_id);
-       if (phys_id < 0)
+       if (phys_id == PHYS_CPUID_INVALID)
                 acpi_handle_debug(pr->handle, "failed to get CPU physical ID.\n");
         pr->phys_id = phys_id;
  
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c

index 8b67bd0..c412fdb 100644 (file)
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -448,6 +448,9 @@ static int __init acpi_bus_init_irq(void)
         case ACPI_IRQ_MODEL_IOSAPIC:
                 message = "IOSAPIC";
                 break;
+       case ACPI_IRQ_MODEL_GIC:
+               message = "GIC";
+               break;
         case ACPI_IRQ_MODEL_PLATFORM:
                 message = "platform specific model";
                 break;
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c

index 220d640..5e8fed4 100644 (file)
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -861,7 +861,7 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
                 }
         }
         mutex_unlock(&ec->mutex);
-       list_for_each_entry(handler, &free_list, node)
+       list_for_each_entry_safe(handler, tmp, &free_list, node)
                 acpi_ec_put_query_handler(handler);
  }
  EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
diff --git a/drivers/acpi/gsi.c b/drivers/acpi/gsi.c

new file mode 100644 (file)

index 0000000..38208f2
--- /dev/null
+++ b/drivers/acpi/gsi.c
@@ -0,0 +1,105 @@
+/*
+ * ACPI GSI IRQ layer
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/acpi.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+
+enum acpi_irq_model_id acpi_irq_model;
+
+static unsigned int acpi_gsi_get_irq_type(int trigger, int polarity)
+{
+       switch (polarity) {
+       case ACPI_ACTIVE_LOW:
+               return trigger == ACPI_EDGE_SENSITIVE ?
+                      IRQ_TYPE_EDGE_FALLING :
+                      IRQ_TYPE_LEVEL_LOW;
+       case ACPI_ACTIVE_HIGH:
+               return trigger == ACPI_EDGE_SENSITIVE ?
+                      IRQ_TYPE_EDGE_RISING :
+                      IRQ_TYPE_LEVEL_HIGH;
+       case ACPI_ACTIVE_BOTH:
+               if (trigger == ACPI_EDGE_SENSITIVE)
+                       return IRQ_TYPE_EDGE_BOTH;
+       default:
+               return IRQ_TYPE_NONE;
+       }
+}
+
+/**
+ * acpi_gsi_to_irq() - Retrieve the linux irq number for a given GSI
+ * @gsi: GSI IRQ number to map
+ * @irq: pointer where linux IRQ number is stored
+ *
+ * irq location updated with irq value [>0 on success, 0 on failure]
+ *
+ * Returns: linux IRQ number on success (>0)
+ *          -EINVAL on failure
+ */
+int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
+{
+       /*
+        * Only default domain is supported at present, always find
+        * the mapping corresponding to default domain by passing NULL
+        * as irq_domain parameter
+        */
+       *irq = irq_find_mapping(NULL, gsi);
+       /*
+        * *irq == 0 means no mapping, that should
+        * be reported as a failure
+        */
+       return (*irq > 0) ? *irq : -EINVAL;
+}
+EXPORT_SYMBOL_GPL(acpi_gsi_to_irq);
+
+/**
+ * acpi_register_gsi() - Map a GSI to a linux IRQ number
+ * @dev: device for which IRQ has to be mapped
+ * @gsi: GSI IRQ number
+ * @trigger: trigger type of the GSI number to be mapped
+ * @polarity: polarity of the GSI to be mapped
+ *
+ * Returns: a valid linux IRQ number on success
+ *          -EINVAL on failure
+ */
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger,
+                     int polarity)
+{
+       unsigned int irq;
+       unsigned int irq_type = acpi_gsi_get_irq_type(trigger, polarity);
+
+       /*
+        * There is no way at present to look-up the IRQ domain on ACPI,
+        * hence always create mapping referring to the default domain
+        * by passing NULL as irq_domain parameter
+        */
+       irq = irq_create_mapping(NULL, gsi);
+       if (!irq)
+               return -EINVAL;
+
+       /* Set irq type if specified and different than the current one */
+       if (irq_type != IRQ_TYPE_NONE &&
+               irq_type != irq_get_trigger_type(irq))
+               irq_set_irq_type(irq, irq_type);
+       return irq;
+}
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
+
+/**
+ * acpi_unregister_gsi() - Free a GSI<->linux IRQ number mapping
+ * @gsi: GSI IRQ number
+ */
+void acpi_unregister_gsi(u32 gsi)
+{
+       int irq = irq_find_mapping(NULL, gsi);
+
+       irq_dispose_mapping(irq);
+}
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h

index 56b321a..ba4a61e 100644 (file)
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -161,7 +161,11 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit);
  /*--------------------------------------------------------------------------
                                    Suspend/Resume
    -------------------------------------------------------------------------- */
+#ifdef CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT
  extern int acpi_sleep_init(void);
+#else
+static inline int acpi_sleep_init(void) { return -ENXIO; }
+#endif
  
  #ifdef CONFIG_ACPI_SLEEP
  int acpi_sleep_proc_init(void);
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c

index f9eeae8..39748bb 100644 (file)
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -336,11 +336,11 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size)
         return NULL;
  }
  
-#ifndef CONFIG_IA64
-#define should_use_kmap(pfn)   page_is_ram(pfn)
-#else
+#if defined(CONFIG_IA64) || defined(CONFIG_ARM64)
  /* ioremap will take care of cache attributes */
  #define should_use_kmap(pfn)   0
+#else
+#define should_use_kmap(pfn)   page_is_ram(pfn)
  #endif
  
  static void __iomem *acpi_map(acpi_physical_address pg_off, unsigned long pg_sz)
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c

index 7962651..b1ec78b 100644 (file)
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -32,7 +32,7 @@ static struct acpi_table_madt *get_madt_table(void)
  }
  
  static int map_lapic_id(struct acpi_subtable_header *entry,
-                u32 acpi_id, int *apic_id)
+                u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_apic *lapic =
                 container_of(entry, struct acpi_madt_local_apic, header);
@@ -48,7 +48,7 @@ static int map_lapic_id(struct acpi_subtable_header *entry,
  }
  
  static int map_x2apic_id(struct acpi_subtable_header *entry,
-                        int device_declaration, u32 acpi_id, int *apic_id)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_x2apic *apic =
                 container_of(entry, struct acpi_madt_local_x2apic, header);
@@ -65,7 +65,7 @@ static int map_x2apic_id(struct acpi_subtable_header *entry,
  }
  
  static int map_lsapic_id(struct acpi_subtable_header *entry,
-               int device_declaration, u32 acpi_id, int *apic_id)
+               int device_declaration, u32 acpi_id, phys_cpuid_t *apic_id)
  {
         struct acpi_madt_local_sapic *lsapic =
                 container_of(entry, struct acpi_madt_local_sapic, header);
@@ -83,10 +83,35 @@ static int map_lsapic_id(struct acpi_subtable_header *entry,
         return 0;
  }
  
-static int map_madt_entry(int type, u32 acpi_id)
+/*
+ * Retrieve the ARM CPU physical identifier (MPIDR)
+ */
+static int map_gicc_mpidr(struct acpi_subtable_header *entry,
+               int device_declaration, u32 acpi_id, phys_cpuid_t *mpidr)
+{
+       struct acpi_madt_generic_interrupt *gicc =
+           container_of(entry, struct acpi_madt_generic_interrupt, header);
+
+       if (!(gicc->flags & ACPI_MADT_ENABLED))
+               return -ENODEV;
+
+       /* device_declaration means Device object in DSDT, in the
+        * GIC interrupt model, logical processors are required to
+        * have a Processor Device object in the DSDT, so we should
+        * check device_declaration here
+        */
+       if (device_declaration && (gicc->uid == acpi_id)) {
+               *mpidr = gicc->arm_mpidr;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static phys_cpuid_t map_madt_entry(int type, u32 acpi_id)
  {
         unsigned long madt_end, entry;
-       int phys_id = -1;       /* CPU hardware ID */
+       phys_cpuid_t phys_id = PHYS_CPUID_INVALID;      /* CPU hardware ID */
         struct acpi_table_madt *madt;
  
         madt = get_madt_table();
@@ -111,18 +136,21 @@ static int map_madt_entry(int type, u32 acpi_id)
                 } else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
                         if (!map_lsapic_id(header, type, acpi_id, &phys_id))
                                 break;
+               } else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT) {
+                       if (!map_gicc_mpidr(header, type, acpi_id, &phys_id))
+                               break;
                 }
                 entry += header->length;
         }
         return phys_id;
  }
  
-static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
+static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
  {
         struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
         union acpi_object *obj;
         struct acpi_subtable_header *header;
-       int phys_id = -1;
+       phys_cpuid_t phys_id = PHYS_CPUID_INVALID;
  
         if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
                 goto exit;
@@ -143,33 +171,35 @@ static int map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
                 map_lsapic_id(header, type, acpi_id, &phys_id);
         else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
                 map_x2apic_id(header, type, acpi_id, &phys_id);
+       else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
+               map_gicc_mpidr(header, type, acpi_id, &phys_id);
  
  exit:
         kfree(buffer.pointer);
         return phys_id;
  }
  
-int acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
+phys_cpuid_t acpi_get_phys_id(acpi_handle handle, int type, u32 acpi_id)
  {
-       int phys_id;
+       phys_cpuid_t phys_id;
  
         phys_id = map_mat_entry(handle, type, acpi_id);
-       if (phys_id == -1)
+       if (phys_id == PHYS_CPUID_INVALID)
                 phys_id = map_madt_entry(type, acpi_id);
  
         return phys_id;
  }
  
-int acpi_map_cpuid(int phys_id, u32 acpi_id)
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id)
  {
  #ifdef CONFIG_SMP
         int i;
  #endif
  
-       if (phys_id == -1) {
+       if (phys_id == PHYS_CPUID_INVALID) {
                 /*
                  * On UP processor, there is no _MAT or MADT table.
-                * So above phys_id is always set to -1.
+                * So above phys_id is always set to PHYS_CPUID_INVALID.
                  *
                  * BIOS may define multiple CPU handles even for UP processor.
                  * For example,
@@ -190,7 +220,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
                 if (nr_cpu_ids <= 1 && acpi_id == 0)
                         return acpi_id;
                 else
-                       return phys_id;
+                       return -1;
         }
  
  #ifdef CONFIG_SMP
@@ -208,7 +238,7 @@ int acpi_map_cpuid(int phys_id, u32 acpi_id)
  
  int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
  {
-       int phys_id;
+       phys_cpuid_t phys_id;
  
         phys_id = acpi_get_phys_id(handle, type, acpi_id);
  
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index 69bc0d8..03141aa 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -375,7 +375,11 @@ bool acpi_scan_is_offline(struct acpi_device *adev, bool uevent)
         struct acpi_device_physical_node *pn;
         bool offline = true;
  
-       mutex_lock(&adev->physical_node_lock);
+       /*
+        * acpi_container_offline() calls this for all of the container's
+        * children under the container's physical_node_lock lock.
+        */
+       mutex_lock_nested(&adev->physical_node_lock, SINGLE_DEPTH_NESTING);
  
         list_for_each_entry(pn, &adev->physical_node_list, node)
                 if (device_supports_offline(pn->dev) && !pn->dev->offline) {
@@ -2388,9 +2392,6 @@ static void acpi_default_enumeration(struct acpi_device *device)
         struct list_head resource_list;
         bool is_spi_i2c_slave = false;
  
-       if (!device->pnp.type.platform_id || device->handler)
-               return;
-
         /*
          * Do not enemerate SPI/I2C slaves as they will be enuerated by their
          * respective parents.
@@ -2403,6 +2404,29 @@ static void acpi_default_enumeration(struct acpi_device *device)
                 acpi_create_platform_device(device);
  }
  
+static const struct acpi_device_id generic_device_ids[] = {
+       {"PRP0001", },
+       {"", },
+};
+
+static int acpi_generic_device_attach(struct acpi_device *adev,
+                                     const struct acpi_device_id *not_used)
+{
+       /*
+        * Since PRP0001 is the only ID handled here, the test below can be
+        * unconditional.
+        */
+       if (adev->data.of_compatible)
+               acpi_default_enumeration(adev);
+
+       return 1;
+}
+
+static struct acpi_scan_handler generic_device_handler = {
+       .ids = generic_device_ids,
+       .attach = acpi_generic_device_attach,
+};
+
  static int acpi_scan_attach_handler(struct acpi_device *device)
  {
         struct acpi_hardware_id *hwid;
@@ -2428,8 +2452,6 @@ static int acpi_scan_attach_handler(struct acpi_device *device)
                                 break;
                 }
         }
-       if (!ret)
-               acpi_default_enumeration(device);
  
         return ret;
  }
@@ -2471,6 +2493,9 @@ static void acpi_bus_attach(struct acpi_device *device)
                 ret = device_attach(&device->dev);
                 if (ret < 0)
                         return;
+
+               if (!ret && device->pnp.type.platform_id)
+                       acpi_default_enumeration(device);
         }
         device->flags.visited = true;
  
@@ -2629,6 +2654,8 @@ int __init acpi_scan_init(void)
         acpi_pnp_init();
         acpi_int340x_thermal_init();
  
+       acpi_scan_add_handler(&generic_device_handler);
+
         mutex_lock(&acpi_scan_lock);
         /*
          * Enumerate devices in the ACPI namespace.
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c

index 93b8152..2e19189 100644 (file)
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -23,6 +23,8 @@
   *
   */
  
+/* Uncomment next line to get verbose printout */
+/* #define DEBUG */
  #define pr_fmt(fmt) "ACPI: " fmt
  
  #include <linux/init.h>
@@ -61,9 +63,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 {
                         struct acpi_madt_local_apic *p =
                             (struct acpi_madt_local_apic *)header;
-                       pr_info("LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
-                               p->processor_id, p->id,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("LAPIC (acpi_id[0x%02x] lapic_id[0x%02x] %s)\n",
+                                p->processor_id, p->id,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                 }
                 break;
  
@@ -71,9 +73,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 {
                         struct acpi_madt_local_x2apic *p =
                             (struct acpi_madt_local_x2apic *)header;
-                       pr_info("X2APIC (apic_id[0x%02x] uid[0x%02x] %s)\n",
-                               p->local_apic_id, p->uid,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("X2APIC (apic_id[0x%02x] uid[0x%02x] %s)\n",
+                                p->local_apic_id, p->uid,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                 }
                 break;
  
@@ -81,8 +83,8 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 {
                         struct acpi_madt_io_apic *p =
                             (struct acpi_madt_io_apic *)header;
-                       pr_info("IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
-                               p->id, p->address, p->global_irq_base);
+                       pr_debug("IOAPIC (id[0x%02x] address[0x%08x] gsi_base[%d])\n",
+                                p->id, p->address, p->global_irq_base);
                 }
                 break;
  
@@ -155,9 +157,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 {
                         struct acpi_madt_io_sapic *p =
                             (struct acpi_madt_io_sapic *)header;
-                       pr_info("IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
-                               p->id, (void *)(unsigned long)p->address,
-                               p->global_irq_base);
+                       pr_debug("IOSAPIC (id[0x%x] address[%p] gsi_base[%d])\n",
+                                p->id, (void *)(unsigned long)p->address,
+                                p->global_irq_base);
                 }
                 break;
  
@@ -165,9 +167,9 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 {
                         struct acpi_madt_local_sapic *p =
                             (struct acpi_madt_local_sapic *)header;
-                       pr_info("LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
-                               p->processor_id, p->id, p->eid,
-                               (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+                       pr_debug("LSAPIC (acpi_id[0x%02x] lsapic_id[0x%02x] lsapic_eid[0x%02x] %s)\n",
+                                p->processor_id, p->id, p->eid,
+                                (p->lapic_flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
                 }
                 break;
  
@@ -183,6 +185,28 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
                 }
                 break;
  
+       case ACPI_MADT_TYPE_GENERIC_INTERRUPT:
+               {
+                       struct acpi_madt_generic_interrupt *p =
+                               (struct acpi_madt_generic_interrupt *)header;
+                       pr_debug("GICC (acpi_id[0x%04x] address[%llx] MPIDR[0x%llx] %s)\n",
+                                p->uid, p->base_address,
+                                p->arm_mpidr,
+                                (p->flags & ACPI_MADT_ENABLED) ? "enabled" : "disabled");
+
+               }
+               break;
+
+       case ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR:
+               {
+                       struct acpi_madt_generic_distributor *p =
+                               (struct acpi_madt_generic_distributor *)header;
+                       pr_debug("GIC Distributor (gic_id[0x%04x] address[%llx] gsi_base[%d])\n",
+                                p->gic_id, p->base_address,
+                                p->global_irq_base);
+               }
+               break;
+
         default:
                 pr_warn("Found unsupported MADT entry (type = 0x%x)\n",
                         header->type);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c

index 25798db..68f0314 100644 (file)
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -157,10 +157,10 @@ static int dev_mkdir(const char *name, umode_t mode)
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       err = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+       err = vfs_mkdir(d_inode(path.dentry), dentry, mode);
         if (!err)
                 /* mark as kernel-created inode */
-               dentry->d_inode->i_private = &thread;
+               d_inode(dentry)->i_private = &thread;
         done_path_create(&path, dentry);
         return err;
  }
@@ -207,7 +207,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       err = vfs_mknod(path.dentry->d_inode, dentry, mode, dev->devt);
+       err = vfs_mknod(d_inode(path.dentry), dentry, mode, dev->devt);
         if (!err) {
                 struct iattr newattrs;
  
@@ -215,12 +215,12 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                 newattrs.ia_uid = uid;
                 newattrs.ia_gid = gid;
                 newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
-               mutex_lock(&dentry->d_inode->i_mutex);
+               mutex_lock(&d_inode(dentry)->i_mutex);
                 notify_change(dentry, &newattrs, NULL);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
  
                 /* mark as kernel-created inode */
-               dentry->d_inode->i_private = &thread;
+               d_inode(dentry)->i_private = &thread;
         }
         done_path_create(&path, dentry);
         return err;
@@ -235,16 +235,16 @@ static int dev_rmdir(const char *name)
         dentry = kern_path_locked(name, &parent);
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
-       if (dentry->d_inode) {
-               if (dentry->d_inode->i_private == &thread)
-                       err = vfs_rmdir(parent.dentry->d_inode, dentry);
+       if (d_really_is_positive(dentry)) {
+               if (d_inode(dentry)->i_private == &thread)
+                       err = vfs_rmdir(d_inode(parent.dentry), dentry);
                 else
                         err = -EPERM;
         } else {
                 err = -ENOENT;
         }
         dput(dentry);
-       mutex_unlock(&parent.dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
         path_put(&parent);
         return err;
  }
@@ -306,11 +306,11 @@ static int handle_remove(const char *nodename, struct device *dev)
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 struct kstat stat;
                 struct path p = {.mnt = parent.mnt, .dentry = dentry};
                 err = vfs_getattr(&p, &stat);
-               if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+               if (!err && dev_mynode(dev, d_inode(dentry), &stat)) {
                         struct iattr newattrs;
                         /*
                          * before unlinking this node, reset permissions
@@ -321,10 +321,10 @@ static int handle_remove(const char *nodename, struct device *dev)
                         newattrs.ia_mode = stat.mode & ~0777;
                         newattrs.ia_valid =
                                 ATTR_UID|ATTR_GID|ATTR_MODE;
-                       mutex_lock(&dentry->d_inode->i_mutex);
+                       mutex_lock(&d_inode(dentry)->i_mutex);
                         notify_change(dentry, &newattrs, NULL);
-                       mutex_unlock(&dentry->d_inode->i_mutex);
-                       err = vfs_unlink(parent.dentry->d_inode, dentry, NULL);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
+                       err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
                         if (!err || err == -ENOENT)
                                 deleted = 1;
                 }
@@ -332,7 +332,7 @@ static int handle_remove(const char *nodename, struct device *dev)
                 err = -ENOENT;
         }
         dput(dentry);
-       mutex_unlock(&parent.dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent.dentry)->i_mutex);
  
         path_put(&parent);
         if (deleted && strchr(nodename, '/'))
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c

index 9a95002..a6ee3d7 100644 (file)
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -424,7 +424,7 @@ static int in_flight_summary_show(struct seq_file *m, void *pos)
   * So we have our own inline version of it above.  :-( */
  static inline int debugfs_positive(struct dentry *dentry)
  {
-        return dentry->d_inode && !d_unhashed(dentry);
+        return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  /* make sure at *open* time that the respective object won't go away. */
@@ -439,15 +439,15 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
          * or has debugfs_remove() already been called? */
         parent = file->f_path.dentry->d_parent;
         /* not sure if this can happen: */
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                 goto out;
         /* serialize with d_delete() */
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         /* Make sure the object is still alive */
         if (debugfs_positive(file->f_path.dentry)
         && kref_get_unless_zero(kref))
                 ret = 0;
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         if (!ret) {
                 ret = single_open(file, show, data);
                 if (ret)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c

index b40af32..8125233 100644 (file)
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3762,8 +3762,8 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
                 goto out_tag_set;
         }
  
-       /* We use the default size, but let's be explicit about it. */
-       blk_queue_physical_block_size(q, SECTOR_SIZE);
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
+       /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */
  
         /* set io sizes to object size */
         segment_size = rbd_obj_bytes(&rbd_dev->header);
@@ -5301,8 +5301,13 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
  
         if (mapping) {
                 ret = rbd_dev_header_watch_sync(rbd_dev);
-               if (ret)
+               if (ret) {
+                       if (ret == -ENOENT)
+                               pr_info("image %s/%s does not exist\n",
+                                       rbd_dev->spec->pool_name,
+                                       rbd_dev->spec->image_name);
                         goto out_header_name;
+               }
         }
  
         ret = rbd_dev_header_info(rbd_dev);
@@ -5319,8 +5324,14 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
                 ret = rbd_spec_fill_snap_id(rbd_dev);
         else
                 ret = rbd_spec_fill_names(rbd_dev);
-       if (ret)
+       if (ret) {
+               if (ret == -ENOENT)
+                       pr_info("snap %s/%s@%s does not exist\n",
+                               rbd_dev->spec->pool_name,
+                               rbd_dev->spec->image_name,
+                               rbd_dev->spec->snap_name);
                 goto err_out_probe;
+       }
  
         if (rbd_dev->header.features & RBD_FEATURE_LAYERING) {
                 ret = rbd_dev_v2_parent_info(rbd_dev);
@@ -5390,8 +5401,11 @@ static ssize_t do_rbd_add(struct bus_type *bus,
  
         /* pick the pool */
         rc = rbd_add_get_pool_id(rbdc, spec->pool_name);
-       if (rc < 0)
+       if (rc < 0) {
+               if (rc == -ENOENT)
+                       pr_info("pool %s does not exist\n", spec->pool_name);
                 goto err_out_client;
+       }
         spec->pool_id = (u64)rc;
  
         /* The ceph file layout needs to fit pool id in 32 bits */
@@ -5673,7 +5687,7 @@ static int __init rbd_init(void)
  
         /*
          * The number of active work items is limited by the number of
-        * rbd devices, so leave @max_active at default.
+        * rbd devices * queue depth, so leave @max_active at default.
          */
         rbd_wq = alloc_workqueue(RBD_DRV_NAME, WQ_MEM_RECLAIM, 0);
         if (!rbd_wq) {
diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c

index 2664696..0aa135d 100644 (file)
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -22,6 +22,7 @@
  #include <linux/io.h>
  #include <linux/slab.h>
  #include <linux/sched_clock.h>
+#include <linux/acpi.h>
  
  #include <asm/arch_timer.h>
  #include <asm/virt.h>
@@ -371,8 +372,12 @@ arch_timer_detect_rate(void __iomem *cntbase, struct device_node *np)
         if (arch_timer_rate)
                 return;
  
-       /* Try to determine the frequency from the device tree or CNTFRQ */
-       if (of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
+       /*
+        * Try to determine the frequency from the device tree or CNTFRQ,
+        * if ACPI is enabled, get the frequency from CNTFRQ ONLY.
+        */
+       if (!acpi_disabled ||
+           of_property_read_u32(np, "clock-frequency", &arch_timer_rate)) {
                 if (cntbase)
                         arch_timer_rate = readl_relaxed(cntbase + CNTFRQ);
                 else
@@ -691,28 +696,8 @@ static void __init arch_timer_common_init(void)
         arch_timer_arch_init();
  }
  
-static void __init arch_timer_init(struct device_node *np)
+static void __init arch_timer_init(void)
  {
-       int i;
-
-       if (arch_timers_present & ARCH_CP15_TIMER) {
-               pr_warn("arch_timer: multiple nodes in dt, skipping\n");
-               return;
-       }
-
-       arch_timers_present |= ARCH_CP15_TIMER;
-       for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
-               arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
-       arch_timer_detect_rate(NULL, np);
-
-       /*
-        * If we cannot rely on firmware initializing the timer registers then
-        * we should use the physical timers instead.
-        */
-       if (IS_ENABLED(CONFIG_ARM) &&
-           of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
-                       arch_timer_use_virtual = false;
-
         /*
          * If HYP mode is available, we know that the physical timer
          * has been configured to be accessible from PL1. Use it, so
@@ -731,13 +716,39 @@ static void __init arch_timer_init(struct device_node *np)
                 }
         }
  
-       arch_timer_c3stop = !of_property_read_bool(np, "always-on");
-
         arch_timer_register();
         arch_timer_common_init();
  }
-CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_init);
-CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_init);
+
+static void __init arch_timer_of_init(struct device_node *np)
+{
+       int i;
+
+       if (arch_timers_present & ARCH_CP15_TIMER) {
+               pr_warn("arch_timer: multiple nodes in dt, skipping\n");
+               return;
+       }
+
+       arch_timers_present |= ARCH_CP15_TIMER;
+       for (i = PHYS_SECURE_PPI; i < MAX_TIMER_PPI; i++)
+               arch_timer_ppi[i] = irq_of_parse_and_map(np, i);
+
+       arch_timer_detect_rate(NULL, np);
+
+       arch_timer_c3stop = !of_property_read_bool(np, "always-on");
+
+       /*
+        * If we cannot rely on firmware initializing the timer registers then
+        * we should use the physical timers instead.
+        */
+       if (IS_ENABLED(CONFIG_ARM) &&
+           of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
+                       arch_timer_use_virtual = false;
+
+       arch_timer_init();
+}
+CLOCKSOURCE_OF_DECLARE(armv7_arch_timer, "arm,armv7-timer", arch_timer_of_init);
+CLOCKSOURCE_OF_DECLARE(armv8_arch_timer, "arm,armv8-timer", arch_timer_of_init);
  
  static void __init arch_timer_mem_init(struct device_node *np)
  {
@@ -804,3 +815,70 @@ static void __init arch_timer_mem_init(struct device_node *np)
  }
  CLOCKSOURCE_OF_DECLARE(armv7_arch_timer_mem, "arm,armv7-timer-mem",
                        arch_timer_mem_init);
+
+#ifdef CONFIG_ACPI
+static int __init map_generic_timer_interrupt(u32 interrupt, u32 flags)
+{
+       int trigger, polarity;
+
+       if (!interrupt)
+               return 0;
+
+       trigger = (flags & ACPI_GTDT_INTERRUPT_MODE) ? ACPI_EDGE_SENSITIVE
+                       : ACPI_LEVEL_SENSITIVE;
+
+       polarity = (flags & ACPI_GTDT_INTERRUPT_POLARITY) ? ACPI_ACTIVE_LOW
+                       : ACPI_ACTIVE_HIGH;
+
+       return acpi_register_gsi(NULL, interrupt, trigger, polarity);
+}
+
+/* Initialize per-processor generic timer */
+static int __init arch_timer_acpi_init(struct acpi_table_header *table)
+{
+       struct acpi_table_gtdt *gtdt;
+
+       if (arch_timers_present & ARCH_CP15_TIMER) {
+               pr_warn("arch_timer: already initialized, skipping\n");
+               return -EINVAL;
+       }
+
+       gtdt = container_of(table, struct acpi_table_gtdt, header);
+
+       arch_timers_present |= ARCH_CP15_TIMER;
+
+       arch_timer_ppi[PHYS_SECURE_PPI] =
+               map_generic_timer_interrupt(gtdt->secure_el1_interrupt,
+               gtdt->secure_el1_flags);
+
+       arch_timer_ppi[PHYS_NONSECURE_PPI] =
+               map_generic_timer_interrupt(gtdt->non_secure_el1_interrupt,
+               gtdt->non_secure_el1_flags);
+
+       arch_timer_ppi[VIRT_PPI] =
+               map_generic_timer_interrupt(gtdt->virtual_timer_interrupt,
+               gtdt->virtual_timer_flags);
+
+       arch_timer_ppi[HYP_PPI] =
+               map_generic_timer_interrupt(gtdt->non_secure_el2_interrupt,
+               gtdt->non_secure_el2_flags);
+
+       /* Get the frequency from CNTFRQ */
+       arch_timer_detect_rate(NULL, NULL);
+
+       /* Always-on capability */
+       arch_timer_c3stop = !(gtdt->non_secure_el1_flags & ACPI_GTDT_ALWAYS_ON);
+
+       arch_timer_init();
+       return 0;
+}
+
+/* Initialize all the generic timers presented in GTDT */
+void __init acpi_generic_timer_init(void)
+{
+       if (acpi_disabled)
+               return;
+
+       acpi_table_parse(ACPI_SIG_GTDT, arch_timer_acpi_init);
+}
+#endif
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

index c5b81be..6414661 100644 (file)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -31,6 +31,7 @@
  #include <asm/div64.h>
  #include <asm/msr.h>
  #include <asm/cpu_device_id.h>
+#include <asm/cpufeature.h>
  
  #define BYT_RATIOS             0x66a
  #define BYT_VIDS               0x66b
@@ -649,7 +650,7 @@ static struct cpu_defaults byt_params = {
         .pid_policy = {
                 .sample_rate_ms = 10,
                 .deadband = 0,
-               .setpoint = 97,
+               .setpoint = 60,
                 .p_gain_pct = 14,
                 .d_gain_pct = 0,
                 .i_gain_pct = 4,
@@ -1200,8 +1201,7 @@ static int __init intel_pstate_init(void)
  {
         int cpu, rc = 0;
         const struct x86_cpu_id *id;
-       struct cpu_defaults *cpu_info;
-       struct cpuinfo_x86 *c = &boot_cpu_data;
+       struct cpu_defaults *cpu_def;
  
         if (no_load)
                 return -ENODEV;
@@ -1217,10 +1217,10 @@ static int __init intel_pstate_init(void)
         if (intel_pstate_platform_pwr_mgmt_exists())
                 return -ENODEV;
  
-       cpu_info = (struct cpu_defaults *)id->driver_data;
+       cpu_def = (struct cpu_defaults *)id->driver_data;
  
-       copy_pid_params(&cpu_info->pid_policy);
-       copy_cpu_funcs(&cpu_info->funcs);
+       copy_pid_params(&cpu_def->pid_policy);
+       copy_cpu_funcs(&cpu_def->funcs);
  
         if (intel_pstate_msrs_not_valid())
                 return -ENODEV;
@@ -1231,7 +1231,7 @@ static int __init intel_pstate_init(void)
         if (!all_cpu_data)
                 return -ENOMEM;
  
-       if (cpu_has(c,X86_FEATURE_HWP) && !no_hwp)
+       if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp)
                 intel_pstate_hwp_enable();
  
         if (!hwp_active && hwp_only)
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig

index 800bf41..033c0c8 100644 (file)
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -446,8 +446,9 @@ config CRYPTO_DEV_VMX
  source "drivers/crypto/vmx/Kconfig"
  
  config CRYPTO_DEV_IMGTEC_HASH
-       depends on MIPS || COMPILE_TEST
         tristate "Imagination Technologies hardware hash accelerator"
+       depends on MIPS || COMPILE_TEST
+       depends on HAS_DMA
         select CRYPTO_ALGAPI
         select CRYPTO_MD5
         select CRYPTO_SHA1
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c

index 5be225c..c5a9138 100644 (file)
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -265,43 +265,40 @@ static inline int is_dma_buf_file(struct file *file)
  }
  
  /**
- * dma_buf_export_named - Creates a new dma_buf, and associates an anon file
+ * dma_buf_export - Creates a new dma_buf, and associates an anon file
   * with this buffer, so it can be exported.
   * Also connect the allocator specific data and ops to the buffer.
   * Additionally, provide a name string for exporter; useful in debugging.
   *
- * @priv:      [in]    Attach private data of allocator to this buffer
- * @ops:       [in]    Attach allocator-defined dma buf ops to the new buffer.
- * @size:      [in]    Size of the buffer
- * @flags:     [in]    mode flags for the file.
- * @exp_name:  [in]    name of the exporting module - useful for debugging.
- * @resv:      [in]    reservation-object, NULL to allocate default one.
+ * @exp_info:  [in]    holds all the export related information provided
+ *                     by the exporter. see struct dma_buf_export_info
+ *                     for further details.
   *
   * Returns, on success, a newly created dma_buf object, which wraps the
   * supplied private data and operations for dma_buf_ops. On either missing
   * ops, or error in allocating struct dma_buf, will return negative error.
   *
   */
-struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-                               size_t size, int flags, const char *exp_name,
-                               struct reservation_object *resv)
+struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
  {
         struct dma_buf *dmabuf;
+       struct reservation_object *resv = exp_info->resv;
         struct file *file;
         size_t alloc_size = sizeof(struct dma_buf);
-       if (!resv)
+       if (!exp_info->resv)
                 alloc_size += sizeof(struct reservation_object);
         else
                 /* prevent &dma_buf[1] == dma_buf->resv */
                 alloc_size += 1;
  
-       if (WARN_ON(!priv || !ops
-                         || !ops->map_dma_buf
-                         || !ops->unmap_dma_buf
-                         || !ops->release
-                         || !ops->kmap_atomic
-                         || !ops->kmap
-                         || !ops->mmap)) {
+       if (WARN_ON(!exp_info->priv
+                         || !exp_info->ops
+                         || !exp_info->ops->map_dma_buf
+                         || !exp_info->ops->unmap_dma_buf
+                         || !exp_info->ops->release
+                         || !exp_info->ops->kmap_atomic
+                         || !exp_info->ops->kmap
+                         || !exp_info->ops->mmap)) {
                 return ERR_PTR(-EINVAL);
         }
  
@@ -309,10 +306,10 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
         if (dmabuf == NULL)
                 return ERR_PTR(-ENOMEM);
  
-       dmabuf->priv = priv;
-       dmabuf->ops = ops;
-       dmabuf->size = size;
-       dmabuf->exp_name = exp_name;
+       dmabuf->priv = exp_info->priv;
+       dmabuf->ops = exp_info->ops;
+       dmabuf->size = exp_info->size;
+       dmabuf->exp_name = exp_info->exp_name;
         init_waitqueue_head(&dmabuf->poll);
         dmabuf->cb_excl.poll = dmabuf->cb_shared.poll = &dmabuf->poll;
         dmabuf->cb_excl.active = dmabuf->cb_shared.active = 0;
@@ -323,7 +320,8 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
         }
         dmabuf->resv = resv;
  
-       file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
+       file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
+                                       exp_info->flags);
         if (IS_ERR(file)) {
                 kfree(dmabuf);
                 return ERR_CAST(file);
@@ -341,8 +339,7 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
  
         return dmabuf;
  }
-EXPORT_SYMBOL_GPL(dma_buf_export_named);
-
+EXPORT_SYMBOL_GPL(dma_buf_export);
  
  /**
   * dma_buf_fd - returns a file descriptor for the given dma_buf
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig

index 91eced0..fd7ac13 100644 (file)
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -112,6 +112,17 @@ config FSL_DMA
           EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
           some Txxx and Bxxx parts.
  
+config FSL_RAID
+        tristate "Freescale RAID engine Support"
+        depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
+        select DMA_ENGINE
+        select DMA_ENGINE_RAID
+        ---help---
+          Enable support for Freescale RAID Engine. RAID Engine is
+          available on some QorIQ SoCs (like P5020/P5040). It has
+          the capability to offload memcpy, xor and pq computation
+         for raid5/6.
+
  source "drivers/dma/hsu/Kconfig"
  
  config MPC512X_DMA
@@ -347,6 +358,16 @@ config DMA_JZ4740
         select DMA_ENGINE
         select DMA_VIRTUAL_CHANNELS
  
+config DMA_JZ4780
+       tristate "JZ4780 DMA support"
+       depends on MACH_JZ4780
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         This selects support for the DMA controller in Ingenic JZ4780 SoCs.
+         If you have a board based on such a SoC and wish to use DMA for
+         devices which can use the DMA controller, say Y or M here.
+
  config K3_DMA
         tristate "Hisilicon K3 DMA support"
         depends on ARCH_HI3xxx
@@ -414,6 +435,14 @@ config IMG_MDC_DMA
         help
           Enable support for the IMG multi-threaded DMA controller (MDC).
  
+config XGENE_DMA
+       tristate "APM X-Gene DMA support"
+       select DMA_ENGINE
+       select DMA_ENGINE_RAID
+       select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+       help
+         Enable support for the APM X-Gene SoC DMA engine.
+
  config DMA_ENGINE
         bool
  
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile

index 7e8301c..69f77d5 100644 (file)
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -41,9 +41,11 @@ obj-$(CONFIG_DMA_OMAP) += omap-dma.o
  obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
  obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
  obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
+obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
  obj-$(CONFIG_TI_CPPI41) += cppi41.o
  obj-$(CONFIG_K3_DMA) += k3dma.o
  obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
+obj-$(CONFIG_FSL_RAID) += fsl_raid.o
  obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
  obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
  obj-y += xilinx/
@@ -51,3 +53,4 @@ obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
  obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
  obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
  obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c

index 83aa55d..49d396e 100644 (file)
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -15,10 +15,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is in this distribution in the file
   * called COPYING.
   *
@@ -1195,11 +1191,6 @@ static void pl08x_free_txd_list(struct pl08x_driver_data *pl08x,
  /*
   * The DMA ENGINE API
   */
-static int pl08x_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void pl08x_free_chan_resources(struct dma_chan *chan)
  {
         /* Ensure all queued descriptors are freed */
@@ -2066,7 +2057,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
         /* Initialize memcpy engine */
         dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
         pl08x->memcpy.dev = &adev->dev;
-       pl08x->memcpy.device_alloc_chan_resources = pl08x_alloc_chan_resources;
         pl08x->memcpy.device_free_chan_resources = pl08x_free_chan_resources;
         pl08x->memcpy.device_prep_dma_memcpy = pl08x_prep_dma_memcpy;
         pl08x->memcpy.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
@@ -2085,7 +2075,6 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
         dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
         dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
         pl08x->slave.dev = &adev->dev;
-       pl08x->slave.device_alloc_chan_resources = pl08x_alloc_chan_resources;
         pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
         pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
         pl08x->slave.device_tx_status = pl08x_dma_tx_status;
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c

index 0b4fc6f..57b2141 100644 (file)
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -65,6 +65,21 @@ static void atc_issue_pending(struct dma_chan *chan);
  
  /*----------------------------------------------------------------------*/
  
+static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
+                                               size_t len)
+{
+       unsigned int width;
+
+       if (!((src | dst  | len) & 3))
+               width = 2;
+       else if (!((src | dst | len) & 1))
+               width = 1;
+       else
+               width = 0;
+
+       return width;
+}
+
  static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
  {
         return list_first_entry(&atchan->active_list,
@@ -659,16 +674,10 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
          * We can be a lot more clever here, but this should take care
          * of the most common optimization.
          */
-       if (!((src | dest  | len) & 3)) {
-               ctrla = ATC_SRC_WIDTH_WORD | ATC_DST_WIDTH_WORD;
-               src_width = dst_width = 2;
-       } else if (!((src | dest | len) & 1)) {
-               ctrla = ATC_SRC_WIDTH_HALFWORD | ATC_DST_WIDTH_HALFWORD;
-               src_width = dst_width = 1;
-       } else {
-               ctrla = ATC_SRC_WIDTH_BYTE | ATC_DST_WIDTH_BYTE;
-               src_width = dst_width = 0;
-       }
+       src_width = dst_width = atc_get_xfer_width(src, dest, len);
+
+       ctrla = ATC_SRC_WIDTH(src_width) |
+               ATC_DST_WIDTH(dst_width);
  
         for (offset = 0; offset < len; offset += xfer_count << src_width) {
                 xfer_count = min_t(size_t, (len - offset) >> src_width,
@@ -861,6 +870,144 @@ err:
         return NULL;
  }
  
+/**
+ * atc_prep_dma_sg - prepare memory to memory scather-gather operation
+ * @chan: the channel to prepare operation on
+ * @dst_sg: destination scatterlist
+ * @dst_nents: number of destination scatterlist entries
+ * @src_sg: source scatterlist
+ * @src_nents: number of source scatterlist entries
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_sg(struct dma_chan *chan,
+               struct scatterlist *dst_sg, unsigned int dst_nents,
+               struct scatterlist *src_sg, unsigned int src_nents,
+               unsigned long flags)
+{
+       struct at_dma_chan      *atchan = to_at_dma_chan(chan);
+       struct at_desc          *desc = NULL;
+       struct at_desc          *first = NULL;
+       struct at_desc          *prev = NULL;
+       unsigned int            src_width;
+       unsigned int            dst_width;
+       size_t                  xfer_count;
+       u32                     ctrla;
+       u32                     ctrlb;
+       size_t                  dst_len = 0, src_len = 0;
+       dma_addr_t              dst = 0, src = 0;
+       size_t                  len = 0, total_len = 0;
+
+       if (unlikely(dst_nents == 0 || src_nents == 0))
+               return NULL;
+
+       if (unlikely(dst_sg == NULL || src_sg == NULL))
+               return NULL;
+
+       ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+               | ATC_SRC_ADDR_MODE_INCR
+               | ATC_DST_ADDR_MODE_INCR
+               | ATC_FC_MEM2MEM;
+
+       /*
+        * loop until there is either no more source or no more destination
+        * scatterlist entry
+        */
+       while (true) {
+
+               /* prepare the next transfer */
+               if (dst_len == 0) {
+
+                       /* no more destination scatterlist entries */
+                       if (!dst_sg || !dst_nents)
+                               break;
+
+                       dst = sg_dma_address(dst_sg);
+                       dst_len = sg_dma_len(dst_sg);
+
+                       dst_sg = sg_next(dst_sg);
+                       dst_nents--;
+               }
+
+               if (src_len == 0) {
+
+                       /* no more source scatterlist entries */
+                       if (!src_sg || !src_nents)
+                               break;
+
+                       src = sg_dma_address(src_sg);
+                       src_len = sg_dma_len(src_sg);
+
+                       src_sg = sg_next(src_sg);
+                       src_nents--;
+               }
+
+               len = min_t(size_t, src_len, dst_len);
+               if (len == 0)
+                       continue;
+
+               /* take care for the alignment */
+               src_width = dst_width = atc_get_xfer_width(src, dst, len);
+
+               ctrla = ATC_SRC_WIDTH(src_width) |
+                       ATC_DST_WIDTH(dst_width);
+
+               /*
+                * The number of transfers to set up refer to the source width
+                * that depends on the alignment.
+                */
+               xfer_count = len >> src_width;
+               if (xfer_count > ATC_BTSIZE_MAX) {
+                       xfer_count = ATC_BTSIZE_MAX;
+                       len = ATC_BTSIZE_MAX << src_width;
+               }
+
+               /* create the transfer */
+               desc = atc_desc_get(atchan);
+               if (!desc)
+                       goto err_desc_get;
+
+               desc->lli.saddr = src;
+               desc->lli.daddr = dst;
+               desc->lli.ctrla = ctrla | xfer_count;
+               desc->lli.ctrlb = ctrlb;
+
+               desc->txd.cookie = 0;
+               desc->len = len;
+
+               /*
+                * Although we only need the transfer width for the first and
+                * the last descriptor, its easier to set it to all descriptors.
+                */
+               desc->tx_width = src_width;
+
+               atc_desc_chain(&first, &prev, desc);
+
+               /* update the lengths and addresses for the next loop cycle */
+               dst_len -= len;
+               src_len -= len;
+               dst += len;
+               src += len;
+
+               total_len += len;
+       }
+
+       /* First descriptor of the chain embedds additional information */
+       first->txd.cookie = -EBUSY;
+       first->total_len = total_len;
+
+       /* set end-of-link to the last link descriptor of list*/
+       set_desc_eol(desc);
+
+       first->txd.flags = flags; /* client is in control of this ack */
+
+       return &first->txd;
+
+err_desc_get:
+       atc_desc_put(atchan, first);
+       return NULL;
+}
+
  /**
   * atc_dma_cyclic_check_values
   * Check for too big/unaligned periods and unaligned DMA buffer
@@ -1461,8 +1608,10 @@ static int __init at_dma_probe(struct platform_device *pdev)
  
         /* setup platform data for each SoC */
         dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
+       dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
         dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
         dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
+       dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
  
         /* get DMA parameters from controller type */
         plat_dat = at_dma_get_driver_data(pdev);
@@ -1582,11 +1731,15 @@ static int __init at_dma_probe(struct platform_device *pdev)
                 atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
         }
  
+       if (dma_has_cap(DMA_SG, atdma->dma_common.cap_mask))
+               atdma->dma_common.device_prep_dma_sg = atc_prep_dma_sg;
+
         dma_writel(atdma, EN, AT_DMA_ENABLE);
  
-       dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s), %d channels\n",
+       dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
           dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
           dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
+         dma_has_cap(DMA_SG, atdma->dma_common.cap_mask)  ? "sg-cpy " : "",
           plat_dat->nr_channels);
  
         dma_async_device_register(&atdma->dma_common);
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c

index d9891d3..933e4b3 100644 (file)
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -1154,8 +1154,10 @@ static int at_xdmac_device_resume(struct dma_chan *chan)
         dev_dbg(chan2dev(chan), "%s\n", __func__);
  
         spin_lock_bh(&atchan->lock);
-       if (!at_xdmac_chan_is_paused(atchan))
+       if (!at_xdmac_chan_is_paused(atchan)) {
+               spin_unlock_bh(&atchan->lock);
                 return 0;
+       }
  
         at_xdmac_write(atxdmac, AT_XDMAC_GRWR, atchan->mask);
         clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
diff --git a/drivers/dma/bestcomm/bestcomm.c b/drivers/dma/bestcomm/bestcomm.c

index fa378d8..180fedb 100644 (file)
--- a/drivers/dma/bestcomm/bestcomm.c
+++ b/drivers/dma/bestcomm/bestcomm.c
@@ -30,7 +30,7 @@
  #define DRIVER_NAME "bestcomm-core"
  
  /* MPC5200 device tree match tables */
-static struct of_device_id mpc52xx_sram_ids[] = {
+static const struct of_device_id mpc52xx_sram_ids[] = {
         { .compatible = "fsl,mpc5200-sram", },
         { .compatible = "mpc5200-sram", },
         {}
@@ -481,7 +481,7 @@ static int mpc52xx_bcom_remove(struct platform_device *op)
         return 0;
  }
  
-static struct of_device_id mpc52xx_bcom_of_match[] = {
+static const struct of_device_id mpc52xx_bcom_of_match[] = {
         { .compatible = "fsl,mpc5200-bestcomm", },
         { .compatible = "mpc5200-bestcomm", },
         {},
diff --git a/drivers/dma/dma-jz4740.c b/drivers/dma/dma-jz4740.c

index 8488441..7638b24 100644 (file)
--- a/drivers/dma/dma-jz4740.c
+++ b/drivers/dma/dma-jz4740.c
@@ -7,10 +7,6 @@
   *  Free Software Foundation;  either version 2 of the License, or (at your
   *  option) any later version.
   *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  675 Mass Ave, Cambridge, MA 02139, USA.
- *
   */
  
  #include <linux/dmaengine.h>
@@ -343,7 +339,7 @@ static void jz4740_dma_chan_irq(struct jz4740_dmaengine_chan *chan)
  {
         spin_lock(&chan->vchan.lock);
         if (chan->desc) {
-               if (chan->desc && chan->desc->cyclic) {
+               if (chan->desc->cyclic) {
                         vchan_cyclic_callback(&chan->desc->vdesc);
                 } else {
                         if (chan->next_sg == chan->desc->num_sgs) {
@@ -496,11 +492,6 @@ static enum dma_status jz4740_dma_tx_status(struct dma_chan *c,
         return status;
  }
  
-static int jz4740_dma_alloc_chan_resources(struct dma_chan *c)
-{
-       return 0;
-}
-
  static void jz4740_dma_free_chan_resources(struct dma_chan *c)
  {
         vchan_free_chan_resources(to_virt_chan(c));
@@ -543,7 +534,6 @@ static int jz4740_dma_probe(struct platform_device *pdev)
  
         dma_cap_set(DMA_SLAVE, dd->cap_mask);
         dma_cap_set(DMA_CYCLIC, dd->cap_mask);
-       dd->device_alloc_chan_resources = jz4740_dma_alloc_chan_resources;
         dd->device_free_chan_resources = jz4740_dma_free_chan_resources;
         dd->device_tx_status = jz4740_dma_tx_status;
         dd->device_issue_pending = jz4740_dma_issue_pending;
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c

new file mode 100644 (file)

index 0000000..26d2f0e
--- /dev/null
+++ b/drivers/dma/dma-jz4780.c
@@ -0,0 +1,877 @@
+/*
+ * Ingenic JZ4780 DMA controller
+ *
+ * Copyright (c) 2015 Imagination Technologies
+ * Author: Alex Smith <alex@alex-smith.me.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define JZ_DMA_NR_CHANNELS     32
+
+/* Global registers. */
+#define JZ_DMA_REG_DMAC                0x1000
+#define JZ_DMA_REG_DIRQP       0x1004
+#define JZ_DMA_REG_DDR         0x1008
+#define JZ_DMA_REG_DDRS                0x100c
+#define JZ_DMA_REG_DMACP       0x101c
+#define JZ_DMA_REG_DSIRQP      0x1020
+#define JZ_DMA_REG_DSIRQM      0x1024
+#define JZ_DMA_REG_DCIRQP      0x1028
+#define JZ_DMA_REG_DCIRQM      0x102c
+
+/* Per-channel registers. */
+#define JZ_DMA_REG_CHAN(n)     (n * 0x20)
+#define JZ_DMA_REG_DSA(n)      (0x00 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTA(n)      (0x04 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DTC(n)      (0x08 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DRT(n)      (0x0c + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCS(n)      (0x10 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DCM(n)      (0x14 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DDA(n)      (0x18 + JZ_DMA_REG_CHAN(n))
+#define JZ_DMA_REG_DSD(n)      (0x1c + JZ_DMA_REG_CHAN(n))
+
+#define JZ_DMA_DMAC_DMAE       BIT(0)
+#define JZ_DMA_DMAC_AR         BIT(2)
+#define JZ_DMA_DMAC_HLT                BIT(3)
+#define JZ_DMA_DMAC_FMSC       BIT(31)
+
+#define JZ_DMA_DRT_AUTO                0x8
+
+#define JZ_DMA_DCS_CTE         BIT(0)
+#define JZ_DMA_DCS_HLT         BIT(2)
+#define JZ_DMA_DCS_TT          BIT(3)
+#define JZ_DMA_DCS_AR          BIT(4)
+#define JZ_DMA_DCS_DES8                BIT(30)
+
+#define JZ_DMA_DCM_LINK                BIT(0)
+#define JZ_DMA_DCM_TIE         BIT(1)
+#define JZ_DMA_DCM_STDE                BIT(2)
+#define JZ_DMA_DCM_TSZ_SHIFT   8
+#define JZ_DMA_DCM_TSZ_MASK    (0x7 << JZ_DMA_DCM_TSZ_SHIFT)
+#define JZ_DMA_DCM_DP_SHIFT    12
+#define JZ_DMA_DCM_SP_SHIFT    14
+#define JZ_DMA_DCM_DAI         BIT(22)
+#define JZ_DMA_DCM_SAI         BIT(23)
+
+#define JZ_DMA_SIZE_4_BYTE     0x0
+#define JZ_DMA_SIZE_1_BYTE     0x1
+#define JZ_DMA_SIZE_2_BYTE     0x2
+#define JZ_DMA_SIZE_16_BYTE    0x3
+#define JZ_DMA_SIZE_32_BYTE    0x4
+#define JZ_DMA_SIZE_64_BYTE    0x5
+#define JZ_DMA_SIZE_128_BYTE   0x6
+
+#define JZ_DMA_WIDTH_32_BIT    0x0
+#define JZ_DMA_WIDTH_8_BIT     0x1
+#define JZ_DMA_WIDTH_16_BIT    0x2
+
+#define JZ_DMA_BUSWIDTHS       (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)  | \
+                                BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+                                BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+/**
+ * struct jz4780_dma_hwdesc - descriptor structure read by the DMA controller.
+ * @dcm: value for the DCM (channel command) register
+ * @dsa: source address
+ * @dta: target address
+ * @dtc: transfer count (number of blocks of the transfer size specified in DCM
+ * to transfer) in the low 24 bits, offset of the next descriptor from the
+ * descriptor base address in the upper 8 bits.
+ * @sd: target/source stride difference (in stride transfer mode).
+ * @drt: request type
+ */
+struct jz4780_dma_hwdesc {
+       uint32_t dcm;
+       uint32_t dsa;
+       uint32_t dta;
+       uint32_t dtc;
+       uint32_t sd;
+       uint32_t drt;
+       uint32_t reserved[2];
+};
+
+/* Size of allocations for hardware descriptor blocks. */
+#define JZ_DMA_DESC_BLOCK_SIZE PAGE_SIZE
+#define JZ_DMA_MAX_DESC                \
+       (JZ_DMA_DESC_BLOCK_SIZE / sizeof(struct jz4780_dma_hwdesc))
+
+struct jz4780_dma_desc {
+       struct virt_dma_desc vdesc;
+
+       struct jz4780_dma_hwdesc *desc;
+       dma_addr_t desc_phys;
+       unsigned int count;
+       enum dma_transaction_type type;
+       uint32_t status;
+};
+
+struct jz4780_dma_chan {
+       struct virt_dma_chan vchan;
+       unsigned int id;
+       struct dma_pool *desc_pool;
+
+       uint32_t transfer_type;
+       uint32_t transfer_shift;
+       struct dma_slave_config config;
+
+       struct jz4780_dma_desc *desc;
+       unsigned int curr_hwdesc;
+};
+
+struct jz4780_dma_dev {
+       struct dma_device dma_device;
+       void __iomem *base;
+       struct clk *clk;
+       unsigned int irq;
+
+       uint32_t chan_reserved;
+       struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS];
+};
+
+struct jz4780_dma_data {
+       uint32_t transfer_type;
+       int channel;
+};
+
+static inline struct jz4780_dma_chan *to_jz4780_dma_chan(struct dma_chan *chan)
+{
+       return container_of(chan, struct jz4780_dma_chan, vchan.chan);
+}
+
+static inline struct jz4780_dma_desc *to_jz4780_dma_desc(
+       struct virt_dma_desc *vdesc)
+{
+       return container_of(vdesc, struct jz4780_dma_desc, vdesc);
+}
+
+static inline struct jz4780_dma_dev *jz4780_dma_chan_parent(
+       struct jz4780_dma_chan *jzchan)
+{
+       return container_of(jzchan->vchan.chan.device, struct jz4780_dma_dev,
+                           dma_device);
+}
+
+static inline uint32_t jz4780_dma_readl(struct jz4780_dma_dev *jzdma,
+       unsigned int reg)
+{
+       return readl(jzdma->base + reg);
+}
+
+static inline void jz4780_dma_writel(struct jz4780_dma_dev *jzdma,
+       unsigned int reg, uint32_t val)
+{
+       writel(val, jzdma->base + reg);
+}
+
+static struct jz4780_dma_desc *jz4780_dma_desc_alloc(
+       struct jz4780_dma_chan *jzchan, unsigned int count,
+       enum dma_transaction_type type)
+{
+       struct jz4780_dma_desc *desc;
+
+       if (count > JZ_DMA_MAX_DESC)
+               return NULL;
+
+       desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       desc->desc = dma_pool_alloc(jzchan->desc_pool, GFP_NOWAIT,
+                                   &desc->desc_phys);
+       if (!desc->desc) {
+               kfree(desc);
+               return NULL;
+       }
+
+       desc->count = count;
+       desc->type = type;
+       return desc;
+}
+
+static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
+{
+       struct jz4780_dma_desc *desc = to_jz4780_dma_desc(vdesc);
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(vdesc->tx.chan);
+
+       dma_pool_free(jzchan->desc_pool, desc->desc, desc->desc_phys);
+       kfree(desc);
+}
+
+static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord)
+{
+       *ord = ffs(val) - 1;
+
+       switch (*ord) {
+       case 0:
+               return JZ_DMA_SIZE_1_BYTE;
+       case 1:
+               return JZ_DMA_SIZE_2_BYTE;
+       case 2:
+               return JZ_DMA_SIZE_4_BYTE;
+       case 4:
+               return JZ_DMA_SIZE_16_BYTE;
+       case 5:
+               return JZ_DMA_SIZE_32_BYTE;
+       case 6:
+               return JZ_DMA_SIZE_64_BYTE;
+       case 7:
+               return JZ_DMA_SIZE_128_BYTE;
+       default:
+               return -EINVAL;
+       }
+}
+
+static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
+       struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len,
+       enum dma_transfer_direction direction)
+{
+       struct dma_slave_config *config = &jzchan->config;
+       uint32_t width, maxburst, tsz;
+       int ord;
+
+       if (direction == DMA_MEM_TO_DEV) {
+               desc->dcm = JZ_DMA_DCM_SAI;
+               desc->dsa = addr;
+               desc->dta = config->dst_addr;
+               desc->drt = jzchan->transfer_type;
+
+               width = config->dst_addr_width;
+               maxburst = config->dst_maxburst;
+       } else {
+               desc->dcm = JZ_DMA_DCM_DAI;
+               desc->dsa = config->src_addr;
+               desc->dta = addr;
+               desc->drt = jzchan->transfer_type;
+
+               width = config->src_addr_width;
+               maxburst = config->src_maxburst;
+       }
+
+       /*
+        * This calculates the maximum transfer size that can be used with the
+        * given address, length, width and maximum burst size. The address
+        * must be aligned to the transfer size, the total length must be
+        * divisible by the transfer size, and we must not use more than the
+        * maximum burst specified by the user.
+        */
+       tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord);
+       jzchan->transfer_shift = ord;
+
+       switch (width) {
+       case DMA_SLAVE_BUSWIDTH_1_BYTE:
+       case DMA_SLAVE_BUSWIDTH_2_BYTES:
+               break;
+       case DMA_SLAVE_BUSWIDTH_4_BYTES:
+               width = JZ_DMA_WIDTH_32_BIT;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       desc->dcm |= tsz << JZ_DMA_DCM_TSZ_SHIFT;
+       desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT;
+       desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT;
+
+       desc->dtc = len >> ord;
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
+       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
+       enum dma_transfer_direction direction, unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       unsigned int i;
+       int err;
+
+       desc = jz4780_dma_desc_alloc(jzchan, sg_len, DMA_SLAVE);
+       if (!desc)
+               return NULL;
+
+       for (i = 0; i < sg_len; i++) {
+               err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i],
+                                       sg_dma_address(&sgl[i]),
+                                       sg_dma_len(&sgl[i]),
+                                       direction);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+
+               desc->desc[i].dcm |= JZ_DMA_DCM_TIE;
+
+               if (i != (sg_len - 1)) {
+                       /* Automatically proceeed to the next descriptor. */
+                       desc->desc[i].dcm |= JZ_DMA_DCM_LINK;
+
+                       /*
+                        * The upper 8 bits of the DTC field in the descriptor
+                        * must be set to (offset from descriptor base of next
+                        * descriptor >> 4).
+                        */
+                       desc->desc[i].dtc |=
+                               (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+               }
+       }
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
+       struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+       size_t period_len, enum dma_transfer_direction direction,
+       unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       unsigned int periods, i;
+       int err;
+
+       if (buf_len % period_len)
+               return NULL;
+
+       periods = buf_len / period_len;
+
+       desc = jz4780_dma_desc_alloc(jzchan, periods, DMA_CYCLIC);
+       if (!desc)
+               return NULL;
+
+       for (i = 0; i < periods; i++) {
+               err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr,
+                                       period_len, direction);
+               if (err < 0)
+                       return ERR_PTR(err);
+
+               buf_addr += period_len;
+
+               /*
+                * Set the link bit to indicate that the controller should
+                * automatically proceed to the next descriptor. In
+                * jz4780_dma_begin(), this will be cleared if we need to issue
+                * an interrupt after each period.
+                */
+               desc->desc[i].dcm |= JZ_DMA_DCM_TIE | JZ_DMA_DCM_LINK;
+
+               /*
+                * The upper 8 bits of the DTC field in the descriptor must be
+                * set to (offset from descriptor base of next descriptor >> 4).
+                * If this is the last descriptor, link it back to the first,
+                * i.e. leave offset set to 0, otherwise point to the next one.
+                */
+               if (i != (periods - 1)) {
+                       desc->desc[i].dtc |=
+                               (((i + 1) * sizeof(*desc->desc)) >> 4) << 24;
+               }
+       }
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
+       struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+       size_t len, unsigned long flags)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_desc *desc;
+       uint32_t tsz;
+       int ord;
+
+       desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
+       if (!desc)
+               return NULL;
+
+       tsz = jz4780_dma_transfer_size(dest | src | len, &ord);
+       if (tsz < 0)
+               return ERR_PTR(tsz);
+
+       desc->desc[0].dsa = src;
+       desc->desc[0].dta = dest;
+       desc->desc[0].drt = JZ_DMA_DRT_AUTO;
+       desc->desc[0].dcm = JZ_DMA_DCM_TIE | JZ_DMA_DCM_SAI | JZ_DMA_DCM_DAI |
+                           tsz << JZ_DMA_DCM_TSZ_SHIFT |
+                           JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT |
+                           JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT;
+       desc->desc[0].dtc = len >> ord;
+
+       return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
+}
+
+static void jz4780_dma_begin(struct jz4780_dma_chan *jzchan)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       struct virt_dma_desc *vdesc;
+       unsigned int i;
+       dma_addr_t desc_phys;
+
+       if (!jzchan->desc) {
+               vdesc = vchan_next_desc(&jzchan->vchan);
+               if (!vdesc)
+                       return;
+
+               list_del(&vdesc->node);
+
+               jzchan->desc = to_jz4780_dma_desc(vdesc);
+               jzchan->curr_hwdesc = 0;
+
+               if (jzchan->desc->type == DMA_CYCLIC && vdesc->tx.callback) {
+                       /*
+                        * The DMA controller doesn't support triggering an
+                        * interrupt after processing each descriptor, only
+                        * after processing an entire terminated list of
+                        * descriptors. For a cyclic DMA setup the list of
+                        * descriptors is not terminated so we can never get an
+                        * interrupt.
+                        *
+                        * If the user requested a callback for a cyclic DMA
+                        * setup then we workaround this hardware limitation
+                        * here by degrading to a set of unlinked descriptors
+                        * which we will submit in sequence in response to the
+                        * completion of processing the previous descriptor.
+                        */
+                       for (i = 0; i < jzchan->desc->count; i++)
+                               jzchan->desc->desc[i].dcm &= ~JZ_DMA_DCM_LINK;
+               }
+       } else {
+               /*
+                * There is an existing transfer, therefore this must be one
+                * for which we unlinked the descriptors above. Advance to the
+                * next one in the list.
+                */
+               jzchan->curr_hwdesc =
+                       (jzchan->curr_hwdesc + 1) % jzchan->desc->count;
+       }
+
+       /* Use 8-word descriptors. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), JZ_DMA_DCS_DES8);
+
+       /* Write descriptor address and initiate descriptor fetch. */
+       desc_phys = jzchan->desc->desc_phys +
+                   (jzchan->curr_hwdesc * sizeof(*jzchan->desc->desc));
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DDA(jzchan->id), desc_phys);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DDRS, BIT(jzchan->id));
+
+       /* Enable the channel. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id),
+                         JZ_DMA_DCS_DES8 | JZ_DMA_DCS_CTE);
+}
+
+static void jz4780_dma_issue_pending(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       if (vchan_issue_pending(&jzchan->vchan) && !jzchan->desc)
+               jz4780_dma_begin(jzchan);
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+}
+
+static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       /* Clear the DMA status and stop the transfer. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+       if (jzchan->desc) {
+               jz4780_dma_desc_free(&jzchan->desc->vdesc);
+               jzchan->desc = NULL;
+       }
+
+       vchan_get_all_descriptors(&jzchan->vchan, &head);
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+
+       vchan_dma_desc_free_list(&jzchan->vchan, &head);
+       return 0;
+}
+
+static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan,
+       const struct dma_slave_config *config)
+{
+       if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+          || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES))
+               return -EINVAL;
+
+       /* Copy the reset of the slave configuration, it is used later. */
+       memcpy(&jzchan->config, config, sizeof(jzchan->config));
+
+       return 0;
+}
+
+static size_t jz4780_dma_desc_residue(struct jz4780_dma_chan *jzchan,
+       struct jz4780_dma_desc *desc, unsigned int next_sg)
+{
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       unsigned int residue, count;
+       unsigned int i;
+
+       residue = 0;
+
+       for (i = next_sg; i < desc->count; i++)
+               residue += desc->desc[i].dtc << jzchan->transfer_shift;
+
+       if (next_sg != 0) {
+               count = jz4780_dma_readl(jzdma,
+                                        JZ_DMA_REG_DTC(jzchan->id));
+               residue += count << jzchan->transfer_shift;
+       }
+
+       return residue;
+}
+
+static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan,
+       dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct virt_dma_desc *vdesc;
+       enum dma_status status;
+       unsigned long flags;
+
+       status = dma_cookie_status(chan, cookie, txstate);
+       if ((status == DMA_COMPLETE) || (txstate == NULL))
+               return status;
+
+       spin_lock_irqsave(&jzchan->vchan.lock, flags);
+
+       vdesc = vchan_find_desc(&jzchan->vchan, cookie);
+       if (vdesc) {
+               /* On the issued list, so hasn't been processed yet */
+               txstate->residue = jz4780_dma_desc_residue(jzchan,
+                                       to_jz4780_dma_desc(vdesc), 0);
+       } else if (cookie == jzchan->desc->vdesc.tx.cookie) {
+               txstate->residue = jz4780_dma_desc_residue(jzchan, jzchan->desc,
+                         (jzchan->curr_hwdesc + 1) % jzchan->desc->count);
+       } else
+               txstate->residue = 0;
+
+       if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc
+               && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
+                       status = DMA_ERROR;
+
+       spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
+       return status;
+}
+
+static void jz4780_dma_chan_irq(struct jz4780_dma_dev *jzdma,
+       struct jz4780_dma_chan *jzchan)
+{
+       uint32_t dcs;
+
+       spin_lock(&jzchan->vchan.lock);
+
+       dcs = jz4780_dma_readl(jzdma, JZ_DMA_REG_DCS(jzchan->id));
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DCS(jzchan->id), 0);
+
+       if (dcs & JZ_DMA_DCS_AR) {
+               dev_warn(&jzchan->vchan.chan.dev->device,
+                        "address error (DCS=0x%x)\n", dcs);
+       }
+
+       if (dcs & JZ_DMA_DCS_HLT) {
+               dev_warn(&jzchan->vchan.chan.dev->device,
+                        "channel halt (DCS=0x%x)\n", dcs);
+       }
+
+       if (jzchan->desc) {
+               jzchan->desc->status = dcs;
+
+               if ((dcs & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT)) == 0) {
+                       if (jzchan->desc->type == DMA_CYCLIC) {
+                               vchan_cyclic_callback(&jzchan->desc->vdesc);
+                       } else {
+                               vchan_cookie_complete(&jzchan->desc->vdesc);
+                               jzchan->desc = NULL;
+                       }
+
+                       jz4780_dma_begin(jzchan);
+               }
+       } else {
+               dev_err(&jzchan->vchan.chan.dev->device,
+                       "channel IRQ with no active transfer\n");
+       }
+
+       spin_unlock(&jzchan->vchan.lock);
+}
+
+static irqreturn_t jz4780_dma_irq_handler(int irq, void *data)
+{
+       struct jz4780_dma_dev *jzdma = data;
+       uint32_t pending, dmac;
+       int i;
+
+       pending = jz4780_dma_readl(jzdma, JZ_DMA_REG_DIRQP);
+
+       for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+               if (!(pending & (1<<i)))
+                       continue;
+
+               jz4780_dma_chan_irq(jzdma, &jzdma->chan[i]);
+       }
+
+       /* Clear halt and address error status of all channels. */
+       dmac = jz4780_dma_readl(jzdma, JZ_DMA_REG_DMAC);
+       dmac &= ~(JZ_DMA_DMAC_HLT | JZ_DMA_DMAC_AR);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC, dmac);
+
+       /* Clear interrupt pending status. */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DIRQP, 0);
+
+       return IRQ_HANDLED;
+}
+
+static int jz4780_dma_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+       jzchan->desc_pool = dma_pool_create(dev_name(&chan->dev->device),
+                                           chan->device->dev,
+                                           JZ_DMA_DESC_BLOCK_SIZE,
+                                           PAGE_SIZE, 0);
+       if (!jzchan->desc_pool) {
+               dev_err(&chan->dev->device,
+                       "failed to allocate descriptor pool\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void jz4780_dma_free_chan_resources(struct dma_chan *chan)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
+       vchan_free_chan_resources(&jzchan->vchan);
+       dma_pool_destroy(jzchan->desc_pool);
+       jzchan->desc_pool = NULL;
+}
+
+static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
+{
+       struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+       struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
+       struct jz4780_dma_data *data = param;
+
+       if (data->channel > -1) {
+               if (data->channel != jzchan->id)
+                       return false;
+       } else if (jzdma->chan_reserved & BIT(jzchan->id)) {
+               return false;
+       }
+
+       jzchan->transfer_type = data->transfer_type;
+
+       return true;
+}
+
+static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
+       struct of_dma *ofdma)
+{
+       struct jz4780_dma_dev *jzdma = ofdma->of_dma_data;
+       dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
+       struct jz4780_dma_data data;
+
+       if (dma_spec->args_count != 2)
+               return NULL;
+
+       data.transfer_type = dma_spec->args[0];
+       data.channel = dma_spec->args[1];
+
+       if (data.channel > -1) {
+               if (data.channel >= JZ_DMA_NR_CHANNELS) {
+                       dev_err(jzdma->dma_device.dev,
+                               "device requested non-existent channel %u\n",
+                               data.channel);
+                       return NULL;
+               }
+
+               /* Can only select a channel marked as reserved. */
+               if (!(jzdma->chan_reserved & BIT(data.channel))) {
+                       dev_err(jzdma->dma_device.dev,
+                               "device requested unreserved channel %u\n",
+                               data.channel);
+                       return NULL;
+               }
+       }
+
+       return dma_request_channel(mask, jz4780_dma_filter_fn, &data);
+}
+
+static int jz4780_dma_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct jz4780_dma_dev *jzdma;
+       struct jz4780_dma_chan *jzchan;
+       struct dma_device *dd;
+       struct resource *res;
+       int i, ret;
+
+       jzdma = devm_kzalloc(dev, sizeof(*jzdma), GFP_KERNEL);
+       if (!jzdma)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, jzdma);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(dev, "failed to get I/O memory\n");
+               return -EINVAL;
+       }
+
+       jzdma->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(jzdma->base))
+               return PTR_ERR(jzdma->base);
+
+       jzdma->irq = platform_get_irq(pdev, 0);
+       if (jzdma->irq < 0) {
+               dev_err(dev, "failed to get IRQ: %d\n", ret);
+               return jzdma->irq;
+       }
+
+       ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0,
+                              dev_name(dev), jzdma);
+       if (ret) {
+               dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq);
+               return -EINVAL;
+       }
+
+       jzdma->clk = devm_clk_get(dev, NULL);
+       if (IS_ERR(jzdma->clk)) {
+               dev_err(dev, "failed to get clock\n");
+               return PTR_ERR(jzdma->clk);
+       }
+
+       clk_prepare_enable(jzdma->clk);
+
+       /* Property is optional, if it doesn't exist the value will remain 0. */
+       of_property_read_u32_index(dev->of_node, "ingenic,reserved-channels",
+                                  0, &jzdma->chan_reserved);
+
+       dd = &jzdma->dma_device;
+
+       dma_cap_set(DMA_MEMCPY, dd->cap_mask);
+       dma_cap_set(DMA_SLAVE, dd->cap_mask);
+       dma_cap_set(DMA_CYCLIC, dd->cap_mask);
+
+       dd->dev = dev;
+       dd->copy_align = 2; /* 2^2 = 4 byte alignment */
+       dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources;
+       dd->device_free_chan_resources = jz4780_dma_free_chan_resources;
+       dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg;
+       dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic;
+       dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy;
+       dd->device_config = jz4780_dma_slave_config;
+       dd->device_terminate_all = jz4780_dma_terminate_all;
+       dd->device_tx_status = jz4780_dma_tx_status;
+       dd->device_issue_pending = jz4780_dma_issue_pending;
+       dd->src_addr_widths = JZ_DMA_BUSWIDTHS;
+       dd->dst_addr_widths = JZ_DMA_BUSWIDTHS;
+       dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+
+       /*
+        * Enable DMA controller, mark all channels as not programmable.
+        * Also set the FMSC bit - it increases MSC performance, so it makes
+        * little sense not to enable it.
+        */
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMAC,
+                         JZ_DMA_DMAC_DMAE | JZ_DMA_DMAC_FMSC);
+       jz4780_dma_writel(jzdma, JZ_DMA_REG_DMACP, 0);
+
+       INIT_LIST_HEAD(&dd->channels);
+
+       for (i = 0; i < JZ_DMA_NR_CHANNELS; i++) {
+               jzchan = &jzdma->chan[i];
+               jzchan->id = i;
+
+               vchan_init(&jzchan->vchan, dd);
+               jzchan->vchan.desc_free = jz4780_dma_desc_free;
+       }
+
+       ret = dma_async_device_register(dd);
+       if (ret) {
+               dev_err(dev, "failed to register device\n");
+               goto err_disable_clk;
+       }
+
+       /* Register with OF DMA helpers. */
+       ret = of_dma_controller_register(dev->of_node, jz4780_of_dma_xlate,
+                                        jzdma);
+       if (ret) {
+               dev_err(dev, "failed to register OF DMA controller\n");
+               goto err_unregister_dev;
+       }
+
+       dev_info(dev, "JZ4780 DMA controller initialised\n");
+       return 0;
+
+err_unregister_dev:
+       dma_async_device_unregister(dd);
+
+err_disable_clk:
+       clk_disable_unprepare(jzdma->clk);
+       return ret;
+}
+
+static int jz4780_dma_remove(struct platform_device *pdev)
+{
+       struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev);
+
+       of_dma_controller_free(pdev->dev.of_node);
+       devm_free_irq(&pdev->dev, jzdma->irq, jzdma);
+       dma_async_device_unregister(&jzdma->dma_device);
+       return 0;
+}
+
+static const struct of_device_id jz4780_dma_dt_match[] = {
+       { .compatible = "ingenic,jz4780-dma", .data = NULL },
+       {},
+};
+MODULE_DEVICE_TABLE(of, jz4780_dma_dt_match);
+
+static struct platform_driver jz4780_dma_driver = {
+       .probe          = jz4780_dma_probe,
+       .remove         = jz4780_dma_remove,
+       .driver = {
+               .name   = "jz4780-dma",
+               .of_match_table = of_match_ptr(jz4780_dma_dt_match),
+       },
+};
+
+static int __init jz4780_dma_init(void)
+{
+       return platform_driver_register(&jz4780_dma_driver);
+}
+subsys_initcall(jz4780_dma_init);
+
+static void __exit jz4780_dma_exit(void)
+{
+       platform_driver_unregister(&jz4780_dma_driver);
+}
+module_exit(jz4780_dma_exit);
+
+MODULE_AUTHOR("Alex Smith <alex@alex-smith.me.uk>");
+MODULE_DESCRIPTION("Ingenic JZ4780 DMA controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c

index ac336a9..0e035a8 100644 (file)
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
@@ -355,20 +351,6 @@ struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
  }
  EXPORT_SYMBOL(dma_find_channel);
  
-/*
- * net_dma_find_channel - find a channel for net_dma
- * net_dma has alignment requirements
- */
-struct dma_chan *net_dma_find_channel(void)
-{
-       struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
-       if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
-               return NULL;
-
-       return chan;
-}
-EXPORT_SYMBOL(net_dma_find_channel);
-
  /**
   * dma_issue_pending_all - flush all pending operations across all channels
   */
diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig

index dcfe964..36e02f0 100644 (file)
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -3,7 +3,7 @@
  #
  
  config DW_DMAC_CORE
-       tristate "Synopsys DesignWare AHB DMA support"
+       tristate
         select DMA_ENGINE
  
  config DW_DMAC
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c

index a8ad052..1022c2e 100644 (file)
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -230,7 +230,8 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
         /* ASSERT:  channel is idle */
         if (dma_readl(dw, CH_EN) & dwc->mask) {
                 dev_err(chan2dev(&dwc->chan),
-                       "BUG: Attempted to start non-idle channel\n");
+                       "%s: BUG: Attempted to start non-idle channel\n",
+                       __func__);
                 dwc_dump_chan_regs(dwc);
  
                 /* The tasklet will hopefully advance the queue... */
@@ -814,11 +815,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
  
  slave_sg_todev_fill_desc:
                         desc = dwc_desc_get(dwc);
-                       if (!desc) {
-                               dev_err(chan2dev(chan),
-                                       "not enough descriptors available\n");
+                       if (!desc)
                                 goto err_desc_get;
-                       }
  
                         desc->lli.sar = mem;
                         desc->lli.dar = reg;
@@ -874,11 +872,8 @@ slave_sg_todev_fill_desc:
  
  slave_sg_fromdev_fill_desc:
                         desc = dwc_desc_get(dwc);
-                       if (!desc) {
-                               dev_err(chan2dev(chan),
-                                               "not enough descriptors available\n");
+                       if (!desc)
                                 goto err_desc_get;
-                       }
  
                         desc->lli.sar = reg;
                         desc->lli.dar = mem;
@@ -922,6 +917,8 @@ slave_sg_fromdev_fill_desc:
         return &first->txd;
  
  err_desc_get:
+       dev_err(chan2dev(chan),
+               "not enough descriptors available. Direction %d\n", direction);
         dwc_desc_put(dwc, first);
         return NULL;
  }
@@ -1261,7 +1258,8 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
         /* Assert channel is idle */
         if (dma_readl(dw, CH_EN) & dwc->mask) {
                 dev_err(chan2dev(&dwc->chan),
-                       "BUG: Attempted to start non-idle channel\n");
+                       "%s: BUG: Attempted to start non-idle channel\n",
+                       __func__);
                 dwc_dump_chan_regs(dwc);
                 spin_unlock_irqrestore(&dwc->lock, flags);
                 return -EBUSY;
diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c

index 53dbd3b..bf09db7 100644 (file)
--- a/drivers/dma/edma.c
+++ b/drivers/dma/edma.c
@@ -812,7 +812,7 @@ static int edma_alloc_chan_resources(struct dma_chan *chan)
         LIST_HEAD(descs);
  
         a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
-                                       chan, EVENTQ_DEFAULT);
+                                       echan, EVENTQ_DEFAULT);
  
         if (a_ch_num < 0) {
                 ret = -ENODEV;
diff --git a/drivers/dma/fsl_raid.c b/drivers/dma/fsl_raid.c

new file mode 100644 (file)

index 0000000..4d9470f
--- /dev/null
+++ b/drivers/dma/fsl_raid.c
@@ -0,0 +1,904 @@
+/*
+ * drivers/dma/fsl_raid.c
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *     Harninder Rai <harninder.rai@freescale.com>
+ *     Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *     Xuelin Shi <xuelin.shi@freescale.com>
+ *
+ * Copyright (c) 2010-2014 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Theory of operation:
+ *
+ * General capabilities:
+ *     RAID Engine (RE) block is capable of offloading XOR, memcpy and P/Q
+ *     calculations required in RAID5 and RAID6 operations. RE driver
+ *     registers with Linux's ASYNC layer as dma driver. RE hardware
+ *     maintains strict ordering of the requests through chained
+ *     command queueing.
+ *
+ * Data flow:
+ *     Software RAID layer of Linux (MD layer) maintains RAID partitions,
+ *     strips, stripes etc. It sends requests to the underlying ASYNC layer
+ *     which further passes it to RE driver. ASYNC layer decides which request
+ *     goes to which job ring of RE hardware. For every request processed by
+ *     RAID Engine, driver gets an interrupt unless coalescing is set. The
+ *     per job ring interrupt handler checks the status register for errors,
+ *     clears the interrupt and leave the post interrupt processing to the irq
+ *     thread.
+ */
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+
+#include "dmaengine.h"
+#include "fsl_raid.h"
+
+#define FSL_RE_MAX_XOR_SRCS    16
+#define FSL_RE_MAX_PQ_SRCS     16
+#define FSL_RE_MIN_DESCS       256
+#define FSL_RE_MAX_DESCS       (4 * FSL_RE_MIN_DESCS)
+#define FSL_RE_FRAME_FORMAT    0x1
+#define FSL_RE_MAX_DATA_LEN    (1024*1024)
+
+#define to_fsl_re_dma_desc(tx) container_of(tx, struct fsl_re_desc, async_tx)
+
+/* Add descriptors into per chan software queue - submit_q */
+static dma_cookie_t fsl_re_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct fsl_re_desc *desc;
+       struct fsl_re_chan *re_chan;
+       dma_cookie_t cookie;
+       unsigned long flags;
+
+       desc = to_fsl_re_dma_desc(tx);
+       re_chan = container_of(tx->chan, struct fsl_re_chan, chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       cookie = dma_cookie_assign(tx);
+       list_add_tail(&desc->node, &re_chan->submit_q);
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+       return cookie;
+}
+
+/* Copy descriptor from per chan software queue into hardware job ring */
+static void fsl_re_issue_pending(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       int avail;
+       struct fsl_re_desc *desc, *_desc;
+       unsigned long flags;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       avail = FSL_RE_SLOT_AVAIL(
+               in_be32(&re_chan->jrregs->inbring_slot_avail));
+
+       list_for_each_entry_safe(desc, _desc, &re_chan->submit_q, node) {
+               if (!avail)
+                       break;
+
+               list_move_tail(&desc->node, &re_chan->active_q);
+
+               memcpy(&re_chan->inb_ring_virt_addr[re_chan->inb_count],
+                      &desc->hwdesc, sizeof(struct fsl_re_hw_desc));
+
+               re_chan->inb_count = (re_chan->inb_count + 1) &
+                                               FSL_RE_RING_SIZE_MASK;
+               out_be32(&re_chan->jrregs->inbring_add_job, FSL_RE_ADD_JOB(1));
+               avail--;
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+static void fsl_re_desc_done(struct fsl_re_desc *desc)
+{
+       dma_async_tx_callback callback;
+       void *callback_param;
+
+       dma_cookie_complete(&desc->async_tx);
+
+       callback = desc->async_tx.callback;
+       callback_param = desc->async_tx.callback_param;
+       if (callback)
+               callback(callback_param);
+
+       dma_descriptor_unmap(&desc->async_tx);
+}
+
+static void fsl_re_cleanup_descs(struct fsl_re_chan *re_chan)
+{
+       struct fsl_re_desc *desc, *_desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       list_for_each_entry_safe(desc, _desc, &re_chan->ack_q, node) {
+               if (async_tx_test_ack(&desc->async_tx))
+                       list_move_tail(&desc->node, &re_chan->free_q);
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+
+       fsl_re_issue_pending(&re_chan->chan);
+}
+
+static void fsl_re_dequeue(unsigned long data)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc, *_desc;
+       struct fsl_re_hw_desc *hwdesc;
+       unsigned long flags;
+       unsigned int count, oub_count;
+       int found;
+
+       re_chan = dev_get_drvdata((struct device *)data);
+
+       fsl_re_cleanup_descs(re_chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, flags);
+       count = FSL_RE_SLOT_FULL(in_be32(&re_chan->jrregs->oubring_slot_full));
+       while (count--) {
+               found = 0;
+               hwdesc = &re_chan->oub_ring_virt_addr[re_chan->oub_count];
+               list_for_each_entry_safe(desc, _desc, &re_chan->active_q,
+                                        node) {
+                       /* compare the hw dma addr to find the completed */
+                       if (desc->hwdesc.lbea32 == hwdesc->lbea32 &&
+                           desc->hwdesc.addr_low == hwdesc->addr_low) {
+                               found = 1;
+                               break;
+                       }
+               }
+
+               if (found) {
+                       fsl_re_desc_done(desc);
+                       list_move_tail(&desc->node, &re_chan->ack_q);
+               } else {
+                       dev_err(re_chan->dev,
+                               "found hwdesc not in sw queue, discard it\n");
+               }
+
+               oub_count = (re_chan->oub_count + 1) & FSL_RE_RING_SIZE_MASK;
+               re_chan->oub_count = oub_count;
+
+               out_be32(&re_chan->jrregs->oubring_job_rmvd,
+                        FSL_RE_RMVD_JOB(1));
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, flags);
+}
+
+/* Per Job Ring interrupt handler */
+static irqreturn_t fsl_re_isr(int irq, void *data)
+{
+       struct fsl_re_chan *re_chan;
+       u32 irqstate, status;
+
+       re_chan = dev_get_drvdata((struct device *)data);
+
+       irqstate = in_be32(&re_chan->jrregs->jr_interrupt_status);
+       if (!irqstate)
+               return IRQ_NONE;
+
+       /*
+        * There's no way in upper layer (read MD layer) to recover from
+        * error conditions except restart everything. In long term we
+        * need to do something more than just crashing
+        */
+       if (irqstate & FSL_RE_ERROR) {
+               status = in_be32(&re_chan->jrregs->jr_status);
+               dev_err(re_chan->dev, "chan error irqstate: %x, status: %x\n",
+                       irqstate, status);
+       }
+
+       /* Clear interrupt */
+       out_be32(&re_chan->jrregs->jr_interrupt_status, FSL_RE_CLR_INTR);
+
+       tasklet_schedule(&re_chan->irqtask);
+
+       return IRQ_HANDLED;
+}
+
+static enum dma_status fsl_re_tx_status(struct dma_chan *chan,
+                                       dma_cookie_t cookie,
+                                       struct dma_tx_state *txstate)
+{
+       return dma_cookie_status(chan, cookie, txstate);
+}
+
+static void fill_cfd_frame(struct fsl_re_cmpnd_frame *cf, u8 index,
+                          size_t length, dma_addr_t addr, bool final)
+{
+       u32 efrl = length & FSL_RE_CF_LENGTH_MASK;
+
+       efrl |= final << FSL_RE_CF_FINAL_SHIFT;
+       cf[index].efrl32 = efrl;
+       cf[index].addr_high = upper_32_bits(addr);
+       cf[index].addr_low = lower_32_bits(addr);
+}
+
+static struct fsl_re_desc *fsl_re_init_desc(struct fsl_re_chan *re_chan,
+                                           struct fsl_re_desc *desc,
+                                           void *cf, dma_addr_t paddr)
+{
+       desc->re_chan = re_chan;
+       desc->async_tx.tx_submit = fsl_re_tx_submit;
+       dma_async_tx_descriptor_init(&desc->async_tx, &re_chan->chan);
+       INIT_LIST_HEAD(&desc->node);
+
+       desc->hwdesc.fmt32 = FSL_RE_FRAME_FORMAT << FSL_RE_HWDESC_FMT_SHIFT;
+       desc->hwdesc.lbea32 = upper_32_bits(paddr);
+       desc->hwdesc.addr_low = lower_32_bits(paddr);
+       desc->cf_addr = cf;
+       desc->cf_paddr = paddr;
+
+       desc->cdb_addr = (void *)(cf + FSL_RE_CF_DESC_SIZE);
+       desc->cdb_paddr = paddr + FSL_RE_CF_DESC_SIZE;
+
+       return desc;
+}
+
+static struct fsl_re_desc *fsl_re_chan_alloc_desc(struct fsl_re_chan *re_chan,
+                                                 unsigned long flags)
+{
+       struct fsl_re_desc *desc = NULL;
+       void *cf;
+       dma_addr_t paddr;
+       unsigned long lock_flag;
+
+       fsl_re_cleanup_descs(re_chan);
+
+       spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+       if (!list_empty(&re_chan->free_q)) {
+               /* take one desc from free_q */
+               desc = list_first_entry(&re_chan->free_q,
+                                       struct fsl_re_desc, node);
+               list_del(&desc->node);
+
+               desc->async_tx.flags = flags;
+       }
+       spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+
+       if (!desc) {
+               desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+               if (!desc)
+                       return NULL;
+
+               cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_NOWAIT,
+                                   &paddr);
+               if (!cf) {
+                       kfree(desc);
+                       return NULL;
+               }
+
+               desc = fsl_re_init_desc(re_chan, desc, cf, paddr);
+               desc->async_tx.flags = flags;
+
+               spin_lock_irqsave(&re_chan->desc_lock, lock_flag);
+               re_chan->alloc_count++;
+               spin_unlock_irqrestore(&re_chan->desc_lock, lock_flag);
+       }
+
+       return desc;
+}
+
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_genq(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       struct fsl_re_xor_cdb *xor;
+       struct fsl_re_cmpnd_frame *cf;
+       u32 cdb;
+       unsigned int i, j;
+       unsigned int save_src_cnt = src_cnt;
+       int cont_q = 0;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "genq tx length %lu, max length %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       if (scf && (flags & DMA_PREP_CONTINUE)) {
+               cont_q = 1;
+               src_cnt += 1;
+       }
+
+       /* Filling xor CDB */
+       cdb = FSL_RE_XOR_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+       xor = desc->cdb_addr;
+       xor->cdb32 = cdb;
+
+       if (scf) {
+               /* compute q = src0*coef0^src1*coef1^..., * is GF(8) mult */
+               for (i = 0; i < save_src_cnt; i++)
+                       xor->gfm[i] = scf[i];
+               if (cont_q)
+                       xor->gfm[i++] = 1;
+       } else {
+               /* compute P, that is XOR all srcs */
+               for (i = 0; i < src_cnt; i++)
+                       xor->gfm[i] = 1;
+       }
+
+       /* Filling frame 0 of compound frame descriptor with CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(*xor), desc->cdb_paddr, 0);
+
+       /* Fill CFD's 1st frame with dest buffer */
+       fill_cfd_frame(cf, 1, len, dest, 0);
+
+       /* Fill CFD's rest of the frames with source buffers */
+       for (i = 2, j = 0; j < save_src_cnt; i++, j++)
+               fill_cfd_frame(cf, i, len, src[j], 0);
+
+       if (cont_q)
+               fill_cfd_frame(cf, i++, len, dest, 0);
+
+       /* Setting the final bit in the last source buffer frame in CFD */
+       cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+       return &desc->async_tx;
+}
+
+/*
+ * Prep function for P parity calculation.In RAID Engine terminology,
+ * XOR calculation is called GenQ calculation done through GenQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_xor(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+               unsigned int src_cnt, size_t len, unsigned long flags)
+{
+       /* NULL let genq take all coef as 1 */
+       return fsl_re_prep_dma_genq(chan, dest, src, src_cnt, NULL, len, flags);
+}
+
+/*
+ * Prep function for P/Q parity calculation.In RAID Engine terminology,
+ * P/Q calculation is called GenQQ done through GenQQ command
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_pq(
+               struct dma_chan *chan, dma_addr_t *dest, dma_addr_t *src,
+               unsigned int src_cnt, const unsigned char *scf, size_t len,
+               unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       struct fsl_re_pq_cdb *pq;
+       struct fsl_re_cmpnd_frame *cf;
+       u32 cdb;
+       u8 *p;
+       int gfmq_len, i, j;
+       unsigned int save_src_cnt = src_cnt;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "pq tx length is %lu, max length is %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       /*
+        * RE requires at least 2 sources, if given only one source, we pass the
+        * second source same as the first one.
+        * With only one source, generating P is meaningless, only generate Q.
+        */
+       if (src_cnt == 1) {
+               struct dma_async_tx_descriptor *tx;
+               dma_addr_t dma_src[2];
+               unsigned char coef[2];
+
+               dma_src[0] = *src;
+               coef[0] = *scf;
+               dma_src[1] = *src;
+               coef[1] = 0;
+               tx = fsl_re_prep_dma_genq(chan, dest[1], dma_src, 2, coef, len,
+                                         flags);
+               if (tx)
+                       desc = to_fsl_re_dma_desc(tx);
+
+               return tx;
+       }
+
+       /*
+        * During RAID6 array creation, Linux's MD layer gets P and Q
+        * calculated separately in two steps. But our RAID Engine has
+        * the capability to calculate both P and Q with a single command
+        * Hence to merge well with MD layer, we need to provide a hook
+        * here and call re_jq_prep_dma_genq() function
+        */
+
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               return fsl_re_prep_dma_genq(chan, dest[1], src, src_cnt,
+                               scf, len, flags);
+
+       if (flags & DMA_PREP_CONTINUE)
+               src_cnt += 3;
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       /* Filling GenQQ CDB */
+       cdb = FSL_RE_PQ_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= (src_cnt - 1) << FSL_RE_CDB_NRCS_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_BUFFER_OUTPUT << FSL_RE_CDB_BUFFER_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+       pq = desc->cdb_addr;
+       pq->cdb32 = cdb;
+
+       p = pq->gfm_q1;
+       /* Init gfm_q1[] */
+       for (i = 0; i < src_cnt; i++)
+               p[i] = 1;
+
+       /* Align gfm[] to 32bit */
+       gfmq_len = ALIGN(src_cnt, 4);
+
+       /* Init gfm_q2[] */
+       p += gfmq_len;
+       for (i = 0; i < src_cnt; i++)
+               p[i] = scf[i];
+
+       /* Filling frame 0 of compound frame descriptor with CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(struct fsl_re_pq_cdb), desc->cdb_paddr, 0);
+
+       /* Fill CFD's 1st & 2nd frame with dest buffers */
+       for (i = 1, j = 0; i < 3; i++, j++)
+               fill_cfd_frame(cf, i, len, dest[j], 0);
+
+       /* Fill CFD's rest of the frames with source buffers */
+       for (i = 3, j = 0; j < save_src_cnt; i++, j++)
+               fill_cfd_frame(cf, i, len, src[j], 0);
+
+       /* PQ computation continuation */
+       if (flags & DMA_PREP_CONTINUE) {
+               if (src_cnt - save_src_cnt == 3) {
+                       p[save_src_cnt] = 0;
+                       p[save_src_cnt + 1] = 0;
+                       p[save_src_cnt + 2] = 1;
+                       fill_cfd_frame(cf, i++, len, dest[0], 0);
+                       fill_cfd_frame(cf, i++, len, dest[1], 0);
+                       fill_cfd_frame(cf, i++, len, dest[1], 0);
+               } else {
+                       dev_err(re_chan->dev, "PQ tx continuation error!\n");
+                       return NULL;
+               }
+       }
+
+       /* Setting the final bit in the last source buffer frame in CFD */
+       cf[i - 1].efrl32 |= 1 << FSL_RE_CF_FINAL_SHIFT;
+
+       return &desc->async_tx;
+}
+
+/*
+ * Prep function for memcpy. In RAID Engine, memcpy is done through MOVE
+ * command. Logic of this function will need to be modified once multipage
+ * support is added in Linux's MD/ASYNC Layer
+ */
+static struct dma_async_tx_descriptor *fsl_re_prep_dma_memcpy(
+               struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+               size_t len, unsigned long flags)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       size_t length;
+       struct fsl_re_cmpnd_frame *cf;
+       struct fsl_re_move_cdb *move;
+       u32 cdb;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+
+       if (len > FSL_RE_MAX_DATA_LEN) {
+               dev_err(re_chan->dev, "cp tx length is %lu, max length is %d\n",
+                       len, FSL_RE_MAX_DATA_LEN);
+               return NULL;
+       }
+
+       desc = fsl_re_chan_alloc_desc(re_chan, flags);
+       if (desc <= 0)
+               return NULL;
+
+       /* Filling move CDB */
+       cdb = FSL_RE_MOVE_OPCODE << FSL_RE_CDB_OPCODE_SHIFT;
+       cdb |= FSL_RE_BLOCK_SIZE << FSL_RE_CDB_BLKSIZE_SHIFT;
+       cdb |= FSL_RE_INTR_ON_ERROR << FSL_RE_CDB_ERROR_SHIFT;
+       cdb |= FSL_RE_DATA_DEP << FSL_RE_CDB_DEPEND_SHIFT;
+
+       move = desc->cdb_addr;
+       move->cdb32 = cdb;
+
+       /* Filling frame 0 of CFD with move CDB */
+       cf = desc->cf_addr;
+       fill_cfd_frame(cf, 0, sizeof(*move), desc->cdb_paddr, 0);
+
+       length = min_t(size_t, len, FSL_RE_MAX_DATA_LEN);
+
+       /* Fill CFD's 1st frame with dest buffer */
+       fill_cfd_frame(cf, 1, length, dest, 0);
+
+       /* Fill CFD's 2nd frame with src buffer */
+       fill_cfd_frame(cf, 2, length, src, 1);
+
+       return &desc->async_tx;
+}
+
+static int fsl_re_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+       void *cf;
+       dma_addr_t paddr;
+       int i;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       for (i = 0; i < FSL_RE_MIN_DESCS; i++) {
+               desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+               if (!desc)
+                       break;
+
+               cf = dma_pool_alloc(re_chan->re_dev->cf_desc_pool, GFP_KERNEL,
+                                   &paddr);
+               if (!cf) {
+                       kfree(desc);
+                       break;
+               }
+
+               INIT_LIST_HEAD(&desc->node);
+               fsl_re_init_desc(re_chan, desc, cf, paddr);
+
+               list_add_tail(&desc->node, &re_chan->free_q);
+               re_chan->alloc_count++;
+       }
+       return re_chan->alloc_count;
+}
+
+static void fsl_re_free_chan_resources(struct dma_chan *chan)
+{
+       struct fsl_re_chan *re_chan;
+       struct fsl_re_desc *desc;
+
+       re_chan = container_of(chan, struct fsl_re_chan, chan);
+       while (re_chan->alloc_count--) {
+               desc = list_first_entry(&re_chan->free_q,
+                                       struct fsl_re_desc,
+                                       node);
+
+               list_del(&desc->node);
+               dma_pool_free(re_chan->re_dev->cf_desc_pool, desc->cf_addr,
+                             desc->cf_paddr);
+               kfree(desc);
+       }
+
+       if (!list_empty(&re_chan->free_q))
+               dev_err(re_chan->dev, "chan resource cannot be cleaned!\n");
+}
+
+static int fsl_re_chan_probe(struct platform_device *ofdev,
+                     struct device_node *np, u8 q, u32 off)
+{
+       struct device *dev, *chandev;
+       struct fsl_re_drv_private *re_priv;
+       struct fsl_re_chan *chan;
+       struct dma_device *dma_dev;
+       u32 ptr;
+       u32 status;
+       int ret = 0, rc;
+       struct platform_device *chan_ofdev;
+
+       dev = &ofdev->dev;
+       re_priv = dev_get_drvdata(dev);
+       dma_dev = &re_priv->dma_dev;
+
+       chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+       if (!chan)
+               return -ENOMEM;
+
+       /* create platform device for chan node */
+       chan_ofdev = of_platform_device_create(np, NULL, dev);
+       if (!chan_ofdev) {
+               dev_err(dev, "Not able to create ofdev for jr %d\n", q);
+               ret = -EINVAL;
+               goto err_free;
+       }
+
+       /* read reg property from dts */
+       rc = of_property_read_u32(np, "reg", &ptr);
+       if (rc) {
+               dev_err(dev, "Reg property not found in jr %d\n", q);
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       chan->jrregs = (struct fsl_re_chan_cfg *)((u8 *)re_priv->re_regs +
+                       off + ptr);
+
+       /* read irq property from dts */
+       chan->irq = irq_of_parse_and_map(np, 0);
+       if (chan->irq == NO_IRQ) {
+               dev_err(dev, "No IRQ defined for JR %d\n", q);
+               ret = -ENODEV;
+               goto err_free;
+       }
+
+       snprintf(chan->name, sizeof(chan->name), "re_jr%02d", q);
+
+       chandev = &chan_ofdev->dev;
+       tasklet_init(&chan->irqtask, fsl_re_dequeue, (unsigned long)chandev);
+
+       ret = request_irq(chan->irq, fsl_re_isr, 0, chan->name, chandev);
+       if (ret) {
+               dev_err(dev, "Unable to register interrupt for JR %d\n", q);
+               ret = -EINVAL;
+               goto err_free;
+       }
+
+       re_priv->re_jrs[q] = chan;
+       chan->chan.device = dma_dev;
+       chan->chan.private = chan;
+       chan->dev = chandev;
+       chan->re_dev = re_priv;
+
+       spin_lock_init(&chan->desc_lock);
+       INIT_LIST_HEAD(&chan->ack_q);
+       INIT_LIST_HEAD(&chan->active_q);
+       INIT_LIST_HEAD(&chan->submit_q);
+       INIT_LIST_HEAD(&chan->free_q);
+
+       chan->inb_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+               GFP_KERNEL, &chan->inb_phys_addr);
+       if (!chan->inb_ring_virt_addr) {
+               dev_err(dev, "No dma memory for inb_ring_virt_addr\n");
+               ret = -ENOMEM;
+               goto err_free;
+       }
+
+       chan->oub_ring_virt_addr = dma_pool_alloc(chan->re_dev->hw_desc_pool,
+               GFP_KERNEL, &chan->oub_phys_addr);
+       if (!chan->oub_ring_virt_addr) {
+               dev_err(dev, "No dma memory for oub_ring_virt_addr\n");
+               ret = -ENOMEM;
+               goto err_free_1;
+       }
+
+       /* Program the Inbound/Outbound ring base addresses and size */
+       out_be32(&chan->jrregs->inbring_base_h,
+                chan->inb_phys_addr & FSL_RE_ADDR_BIT_MASK);
+       out_be32(&chan->jrregs->oubring_base_h,
+                chan->oub_phys_addr & FSL_RE_ADDR_BIT_MASK);
+       out_be32(&chan->jrregs->inbring_base_l,
+                chan->inb_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+       out_be32(&chan->jrregs->oubring_base_l,
+                chan->oub_phys_addr >> FSL_RE_ADDR_BIT_SHIFT);
+       out_be32(&chan->jrregs->inbring_size,
+                FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+       out_be32(&chan->jrregs->oubring_size,
+                FSL_RE_RING_SIZE << FSL_RE_RING_SIZE_SHIFT);
+
+       /* Read LIODN value from u-boot */
+       status = in_be32(&chan->jrregs->jr_config_1) & FSL_RE_REG_LIODN_MASK;
+
+       /* Program the CFG reg */
+       out_be32(&chan->jrregs->jr_config_1,
+                FSL_RE_CFG1_CBSI | FSL_RE_CFG1_CBS0 | status);
+
+       dev_set_drvdata(chandev, chan);
+
+       /* Enable RE/CHAN */
+       out_be32(&chan->jrregs->jr_command, FSL_RE_ENABLE);
+
+       return 0;
+
+err_free_1:
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+                     chan->inb_phys_addr);
+err_free:
+       return ret;
+}
+
+/* Probe function for RAID Engine */
+static int fsl_re_probe(struct platform_device *ofdev)
+{
+       struct fsl_re_drv_private *re_priv;
+       struct device_node *np;
+       struct device_node *child;
+       u32 off;
+       u8 ridx = 0;
+       struct dma_device *dma_dev;
+       struct resource *res;
+       int rc;
+       struct device *dev = &ofdev->dev;
+
+       re_priv = devm_kzalloc(dev, sizeof(*re_priv), GFP_KERNEL);
+       if (!re_priv)
+               return -ENOMEM;
+
+       res = platform_get_resource(ofdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -ENODEV;
+
+       /* IOMAP the entire RAID Engine region */
+       re_priv->re_regs = devm_ioremap(dev, res->start, resource_size(res));
+       if (!re_priv->re_regs)
+               return -EBUSY;
+
+       /* Program the RE mode */
+       out_be32(&re_priv->re_regs->global_config, FSL_RE_NON_DPAA_MODE);
+
+       /* Program Galois Field polynomial */
+       out_be32(&re_priv->re_regs->galois_field_config, FSL_RE_GFM_POLY);
+
+       dev_info(dev, "version %x, mode %x, gfp %x\n",
+                in_be32(&re_priv->re_regs->re_version_id),
+                in_be32(&re_priv->re_regs->global_config),
+                in_be32(&re_priv->re_regs->galois_field_config));
+
+       dma_dev = &re_priv->dma_dev;
+       dma_dev->dev = dev;
+       INIT_LIST_HEAD(&dma_dev->channels);
+       dma_set_mask(dev, DMA_BIT_MASK(40));
+
+       dma_dev->device_alloc_chan_resources = fsl_re_alloc_chan_resources;
+       dma_dev->device_tx_status = fsl_re_tx_status;
+       dma_dev->device_issue_pending = fsl_re_issue_pending;
+
+       dma_dev->max_xor = FSL_RE_MAX_XOR_SRCS;
+       dma_dev->device_prep_dma_xor = fsl_re_prep_dma_xor;
+       dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+
+       dma_dev->max_pq = FSL_RE_MAX_PQ_SRCS;
+       dma_dev->device_prep_dma_pq = fsl_re_prep_dma_pq;
+       dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+
+       dma_dev->device_prep_dma_memcpy = fsl_re_prep_dma_memcpy;
+       dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+
+       dma_dev->device_free_chan_resources = fsl_re_free_chan_resources;
+
+       re_priv->total_chans = 0;
+
+       re_priv->cf_desc_pool = dmam_pool_create("fsl_re_cf_desc_pool", dev,
+                                       FSL_RE_CF_CDB_SIZE,
+                                       FSL_RE_CF_CDB_ALIGN, 0);
+
+       if (!re_priv->cf_desc_pool) {
+               dev_err(dev, "No memory for fsl re_cf desc pool\n");
+               return -ENOMEM;
+       }
+
+       re_priv->hw_desc_pool = dmam_pool_create("fsl_re_hw_desc_pool", dev,
+                       sizeof(struct fsl_re_hw_desc) * FSL_RE_RING_SIZE,
+                       FSL_RE_FRAME_ALIGN, 0);
+       if (!re_priv->hw_desc_pool) {
+               dev_err(dev, "No memory for fsl re_hw desc pool\n");
+               return -ENOMEM;
+       }
+
+       dev_set_drvdata(dev, re_priv);
+
+       /* Parse Device tree to find out the total number of JQs present */
+       for_each_compatible_node(np, NULL, "fsl,raideng-v1.0-job-queue") {
+               rc = of_property_read_u32(np, "reg", &off);
+               if (rc) {
+                       dev_err(dev, "Reg property not found in JQ node\n");
+                       return -ENODEV;
+               }
+               /* Find out the Job Rings present under each JQ */
+               for_each_child_of_node(np, child) {
+                       rc = of_device_is_compatible(child,
+                                            "fsl,raideng-v1.0-job-ring");
+                       if (rc) {
+                               fsl_re_chan_probe(ofdev, child, ridx++, off);
+                               re_priv->total_chans++;
+                       }
+               }
+       }
+
+       dma_async_device_register(dma_dev);
+
+       return 0;
+}
+
+static void fsl_re_remove_chan(struct fsl_re_chan *chan)
+{
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->inb_ring_virt_addr,
+                     chan->inb_phys_addr);
+
+       dma_pool_free(chan->re_dev->hw_desc_pool, chan->oub_ring_virt_addr,
+                     chan->oub_phys_addr);
+}
+
+static int fsl_re_remove(struct platform_device *ofdev)
+{
+       struct fsl_re_drv_private *re_priv;
+       struct device *dev;
+       int i;
+
+       dev = &ofdev->dev;
+       re_priv = dev_get_drvdata(dev);
+
+       /* Cleanup chan related memory areas */
+       for (i = 0; i < re_priv->total_chans; i++)
+               fsl_re_remove_chan(re_priv->re_jrs[i]);
+
+       /* Unregister the driver */
+       dma_async_device_unregister(&re_priv->dma_dev);
+
+       return 0;
+}
+
+static struct of_device_id fsl_re_ids[] = {
+       { .compatible = "fsl,raideng-v1.0", },
+       {}
+};
+
+static struct platform_driver fsl_re_driver = {
+       .driver = {
+               .name = "fsl-raideng",
+               .owner = THIS_MODULE,
+               .of_match_table = fsl_re_ids,
+       },
+       .probe = fsl_re_probe,
+       .remove = fsl_re_remove,
+};
+
+module_platform_driver(fsl_re_driver);
+
+MODULE_AUTHOR("Harninder Rai <harninder.rai@freescale.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Freescale RAID Engine Device Driver");
diff --git a/drivers/dma/fsl_raid.h b/drivers/dma/fsl_raid.h

new file mode 100644 (file)

index 0000000..69d743c
--- /dev/null
+++ b/drivers/dma/fsl_raid.h
@@ -0,0 +1,306 @@
+/*
+ * drivers/dma/fsl_raid.h
+ *
+ * Freescale RAID Engine device driver
+ *
+ * Author:
+ *     Harninder Rai <harninder.rai@freescale.com>
+ *     Naveen Burmi <naveenburmi@freescale.com>
+ *
+ * Rewrite:
+ *     Xuelin Shi <xuelin.shi@freescale.com>
+
+ * Copyright (c) 2010-2012 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#define FSL_RE_MAX_CHANS               4
+#define FSL_RE_DPAA_MODE               BIT(30)
+#define FSL_RE_NON_DPAA_MODE           BIT(31)
+#define FSL_RE_GFM_POLY                        0x1d000000
+#define FSL_RE_ADD_JOB(x)              ((x) << 16)
+#define FSL_RE_RMVD_JOB(x)             ((x) << 16)
+#define FSL_RE_CFG1_CBSI               0x08000000
+#define FSL_RE_CFG1_CBS0               0x00080000
+#define FSL_RE_SLOT_FULL_SHIFT         8
+#define FSL_RE_SLOT_FULL(x)            ((x) >> FSL_RE_SLOT_FULL_SHIFT)
+#define FSL_RE_SLOT_AVAIL_SHIFT                8
+#define FSL_RE_SLOT_AVAIL(x)           ((x) >> FSL_RE_SLOT_AVAIL_SHIFT)
+#define FSL_RE_PQ_OPCODE               0x1B
+#define FSL_RE_XOR_OPCODE              0x1A
+#define FSL_RE_MOVE_OPCODE             0x8
+#define FSL_RE_FRAME_ALIGN             16
+#define FSL_RE_BLOCK_SIZE              0x3 /* 4096 bytes */
+#define FSL_RE_CACHEABLE_IO            0x0
+#define FSL_RE_BUFFER_OUTPUT           0x0
+#define FSL_RE_INTR_ON_ERROR           0x1
+#define FSL_RE_DATA_DEP                        0x1
+#define FSL_RE_ENABLE_DPI              0x0
+#define FSL_RE_RING_SIZE               0x400
+#define FSL_RE_RING_SIZE_MASK          (FSL_RE_RING_SIZE - 1)
+#define FSL_RE_RING_SIZE_SHIFT         8
+#define FSL_RE_ADDR_BIT_SHIFT          4
+#define FSL_RE_ADDR_BIT_MASK           (BIT(FSL_RE_ADDR_BIT_SHIFT) - 1)
+#define FSL_RE_ERROR                   0x40000000
+#define FSL_RE_INTR                    0x80000000
+#define FSL_RE_CLR_INTR                        0x80000000
+#define FSL_RE_PAUSE                   0x80000000
+#define FSL_RE_ENABLE                  0x80000000
+#define FSL_RE_REG_LIODN_MASK          0x00000FFF
+
+#define FSL_RE_CDB_OPCODE_MASK         0xF8000000
+#define FSL_RE_CDB_OPCODE_SHIFT                27
+#define FSL_RE_CDB_EXCLEN_MASK         0x03000000
+#define FSL_RE_CDB_EXCLEN_SHIFT                24
+#define FSL_RE_CDB_EXCLQ1_MASK         0x00F00000
+#define FSL_RE_CDB_EXCLQ1_SHIFT                20
+#define FSL_RE_CDB_EXCLQ2_MASK         0x000F0000
+#define FSL_RE_CDB_EXCLQ2_SHIFT                16
+#define FSL_RE_CDB_BLKSIZE_MASK                0x0000C000
+#define FSL_RE_CDB_BLKSIZE_SHIFT       14
+#define FSL_RE_CDB_CACHE_MASK          0x00003000
+#define FSL_RE_CDB_CACHE_SHIFT         12
+#define FSL_RE_CDB_BUFFER_MASK         0x00000800
+#define FSL_RE_CDB_BUFFER_SHIFT                11
+#define FSL_RE_CDB_ERROR_MASK          0x00000400
+#define FSL_RE_CDB_ERROR_SHIFT         10
+#define FSL_RE_CDB_NRCS_MASK           0x0000003C
+#define FSL_RE_CDB_NRCS_SHIFT          6
+#define FSL_RE_CDB_DEPEND_MASK         0x00000008
+#define FSL_RE_CDB_DEPEND_SHIFT                3
+#define FSL_RE_CDB_DPI_MASK            0x00000004
+#define FSL_RE_CDB_DPI_SHIFT           2
+
+/*
+ * the largest cf block is 19*sizeof(struct cmpnd_frame), which is 304 bytes.
+ * here 19 = 1(cdb)+2(dest)+16(src), align to 64bytes, that is 320 bytes.
+ * the largest cdb block: struct pq_cdb which is 180 bytes, adding to cf block
+ * 320+180=500, align to 64bytes, that is 512 bytes.
+ */
+#define FSL_RE_CF_DESC_SIZE            320
+#define FSL_RE_CF_CDB_SIZE             512
+#define FSL_RE_CF_CDB_ALIGN            64
+
+struct fsl_re_ctrl {
+       /* General Configuration Registers */
+       __be32 global_config;   /* Global Configuration Register */
+       u8     rsvd1[4];
+       __be32 galois_field_config; /* Galois Field Configuration Register */
+       u8     rsvd2[4];
+       __be32 jq_wrr_config;   /* WRR Configuration register */
+       u8     rsvd3[4];
+       __be32 crc_config;      /* CRC Configuration register */
+       u8     rsvd4[228];
+       __be32 system_reset;    /* System Reset Register */
+       u8     rsvd5[252];
+       __be32 global_status;   /* Global Status Register */
+       u8     rsvd6[832];
+       __be32 re_liodn_base;   /* LIODN Base Register */
+       u8     rsvd7[1712];
+       __be32 re_version_id;   /* Version ID register of RE */
+       __be32 re_version_id_2; /* Version ID 2 register of RE */
+       u8     rsvd8[512];
+       __be32 host_config;     /* Host I/F Configuration Register */
+};
+
+struct fsl_re_chan_cfg {
+       /* Registers for JR interface */
+       __be32 jr_config_0;     /* Job Queue Configuration 0 Register */
+       __be32 jr_config_1;     /* Job Queue Configuration 1 Register */
+       __be32 jr_interrupt_status; /* Job Queue Interrupt Status Register */
+       u8     rsvd1[4];
+       __be32 jr_command;      /* Job Queue Command Register */
+       u8     rsvd2[4];
+       __be32 jr_status;       /* Job Queue Status Register */
+       u8     rsvd3[228];
+
+       /* Input Ring */
+       __be32 inbring_base_h;  /* Inbound Ring Base Address Register - High */
+       __be32 inbring_base_l;  /* Inbound Ring Base Address Register - Low */
+       __be32 inbring_size;    /* Inbound Ring Size Register */
+       u8     rsvd4[4];
+       __be32 inbring_slot_avail; /* Inbound Ring Slot Available Register */
+       u8     rsvd5[4];
+       __be32 inbring_add_job; /* Inbound Ring Add Job Register */
+       u8     rsvd6[4];
+       __be32 inbring_cnsmr_indx; /* Inbound Ring Consumer Index Register */
+       u8     rsvd7[220];
+
+       /* Output Ring */
+       __be32 oubring_base_h;  /* Outbound Ring Base Address Register - High */
+       __be32 oubring_base_l;  /* Outbound Ring Base Address Register - Low */
+       __be32 oubring_size;    /* Outbound Ring Size Register */
+       u8     rsvd8[4];
+       __be32 oubring_job_rmvd; /* Outbound Ring Job Removed Register */
+       u8     rsvd9[4];
+       __be32 oubring_slot_full; /* Outbound Ring Slot Full Register */
+       u8     rsvd10[4];
+       __be32 oubring_prdcr_indx; /* Outbound Ring Producer Index */
+};
+
+/*
+ * Command Descriptor Block (CDB) for unicast move command.
+ * In RAID Engine terms, memcpy is done through move command
+ */
+struct fsl_re_move_cdb {
+       __be32 cdb32;
+};
+
+/* Data protection/integrity related fields */
+#define FSL_RE_DPI_APPS_MASK           0xC0000000
+#define FSL_RE_DPI_APPS_SHIFT          30
+#define FSL_RE_DPI_REF_MASK            0x30000000
+#define FSL_RE_DPI_REF_SHIFT           28
+#define FSL_RE_DPI_GUARD_MASK          0x0C000000
+#define FSL_RE_DPI_GUARD_SHIFT         26
+#define FSL_RE_DPI_ATTR_MASK           0x03000000
+#define FSL_RE_DPI_ATTR_SHIFT          24
+#define FSL_RE_DPI_META_MASK           0x0000FFFF
+
+struct fsl_re_dpi {
+       __be32 dpi32;
+       __be32 ref;
+};
+
+/*
+ * CDB for GenQ command. In RAID Engine terminology, XOR is
+ * done through this command
+ */
+struct fsl_re_xor_cdb {
+       __be32 cdb32;
+       u8 gfm[16];
+       struct fsl_re_dpi dpi_dest_spec;
+       struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* CDB for no-op command */
+struct fsl_re_noop_cdb {
+       __be32 cdb32;
+};
+
+/*
+ * CDB for GenQQ command. In RAID Engine terminology, P/Q is
+ * done through this command
+ */
+struct fsl_re_pq_cdb {
+       __be32 cdb32;
+       u8 gfm_q1[16];
+       u8 gfm_q2[16];
+       struct fsl_re_dpi dpi_dest_spec[2];
+       struct fsl_re_dpi dpi_src_spec[16];
+};
+
+/* Compound frame */
+#define FSL_RE_CF_ADDR_HIGH_MASK       0x000000FF
+#define FSL_RE_CF_EXT_MASK             0x80000000
+#define FSL_RE_CF_EXT_SHIFT            31
+#define FSL_RE_CF_FINAL_MASK           0x40000000
+#define FSL_RE_CF_FINAL_SHIFT          30
+#define FSL_RE_CF_LENGTH_MASK          0x000FFFFF
+#define FSL_RE_CF_BPID_MASK            0x00FF0000
+#define FSL_RE_CF_BPID_SHIFT           16
+#define FSL_RE_CF_OFFSET_MASK          0x00001FFF
+
+struct fsl_re_cmpnd_frame {
+       __be32 addr_high;
+       __be32 addr_low;
+       __be32 efrl32;
+       __be32 rbro32;
+};
+
+/* Frame descriptor */
+#define FSL_RE_HWDESC_LIODN_MASK       0x3F000000
+#define FSL_RE_HWDESC_LIODN_SHIFT      24
+#define FSL_RE_HWDESC_BPID_MASK                0x00FF0000
+#define FSL_RE_HWDESC_BPID_SHIFT       16
+#define FSL_RE_HWDESC_ELIODN_MASK      0x0000F000
+#define FSL_RE_HWDESC_ELIODN_SHIFT     12
+#define FSL_RE_HWDESC_FMT_SHIFT                29
+#define FSL_RE_HWDESC_FMT_MASK         (0x3 << FSL_RE_HWDESC_FMT_SHIFT)
+
+struct fsl_re_hw_desc {
+       __be32 lbea32;
+       __be32 addr_low;
+       __be32 fmt32;
+       __be32 status;
+};
+
+/* Raid Engine device private data */
+struct fsl_re_drv_private {
+       u8 total_chans;
+       struct dma_device dma_dev;
+       struct fsl_re_ctrl *re_regs;
+       struct fsl_re_chan *re_jrs[FSL_RE_MAX_CHANS];
+       struct dma_pool *cf_desc_pool;
+       struct dma_pool *hw_desc_pool;
+};
+
+/* Per job ring data structure */
+struct fsl_re_chan {
+       char name[16];
+       spinlock_t desc_lock; /* queue lock */
+       struct list_head ack_q;  /* wait to acked queue */
+       struct list_head active_q; /* already issued on hw, not completed */
+       struct list_head submit_q;
+       struct list_head free_q; /* alloc available queue */
+       struct device *dev;
+       struct fsl_re_drv_private *re_dev;
+       struct dma_chan chan;
+       struct fsl_re_chan_cfg *jrregs;
+       int irq;
+       struct tasklet_struct irqtask;
+       u32 alloc_count;
+
+       /* hw descriptor ring for inbound queue*/
+       dma_addr_t inb_phys_addr;
+       struct fsl_re_hw_desc *inb_ring_virt_addr;
+       u32 inb_count;
+
+       /* hw descriptor ring for outbound queue */
+       dma_addr_t oub_phys_addr;
+       struct fsl_re_hw_desc *oub_ring_virt_addr;
+       u32 oub_count;
+};
+
+/* Async transaction descriptor */
+struct fsl_re_desc {
+       struct dma_async_tx_descriptor async_tx;
+       struct list_head node;
+       struct fsl_re_hw_desc hwdesc;
+       struct fsl_re_chan *re_chan;
+
+       /* hwdesc will point to cf_addr */
+       void *cf_addr;
+       dma_addr_t cf_paddr;
+
+       void *cdb_addr;
+       dma_addr_t cdb_paddr;
+       int status;
+};
diff --git a/drivers/dma/img-mdc-dma.c b/drivers/dma/img-mdc-dma.c

index ed045a9..9ca5683 100644 (file)
--- a/drivers/dma/img-mdc-dma.c
+++ b/drivers/dma/img-mdc-dma.c
@@ -689,11 +689,6 @@ static int mdc_slave_config(struct dma_chan *chan,
         return 0;
  }
  
-static int mdc_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void mdc_free_chan_resources(struct dma_chan *chan)
  {
         struct mdc_chan *mchan = to_mdc_chan(chan);
@@ -910,7 +905,6 @@ static int mdc_dma_probe(struct platform_device *pdev)
         mdma->dma_dev.device_prep_slave_sg = mdc_prep_slave_sg;
         mdma->dma_dev.device_prep_dma_cyclic = mdc_prep_dma_cyclic;
         mdma->dma_dev.device_prep_dma_memcpy = mdc_prep_dma_memcpy;
-       mdma->dma_dev.device_alloc_chan_resources = mdc_alloc_chan_resources;
         mdma->dma_dev.device_free_chan_resources = mdc_free_chan_resources;
         mdma->dma_dev.device_tx_status = mdc_tx_status;
         mdma->dma_dev.device_issue_pending = mdc_issue_pending;
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c

index 66a0efb..62bbd79 100644 (file)
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -1260,6 +1260,7 @@ static void sdma_issue_pending(struct dma_chan *chan)
  
  #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1        34
  #define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2        38
+#define SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3        41
  
  static void sdma_add_scripts(struct sdma_engine *sdma,
                 const struct sdma_script_start_addrs *addr)
@@ -1306,6 +1307,9 @@ static void sdma_load_firmware(const struct firmware *fw, void *context)
         case 2:
                 sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V2;
                 break;
+       case 3:
+               sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V3;
+               break;
         default:
                 dev_err(sdma->dev, "unknown firmware version\n");
                 goto err_firmware;
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c

index 3b55bb8..ea1e107 100644 (file)
--- a/drivers/dma/ioat/dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   * The full GNU General Public License is included in this distribution in
   * the file called "COPYING".
   *
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c

index 940c150..ee0aa9f 100644 (file)
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   * The full GNU General Public License is included in this distribution in
   * the file called "COPYING".
   *
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h

index d63f68b..30f5c7e 100644 (file)
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c

index 695483e..69c7dfc 100644 (file)
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   * The full GNU General Public License is included in this distribution in
   * the file called "COPYING".
   *
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h

index 4702927..bf24ebe 100644 (file)
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c

index 194ec20..64790a4 100644 (file)
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -15,10 +15,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   * The full GNU General Public License is included in this distribution in
   * the file called "COPYING".
   *
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h

index 02177ec..a3e731e 100644 (file)
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c

index 5501eb0..76f0dc6 100644 (file)
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   * The full GNU General Public License is included in this distribution in
   * the file called "COPYING".
   *
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h

index 2f1cfa0..909352f 100644 (file)
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c

index 263d9f6..9988268 100644 (file)
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
   */
  
  /*
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c

index 6f7f435..647e362 100644 (file)
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -313,11 +313,6 @@ static void k3_dma_tasklet(unsigned long arg)
         }
  }
  
-static int k3_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void k3_dma_free_chan_resources(struct dma_chan *chan)
  {
         struct k3_dma_chan *c = to_k3_chan(chan);
@@ -654,7 +649,7 @@ static void k3_dma_free_desc(struct virt_dma_desc *vd)
         kfree(ds);
  }
  
-static struct of_device_id k3_pdma_dt_ids[] = {
+static const struct of_device_id k3_pdma_dt_ids[] = {
         { .compatible = "hisilicon,k3-dma-1.0", },
         {}
  };
@@ -728,7 +723,6 @@ static int k3_dma_probe(struct platform_device *op)
         dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
         dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
         d->slave.dev = &op->dev;
-       d->slave.device_alloc_chan_resources = k3_dma_alloc_chan_resources;
         d->slave.device_free_chan_resources = k3_dma_free_chan_resources;
         d->slave.device_tx_status = k3_dma_tx_status;
         d->slave.device_prep_dma_memcpy = k3_dma_prep_memcpy;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c

index eb41004..462a022 100644 (file)
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -973,7 +973,7 @@ static int mmp_pdma_chan_init(struct mmp_pdma_device *pdev, int idx, int irq)
         return 0;
  }
  
-static struct of_device_id mmp_pdma_dt_ids[] = {
+static const struct of_device_id mmp_pdma_dt_ids[] = {
         { .compatible = "marvell,pdma-1.0", },
         {}
  };
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c

index b6f4e1f..449e785 100644 (file)
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -613,7 +613,7 @@ struct dma_chan *mmp_tdma_xlate(struct of_phandle_args *dma_spec,
         return dma_request_channel(mask, mmp_tdma_filter_fn, &param);
  }
  
-static struct of_device_id mmp_tdma_dt_ids[] = {
+static const struct of_device_id mmp_tdma_dt_ids[] = {
         { .compatible = "marvell,adma-1.0", .data = (void *)MMP_AUD_TDMA},
         { .compatible = "marvell,pxa910-squ", .data = (void *)PXA910_SQU},
         {}
diff --git a/drivers/dma/mpc512x_dma.c b/drivers/dma/mpc512x_dma.c

index 57d2457..e6281e7 100644 (file)
--- a/drivers/dma/mpc512x_dma.c
+++ b/drivers/dma/mpc512x_dma.c
@@ -21,10 +21,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
@@ -1072,7 +1068,7 @@ static int mpc_dma_remove(struct platform_device *op)
         return 0;
  }
  
-static struct of_device_id mpc_dma_match[] = {
+static const struct of_device_id mpc_dma_match[] = {
         { .compatible = "fsl,mpc5121-dma", },
         { .compatible = "fsl,mpc8308-dma", },
         {},
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c

index b03e813..1c56001 100644 (file)
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -10,10 +10,6 @@
   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
   */
  
  #include <linux/init.h>
@@ -1249,7 +1245,7 @@ static int mv_xor_remove(struct platform_device *pdev)
  }
  
  #ifdef CONFIG_OF
-static struct of_device_id mv_xor_dt_ids[] = {
+static const struct of_device_id mv_xor_dt_ids[] = {
         { .compatible = "marvell,orion-xor", },
         {},
  };
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h

index 78edc7e..91958db 100644 (file)
--- a/drivers/dma/mv_xor.h
+++ b/drivers/dma/mv_xor.h
@@ -9,10 +9,6 @@
   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
   * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
   */
  
  #ifndef MV_XOR_H
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c

index 35c143c..b859792 100644 (file)
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -949,6 +949,7 @@ err_free_res:
  err_disable_pdev:
         pci_disable_device(pdev);
  err_free_mem:
+       kfree(pd);
         return err;
  }
  
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c

index 0e1f567..a7d9d30 100644 (file)
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -556,7 +556,7 @@ static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
  
         buf[0] = CMD_DMAADDH;
         buf[0] |= (da << 1);
-       *((u16 *)&buf[1]) = val;
+       *((__le16 *)&buf[1]) = cpu_to_le16(val);
  
         PL330_DBGCMD_DUMP(SZ_DMAADDH, "\tDMAADDH %s %u\n",
                 da == 1 ? "DA" : "SA", val);
@@ -710,7 +710,7 @@ static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
  
         buf[0] = CMD_DMAMOV;
         buf[1] = dst;
-       *((u32 *)&buf[2]) = val;
+       *((__le32 *)&buf[2]) = cpu_to_le32(val);
  
         PL330_DBGCMD_DUMP(SZ_DMAMOV, "\tDMAMOV %s 0x%x\n",
                 dst == SAR ? "SAR" : (dst == DAR ? "DAR" : "CCR"), val);
@@ -888,7 +888,7 @@ static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
  
         buf[1] = chan & 0x7;
  
-       *((u32 *)&buf[2]) = addr;
+       *((__le32 *)&buf[2]) = cpu_to_le32(addr);
  
         return SZ_DMAGO;
  }
@@ -928,7 +928,7 @@ static inline void _execute_DBGINSN(struct pl330_thread *thrd,
         }
         writel(val, regs + DBGINST0);
  
-       val = *((u32 *)&insn[2]);
+       val = le32_to_cpu(*((__le32 *)&insn[2]));
         writel(val, regs + DBGINST1);
  
         /* If timed out due to halted state-machine */
@@ -2162,7 +2162,7 @@ static int pl330_terminate_all(struct dma_chan *chan)
   * DMA transfer again. This pause feature was implemented to
   * allow safely read residue before channel termination.
   */
-int pl330_pause(struct dma_chan *chan)
+static int pl330_pause(struct dma_chan *chan)
  {
         struct dma_pl330_chan *pch = to_pchan(chan);
         struct pl330_dmac *pl330 = pch->dmac;
@@ -2203,8 +2203,8 @@ static void pl330_free_chan_resources(struct dma_chan *chan)
         pm_runtime_put_autosuspend(pch->dmac->ddma.dev);
  }
  
-int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
-               struct dma_pl330_desc *desc)
+static int pl330_get_current_xferred_count(struct dma_pl330_chan *pch,
+                                          struct dma_pl330_desc *desc)
  {
         struct pl330_thread *thrd = pch->thread;
         struct pl330_dmac *pl330 = pch->dmac;
@@ -2259,7 +2259,17 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                         transferred = 0;
                 residual += desc->bytes_requested - transferred;
                 if (desc->txd.cookie == cookie) {
-                       ret = desc->status;
+                       switch (desc->status) {
+                       case DONE:
+                               ret = DMA_COMPLETE;
+                               break;
+                       case PREP:
+                       case BUSY:
+                               ret = DMA_IN_PROGRESS;
+                               break;
+                       default:
+                               WARN_ON(1);
+                       }
                         break;
                 }
                 if (desc->last)
diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c

index fa764a3..9217f89 100644 (file)
--- a/drivers/dma/ppc4xx/adma.c
+++ b/drivers/dma/ppc4xx/adma.c
@@ -16,10 +16,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
diff --git a/drivers/dma/qcom_bam_dma.c b/drivers/dma/qcom_bam_dma.c

index 9c914d6..5a250cd 100644 (file)
--- a/drivers/dma/qcom_bam_dma.c
+++ b/drivers/dma/qcom_bam_dma.c
@@ -171,6 +171,35 @@ static const struct reg_offset_data bam_v1_4_reg_info[] = {
         [BAM_P_FIFO_SIZES]      = { 0x1820, 0x00, 0x1000, 0x00 },
  };
  
+static const struct reg_offset_data bam_v1_7_reg_info[] = {
+       [BAM_CTRL]              = { 0x00000, 0x00, 0x00, 0x00 },
+       [BAM_REVISION]          = { 0x01000, 0x00, 0x00, 0x00 },
+       [BAM_NUM_PIPES]         = { 0x01008, 0x00, 0x00, 0x00 },
+       [BAM_DESC_CNT_TRSHLD]   = { 0x00008, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS]          = { 0x03010, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_MSK]      = { 0x03014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_UNMASKED] = { 0x03018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_STTS]          = { 0x00014, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_CLR]           = { 0x00018, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_EN]            = { 0x0001C, 0x00, 0x00, 0x00 },
+       [BAM_CNFG_BITS]         = { 0x0007C, 0x00, 0x00, 0x00 },
+       [BAM_IRQ_SRCS_EE]       = { 0x03000, 0x00, 0x00, 0x1000 },
+       [BAM_IRQ_SRCS_MSK_EE]   = { 0x03004, 0x00, 0x00, 0x1000 },
+       [BAM_P_CTRL]            = { 0x13000, 0x1000, 0x00, 0x00 },
+       [BAM_P_RST]             = { 0x13004, 0x1000, 0x00, 0x00 },
+       [BAM_P_HALT]            = { 0x13008, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_STTS]        = { 0x13010, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_CLR]         = { 0x13014, 0x1000, 0x00, 0x00 },
+       [BAM_P_IRQ_EN]          = { 0x13018, 0x1000, 0x00, 0x00 },
+       [BAM_P_EVNT_DEST_ADDR]  = { 0x1382C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_REG]        = { 0x13818, 0x00, 0x1000, 0x00 },
+       [BAM_P_SW_OFSTS]        = { 0x13800, 0x00, 0x1000, 0x00 },
+       [BAM_P_DATA_FIFO_ADDR]  = { 0x13824, 0x00, 0x1000, 0x00 },
+       [BAM_P_DESC_FIFO_ADDR]  = { 0x1381C, 0x00, 0x1000, 0x00 },
+       [BAM_P_EVNT_GEN_TRSHLD] = { 0x13828, 0x00, 0x1000, 0x00 },
+       [BAM_P_FIFO_SIZES]      = { 0x13820, 0x00, 0x1000, 0x00 },
+};
+
  /* BAM CTRL */
  #define BAM_SW_RST                     BIT(0)
  #define BAM_EN                         BIT(1)
@@ -1051,6 +1080,7 @@ static void bam_channel_init(struct bam_device *bdev, struct bam_chan *bchan,
  static const struct of_device_id bam_of_match[] = {
         { .compatible = "qcom,bam-v1.3.0", .data = &bam_v1_3_reg_info },
         { .compatible = "qcom,bam-v1.4.0", .data = &bam_v1_4_reg_info },
+       { .compatible = "qcom,bam-v1.7.0", .data = &bam_v1_7_reg_info },
         {}
  };
  
@@ -1113,7 +1143,7 @@ static int bam_dma_probe(struct platform_device *pdev)
  
         if (!bdev->channels) {
                 ret = -ENOMEM;
-               goto err_disable_clk;
+               goto err_tasklet_kill;
         }
  
         /* allocate and initialize channels */
@@ -1125,7 +1155,7 @@ static int bam_dma_probe(struct platform_device *pdev)
         ret = devm_request_irq(bdev->dev, bdev->irq, bam_dma_irq,
                         IRQF_TRIGGER_HIGH, "bam_dma", bdev);
         if (ret)
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
  
         /* set max dma segment size */
         bdev->common.dev = bdev->dev;
@@ -1133,7 +1163,7 @@ static int bam_dma_probe(struct platform_device *pdev)
         ret = dma_set_max_seg_size(bdev->common.dev, BAM_MAX_DATA_SIZE);
         if (ret) {
                 dev_err(bdev->dev, "cannot set maximum segment size\n");
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
         }
  
         platform_set_drvdata(pdev, bdev);
@@ -1161,7 +1191,7 @@ static int bam_dma_probe(struct platform_device *pdev)
         ret = dma_async_device_register(&bdev->common);
         if (ret) {
                 dev_err(bdev->dev, "failed to register dma async device\n");
-               goto err_disable_clk;
+               goto err_bam_channel_exit;
         }
  
         ret = of_dma_controller_register(pdev->dev.of_node, bam_dma_xlate,
@@ -1173,8 +1203,14 @@ static int bam_dma_probe(struct platform_device *pdev)
  
  err_unregister_dma:
         dma_async_device_unregister(&bdev->common);
+err_bam_channel_exit:
+       for (i = 0; i < bdev->num_channels; i++)
+               tasklet_kill(&bdev->channels[i].vc.task);
+err_tasklet_kill:
+       tasklet_kill(&bdev->task);
  err_disable_clk:
         clk_disable_unprepare(bdev->bamclk);
+
         return ret;
  }
  
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c

index 2f91da3..01dcaf2 100644 (file)
--- a/drivers/dma/s3c24xx-dma.c
+++ b/drivers/dma/s3c24xx-dma.c
@@ -749,11 +749,6 @@ unlock:
         return ret;
  }
  
-static int s3c24xx_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
  {
         /* Ensure all queued descriptors are freed */
@@ -1238,7 +1233,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
         if (!s3cdma->phy_chans)
                 return -ENOMEM;
  
-       /* aquire irqs and clocks for all physical channels */
+       /* acquire irqs and clocks for all physical channels */
         for (i = 0; i < pdata->num_phy_channels; i++) {
                 struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
                 char clk_name[6];
@@ -1266,7 +1261,7 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
                         sprintf(clk_name, "dma.%d", i);
                         phy->clk = devm_clk_get(&pdev->dev, clk_name);
                         if (IS_ERR(phy->clk) && sdata->has_clocks) {
-                               dev_err(&pdev->dev, "unable to aquire clock for channel %d, error %lu",
+                               dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
                                         i, PTR_ERR(phy->clk));
                                 continue;
                         }
@@ -1290,8 +1285,6 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
         dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
         dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
         s3cdma->memcpy.dev = &pdev->dev;
-       s3cdma->memcpy.device_alloc_chan_resources =
-                                       s3c24xx_dma_alloc_chan_resources;
         s3cdma->memcpy.device_free_chan_resources =
                                         s3c24xx_dma_free_chan_resources;
         s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
@@ -1305,8 +1298,6 @@ static int s3c24xx_dma_probe(struct platform_device *pdev)
         dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
         dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
         s3cdma->slave.dev = &pdev->dev;
-       s3cdma->slave.device_alloc_chan_resources =
-                                       s3c24xx_dma_alloc_chan_resources;
         s3cdma->slave.device_free_chan_resources =
                                         s3c24xx_dma_free_chan_resources;
         s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c

index 5adf540..43db255 100644 (file)
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -389,11 +389,6 @@ static void sa11x0_dma_tasklet(unsigned long arg)
  }
  
  
-static int sa11x0_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void sa11x0_dma_free_chan_resources(struct dma_chan *chan)
  {
         struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
@@ -835,7 +830,6 @@ static int sa11x0_dma_init_dmadev(struct dma_device *dmadev,
  
         INIT_LIST_HEAD(&dmadev->channels);
         dmadev->dev = dev;
-       dmadev->device_alloc_chan_resources = sa11x0_dma_alloc_chan_resources;
         dmadev->device_free_chan_resources = sa11x0_dma_free_chan_resources;
         dmadev->device_config = sa11x0_dma_device_config;
         dmadev->device_pause = sa11x0_dma_device_pause;
@@ -948,6 +942,12 @@ static int sa11x0_dma_probe(struct platform_device *pdev)
         dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
         d->slave.device_prep_slave_sg = sa11x0_dma_prep_slave_sg;
         d->slave.device_prep_dma_cyclic = sa11x0_dma_prep_dma_cyclic;
+       d->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+       d->slave.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+                                  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
+       d->slave.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+                                  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES);
         ret = sa11x0_dma_init_dmadev(&d->slave, &pdev->dev);
         if (ret) {
                 dev_warn(d->slave.dev, "failed to register slave async device: %d\n",
diff --git a/drivers/dma/sh/Kconfig b/drivers/dma/sh/Kconfig

index 8190ad2..0f37152 100644 (file)
--- a/drivers/dma/sh/Kconfig
+++ b/drivers/dma/sh/Kconfig
@@ -51,12 +51,6 @@ config RCAR_HPB_DMAE
         help
           Enable support for the Renesas R-Car series DMA controllers.
  
-config RCAR_AUDMAC_PP
-       tristate "Renesas R-Car Audio DMAC Peripheral Peripheral support"
-       depends on SH_DMAE_BASE
-       help
-         Enable support for the Renesas R-Car Audio DMAC Peripheral Peripheral controllers.
-
  config RCAR_DMAC
         tristate "Renesas R-Car Gen2 DMA Controller"
         depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -64,3 +58,12 @@ config RCAR_DMAC
         help
           This driver supports the general purpose DMA controller found in the
           Renesas R-Car second generation SoCs.
+
+config RENESAS_USB_DMAC
+       tristate "Renesas USB-DMA Controller"
+       depends on ARCH_SHMOBILE || COMPILE_TEST
+       select RENESAS_DMA
+       select DMA_VIRTUAL_CHANNELS
+       help
+         This driver supports the USB-DMA controller found in the Renesas
+         SoCs.
diff --git a/drivers/dma/sh/Makefile b/drivers/dma/sh/Makefile

index 2852f9d..b8a5980 100644 (file)
--- a/drivers/dma/sh/Makefile
+++ b/drivers/dma/sh/Makefile
@@ -15,5 +15,5 @@ obj-$(CONFIG_SH_DMAE) += shdma.o
  
  obj-$(CONFIG_SUDMAC) += sudmac.o
  obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
-obj-$(CONFIG_RCAR_AUDMAC_PP) += rcar-audmapp.o
  obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
+obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
diff --git a/drivers/dma/sh/rcar-audmapp.c b/drivers/dma/sh/rcar-audmapp.c

deleted file mode 100644 (file)

index d95bbdd..0000000
--- a/drivers/dma/sh/rcar-audmapp.c
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * This is for Renesas R-Car Audio-DMAC-peri-peri.
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- * Copyright (C) 2014 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
- *
- * based on the drivers/dma/sh/shdma.c
- *
- * Copyright (C) 2011-2012 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
- * Copyright (C) 2009 Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
- * Copyright (C) 2009 Renesas Solutions, Inc. All rights reserved.
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/dmaengine.h>
-#include <linux/of_dma.h>
-#include <linux/platform_data/dma-rcar-audmapp.h>
-#include <linux/platform_device.h>
-#include <linux/shdma-base.h>
-
-/*
- * DMA register
- */
-#define PDMASAR                0x00
-#define PDMADAR                0x04
-#define PDMACHCR       0x0c
-
-/* PDMACHCR */
-#define PDMACHCR_DE            (1 << 0)
-
-#define AUDMAPP_MAX_CHANNELS   29
-
-/* Default MEMCPY transfer size = 2^2 = 4 bytes */
-#define LOG2_DEFAULT_XFER_SIZE 2
-#define AUDMAPP_SLAVE_NUMBER   256
-#define AUDMAPP_LEN_MAX                (16 * 1024 * 1024)
-
-struct audmapp_chan {
-       struct shdma_chan shdma_chan;
-       void __iomem *base;
-       dma_addr_t slave_addr;
-       u32 chcr;
-};
-
-struct audmapp_device {
-       struct shdma_dev shdma_dev;
-       struct audmapp_pdata *pdata;
-       struct device *dev;
-       void __iomem *chan_reg;
-};
-
-struct audmapp_desc {
-       struct shdma_desc shdma_desc;
-       dma_addr_t src;
-       dma_addr_t dst;
-};
-
-#define to_shdma_chan(c) container_of(c, struct shdma_chan, dma_chan)
-
-#define to_chan(chan) container_of(chan, struct audmapp_chan, shdma_chan)
-#define to_desc(sdesc) container_of(sdesc, struct audmapp_desc, shdma_desc)
-#define to_dev(chan) container_of(chan->shdma_chan.dma_chan.device,    \
-                                 struct audmapp_device, shdma_dev.dma_dev)
-
-static void audmapp_write(struct audmapp_chan *auchan, u32 data, u32 reg)
-{
-       struct audmapp_device *audev = to_dev(auchan);
-       struct device *dev = audev->dev;
-
-       dev_dbg(dev, "w %p : %08x\n", auchan->base + reg, data);
-
-       iowrite32(data, auchan->base + reg);
-}
-
-static u32 audmapp_read(struct audmapp_chan *auchan, u32 reg)
-{
-       return ioread32(auchan->base + reg);
-}
-
-static void audmapp_halt(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       int i;
-
-       audmapp_write(auchan, 0, PDMACHCR);
-
-       for (i = 0; i < 1024; i++) {
-               if (0 == audmapp_read(auchan, PDMACHCR))
-                       return;
-               udelay(1);
-       }
-}
-
-static void audmapp_start_xfer(struct shdma_chan *schan,
-                              struct shdma_desc *sdesc)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       struct audmapp_device *audev = to_dev(auchan);
-       struct audmapp_desc *desc = to_desc(sdesc);
-       struct device *dev = audev->dev;
-       u32 chcr = auchan->chcr | PDMACHCR_DE;
-
-       dev_dbg(dev, "src/dst/chcr = %pad/%pad/%08x\n",
-               &desc->src, &desc->dst, chcr);
-
-       audmapp_write(auchan, desc->src,        PDMASAR);
-       audmapp_write(auchan, desc->dst,        PDMADAR);
-       audmapp_write(auchan, chcr,     PDMACHCR);
-}
-
-static int audmapp_get_config(struct audmapp_chan *auchan, int slave_id,
-                             u32 *chcr, dma_addr_t *dst)
-{
-       struct audmapp_device *audev = to_dev(auchan);
-       struct audmapp_pdata *pdata = audev->pdata;
-       struct audmapp_slave_config *cfg;
-       int i;
-
-       *chcr   = 0;
-       *dst    = 0;
-
-       if (!pdata) { /* DT */
-               *chcr = ((u32)slave_id) << 16;
-               auchan->shdma_chan.slave_id = (slave_id) >> 8;
-               return 0;
-       }
-
-       /* non-DT */
-
-       if (slave_id >= AUDMAPP_SLAVE_NUMBER)
-               return -ENXIO;
-
-       for (i = 0, cfg = pdata->slave; i < pdata->slave_num; i++, cfg++)
-               if (cfg->slave_id == slave_id) {
-                       *chcr   = cfg->chcr;
-                       *dst    = cfg->dst;
-                       return 0;
-               }
-
-       return -ENXIO;
-}
-
-static int audmapp_set_slave(struct shdma_chan *schan, int slave_id,
-                            dma_addr_t slave_addr, bool try)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       u32 chcr;
-       dma_addr_t dst;
-       int ret;
-
-       ret = audmapp_get_config(auchan, slave_id, &chcr, &dst);
-       if (ret < 0)
-               return ret;
-
-       if (try)
-               return 0;
-
-       auchan->chcr            = chcr;
-       auchan->slave_addr      = slave_addr ? : dst;
-
-       return 0;
-}
-
-static int audmapp_desc_setup(struct shdma_chan *schan,
-                             struct shdma_desc *sdesc,
-                             dma_addr_t src, dma_addr_t dst, size_t *len)
-{
-       struct audmapp_desc *desc = to_desc(sdesc);
-
-       if (*len > (size_t)AUDMAPP_LEN_MAX)
-               *len = (size_t)AUDMAPP_LEN_MAX;
-
-       desc->src = src;
-       desc->dst = dst;
-
-       return 0;
-}
-
-static void audmapp_setup_xfer(struct shdma_chan *schan,
-                              int slave_id)
-{
-}
-
-static dma_addr_t audmapp_slave_addr(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-
-       return auchan->slave_addr;
-}
-
-static bool audmapp_channel_busy(struct shdma_chan *schan)
-{
-       struct audmapp_chan *auchan = to_chan(schan);
-       u32 chcr = audmapp_read(auchan, PDMACHCR);
-
-       return chcr & ~PDMACHCR_DE;
-}
-
-static bool audmapp_desc_completed(struct shdma_chan *schan,
-                                  struct shdma_desc *sdesc)
-{
-       return true;
-}
-
-static struct shdma_desc *audmapp_embedded_desc(void *buf, int i)
-{
-       return &((struct audmapp_desc *)buf)[i].shdma_desc;
-}
-
-static const struct shdma_ops audmapp_shdma_ops = {
-       .halt_channel   = audmapp_halt,
-       .desc_setup     = audmapp_desc_setup,
-       .set_slave      = audmapp_set_slave,
-       .start_xfer     = audmapp_start_xfer,
-       .embedded_desc  = audmapp_embedded_desc,
-       .setup_xfer     = audmapp_setup_xfer,
-       .slave_addr     = audmapp_slave_addr,
-       .channel_busy   = audmapp_channel_busy,
-       .desc_completed = audmapp_desc_completed,
-};
-
-static int audmapp_chan_probe(struct platform_device *pdev,
-                             struct audmapp_device *audev, int id)
-{
-       struct shdma_dev *sdev = &audev->shdma_dev;
-       struct audmapp_chan *auchan;
-       struct shdma_chan *schan;
-       struct device *dev = audev->dev;
-
-       auchan = devm_kzalloc(dev, sizeof(*auchan), GFP_KERNEL);
-       if (!auchan)
-               return -ENOMEM;
-
-       schan = &auchan->shdma_chan;
-       schan->max_xfer_len = AUDMAPP_LEN_MAX;
-
-       shdma_chan_probe(sdev, schan, id);
-
-       auchan->base = audev->chan_reg + 0x20 + (0x10 * id);
-       dev_dbg(dev, "%02d : %p / %p", id, auchan->base, audev->chan_reg);
-
-       return 0;
-}
-
-static void audmapp_chan_remove(struct audmapp_device *audev)
-{
-       struct shdma_chan *schan;
-       int i;
-
-       shdma_for_each_chan(schan, &audev->shdma_dev, i) {
-               BUG_ON(!schan);
-               shdma_chan_remove(schan);
-       }
-}
-
-static struct dma_chan *audmapp_of_xlate(struct of_phandle_args *dma_spec,
-                                        struct of_dma *ofdma)
-{
-       dma_cap_mask_t mask;
-       struct dma_chan *chan;
-       u32 chcr = dma_spec->args[0];
-
-       if (dma_spec->args_count != 1)
-               return NULL;
-
-       dma_cap_zero(mask);
-       dma_cap_set(DMA_SLAVE, mask);
-
-       chan = dma_request_channel(mask, shdma_chan_filter, NULL);
-       if (chan)
-               to_shdma_chan(chan)->hw_req = chcr;
-
-       return chan;
-}
-
-static int audmapp_probe(struct platform_device *pdev)
-{
-       struct audmapp_pdata *pdata = pdev->dev.platform_data;
-       struct device_node *np = pdev->dev.of_node;
-       struct audmapp_device *audev;
-       struct shdma_dev *sdev;
-       struct dma_device *dma_dev;
-       struct resource *res;
-       int err, i;
-
-       if (np)
-               of_dma_controller_register(np, audmapp_of_xlate, pdev);
-       else if (!pdata)
-               return -ENODEV;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-       audev = devm_kzalloc(&pdev->dev, sizeof(*audev), GFP_KERNEL);
-       if (!audev)
-               return -ENOMEM;
-
-       audev->dev      = &pdev->dev;
-       audev->pdata    = pdata;
-       audev->chan_reg = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(audev->chan_reg))
-               return PTR_ERR(audev->chan_reg);
-
-       sdev            = &audev->shdma_dev;
-       sdev->ops       = &audmapp_shdma_ops;
-       sdev->desc_size = sizeof(struct audmapp_desc);
-
-       dma_dev                 = &sdev->dma_dev;
-       dma_dev->copy_align     = LOG2_DEFAULT_XFER_SIZE;
-       dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
-
-       err = shdma_init(&pdev->dev, sdev, AUDMAPP_MAX_CHANNELS);
-       if (err < 0)
-               return err;
-
-       platform_set_drvdata(pdev, audev);
-
-       /* Create DMA Channel */
-       for (i = 0; i < AUDMAPP_MAX_CHANNELS; i++) {
-               err = audmapp_chan_probe(pdev, audev, i);
-               if (err)
-                       goto chan_probe_err;
-       }
-
-       err = dma_async_device_register(dma_dev);
-       if (err < 0)
-               goto chan_probe_err;
-
-       return err;
-
-chan_probe_err:
-       audmapp_chan_remove(audev);
-       shdma_cleanup(sdev);
-
-       return err;
-}
-
-static int audmapp_remove(struct platform_device *pdev)
-{
-       struct audmapp_device *audev = platform_get_drvdata(pdev);
-       struct dma_device *dma_dev = &audev->shdma_dev.dma_dev;
-
-       dma_async_device_unregister(dma_dev);
-
-       audmapp_chan_remove(audev);
-       shdma_cleanup(&audev->shdma_dev);
-
-       return 0;
-}
-
-static const struct of_device_id audmapp_of_match[] = {
-       { .compatible = "renesas,rcar-audmapp", },
-       {},
-};
-
-static struct platform_driver audmapp_driver = {
-       .probe          = audmapp_probe,
-       .remove         = audmapp_remove,
-       .driver         = {
-               .name   = "rcar-audmapp-engine",
-               .of_match_table = audmapp_of_match,
-       },
-};
-module_platform_driver(audmapp_driver);
-
-MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>");
-MODULE_DESCRIPTION("Renesas R-Car Audio DMAC peri-peri driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c

index 8ee383d..10fcaba 100644 (file)
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -171,8 +171,7 @@ static struct shdma_desc *shdma_get_desc(struct shdma_chan *schan)
         return NULL;
  }
  
-static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
-                            dma_addr_t slave_addr)
+static int shdma_setup_slave(struct shdma_chan *schan, dma_addr_t slave_addr)
  {
         struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
         const struct shdma_ops *ops = sdev->ops;
@@ -183,25 +182,23 @@ static int shdma_setup_slave(struct shdma_chan *schan, int slave_id,
                 ret = ops->set_slave(schan, match, slave_addr, true);
                 if (ret < 0)
                         return ret;
-
-               slave_id = schan->slave_id;
         } else {
-               match = slave_id;
+               match = schan->real_slave_id;
         }
  
-       if (slave_id < 0 || slave_id >= slave_num)
+       if (schan->real_slave_id < 0 || schan->real_slave_id >= slave_num)
                 return -EINVAL;
  
-       if (test_and_set_bit(slave_id, shdma_slave_used))
+       if (test_and_set_bit(schan->real_slave_id, shdma_slave_used))
                 return -EBUSY;
  
         ret = ops->set_slave(schan, match, slave_addr, false);
         if (ret < 0) {
-               clear_bit(slave_id, shdma_slave_used);
+               clear_bit(schan->real_slave_id, shdma_slave_used);
                 return ret;
         }
  
-       schan->slave_id = slave_id;
+       schan->slave_id = schan->real_slave_id;
  
         return 0;
  }
@@ -221,10 +218,12 @@ static int shdma_alloc_chan_resources(struct dma_chan *chan)
          */
         if (slave) {
                 /* Legacy mode: .private is set in filter */
-               ret = shdma_setup_slave(schan, slave->slave_id, 0);
+               schan->real_slave_id = slave->slave_id;
+               ret = shdma_setup_slave(schan, 0);
                 if (ret < 0)
                         goto esetslave;
         } else {
+               /* Normal mode: real_slave_id was set by filter */
                 schan->slave_id = -EINVAL;
         }
  
@@ -258,11 +257,14 @@ esetslave:
  
  /*
   * This is the standard shdma filter function to be used as a replacement to the
- * "old" method, using the .private pointer. If for some reason you allocate a
- * channel without slave data, use something like ERR_PTR(-EINVAL) as a filter
+ * "old" method, using the .private pointer.
+ * You always have to pass a valid slave id as the argument, old drivers that
+ * pass ERR_PTR(-EINVAL) as a filter parameter and set it up in dma_slave_config
+ * need to be updated so we can remove the slave_id field from dma_slave_config.
   * parameter. If this filter is used, the slave driver, after calling
   * dma_request_channel(), will also have to call dmaengine_slave_config() with
- * .slave_id, .direction, and either .src_addr or .dst_addr set.
+ * .direction, and either .src_addr or .dst_addr set.
+ *
   * NOTE: this filter doesn't support multiple DMAC drivers with the DMA_SLAVE
   * capability! If this becomes a requirement, hardware glue drivers, using this
   * services would have to provide their own filters, which first would check
@@ -276,7 +278,7 @@ bool shdma_chan_filter(struct dma_chan *chan, void *arg)
  {
         struct shdma_chan *schan;
         struct shdma_dev *sdev;
-       int match = (long)arg;
+       int slave_id = (long)arg;
         int ret;
  
         /* Only support channels handled by this driver. */
@@ -284,19 +286,39 @@ bool shdma_chan_filter(struct dma_chan *chan, void *arg)
             shdma_alloc_chan_resources)
                 return false;
  
-       if (match < 0)
+       schan = to_shdma_chan(chan);
+       sdev = to_shdma_dev(chan->device);
+
+       /*
+        * For DT, the schan->slave_id field is generated by the
+        * set_slave function from the slave ID that is passed in
+        * from xlate. For the non-DT case, the slave ID is
+        * directly passed into the filter function by the driver
+        */
+       if (schan->dev->of_node) {
+               ret = sdev->ops->set_slave(schan, slave_id, 0, true);
+               if (ret < 0)
+                       return false;
+
+               schan->real_slave_id = schan->slave_id;
+               return true;
+       }
+
+       if (slave_id < 0) {
                 /* No slave requested - arbitrary channel */
+               dev_warn(sdev->dma_dev.dev, "invalid slave ID passed to dma_request_slave\n");
                 return true;
+       }
  
-       schan = to_shdma_chan(chan);
-       if (!schan->dev->of_node && match >= slave_num)
+       if (slave_id >= slave_num)
                 return false;
  
-       sdev = to_shdma_dev(schan->dma_chan.device);
-       ret = sdev->ops->set_slave(schan, match, 0, true);
+       ret = sdev->ops->set_slave(schan, slave_id, 0, true);
         if (ret < 0)
                 return false;
  
+       schan->real_slave_id = slave_id;
+
         return true;
  }
  EXPORT_SYMBOL(shdma_chan_filter);
@@ -452,6 +474,8 @@ static void shdma_free_chan_resources(struct dma_chan *chan)
                 chan->private = NULL;
         }
  
+       schan->real_slave_id = 0;
+
         spin_lock_irq(&schan->chan_lock);
  
         list_splice_init(&schan->ld_free, &list);
@@ -764,11 +788,20 @@ static int shdma_config(struct dma_chan *chan,
          */
         if (!config)
                 return -EINVAL;
+
+       /*
+        * overriding the slave_id through dma_slave_config is deprecated,
+        * but possibly some out-of-tree drivers still do it.
+        */
+       if (WARN_ON_ONCE(config->slave_id &&
+                        config->slave_id != schan->real_slave_id))
+               schan->real_slave_id = config->slave_id;
+
         /*
          * We could lock this, but you shouldn't be configuring the
          * channel, while using it...
          */
-       return shdma_setup_slave(schan, config->slave_id,
+       return shdma_setup_slave(schan,
                                  config->direction == DMA_DEV_TO_MEM ?
                                  config->src_addr : config->dst_addr);
  }
diff --git a/drivers/dma/sh/shdmac.c b/drivers/dma/sh/shdmac.c

index 9f1d4c7..11707df 100644 (file)
--- a/drivers/dma/sh/shdmac.c
+++ b/drivers/dma/sh/shdmac.c
@@ -443,7 +443,7 @@ static bool sh_dmae_reset(struct sh_dmae_device *shdev)
         return ret;
  }
  
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARM)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
  static irqreturn_t sh_dmae_err(int irq, void *data)
  {
         struct sh_dmae_device *shdev = data;
@@ -689,7 +689,7 @@ static int sh_dmae_probe(struct platform_device *pdev)
         const struct sh_dmae_pdata *pdata;
         unsigned long chan_flag[SH_DMAE_MAX_CHANNELS] = {};
         int chan_irq[SH_DMAE_MAX_CHANNELS];
-#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARM)
+#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
         unsigned long irqflags = 0;
         int errirq;
  #endif
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c

new file mode 100644 (file)

index 0000000..f705798
--- /dev/null
+++ b/drivers/dma/sh/usb-dmac.c
@@ -0,0 +1,910 @@
+/*
+ * Renesas USB DMA Controller Driver
+ *
+ * Copyright (C) 2015 Renesas Electronics Corporation
+ *
+ * based on rcar-dmac.c
+ * Copyright (C) 2014 Renesas Electronics Inc.
+ * Author: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+/*
+ * struct usb_dmac_sg - Descriptor for a hardware transfer
+ * @mem_addr: memory address
+ * @size: transfer size in bytes
+ */
+struct usb_dmac_sg {
+       dma_addr_t mem_addr;
+       u32 size;
+};
+
+/*
+ * struct usb_dmac_desc - USB DMA Transfer Descriptor
+ * @vd: base virtual channel DMA transaction descriptor
+ * @direction: direction of the DMA transfer
+ * @sg_allocated_len: length of allocated sg
+ * @sg_len: length of sg
+ * @sg_index: index of sg
+ * @residue: residue after the DMAC completed a transfer
+ * @node: node for desc_got and desc_freed
+ * @done_cookie: cookie after the DMAC completed a transfer
+ * @sg: information for the transfer
+ */
+struct usb_dmac_desc {
+       struct virt_dma_desc vd;
+       enum dma_transfer_direction direction;
+       unsigned int sg_allocated_len;
+       unsigned int sg_len;
+       unsigned int sg_index;
+       u32 residue;
+       struct list_head node;
+       dma_cookie_t done_cookie;
+       struct usb_dmac_sg sg[0];
+};
+
+#define to_usb_dmac_desc(vd)   container_of(vd, struct usb_dmac_desc, vd)
+
+/*
+ * struct usb_dmac_chan - USB DMA Controller Channel
+ * @vc: base virtual DMA channel object
+ * @iomem: channel I/O memory base
+ * @index: index of this channel in the controller
+ * @irq: irq number of this channel
+ * @desc: the current descriptor
+ * @descs_allocated: number of descriptors allocated
+ * @desc_got: got descriptors
+ * @desc_freed: freed descriptors after the DMAC completed a transfer
+ */
+struct usb_dmac_chan {
+       struct virt_dma_chan vc;
+       void __iomem *iomem;
+       unsigned int index;
+       int irq;
+       struct usb_dmac_desc *desc;
+       int descs_allocated;
+       struct list_head desc_got;
+       struct list_head desc_freed;
+};
+
+#define to_usb_dmac_chan(c) container_of(c, struct usb_dmac_chan, vc.chan)
+
+/*
+ * struct usb_dmac - USB DMA Controller
+ * @engine: base DMA engine object
+ * @dev: the hardware device
+ * @iomem: remapped I/O memory base
+ * @n_channels: number of available channels
+ * @channels: array of DMAC channels
+ */
+struct usb_dmac {
+       struct dma_device engine;
+       struct device *dev;
+       void __iomem *iomem;
+
+       unsigned int n_channels;
+       struct usb_dmac_chan *channels;
+};
+
+#define to_usb_dmac(d)         container_of(d, struct usb_dmac, engine)
+
+/* -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define USB_DMAC_CHAN_OFFSET(i)                (0x20 + 0x20 * (i))
+
+#define USB_DMASWR                     0x0008
+#define USB_DMASWR_SWR                 (1 << 0)
+#define USB_DMAOR                      0x0060
+#define USB_DMAOR_AE                   (1 << 2)
+#define USB_DMAOR_DME                  (1 << 0)
+
+#define USB_DMASAR                     0x0000
+#define USB_DMADAR                     0x0004
+#define USB_DMATCR                     0x0008
+#define USB_DMATCR_MASK                        0x00ffffff
+#define USB_DMACHCR                    0x0014
+#define USB_DMACHCR_FTE                        (1 << 24)
+#define USB_DMACHCR_NULLE              (1 << 16)
+#define USB_DMACHCR_NULL               (1 << 12)
+#define USB_DMACHCR_TS_8B              ((0 << 7) | (0 << 6))
+#define USB_DMACHCR_TS_16B             ((0 << 7) | (1 << 6))
+#define USB_DMACHCR_TS_32B             ((1 << 7) | (0 << 6))
+#define USB_DMACHCR_IE                 (1 << 5)
+#define USB_DMACHCR_SP                 (1 << 2)
+#define USB_DMACHCR_TE                 (1 << 1)
+#define USB_DMACHCR_DE                 (1 << 0)
+#define USB_DMATEND                    0x0018
+
+/* Hardcode the xfer_shift to 5 (32bytes) */
+#define USB_DMAC_XFER_SHIFT    5
+#define USB_DMAC_XFER_SIZE     (1 << USB_DMAC_XFER_SHIFT)
+#define USB_DMAC_CHCR_TS       USB_DMACHCR_TS_32B
+#define USB_DMAC_SLAVE_BUSWIDTH        DMA_SLAVE_BUSWIDTH_32_BYTES
+
+/* for descriptors */
+#define USB_DMAC_INITIAL_NR_DESC       16
+#define USB_DMAC_INITIAL_NR_SG         8
+
+/* -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void usb_dmac_write(struct usb_dmac *dmac, u32 reg, u32 data)
+{
+       writel(data, dmac->iomem + reg);
+}
+
+static u32 usb_dmac_read(struct usb_dmac *dmac, u32 reg)
+{
+       return readl(dmac->iomem + reg);
+}
+
+static u32 usb_dmac_chan_read(struct usb_dmac_chan *chan, u32 reg)
+{
+       return readl(chan->iomem + reg);
+}
+
+static void usb_dmac_chan_write(struct usb_dmac_chan *chan, u32 reg, u32 data)
+{
+       writel(data, chan->iomem + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Initialization and configuration
+ */
+
+static bool usb_dmac_chan_is_busy(struct usb_dmac_chan *chan)
+{
+       u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+       return (chcr & (USB_DMACHCR_DE | USB_DMACHCR_TE)) == USB_DMACHCR_DE;
+}
+
+static u32 usb_dmac_calc_tend(u32 size)
+{
+       /*
+        * Please refer to the Figure "Example of Final Transaction Valid
+        * Data Transfer Enable (EDTEN) Setting" in the data sheet.
+        */
+       return 0xffffffff << (32 - (size % USB_DMAC_XFER_SIZE ? :
+                                               USB_DMAC_XFER_SIZE));
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_sg(struct usb_dmac_chan *chan,
+                                  unsigned int index)
+{
+       struct usb_dmac_desc *desc = chan->desc;
+       struct usb_dmac_sg *sg = desc->sg + index;
+       dma_addr_t src_addr = 0, dst_addr = 0;
+
+       WARN_ON_ONCE(usb_dmac_chan_is_busy(chan));
+
+       if (desc->direction == DMA_DEV_TO_MEM)
+               dst_addr = sg->mem_addr;
+       else
+               src_addr = sg->mem_addr;
+
+       dev_dbg(chan->vc.chan.device->dev,
+               "chan%u: queue sg %p: %u@%pad -> %pad\n",
+               chan->index, sg, sg->size, &src_addr, &dst_addr);
+
+       usb_dmac_chan_write(chan, USB_DMASAR, src_addr & 0xffffffff);
+       usb_dmac_chan_write(chan, USB_DMADAR, dst_addr & 0xffffffff);
+       usb_dmac_chan_write(chan, USB_DMATCR,
+                           DIV_ROUND_UP(sg->size, USB_DMAC_XFER_SIZE));
+       usb_dmac_chan_write(chan, USB_DMATEND, usb_dmac_calc_tend(sg->size));
+
+       usb_dmac_chan_write(chan, USB_DMACHCR, USB_DMAC_CHCR_TS |
+                       USB_DMACHCR_NULLE | USB_DMACHCR_IE | USB_DMACHCR_DE);
+}
+
+/* This function is already held by vc.lock */
+static void usb_dmac_chan_start_desc(struct usb_dmac_chan *chan)
+{
+       struct virt_dma_desc *vd;
+
+       vd = vchan_next_desc(&chan->vc);
+       if (!vd) {
+               chan->desc = NULL;
+               return;
+       }
+
+       /*
+        * Remove this request from vc->desc_issued. Otherwise, this driver
+        * will get the previous value from vchan_next_desc() after a transfer
+        * was completed.
+        */
+       list_del(&vd->node);
+
+       chan->desc = to_usb_dmac_desc(vd);
+       chan->desc->sg_index = 0;
+       usb_dmac_chan_start_sg(chan, 0);
+}
+
+static int usb_dmac_init(struct usb_dmac *dmac)
+{
+       u16 dmaor;
+
+       /* Clear all channels and enable the DMAC globally. */
+       usb_dmac_write(dmac, USB_DMAOR, USB_DMAOR_DME);
+
+       dmaor = usb_dmac_read(dmac, USB_DMAOR);
+       if ((dmaor & (USB_DMAOR_AE | USB_DMAOR_DME)) != USB_DMAOR_DME) {
+               dev_warn(dmac->dev, "DMAOR initialization failed.\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors allocation and free
+ */
+static int usb_dmac_desc_alloc(struct usb_dmac_chan *chan, unsigned int sg_len,
+                              gfp_t gfp)
+{
+       struct usb_dmac_desc *desc;
+       unsigned long flags;
+
+       desc = kzalloc(sizeof(*desc) + sg_len * sizeof(desc->sg[0]), gfp);
+       if (!desc)
+               return -ENOMEM;
+
+       desc->sg_allocated_len = sg_len;
+       INIT_LIST_HEAD(&desc->node);
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_add_tail(&desc->node, &chan->desc_freed);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       return 0;
+}
+
+static void usb_dmac_desc_free(struct usb_dmac_chan *chan)
+{
+       struct usb_dmac_desc *desc, *_desc;
+       LIST_HEAD(list);
+
+       list_splice_init(&chan->desc_freed, &list);
+       list_splice_init(&chan->desc_got, &list);
+
+       list_for_each_entry_safe(desc, _desc, &list, node) {
+               list_del(&desc->node);
+               kfree(desc);
+       }
+       chan->descs_allocated = 0;
+}
+
+static struct usb_dmac_desc *usb_dmac_desc_get(struct usb_dmac_chan *chan,
+                                              unsigned int sg_len, gfp_t gfp)
+{
+       struct usb_dmac_desc *desc = NULL;
+       unsigned long flags;
+
+       /* Get a freed descritpor */
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_for_each_entry(desc, &chan->desc_freed, node) {
+               if (sg_len <= desc->sg_allocated_len) {
+                       list_move_tail(&desc->node, &chan->desc_got);
+                       spin_unlock_irqrestore(&chan->vc.lock, flags);
+                       return desc;
+               }
+       }
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       /* Allocate a new descriptor */
+       if (!usb_dmac_desc_alloc(chan, sg_len, gfp)) {
+               /* If allocated the desc, it was added to tail of the list */
+               spin_lock_irqsave(&chan->vc.lock, flags);
+               desc = list_last_entry(&chan->desc_freed, struct usb_dmac_desc,
+                                      node);
+               list_move_tail(&desc->node, &chan->desc_got);
+               spin_unlock_irqrestore(&chan->vc.lock, flags);
+               return desc;
+       }
+
+       return NULL;
+}
+
+static void usb_dmac_desc_put(struct usb_dmac_chan *chan,
+                             struct usb_dmac_desc *desc)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       list_move_tail(&desc->node, &chan->desc_freed);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+/* -----------------------------------------------------------------------------
+ * Stop and reset
+ */
+
+static void usb_dmac_soft_reset(struct usb_dmac_chan *uchan)
+{
+       struct dma_chan *chan = &uchan->vc.chan;
+       struct usb_dmac *dmac = to_usb_dmac(chan->device);
+       int i;
+
+       /* Don't issue soft reset if any one of channels is busy */
+       for (i = 0; i < dmac->n_channels; ++i) {
+               if (usb_dmac_chan_is_busy(uchan))
+                       return;
+       }
+
+       usb_dmac_write(dmac, USB_DMAOR, 0);
+       usb_dmac_write(dmac, USB_DMASWR, USB_DMASWR_SWR);
+       udelay(100);
+       usb_dmac_write(dmac, USB_DMASWR, 0);
+       usb_dmac_write(dmac, USB_DMAOR, 1);
+}
+
+static void usb_dmac_chan_halt(struct usb_dmac_chan *chan)
+{
+       u32 chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+
+       chcr &= ~(USB_DMACHCR_IE | USB_DMACHCR_TE | USB_DMACHCR_DE);
+       usb_dmac_chan_write(chan, USB_DMACHCR, chcr);
+
+       usb_dmac_soft_reset(chan);
+}
+
+static void usb_dmac_stop(struct usb_dmac *dmac)
+{
+       usb_dmac_write(dmac, USB_DMAOR, 0);
+}
+
+/* -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int usb_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       int ret;
+
+       while (uchan->descs_allocated < USB_DMAC_INITIAL_NR_DESC) {
+               ret = usb_dmac_desc_alloc(uchan, USB_DMAC_INITIAL_NR_SG,
+                                         GFP_KERNEL);
+               if (ret < 0) {
+                       usb_dmac_desc_free(uchan);
+                       return ret;
+               }
+               uchan->descs_allocated++;
+       }
+
+       return pm_runtime_get_sync(chan->device->dev);
+}
+
+static void usb_dmac_free_chan_resources(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       unsigned long flags;
+
+       /* Protect against ISR */
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       usb_dmac_chan_halt(uchan);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+       usb_dmac_desc_free(uchan);
+       vchan_free_chan_resources(&uchan->vc);
+
+       pm_runtime_put(chan->device->dev);
+}
+
+static struct dma_async_tx_descriptor *
+usb_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+                      unsigned int sg_len, enum dma_transfer_direction dir,
+                      unsigned long dma_flags, void *context)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct usb_dmac_desc *desc;
+       struct scatterlist *sg;
+       int i;
+
+       if (!sg_len) {
+               dev_warn(chan->device->dev,
+                        "%s: bad parameter: len=%d\n", __func__, sg_len);
+               return NULL;
+       }
+
+       desc = usb_dmac_desc_get(uchan, sg_len, GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       desc->direction = dir;
+       desc->sg_len = sg_len;
+       for_each_sg(sgl, sg, sg_len, i) {
+               desc->sg[i].mem_addr = sg_dma_address(sg);
+               desc->sg[i].size = sg_dma_len(sg);
+       }
+
+       return vchan_tx_prep(&uchan->vc, &desc->vd, dma_flags);
+}
+
+static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct usb_dmac_desc *desc;
+       unsigned long flags;
+       LIST_HEAD(head);
+       LIST_HEAD(list);
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       usb_dmac_chan_halt(uchan);
+       vchan_get_all_descriptors(&uchan->vc, &head);
+       if (uchan->desc)
+               uchan->desc = NULL;
+       list_splice_init(&uchan->desc_got, &list);
+       list_for_each_entry(desc, &list, node)
+               list_move_tail(&desc->node, &uchan->desc_freed);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+       vchan_dma_desc_free_list(&uchan->vc, &head);
+
+       return 0;
+}
+
+static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
+                                                struct usb_dmac_desc *desc,
+                                                int sg_index)
+{
+       struct usb_dmac_sg *sg = desc->sg + sg_index;
+       u32 mem_addr = sg->mem_addr & 0xffffffff;
+       unsigned int residue = sg->size;
+
+       /*
+        * We cannot use USB_DMATCR to calculate residue because USB_DMATCR
+        * has unsuited value to calculate.
+        */
+       if (desc->direction == DMA_DEV_TO_MEM)
+               residue -= usb_dmac_chan_read(chan, USB_DMADAR) - mem_addr;
+       else
+               residue -= usb_dmac_chan_read(chan, USB_DMASAR) - mem_addr;
+
+       return residue;
+}
+
+static u32 usb_dmac_chan_get_residue_if_complete(struct usb_dmac_chan *chan,
+                                                dma_cookie_t cookie)
+{
+       struct usb_dmac_desc *desc;
+       u32 residue = 0;
+
+       list_for_each_entry_reverse(desc, &chan->desc_freed, node) {
+               if (desc->done_cookie == cookie) {
+                       residue = desc->residue;
+                       break;
+               }
+       }
+
+       return residue;
+}
+
+static u32 usb_dmac_chan_get_residue(struct usb_dmac_chan *chan,
+                                    dma_cookie_t cookie)
+{
+       u32 residue = 0;
+       struct virt_dma_desc *vd;
+       struct usb_dmac_desc *desc = chan->desc;
+       int i;
+
+       if (!desc) {
+               vd = vchan_find_desc(&chan->vc, cookie);
+               if (!vd)
+                       return 0;
+               desc = to_usb_dmac_desc(vd);
+       }
+
+       /* Compute the size of all usb_dmac_sg still to be transferred */
+       for (i = desc->sg_index + 1; i < desc->sg_len; i++)
+               residue += desc->sg[i].size;
+
+       /* Add the residue for the current sg */
+       residue += usb_dmac_get_current_residue(chan, desc, desc->sg_index);
+
+       return residue;
+}
+
+static enum dma_status usb_dmac_tx_status(struct dma_chan *chan,
+                                         dma_cookie_t cookie,
+                                         struct dma_tx_state *txstate)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       enum dma_status status;
+       unsigned int residue = 0;
+       unsigned long flags;
+
+       status = dma_cookie_status(chan, cookie, txstate);
+       /* a client driver will get residue after DMA_COMPLETE */
+       if (!txstate)
+               return status;
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       if (status == DMA_COMPLETE)
+               residue = usb_dmac_chan_get_residue_if_complete(uchan, cookie);
+       else
+               residue = usb_dmac_chan_get_residue(uchan, cookie);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+
+       dma_set_residue(txstate, residue);
+
+       return status;
+}
+
+static void usb_dmac_issue_pending(struct dma_chan *chan)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&uchan->vc.lock, flags);
+       if (vchan_issue_pending(&uchan->vc) && !uchan->desc)
+               usb_dmac_chan_start_desc(uchan);
+       spin_unlock_irqrestore(&uchan->vc.lock, flags);
+}
+
+static void usb_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+       struct usb_dmac_desc *desc = to_usb_dmac_desc(vd);
+       struct usb_dmac_chan *chan = to_usb_dmac_chan(vd->tx.chan);
+
+       usb_dmac_desc_put(chan, desc);
+}
+
+/* -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void usb_dmac_isr_transfer_end(struct usb_dmac_chan *chan)
+{
+       struct usb_dmac_desc *desc = chan->desc;
+
+       BUG_ON(!desc);
+
+       if (++desc->sg_index < desc->sg_len) {
+               usb_dmac_chan_start_sg(chan, desc->sg_index);
+       } else {
+               desc->residue = usb_dmac_get_current_residue(chan, desc,
+                                                       desc->sg_index - 1);
+               desc->done_cookie = desc->vd.tx.cookie;
+               vchan_cookie_complete(&desc->vd);
+
+               /* Restart the next transfer if this driver has a next desc */
+               usb_dmac_chan_start_desc(chan);
+       }
+}
+
+static irqreturn_t usb_dmac_isr_channel(int irq, void *dev)
+{
+       struct usb_dmac_chan *chan = dev;
+       irqreturn_t ret = IRQ_NONE;
+       u32 mask = USB_DMACHCR_TE;
+       u32 check_bits = USB_DMACHCR_TE | USB_DMACHCR_SP;
+       u32 chcr;
+
+       spin_lock(&chan->vc.lock);
+
+       chcr = usb_dmac_chan_read(chan, USB_DMACHCR);
+       if (chcr & check_bits)
+               mask |= USB_DMACHCR_DE | check_bits;
+       if (chcr & USB_DMACHCR_NULL) {
+               /* An interruption of TE will happen after we set FTE */
+               mask |= USB_DMACHCR_NULL;
+               chcr |= USB_DMACHCR_FTE;
+               ret |= IRQ_HANDLED;
+       }
+       usb_dmac_chan_write(chan, USB_DMACHCR, chcr & ~mask);
+
+       if (chcr & check_bits) {
+               usb_dmac_isr_transfer_end(chan);
+               ret |= IRQ_HANDLED;
+       }
+
+       spin_unlock(&chan->vc.lock);
+
+       return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool usb_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+       struct usb_dmac_chan *uchan = to_usb_dmac_chan(chan);
+       struct of_phandle_args *dma_spec = arg;
+
+       if (dma_spec->np != chan->device->dev->of_node)
+               return false;
+
+       /* USB-DMAC should be used with fixed usb controller's FIFO */
+       if (uchan->index != dma_spec->args[0])
+               return false;
+
+       return true;
+}
+
+static struct dma_chan *usb_dmac_of_xlate(struct of_phandle_args *dma_spec,
+                                         struct of_dma *ofdma)
+{
+       struct usb_dmac_chan *uchan;
+       struct dma_chan *chan;
+       dma_cap_mask_t mask;
+
+       if (dma_spec->args_count != 1)
+               return NULL;
+
+       /* Only slave DMA channels can be allocated via DT */
+       dma_cap_zero(mask);
+       dma_cap_set(DMA_SLAVE, mask);
+
+       chan = dma_request_channel(mask, usb_dmac_chan_filter, dma_spec);
+       if (!chan)
+               return NULL;
+
+       uchan = to_usb_dmac_chan(chan);
+
+       return chan;
+}
+
+/* -----------------------------------------------------------------------------
+ * Power management
+ */
+
+static int usb_dmac_runtime_suspend(struct device *dev)
+{
+       struct usb_dmac *dmac = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < dmac->n_channels; ++i)
+               usb_dmac_chan_halt(&dmac->channels[i]);
+
+       return 0;
+}
+
+static int usb_dmac_runtime_resume(struct device *dev)
+{
+       struct usb_dmac *dmac = dev_get_drvdata(dev);
+
+       return usb_dmac_init(dmac);
+}
+
+static const struct dev_pm_ops usb_dmac_pm = {
+       SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume,
+                          NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int usb_dmac_chan_probe(struct usb_dmac *dmac,
+                              struct usb_dmac_chan *uchan,
+                              unsigned int index)
+{
+       struct platform_device *pdev = to_platform_device(dmac->dev);
+       char pdev_irqname[5];
+       char *irqname;
+       int ret;
+
+       uchan->index = index;
+       uchan->iomem = dmac->iomem + USB_DMAC_CHAN_OFFSET(index);
+
+       /* Request the channel interrupt. */
+       sprintf(pdev_irqname, "ch%u", index);
+       uchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
+       if (uchan->irq < 0) {
+               dev_err(dmac->dev, "no IRQ specified for channel %u\n", index);
+               return -ENODEV;
+       }
+
+       irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+                                dev_name(dmac->dev), index);
+       if (!irqname)
+               return -ENOMEM;
+
+       ret = devm_request_irq(dmac->dev, uchan->irq, usb_dmac_isr_channel,
+                              IRQF_SHARED, irqname, uchan);
+       if (ret) {
+               dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+                       uchan->irq, ret);
+               return ret;
+       }
+
+       uchan->vc.desc_free = usb_dmac_virt_desc_free;
+       vchan_init(&uchan->vc, &dmac->engine);
+       INIT_LIST_HEAD(&uchan->desc_freed);
+       INIT_LIST_HEAD(&uchan->desc_got);
+
+       return 0;
+}
+
+static int usb_dmac_parse_of(struct device *dev, struct usb_dmac *dmac)
+{
+       struct device_node *np = dev->of_node;
+       int ret;
+
+       ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+       if (ret < 0) {
+               dev_err(dev, "unable to read dma-channels property\n");
+               return ret;
+       }
+
+       if (dmac->n_channels <= 0 || dmac->n_channels >= 100) {
+               dev_err(dev, "invalid number of channels %u\n",
+                       dmac->n_channels);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int usb_dmac_probe(struct platform_device *pdev)
+{
+       const enum dma_slave_buswidth widths = USB_DMAC_SLAVE_BUSWIDTH;
+       struct dma_device *engine;
+       struct usb_dmac *dmac;
+       struct resource *mem;
+       unsigned int i;
+       int ret;
+
+       dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+       if (!dmac)
+               return -ENOMEM;
+
+       dmac->dev = &pdev->dev;
+       platform_set_drvdata(pdev, dmac);
+
+       ret = usb_dmac_parse_of(&pdev->dev, dmac);
+       if (ret < 0)
+               return ret;
+
+       dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+                                     sizeof(*dmac->channels), GFP_KERNEL);
+       if (!dmac->channels)
+               return -ENOMEM;
+
+       /* Request resources. */
+       mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       dmac->iomem = devm_ioremap_resource(&pdev->dev, mem);
+       if (IS_ERR(dmac->iomem))
+               return PTR_ERR(dmac->iomem);
+
+       /* Enable runtime PM and initialize the device. */
+       pm_runtime_enable(&pdev->dev);
+       ret = pm_runtime_get_sync(&pdev->dev);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
+               return ret;
+       }
+
+       ret = usb_dmac_init(dmac);
+       pm_runtime_put(&pdev->dev);
+
+       if (ret) {
+               dev_err(&pdev->dev, "failed to reset device\n");
+               goto error;
+       }
+
+       /* Initialize the channels. */
+       INIT_LIST_HEAD(&dmac->engine.channels);
+
+       for (i = 0; i < dmac->n_channels; ++i) {
+               ret = usb_dmac_chan_probe(dmac, &dmac->channels[i], i);
+               if (ret < 0)
+                       goto error;
+       }
+
+       /* Register the DMAC as a DMA provider for DT. */
+       ret = of_dma_controller_register(pdev->dev.of_node, usb_dmac_of_xlate,
+                                        NULL);
+       if (ret < 0)
+               goto error;
+
+       /*
+        * Register the DMA engine device.
+        *
+        * Default transfer size of 32 bytes requires 32-byte alignment.
+        */
+       engine = &dmac->engine;
+       dma_cap_set(DMA_SLAVE, engine->cap_mask);
+
+       engine->dev = &pdev->dev;
+
+       engine->src_addr_widths = widths;
+       engine->dst_addr_widths = widths;
+       engine->directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+       engine->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+       engine->device_alloc_chan_resources = usb_dmac_alloc_chan_resources;
+       engine->device_free_chan_resources = usb_dmac_free_chan_resources;
+       engine->device_prep_slave_sg = usb_dmac_prep_slave_sg;
+       engine->device_terminate_all = usb_dmac_chan_terminate_all;
+       engine->device_tx_status = usb_dmac_tx_status;
+       engine->device_issue_pending = usb_dmac_issue_pending;
+
+       ret = dma_async_device_register(engine);
+       if (ret < 0)
+               goto error;
+
+       return 0;
+
+error:
+       of_dma_controller_free(pdev->dev.of_node);
+       pm_runtime_disable(&pdev->dev);
+       return ret;
+}
+
+static void usb_dmac_chan_remove(struct usb_dmac *dmac,
+                                struct usb_dmac_chan *uchan)
+{
+       usb_dmac_chan_halt(uchan);
+       devm_free_irq(dmac->dev, uchan->irq, uchan);
+}
+
+static int usb_dmac_remove(struct platform_device *pdev)
+{
+       struct usb_dmac *dmac = platform_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < dmac->n_channels; ++i)
+               usb_dmac_chan_remove(dmac, &dmac->channels[i]);
+       of_dma_controller_free(pdev->dev.of_node);
+       dma_async_device_unregister(&dmac->engine);
+
+       pm_runtime_disable(&pdev->dev);
+
+       return 0;
+}
+
+static void usb_dmac_shutdown(struct platform_device *pdev)
+{
+       struct usb_dmac *dmac = platform_get_drvdata(pdev);
+
+       usb_dmac_stop(dmac);
+}
+
+static const struct of_device_id usb_dmac_of_ids[] = {
+       { .compatible = "renesas,usb-dmac", },
+       { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, usb_dmac_of_ids);
+
+static struct platform_driver usb_dmac_driver = {
+       .driver         = {
+               .pm     = &usb_dmac_pm,
+               .name   = "usb-dmac",
+               .of_match_table = usb_dmac_of_ids,
+       },
+       .probe          = usb_dmac_probe,
+       .remove         = usb_dmac_remove,
+       .shutdown       = usb_dmac_shutdown,
+};
+
+module_platform_driver(usb_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas USB DMA Controller Driver");
+MODULE_AUTHOR("Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c

index d0086e9..a1afda4 100644 (file)
--- a/drivers/dma/sirf-dma.c
+++ b/drivers/dma/sirf-dma.c
@@ -896,7 +896,7 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
         SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
  };
  
-static struct of_device_id sirfsoc_dma_match[] = {
+static const struct of_device_id sirfsoc_dma_match[] = {
         { .compatible = "sirf,prima2-dmac", },
         { .compatible = "sirf,marco-dmac", },
         {},
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c

index 1332b1d..3c10f03 100644 (file)
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2514,7 +2514,8 @@ static struct dma_async_tx_descriptor *d40_prep_memcpy(struct dma_chan *chan,
         sg_dma_len(&dst_sg) = size;
         sg_dma_len(&src_sg) = size;
  
-       return d40_prep_sg(chan, &src_sg, &dst_sg, 1, DMA_NONE, dma_flags);
+       return d40_prep_sg(chan, &src_sg, &dst_sg, 1,
+                          DMA_MEM_TO_MEM, dma_flags);
  }
  
  static struct dma_async_tx_descriptor *
@@ -2526,7 +2527,8 @@ d40_prep_memcpy_sg(struct dma_chan *chan,
         if (dst_nents != src_nents)
                 return NULL;
  
-       return d40_prep_sg(chan, src_sg, dst_sg, src_nents, DMA_NONE, dma_flags);
+       return d40_prep_sg(chan, src_sg, dst_sg, src_nents,
+                          DMA_MEM_TO_MEM, dma_flags);
  }
  
  static struct dma_async_tx_descriptor *
diff --git a/drivers/dma/sun6i-dma.c b/drivers/dma/sun6i-dma.c

index 7ebcf9b..11e5365 100644 (file)
--- a/drivers/dma/sun6i-dma.c
+++ b/drivers/dma/sun6i-dma.c
@@ -796,11 +796,6 @@ static void sun6i_dma_issue_pending(struct dma_chan *chan)
         spin_unlock_irqrestore(&vchan->vc.lock, flags);
  }
  
-static int sun6i_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       return 0;
-}
-
  static void sun6i_dma_free_chan_resources(struct dma_chan *chan)
  {
         struct sun6i_dma_dev *sdev = to_sun6i_dma_dev(chan->device);
@@ -896,7 +891,7 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = {
         .nr_max_vchans   = 37,
  };
  
-static struct of_device_id sun6i_dma_match[] = {
+static const struct of_device_id sun6i_dma_match[] = {
         { .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
         { .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
         { /* sentinel */ }
@@ -957,7 +952,6 @@ static int sun6i_dma_probe(struct platform_device *pdev)
         dma_cap_set(DMA_SLAVE, sdc->slave.cap_mask);
  
         INIT_LIST_HEAD(&sdc->slave.channels);
-       sdc->slave.device_alloc_chan_resources  = sun6i_dma_alloc_chan_resources;
         sdc->slave.device_free_chan_resources   = sun6i_dma_free_chan_resources;
         sdc->slave.device_tx_status             = sun6i_dma_tx_status;
         sdc->slave.device_issue_pending         = sun6i_dma_issue_pending;
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c

new file mode 100755 (executable)

index 0000000..f52e375
--- /dev/null
+++ b/drivers/dma/xgene-dma.c
@@ -0,0 +1,2089 @@
+/*
+ * Applied Micro X-Gene SoC DMA engine Driver
+ *
+ * Copyright (c) 2015, Applied Micro Circuits Corporation
+ * Authors: Rameshwar Prasad Sahu <rsahu@apm.com>
+ *         Loc Ho <lho@apm.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * NOTE: PM support is currently not available.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+
+#include "dmaengine.h"
+
+/* X-Gene DMA ring csr registers and bit definations */
+#define XGENE_DMA_RING_CONFIG                  0x04
+#define XGENE_DMA_RING_ENABLE                  BIT(31)
+#define XGENE_DMA_RING_ID                      0x08
+#define XGENE_DMA_RING_ID_SETUP(v)             ((v) | BIT(31))
+#define XGENE_DMA_RING_ID_BUF                  0x0C
+#define XGENE_DMA_RING_ID_BUF_SETUP(v)         (((v) << 9) | BIT(21))
+#define XGENE_DMA_RING_THRESLD0_SET1           0x30
+#define XGENE_DMA_RING_THRESLD0_SET1_VAL       0X64
+#define XGENE_DMA_RING_THRESLD1_SET1           0x34
+#define XGENE_DMA_RING_THRESLD1_SET1_VAL       0xC8
+#define XGENE_DMA_RING_HYSTERESIS              0x68
+#define XGENE_DMA_RING_HYSTERESIS_VAL          0xFFFFFFFF
+#define XGENE_DMA_RING_STATE                   0x6C
+#define XGENE_DMA_RING_STATE_WR_BASE           0x70
+#define XGENE_DMA_RING_NE_INT_MODE             0x017C
+#define XGENE_DMA_RING_NE_INT_MODE_SET(m, v)   \
+       ((m) = ((m) & ~BIT(31 - (v))) | BIT(31 - (v)))
+#define XGENE_DMA_RING_NE_INT_MODE_RESET(m, v) \
+       ((m) &= (~BIT(31 - (v))))
+#define XGENE_DMA_RING_CLKEN                   0xC208
+#define XGENE_DMA_RING_SRST                    0xC200
+#define XGENE_DMA_RING_MEM_RAM_SHUTDOWN                0xD070
+#define XGENE_DMA_RING_BLK_MEM_RDY             0xD074
+#define XGENE_DMA_RING_BLK_MEM_RDY_VAL         0xFFFFFFFF
+#define XGENE_DMA_RING_DESC_CNT(v)             (((v) & 0x0001FFFE) >> 1)
+#define XGENE_DMA_RING_ID_GET(owner, num)      (((owner) << 6) | (num))
+#define XGENE_DMA_RING_DST_ID(v)               ((1 << 10) | (v))
+#define XGENE_DMA_RING_CMD_OFFSET              0x2C
+#define XGENE_DMA_RING_CMD_BASE_OFFSET(v)      ((v) << 6)
+#define XGENE_DMA_RING_COHERENT_SET(m)         \
+       (((u32 *)(m))[2] |= BIT(4))
+#define XGENE_DMA_RING_ADDRL_SET(m, v)         \
+       (((u32 *)(m))[2] |= (((v) >> 8) << 5))
+#define XGENE_DMA_RING_ADDRH_SET(m, v)         \
+       (((u32 *)(m))[3] |= ((v) >> 35))
+#define XGENE_DMA_RING_ACCEPTLERR_SET(m)       \
+       (((u32 *)(m))[3] |= BIT(19))
+#define XGENE_DMA_RING_SIZE_SET(m, v)          \
+       (((u32 *)(m))[3] |= ((v) << 23))
+#define XGENE_DMA_RING_RECOMBBUF_SET(m)                \
+       (((u32 *)(m))[3] |= BIT(27))
+#define XGENE_DMA_RING_RECOMTIMEOUTL_SET(m)    \
+       (((u32 *)(m))[3] |= (0x7 << 28))
+#define XGENE_DMA_RING_RECOMTIMEOUTH_SET(m)    \
+       (((u32 *)(m))[4] |= 0x3)
+#define XGENE_DMA_RING_SELTHRSH_SET(m)         \
+       (((u32 *)(m))[4] |= BIT(3))
+#define XGENE_DMA_RING_TYPE_SET(m, v)          \
+       (((u32 *)(m))[4] |= ((v) << 19))
+
+/* X-Gene DMA device csr registers and bit definitions */
+#define XGENE_DMA_IPBRR                                0x0
+#define XGENE_DMA_DEV_ID_RD(v)                 ((v) & 0x00000FFF)
+#define XGENE_DMA_BUS_ID_RD(v)                 (((v) >> 12) & 3)
+#define XGENE_DMA_REV_NO_RD(v)                 (((v) >> 14) & 3)
+#define XGENE_DMA_GCR                          0x10
+#define XGENE_DMA_CH_SETUP(v)                  \
+       ((v) = ((v) & ~0x000FFFFF) | 0x000AAFFF)
+#define XGENE_DMA_ENABLE(v)                    ((v) |= BIT(31))
+#define XGENE_DMA_DISABLE(v)                   ((v) &= ~BIT(31))
+#define XGENE_DMA_RAID6_CONT                   0x14
+#define XGENE_DMA_RAID6_MULTI_CTRL(v)          ((v) << 24)
+#define XGENE_DMA_INT                          0x70
+#define XGENE_DMA_INT_MASK                     0x74
+#define XGENE_DMA_INT_ALL_MASK                 0xFFFFFFFF
+#define XGENE_DMA_INT_ALL_UNMASK               0x0
+#define XGENE_DMA_INT_MASK_SHIFT               0x14
+#define XGENE_DMA_RING_INT0_MASK               0x90A0
+#define XGENE_DMA_RING_INT1_MASK               0x90A8
+#define XGENE_DMA_RING_INT2_MASK               0x90B0
+#define XGENE_DMA_RING_INT3_MASK               0x90B8
+#define XGENE_DMA_RING_INT4_MASK               0x90C0
+#define XGENE_DMA_CFG_RING_WQ_ASSOC            0x90E0
+#define XGENE_DMA_ASSOC_RING_MNGR1             0xFFFFFFFF
+#define XGENE_DMA_MEM_RAM_SHUTDOWN             0xD070
+#define XGENE_DMA_BLK_MEM_RDY                  0xD074
+#define XGENE_DMA_BLK_MEM_RDY_VAL              0xFFFFFFFF
+
+/* X-Gene SoC EFUSE csr register and bit defination */
+#define XGENE_SOC_JTAG1_SHADOW                 0x18
+#define XGENE_DMA_PQ_DISABLE_MASK              BIT(13)
+
+/* X-Gene DMA Descriptor format */
+#define XGENE_DMA_DESC_NV_BIT                  BIT_ULL(50)
+#define XGENE_DMA_DESC_IN_BIT                  BIT_ULL(55)
+#define XGENE_DMA_DESC_C_BIT                   BIT_ULL(63)
+#define XGENE_DMA_DESC_DR_BIT                  BIT_ULL(61)
+#define XGENE_DMA_DESC_ELERR_POS               46
+#define XGENE_DMA_DESC_RTYPE_POS               56
+#define XGENE_DMA_DESC_LERR_POS                        60
+#define XGENE_DMA_DESC_FLYBY_POS               4
+#define XGENE_DMA_DESC_BUFLEN_POS              48
+#define XGENE_DMA_DESC_HOENQ_NUM_POS           48
+
+#define XGENE_DMA_DESC_NV_SET(m)               \
+       (((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT)
+#define XGENE_DMA_DESC_IN_SET(m)               \
+       (((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT)
+#define XGENE_DMA_DESC_RTYPE_SET(m, v)         \
+       (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS))
+#define XGENE_DMA_DESC_BUFADDR_SET(m, v)       \
+       (((u64 *)(m))[0] |= (v))
+#define XGENE_DMA_DESC_BUFLEN_SET(m, v)                \
+       (((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS))
+#define XGENE_DMA_DESC_C_SET(m)                        \
+       (((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT)
+#define XGENE_DMA_DESC_FLYBY_SET(m, v)         \
+       (((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS))
+#define XGENE_DMA_DESC_MULTI_SET(m, v, i)      \
+       (((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8)))
+#define XGENE_DMA_DESC_DR_SET(m)               \
+       (((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT)
+#define XGENE_DMA_DESC_DST_ADDR_SET(m, v)      \
+       (((u64 *)(m))[3] |= (v))
+#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v)     \
+       (((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS))
+#define XGENE_DMA_DESC_ELERR_RD(m)             \
+       (((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
+#define XGENE_DMA_DESC_LERR_RD(m)              \
+       (((m) >> XGENE_DMA_DESC_LERR_POS) & 0x7)
+#define XGENE_DMA_DESC_STATUS(elerr, lerr)     \
+       (((elerr) << 4) | (lerr))
+
+/* X-Gene DMA descriptor empty s/w signature */
+#define XGENE_DMA_DESC_EMPTY_INDEX             0
+#define XGENE_DMA_DESC_EMPTY_SIGNATURE         ~0ULL
+#define XGENE_DMA_DESC_SET_EMPTY(m)            \
+       (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] =     \
+        XGENE_DMA_DESC_EMPTY_SIGNATURE)
+#define XGENE_DMA_DESC_IS_EMPTY(m)             \
+       (((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] ==    \
+        XGENE_DMA_DESC_EMPTY_SIGNATURE)
+
+/* X-Gene DMA configurable parameters defines */
+#define XGENE_DMA_RING_NUM             512
+#define XGENE_DMA_BUFNUM               0x0
+#define XGENE_DMA_CPU_BUFNUM           0x18
+#define XGENE_DMA_RING_OWNER_DMA       0x03
+#define XGENE_DMA_RING_OWNER_CPU       0x0F
+#define XGENE_DMA_RING_TYPE_REGULAR    0x01
+#define XGENE_DMA_RING_WQ_DESC_SIZE    32      /* 32 Bytes */
+#define XGENE_DMA_RING_NUM_CONFIG      5
+#define XGENE_DMA_MAX_CHANNEL          4
+#define XGENE_DMA_XOR_CHANNEL          0
+#define XGENE_DMA_PQ_CHANNEL           1
+#define XGENE_DMA_MAX_BYTE_CNT         0x4000  /* 16 KB */
+#define XGENE_DMA_MAX_64B_DESC_BYTE_CNT        0x14000 /* 80 KB */
+#define XGENE_DMA_XOR_ALIGNMENT                6       /* 64 Bytes */
+#define XGENE_DMA_MAX_XOR_SRC          5
+#define XGENE_DMA_16K_BUFFER_LEN_CODE  0x0
+#define XGENE_DMA_INVALID_LEN_CODE     0x7800
+
+/* X-Gene DMA descriptor error codes */
+#define ERR_DESC_AXI                   0x01
+#define ERR_BAD_DESC                   0x02
+#define ERR_READ_DATA_AXI              0x03
+#define ERR_WRITE_DATA_AXI             0x04
+#define ERR_FBP_TIMEOUT                        0x05
+#define ERR_ECC                                0x06
+#define ERR_DIFF_SIZE                  0x08
+#define ERR_SCT_GAT_LEN                        0x09
+#define ERR_CRC_ERR                    0x11
+#define ERR_CHKSUM                     0x12
+#define ERR_DIF                                0x13
+
+/* X-Gene DMA error interrupt codes */
+#define ERR_DIF_SIZE_INT               0x0
+#define ERR_GS_ERR_INT                 0x1
+#define ERR_FPB_TIMEO_INT              0x2
+#define ERR_WFIFO_OVF_INT              0x3
+#define ERR_RFIFO_OVF_INT              0x4
+#define ERR_WR_TIMEO_INT               0x5
+#define ERR_RD_TIMEO_INT               0x6
+#define ERR_WR_ERR_INT                 0x7
+#define ERR_RD_ERR_INT                 0x8
+#define ERR_BAD_DESC_INT               0x9
+#define ERR_DESC_DST_INT               0xA
+#define ERR_DESC_SRC_INT               0xB
+
+/* X-Gene DMA flyby operation code */
+#define FLYBY_2SRC_XOR                 0x8
+#define FLYBY_3SRC_XOR                 0x9
+#define FLYBY_4SRC_XOR                 0xA
+#define FLYBY_5SRC_XOR                 0xB
+
+/* X-Gene DMA SW descriptor flags */
+#define XGENE_DMA_FLAG_64B_DESC                BIT(0)
+
+/* Define to dump X-Gene DMA descriptor */
+#define XGENE_DMA_DESC_DUMP(desc, m)   \
+       print_hex_dump(KERN_ERR, (m),   \
+                       DUMP_PREFIX_ADDRESS, 16, 8, (desc), 32, 0)
+
+#define to_dma_desc_sw(tx)             \
+       container_of(tx, struct xgene_dma_desc_sw, tx)
+#define to_dma_chan(dchan)             \
+       container_of(dchan, struct xgene_dma_chan, dma_chan)
+
+#define chan_dbg(chan, fmt, arg...)    \
+       dev_dbg(chan->dev, "%s: " fmt, chan->name, ##arg)
+#define chan_err(chan, fmt, arg...)    \
+       dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
+
+struct xgene_dma_desc_hw {
+       u64 m0;
+       u64 m1;
+       u64 m2;
+       u64 m3;
+};
+
+enum xgene_dma_ring_cfgsize {
+       XGENE_DMA_RING_CFG_SIZE_512B,
+       XGENE_DMA_RING_CFG_SIZE_2KB,
+       XGENE_DMA_RING_CFG_SIZE_16KB,
+       XGENE_DMA_RING_CFG_SIZE_64KB,
+       XGENE_DMA_RING_CFG_SIZE_512KB,
+       XGENE_DMA_RING_CFG_SIZE_INVALID
+};
+
+struct xgene_dma_ring {
+       struct xgene_dma *pdma;
+       u8 buf_num;
+       u16 id;
+       u16 num;
+       u16 head;
+       u16 owner;
+       u16 slots;
+       u16 dst_ring_num;
+       u32 size;
+       void __iomem *cmd;
+       void __iomem *cmd_base;
+       dma_addr_t desc_paddr;
+       u32 state[XGENE_DMA_RING_NUM_CONFIG];
+       enum xgene_dma_ring_cfgsize cfgsize;
+       union {
+               void *desc_vaddr;
+               struct xgene_dma_desc_hw *desc_hw;
+       };
+};
+
+struct xgene_dma_desc_sw {
+       struct xgene_dma_desc_hw desc1;
+       struct xgene_dma_desc_hw desc2;
+       u32 flags;
+       struct list_head node;
+       struct list_head tx_list;
+       struct dma_async_tx_descriptor tx;
+};
+
+/**
+ * struct xgene_dma_chan - internal representation of an X-Gene DMA channel
+ * @dma_chan: dmaengine channel object member
+ * @pdma: X-Gene DMA device structure reference
+ * @dev: struct device reference for dma mapping api
+ * @id: raw id of this channel
+ * @rx_irq: channel IRQ
+ * @name: name of X-Gene DMA channel
+ * @lock: serializes enqueue/dequeue operations to the descriptor pool
+ * @pending: number of transaction request pushed to DMA controller for
+ *     execution, but still waiting for completion,
+ * @max_outstanding: max number of outstanding request we can push to channel
+ * @ld_pending: descriptors which are queued to run, but have not yet been
+ *     submitted to the hardware for execution
+ * @ld_running: descriptors which are currently being executing by the hardware
+ * @ld_completed: descriptors which have finished execution by the hardware.
+ *     These descriptors have already had their cleanup actions run. They
+ *     are waiting for the ACK bit to be set by the async tx API.
+ * @desc_pool: descriptor pool for DMA operations
+ * @tasklet: bottom half where all completed descriptors cleans
+ * @tx_ring: transmit ring descriptor that we use to prepare actual
+ *     descriptors for further executions
+ * @rx_ring: receive ring descriptor that we use to get completed DMA
+ *     descriptors during cleanup time
+ */
+struct xgene_dma_chan {
+       struct dma_chan dma_chan;
+       struct xgene_dma *pdma;
+       struct device *dev;
+       int id;
+       int rx_irq;
+       char name[10];
+       spinlock_t lock;
+       int pending;
+       int max_outstanding;
+       struct list_head ld_pending;
+       struct list_head ld_running;
+       struct list_head ld_completed;
+       struct dma_pool *desc_pool;
+       struct tasklet_struct tasklet;
+       struct xgene_dma_ring tx_ring;
+       struct xgene_dma_ring rx_ring;
+};
+
+/**
+ * struct xgene_dma - internal representation of an X-Gene DMA device
+ * @err_irq: DMA error irq number
+ * @ring_num: start id number for DMA ring
+ * @csr_dma: base for DMA register access
+ * @csr_ring: base for DMA ring register access
+ * @csr_ring_cmd: base for DMA ring command register access
+ * @csr_efuse: base for efuse register access
+ * @dma_dev: embedded struct dma_device
+ * @chan: reference to X-Gene DMA channels
+ */
+struct xgene_dma {
+       struct device *dev;
+       struct clk *clk;
+       int err_irq;
+       int ring_num;
+       void __iomem *csr_dma;
+       void __iomem *csr_ring;
+       void __iomem *csr_ring_cmd;
+       void __iomem *csr_efuse;
+       struct dma_device dma_dev[XGENE_DMA_MAX_CHANNEL];
+       struct xgene_dma_chan chan[XGENE_DMA_MAX_CHANNEL];
+};
+
+static const char * const xgene_dma_desc_err[] = {
+       [ERR_DESC_AXI] = "AXI error when reading src/dst link list",
+       [ERR_BAD_DESC] = "ERR or El_ERR fields not set to zero in desc",
+       [ERR_READ_DATA_AXI] = "AXI error when reading data",
+       [ERR_WRITE_DATA_AXI] = "AXI error when writing data",
+       [ERR_FBP_TIMEOUT] = "Timeout on bufpool fetch",
+       [ERR_ECC] = "ECC double bit error",
+       [ERR_DIFF_SIZE] = "Bufpool too small to hold all the DIF result",
+       [ERR_SCT_GAT_LEN] = "Gather and scatter data length not same",
+       [ERR_CRC_ERR] = "CRC error",
+       [ERR_CHKSUM] = "Checksum error",
+       [ERR_DIF] = "DIF error",
+};
+
+static const char * const xgene_dma_err[] = {
+       [ERR_DIF_SIZE_INT] = "DIF size error",
+       [ERR_GS_ERR_INT] = "Gather scatter not same size error",
+       [ERR_FPB_TIMEO_INT] = "Free pool time out error",
+       [ERR_WFIFO_OVF_INT] = "Write FIFO over flow error",
+       [ERR_RFIFO_OVF_INT] = "Read FIFO over flow error",
+       [ERR_WR_TIMEO_INT] = "Write time out error",
+       [ERR_RD_TIMEO_INT] = "Read time out error",
+       [ERR_WR_ERR_INT] = "HBF bus write error",
+       [ERR_RD_ERR_INT] = "HBF bus read error",
+       [ERR_BAD_DESC_INT] = "Ring descriptor HE0 not set error",
+       [ERR_DESC_DST_INT] = "HFB reading dst link address error",
+       [ERR_DESC_SRC_INT] = "HFB reading src link address error",
+};
+
+static bool is_pq_enabled(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       val = ioread32(pdma->csr_efuse + XGENE_SOC_JTAG1_SHADOW);
+       return !(val & XGENE_DMA_PQ_DISABLE_MASK);
+}
+
+static void xgene_dma_cpu_to_le64(u64 *desc, int count)
+{
+       int i;
+
+       for (i = 0; i < count; i++)
+               desc[i] = cpu_to_le64(desc[i]);
+}
+
+static u16 xgene_dma_encode_len(u32 len)
+{
+       return (len < XGENE_DMA_MAX_BYTE_CNT) ?
+               len : XGENE_DMA_16K_BUFFER_LEN_CODE;
+}
+
+static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
+{
+       static u8 flyby_type[] = {
+               FLYBY_2SRC_XOR, /* Dummy */
+               FLYBY_2SRC_XOR, /* Dummy */
+               FLYBY_2SRC_XOR,
+               FLYBY_3SRC_XOR,
+               FLYBY_4SRC_XOR,
+               FLYBY_5SRC_XOR
+       };
+
+       return flyby_type[src_cnt];
+}
+
+static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring)
+{
+       u32 __iomem *cmd_base = ring->cmd_base;
+       u32 ring_state = ioread32(&cmd_base[1]);
+
+       return XGENE_DMA_RING_DESC_CNT(ring_state);
+}
+
+static void xgene_dma_set_src_buffer(void *ext8, size_t *len,
+                                    dma_addr_t *paddr)
+{
+       size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
+                       *len : XGENE_DMA_MAX_BYTE_CNT;
+
+       XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr);
+       XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes));
+       *len -= nbytes;
+       *paddr += nbytes;
+}
+
+static void xgene_dma_invalidate_buffer(void *ext8)
+{
+       XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE);
+}
+
+static void *xgene_dma_lookup_ext8(u64 *desc, int idx)
+{
+       return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1);
+}
+
+static void xgene_dma_init_desc(void *desc, u16 dst_ring_num)
+{
+       XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */
+       XGENE_DMA_DESC_IN_SET(desc);
+       XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num);
+       XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA);
+}
+
+static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
+                                   struct xgene_dma_desc_sw *desc_sw,
+                                   dma_addr_t dst, dma_addr_t src,
+                                   size_t len)
+{
+       void *desc1, *desc2;
+       int i;
+
+       /* Get 1st descriptor */
+       desc1 = &desc_sw->desc1;
+       xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+       /* Set destination address */
+       XGENE_DMA_DESC_DR_SET(desc1);
+       XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst);
+
+       /* Set 1st source address */
+       xgene_dma_set_src_buffer(desc1 + 8, &len, &src);
+
+       if (len <= 0) {
+               desc2 = NULL;
+               goto skip_additional_src;
+       }
+
+       /*
+        * We need to split this source buffer,
+        * and need to use 2nd descriptor
+        */
+       desc2 = &desc_sw->desc2;
+       XGENE_DMA_DESC_NV_SET(desc1);
+
+       /* Set 2nd to 5th source address */
+       for (i = 0; i < 4 && len; i++)
+               xgene_dma_set_src_buffer(xgene_dma_lookup_ext8(desc2, i),
+                                        &len, &src);
+
+       /* Invalidate unused source address field */
+       for (; i < 4; i++)
+               xgene_dma_invalidate_buffer(xgene_dma_lookup_ext8(desc2, i));
+
+       /* Updated flag that we have prepared 64B descriptor */
+       desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+
+skip_additional_src:
+       /* Hardware stores descriptor in little endian format */
+       xgene_dma_cpu_to_le64(desc1, 4);
+       if (desc2)
+               xgene_dma_cpu_to_le64(desc2, 4);
+}
+
+static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
+                                   struct xgene_dma_desc_sw *desc_sw,
+                                   dma_addr_t *dst, dma_addr_t *src,
+                                   u32 src_cnt, size_t *nbytes,
+                                   const u8 *scf)
+{
+       void *desc1, *desc2;
+       size_t len = *nbytes;
+       int i;
+
+       desc1 = &desc_sw->desc1;
+       desc2 = &desc_sw->desc2;
+
+       /* Initialize DMA descriptor */
+       xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
+
+       /* Set destination address */
+       XGENE_DMA_DESC_DR_SET(desc1);
+       XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst);
+
+       /* We have multiple source addresses, so need to set NV bit*/
+       XGENE_DMA_DESC_NV_SET(desc1);
+
+       /* Set flyby opcode */
+       XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt));
+
+       /* Set 1st to 5th source addresses */
+       for (i = 0; i < src_cnt; i++) {
+               len = *nbytes;
+               xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) :
+                                        xgene_dma_lookup_ext8(desc2, i - 1),
+                                        &len, &src[i]);
+               XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i);
+       }
+
+       /* Hardware stores descriptor in little endian format */
+       xgene_dma_cpu_to_le64(desc1, 4);
+       xgene_dma_cpu_to_le64(desc2, 4);
+
+       /* Update meta data */
+       *nbytes = len;
+       *dst += XGENE_DMA_MAX_BYTE_CNT;
+
+       /* We need always 64B descriptor to perform xor or pq operations */
+       desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
+}
+
+static dma_cookie_t xgene_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+       struct xgene_dma_desc_sw *desc;
+       struct xgene_dma_chan *chan;
+       dma_cookie_t cookie;
+
+       if (unlikely(!tx))
+               return -EINVAL;
+
+       chan = to_dma_chan(tx->chan);
+       desc = to_dma_desc_sw(tx);
+
+       spin_lock_bh(&chan->lock);
+
+       cookie = dma_cookie_assign(tx);
+
+       /* Add this transaction list onto the tail of the pending queue */
+       list_splice_tail_init(&desc->tx_list, &chan->ld_pending);
+
+       spin_unlock_bh(&chan->lock);
+
+       return cookie;
+}
+
+static void xgene_dma_clean_descriptor(struct xgene_dma_chan *chan,
+                                      struct xgene_dma_desc_sw *desc)
+{
+       list_del(&desc->node);
+       chan_dbg(chan, "LD %p free\n", desc);
+       dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor(
+                                struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc;
+       dma_addr_t phys;
+
+       desc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &phys);
+       if (!desc) {
+               chan_err(chan, "Failed to allocate LDs\n");
+               return NULL;
+       }
+
+       memset(desc, 0, sizeof(*desc));
+
+       INIT_LIST_HEAD(&desc->tx_list);
+       desc->tx.phys = phys;
+       desc->tx.tx_submit = xgene_dma_tx_submit;
+       dma_async_tx_descriptor_init(&desc->tx, &chan->dma_chan);
+
+       chan_dbg(chan, "LD %p allocated\n", desc);
+
+       return desc;
+}
+
+/**
+ * xgene_dma_clean_completed_descriptor - free all descriptors which
+ * has been completed and acked
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on all completed and acked descriptors.
+ */
+static void xgene_dma_clean_completed_descriptor(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       /* Run the callback for each descriptor, in order */
+       list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node) {
+               if (async_tx_test_ack(&desc->tx))
+                       xgene_dma_clean_descriptor(chan, desc);
+       }
+}
+
+/**
+ * xgene_dma_run_tx_complete_actions - cleanup a single link descriptor
+ * @chan: X-Gene DMA channel
+ * @desc: descriptor to cleanup and free
+ *
+ * This function is used on a descriptor which has been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies.
+ */
+static void xgene_dma_run_tx_complete_actions(struct xgene_dma_chan *chan,
+                                             struct xgene_dma_desc_sw *desc)
+{
+       struct dma_async_tx_descriptor *tx = &desc->tx;
+
+       /*
+        * If this is not the last transaction in the group,
+        * then no need to complete cookie and run any callback as
+        * this is not the tx_descriptor which had been sent to caller
+        * of this DMA request
+        */
+
+       if (tx->cookie == 0)
+               return;
+
+       dma_cookie_complete(tx);
+
+       /* Run the link descriptor callback function */
+       if (tx->callback)
+               tx->callback(tx->callback_param);
+
+       dma_descriptor_unmap(tx);
+
+       /* Run any dependencies */
+       dma_run_dependencies(tx);
+}
+
+/**
+ * xgene_dma_clean_running_descriptor - move the completed descriptor from
+ * ld_running to ld_completed
+ * @chan: X-Gene DMA channel
+ * @desc: the descriptor which is completed
+ *
+ * Free the descriptor directly if acked by async_tx api,
+ * else move it to queue ld_completed.
+ */
+static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan,
+                                              struct xgene_dma_desc_sw *desc)
+{
+       /* Remove from the list of running transactions */
+       list_del(&desc->node);
+
+       /*
+        * the client is allowed to attach dependent operations
+        * until 'ack' is set
+        */
+       if (!async_tx_test_ack(&desc->tx)) {
+               /*
+                * Move this descriptor to the list of descriptors which is
+                * completed, but still awaiting the 'ack' bit to be set.
+                */
+               list_add_tail(&desc->node, &chan->ld_completed);
+               return;
+       }
+
+       chan_dbg(chan, "LD %p free\n", desc);
+       dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
+}
+
+static int xgene_chan_xfer_request(struct xgene_dma_ring *ring,
+                                  struct xgene_dma_desc_sw *desc_sw)
+{
+       struct xgene_dma_desc_hw *desc_hw;
+
+       /* Check if can push more descriptor to hw for execution */
+       if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2))
+               return -EBUSY;
+
+       /* Get hw descriptor from DMA tx ring */
+       desc_hw = &ring->desc_hw[ring->head];
+
+       /*
+        * Increment the head count to point next
+        * descriptor for next time
+        */
+       if (++ring->head == ring->slots)
+               ring->head = 0;
+
+       /* Copy prepared sw descriptor data to hw descriptor */
+       memcpy(desc_hw, &desc_sw->desc1, sizeof(*desc_hw));
+
+       /*
+        * Check if we have prepared 64B descriptor,
+        * in this case we need one more hw descriptor
+        */
+       if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) {
+               desc_hw = &ring->desc_hw[ring->head];
+
+               if (++ring->head == ring->slots)
+                       ring->head = 0;
+
+               memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw));
+       }
+
+       /* Notify the hw that we have descriptor ready for execution */
+       iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ?
+                 2 : 1, ring->cmd);
+
+       return 0;
+}
+
+/**
+ * xgene_chan_xfer_ld_pending - push any pending transactions to hw
+ * @chan : X-Gene DMA channel
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+       int ret;
+
+       /*
+        * If the list of pending descriptors is empty, then we
+        * don't need to do any work at all
+        */
+       if (list_empty(&chan->ld_pending)) {
+               chan_dbg(chan, "No pending LDs\n");
+               return;
+       }
+
+       /*
+        * Move elements from the queue of pending transactions onto the list
+        * of running transactions and push it to hw for further executions
+        */
+       list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_pending, node) {
+               /*
+                * Check if have pushed max number of transactions to hw
+                * as capable, so let's stop here and will push remaining
+                * elements from pening ld queue after completing some
+                * descriptors that we have already pushed
+                */
+               if (chan->pending >= chan->max_outstanding)
+                       return;
+
+               ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw);
+               if (ret)
+                       return;
+
+               /*
+                * Delete this element from ld pending queue and append it to
+                * ld running queue
+                */
+               list_move_tail(&desc_sw->node, &chan->ld_running);
+
+               /* Increment the pending transaction count */
+               chan->pending++;
+       }
+}
+
+/**
+ * xgene_dma_cleanup_descriptors - cleanup link descriptors which are completed
+ * and move them to ld_completed to free until flag 'ack' is set
+ * @chan: X-Gene DMA channel
+ *
+ * This function is used on descriptors which have been executed by the DMA
+ * controller. It will run any callbacks, submit any dependencies, then
+ * free these descriptors if flag 'ack' is set.
+ */
+static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_ring *ring = &chan->rx_ring;
+       struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
+       struct xgene_dma_desc_hw *desc_hw;
+       u8 status;
+
+       /* Clean already completed and acked descriptors */
+       xgene_dma_clean_completed_descriptor(chan);
+
+       /* Run the callback for each descriptor, in order */
+       list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
+               /* Get subsequent hw descriptor from DMA rx ring */
+               desc_hw = &ring->desc_hw[ring->head];
+
+               /* Check if this descriptor has been completed */
+               if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw)))
+                       break;
+
+               if (++ring->head == ring->slots)
+                       ring->head = 0;
+
+               /* Check if we have any error with DMA transactions */
+               status = XGENE_DMA_DESC_STATUS(
+                               XGENE_DMA_DESC_ELERR_RD(le64_to_cpu(
+                                                       desc_hw->m0)),
+                               XGENE_DMA_DESC_LERR_RD(le64_to_cpu(
+                                                      desc_hw->m0)));
+               if (status) {
+                       /* Print the DMA error type */
+                       chan_err(chan, "%s\n", xgene_dma_desc_err[status]);
+
+                       /*
+                        * We have DMA transactions error here. Dump DMA Tx
+                        * and Rx descriptors for this request */
+                       XGENE_DMA_DESC_DUMP(&desc_sw->desc1,
+                                           "X-Gene DMA TX DESC1: ");
+
+                       if (desc_sw->flags & XGENE_DMA_FLAG_64B_DESC)
+                               XGENE_DMA_DESC_DUMP(&desc_sw->desc2,
+                                                   "X-Gene DMA TX DESC2: ");
+
+                       XGENE_DMA_DESC_DUMP(desc_hw,
+                                           "X-Gene DMA RX ERR DESC: ");
+               }
+
+               /* Notify the hw about this completed descriptor */
+               iowrite32(-1, ring->cmd);
+
+               /* Mark this hw descriptor as processed */
+               XGENE_DMA_DESC_SET_EMPTY(desc_hw);
+
+               xgene_dma_run_tx_complete_actions(chan, desc_sw);
+
+               xgene_dma_clean_running_descriptor(chan, desc_sw);
+
+               /*
+                * Decrement the pending transaction count
+                * as we have processed one
+                */
+               chan->pending--;
+       }
+
+       /*
+        * Start any pending transactions automatically
+        * In the ideal case, we keep the DMA controller busy while we go
+        * ahead and free the descriptors below.
+        */
+       xgene_chan_xfer_ld_pending(chan);
+}
+
+static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       /* Has this channel already been allocated? */
+       if (chan->desc_pool)
+               return 1;
+
+       chan->desc_pool = dma_pool_create(chan->name, chan->dev,
+                                         sizeof(struct xgene_dma_desc_sw),
+                                         0, 0);
+       if (!chan->desc_pool) {
+               chan_err(chan, "Failed to allocate descriptor pool\n");
+               return -ENOMEM;
+       }
+
+       chan_dbg(chan, "Allocate descripto pool\n");
+
+       return 1;
+}
+
+/**
+ * xgene_dma_free_desc_list - Free all descriptors in a queue
+ * @chan: X-Gene DMA channel
+ * @list: the list to free
+ *
+ * LOCKING: must hold chan->desc_lock
+ */
+static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
+                                    struct list_head *list)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       list_for_each_entry_safe(desc, _desc, list, node)
+               xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan,
+                                       struct list_head *list)
+{
+       struct xgene_dma_desc_sw *desc, *_desc;
+
+       list_for_each_entry_safe(desc, _desc, list, node)
+               xgene_dma_clean_descriptor(chan, desc);
+}
+
+static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       chan_dbg(chan, "Free all resources\n");
+
+       if (!chan->desc_pool)
+               return;
+
+       spin_lock_bh(&chan->lock);
+
+       /* Process all running descriptor */
+       xgene_dma_cleanup_descriptors(chan);
+
+       /* Clean all link descriptor queues */
+       xgene_dma_free_desc_list(chan, &chan->ld_pending);
+       xgene_dma_free_desc_list(chan, &chan->ld_running);
+       xgene_dma_free_desc_list(chan, &chan->ld_completed);
+
+       spin_unlock_bh(&chan->lock);
+
+       /* Delete this channel DMA pool */
+       dma_pool_destroy(chan->desc_pool);
+       chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_memcpy(
+       struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
+       size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       size_t copy;
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Create the largest transaction possible */
+               copy = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT);
+
+               /* Prepare DMA descriptor */
+               xgene_dma_prep_cpy_desc(chan, new, dst, src, copy);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Update metadata */
+               len -= copy;
+               dst += copy;
+               src += copy;
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_sg(
+       struct dma_chan *dchan, struct scatterlist *dst_sg,
+       u32 dst_nents, struct scatterlist *src_sg,
+       u32 src_nents, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new = NULL;
+       struct xgene_dma_chan *chan;
+       size_t dst_avail, src_avail;
+       dma_addr_t dst, src;
+       size_t len;
+
+       if (unlikely(!dchan))
+               return NULL;
+
+       if (unlikely(!dst_nents || !src_nents))
+               return NULL;
+
+       if (unlikely(!dst_sg || !src_sg))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       /* Get prepared for the loop */
+       dst_avail = sg_dma_len(dst_sg);
+       src_avail = sg_dma_len(src_sg);
+       dst_nents--;
+       src_nents--;
+
+       /* Run until we are out of scatterlist entries */
+       while (true) {
+               /* Create the largest transaction possible */
+               len = min_t(size_t, src_avail, dst_avail);
+               len = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT);
+               if (len == 0)
+                       goto fetch;
+
+               dst = sg_dma_address(dst_sg) + sg_dma_len(dst_sg) - dst_avail;
+               src = sg_dma_address(src_sg) + sg_dma_len(src_sg) - src_avail;
+
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Prepare DMA descriptor */
+               xgene_dma_prep_cpy_desc(chan, new, dst, src, len);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* update metadata */
+               dst_avail -= len;
+               src_avail -= len;
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+
+fetch:
+               /* fetch the next dst scatterlist entry */
+               if (dst_avail == 0) {
+                       /* no more entries: we're done */
+                       if (dst_nents == 0)
+                               break;
+
+                       /* fetch the next entry: if there are no more: done */
+                       dst_sg = sg_next(dst_sg);
+                       if (!dst_sg)
+                               break;
+
+                       dst_nents--;
+                       dst_avail = sg_dma_len(dst_sg);
+               }
+
+               /* fetch the next src scatterlist entry */
+               if (src_avail == 0) {
+                       /* no more entries: we're done */
+                       if (src_nents == 0)
+                               break;
+
+                       /* fetch the next entry: if there are no more: done */
+                       src_sg = sg_next(src_sg);
+                       if (!src_sg)
+                               break;
+
+                       src_nents--;
+                       src_avail = sg_dma_len(src_sg);
+               }
+       }
+
+       if (!new)
+               return NULL;
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_xor(
+       struct dma_chan *dchan, dma_addr_t dst, dma_addr_t *src,
+       u32 src_cnt, size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {
+                               0x01, 0x01, 0x01, 0x01, 0x01};
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               /* Prepare xor DMA descriptor */
+               xgene_dma_prep_xor_desc(chan, new, &dst, src,
+                                       src_cnt, &len, multi);
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+       } while (len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *xgene_dma_prep_pq(
+       struct dma_chan *dchan, dma_addr_t *dst, dma_addr_t *src,
+       u32 src_cnt, const u8 *scf, size_t len, unsigned long flags)
+{
+       struct xgene_dma_desc_sw *first = NULL, *new;
+       struct xgene_dma_chan *chan;
+       size_t _len = len;
+       dma_addr_t _src[XGENE_DMA_MAX_XOR_SRC];
+       static u8 multi[XGENE_DMA_MAX_XOR_SRC] = {0x01, 0x01, 0x01, 0x01, 0x01};
+
+       if (unlikely(!dchan || !len))
+               return NULL;
+
+       chan = to_dma_chan(dchan);
+
+       /*
+        * Save source addresses on local variable, may be we have to
+        * prepare two descriptor to generate P and Q if both enabled
+        * in the flags by client
+        */
+       memcpy(_src, src, sizeof(*src) * src_cnt);
+
+       if (flags & DMA_PREP_PQ_DISABLE_P)
+               len = 0;
+
+       if (flags & DMA_PREP_PQ_DISABLE_Q)
+               _len = 0;
+
+       do {
+               /* Allocate the link descriptor from DMA pool */
+               new = xgene_dma_alloc_descriptor(chan);
+               if (!new)
+                       goto fail;
+
+               if (!first)
+                       first = new;
+
+               new->tx.cookie = 0;
+               async_tx_ack(&new->tx);
+
+               /* Insert the link descriptor to the LD ring */
+               list_add_tail(&new->node, &first->tx_list);
+
+               /*
+                * Prepare DMA descriptor to generate P,
+                * if DMA_PREP_PQ_DISABLE_P flag is not set
+                */
+               if (len) {
+                       xgene_dma_prep_xor_desc(chan, new, &dst[0], src,
+                                               src_cnt, &len, multi);
+                       continue;
+               }
+
+               /*
+                * Prepare DMA descriptor to generate Q,
+                * if DMA_PREP_PQ_DISABLE_Q flag is not set
+                */
+               if (_len) {
+                       xgene_dma_prep_xor_desc(chan, new, &dst[1], _src,
+                                               src_cnt, &_len, scf);
+               }
+       } while (len || _len);
+
+       new->tx.flags = flags; /* client is in control of this ack */
+       new->tx.cookie = -EBUSY;
+       list_splice(&first->tx_list, &new->tx_list);
+
+       return &new->tx;
+
+fail:
+       if (!first)
+               return NULL;
+
+       xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+       return NULL;
+}
+
+static void xgene_dma_issue_pending(struct dma_chan *dchan)
+{
+       struct xgene_dma_chan *chan = to_dma_chan(dchan);
+
+       spin_lock_bh(&chan->lock);
+       xgene_chan_xfer_ld_pending(chan);
+       spin_unlock_bh(&chan->lock);
+}
+
+static enum dma_status xgene_dma_tx_status(struct dma_chan *dchan,
+                                          dma_cookie_t cookie,
+                                          struct dma_tx_state *txstate)
+{
+       return dma_cookie_status(dchan, cookie, txstate);
+}
+
+static void xgene_dma_tasklet_cb(unsigned long data)
+{
+       struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data;
+
+       spin_lock_bh(&chan->lock);
+
+       /* Run all cleanup for descriptors which have been completed */
+       xgene_dma_cleanup_descriptors(chan);
+
+       /* Re-enable DMA channel IRQ */
+       enable_irq(chan->rx_irq);
+
+       spin_unlock_bh(&chan->lock);
+}
+
+static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id)
+{
+       struct xgene_dma_chan *chan = (struct xgene_dma_chan *)id;
+
+       BUG_ON(!chan);
+
+       /*
+        * Disable DMA channel IRQ until we process completed
+        * descriptors
+        */
+       disable_irq_nosync(chan->rx_irq);
+
+       /*
+        * Schedule the tasklet to handle all cleanup of the current
+        * transaction. It will start a new transaction if there is
+        * one pending.
+        */
+       tasklet_schedule(&chan->tasklet);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t xgene_dma_err_isr(int irq, void *id)
+{
+       struct xgene_dma *pdma = (struct xgene_dma *)id;
+       unsigned long int_mask;
+       u32 val, i;
+
+       val = ioread32(pdma->csr_dma + XGENE_DMA_INT);
+
+       /* Clear DMA interrupts */
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_INT);
+
+       /* Print DMA error info */
+       int_mask = val >> XGENE_DMA_INT_MASK_SHIFT;
+       for_each_set_bit(i, &int_mask, ARRAY_SIZE(xgene_dma_err))
+               dev_err(pdma->dev,
+                       "Interrupt status 0x%08X %s\n", val, xgene_dma_err[i]);
+
+       return IRQ_HANDLED;
+}
+
+static void xgene_dma_wr_ring_state(struct xgene_dma_ring *ring)
+{
+       int i;
+
+       iowrite32(ring->num, ring->pdma->csr_ring + XGENE_DMA_RING_STATE);
+
+       for (i = 0; i < XGENE_DMA_RING_NUM_CONFIG; i++)
+               iowrite32(ring->state[i], ring->pdma->csr_ring +
+                         XGENE_DMA_RING_STATE_WR_BASE + (i * 4));
+}
+
+static void xgene_dma_clr_ring_state(struct xgene_dma_ring *ring)
+{
+       memset(ring->state, 0, sizeof(u32) * XGENE_DMA_RING_NUM_CONFIG);
+       xgene_dma_wr_ring_state(ring);
+}
+
+static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
+{
+       void *ring_cfg = ring->state;
+       u64 addr = ring->desc_paddr;
+       void *desc;
+       u32 i, val;
+
+       ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
+
+       /* Clear DMA ring state */
+       xgene_dma_clr_ring_state(ring);
+
+       /* Set DMA ring type */
+       XGENE_DMA_RING_TYPE_SET(ring_cfg, XGENE_DMA_RING_TYPE_REGULAR);
+
+       if (ring->owner == XGENE_DMA_RING_OWNER_DMA) {
+               /* Set recombination buffer and timeout */
+               XGENE_DMA_RING_RECOMBBUF_SET(ring_cfg);
+               XGENE_DMA_RING_RECOMTIMEOUTL_SET(ring_cfg);
+               XGENE_DMA_RING_RECOMTIMEOUTH_SET(ring_cfg);
+       }
+
+       /* Initialize DMA ring state */
+       XGENE_DMA_RING_SELTHRSH_SET(ring_cfg);
+       XGENE_DMA_RING_ACCEPTLERR_SET(ring_cfg);
+       XGENE_DMA_RING_COHERENT_SET(ring_cfg);
+       XGENE_DMA_RING_ADDRL_SET(ring_cfg, addr);
+       XGENE_DMA_RING_ADDRH_SET(ring_cfg, addr);
+       XGENE_DMA_RING_SIZE_SET(ring_cfg, ring->cfgsize);
+
+       /* Write DMA ring configurations */
+       xgene_dma_wr_ring_state(ring);
+
+       /* Set DMA ring id */
+       iowrite32(XGENE_DMA_RING_ID_SETUP(ring->id),
+                 ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+       /* Set DMA ring buffer */
+       iowrite32(XGENE_DMA_RING_ID_BUF_SETUP(ring->num),
+                 ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+
+       if (ring->owner != XGENE_DMA_RING_OWNER_CPU)
+               return;
+
+       /* Set empty signature to DMA Rx ring descriptors */
+       for (i = 0; i < ring->slots; i++) {
+               desc = &ring->desc_hw[i];
+               XGENE_DMA_DESC_SET_EMPTY(desc);
+       }
+
+       /* Enable DMA Rx ring interrupt */
+       val = ioread32(ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+       XGENE_DMA_RING_NE_INT_MODE_SET(val, ring->buf_num);
+       iowrite32(val, ring->pdma->csr_ring + XGENE_DMA_RING_NE_INT_MODE);
+}
+
+static void xgene_dma_clear_ring(struct xgene_dma_ring *ring)
+{
+       u32 ring_id, val;
+
+       if (ring->owner == XGENE_DMA_RING_OWNER_CPU) {
+               /* Disable DMA Rx ring interrupt */
+               val = ioread32(ring->pdma->csr_ring +
+                              XGENE_DMA_RING_NE_INT_MODE);
+               XGENE_DMA_RING_NE_INT_MODE_RESET(val, ring->buf_num);
+               iowrite32(val, ring->pdma->csr_ring +
+                         XGENE_DMA_RING_NE_INT_MODE);
+       }
+
+       /* Clear DMA ring state */
+       ring_id = XGENE_DMA_RING_ID_SETUP(ring->id);
+       iowrite32(ring_id, ring->pdma->csr_ring + XGENE_DMA_RING_ID);
+
+       iowrite32(0, ring->pdma->csr_ring + XGENE_DMA_RING_ID_BUF);
+       xgene_dma_clr_ring_state(ring);
+}
+
+static void xgene_dma_set_ring_cmd(struct xgene_dma_ring *ring)
+{
+       ring->cmd_base = ring->pdma->csr_ring_cmd +
+                               XGENE_DMA_RING_CMD_BASE_OFFSET((ring->num -
+                                                         XGENE_DMA_RING_NUM));
+
+       ring->cmd = ring->cmd_base + XGENE_DMA_RING_CMD_OFFSET;
+}
+
+static int xgene_dma_get_ring_size(struct xgene_dma_chan *chan,
+                                  enum xgene_dma_ring_cfgsize cfgsize)
+{
+       int size;
+
+       switch (cfgsize) {
+       case XGENE_DMA_RING_CFG_SIZE_512B:
+               size = 0x200;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_2KB:
+               size = 0x800;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_16KB:
+               size = 0x4000;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_64KB:
+               size = 0x10000;
+               break;
+       case XGENE_DMA_RING_CFG_SIZE_512KB:
+               size = 0x80000;
+               break;
+       default:
+               chan_err(chan, "Unsupported cfg ring size %d\n", cfgsize);
+               return -EINVAL;
+       }
+
+       return size;
+}
+
+static void xgene_dma_delete_ring_one(struct xgene_dma_ring *ring)
+{
+       /* Clear DMA ring configurations */
+       xgene_dma_clear_ring(ring);
+
+       /* De-allocate DMA ring descriptor */
+       if (ring->desc_vaddr) {
+               dma_free_coherent(ring->pdma->dev, ring->size,
+                                 ring->desc_vaddr, ring->desc_paddr);
+               ring->desc_vaddr = NULL;
+       }
+}
+
+static void xgene_dma_delete_chan_rings(struct xgene_dma_chan *chan)
+{
+       xgene_dma_delete_ring_one(&chan->rx_ring);
+       xgene_dma_delete_ring_one(&chan->tx_ring);
+}
+
+static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
+                                    struct xgene_dma_ring *ring,
+                                    enum xgene_dma_ring_cfgsize cfgsize)
+{
+       /* Setup DMA ring descriptor variables */
+       ring->pdma = chan->pdma;
+       ring->cfgsize = cfgsize;
+       ring->num = chan->pdma->ring_num++;
+       ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num);
+
+       ring->size = xgene_dma_get_ring_size(chan, cfgsize);
+       if (ring->size <= 0)
+               return ring->size;
+
+       /* Allocate memory for DMA ring descriptor */
+       ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size,
+                                              &ring->desc_paddr, GFP_KERNEL);
+       if (!ring->desc_vaddr) {
+               chan_err(chan, "Failed to allocate ring desc\n");
+               return -ENOMEM;
+       }
+
+       /* Configure and enable DMA ring */
+       xgene_dma_set_ring_cmd(ring);
+       xgene_dma_setup_ring(ring);
+
+       return 0;
+}
+
+static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
+{
+       struct xgene_dma_ring *rx_ring = &chan->rx_ring;
+       struct xgene_dma_ring *tx_ring = &chan->tx_ring;
+       int ret;
+
+       /* Create DMA Rx ring descriptor */
+       rx_ring->owner = XGENE_DMA_RING_OWNER_CPU;
+       rx_ring->buf_num = XGENE_DMA_CPU_BUFNUM + chan->id;
+
+       ret = xgene_dma_create_ring_one(chan, rx_ring,
+                                       XGENE_DMA_RING_CFG_SIZE_64KB);
+       if (ret)
+               return ret;
+
+       chan_dbg(chan, "Rx ring id 0x%X num %d desc 0x%p\n",
+                rx_ring->id, rx_ring->num, rx_ring->desc_vaddr);
+
+       /* Create DMA Tx ring descriptor */
+       tx_ring->owner = XGENE_DMA_RING_OWNER_DMA;
+       tx_ring->buf_num = XGENE_DMA_BUFNUM + chan->id;
+
+       ret = xgene_dma_create_ring_one(chan, tx_ring,
+                                       XGENE_DMA_RING_CFG_SIZE_64KB);
+       if (ret) {
+               xgene_dma_delete_ring_one(rx_ring);
+               return ret;
+       }
+
+       tx_ring->dst_ring_num = XGENE_DMA_RING_DST_ID(rx_ring->num);
+
+       chan_dbg(chan,
+                "Tx ring id 0x%X num %d desc 0x%p\n",
+                tx_ring->id, tx_ring->num, tx_ring->desc_vaddr);
+
+       /* Set the max outstanding request possible to this channel */
+       chan->max_outstanding = rx_ring->slots;
+
+       return ret;
+}
+
+static int xgene_dma_init_rings(struct xgene_dma *pdma)
+{
+       int ret, i, j;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               ret = xgene_dma_create_chan_rings(&pdma->chan[i]);
+               if (ret) {
+                       for (j = 0; j < i; j++)
+                               xgene_dma_delete_chan_rings(&pdma->chan[j]);
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+static void xgene_dma_enable(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       /* Configure and enable DMA engine */
+       val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+       XGENE_DMA_CH_SETUP(val);
+       XGENE_DMA_ENABLE(val);
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_disable(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       val = ioread32(pdma->csr_dma + XGENE_DMA_GCR);
+       XGENE_DMA_DISABLE(val);
+       iowrite32(val, pdma->csr_dma + XGENE_DMA_GCR);
+}
+
+static void xgene_dma_mask_interrupts(struct xgene_dma *pdma)
+{
+       /*
+        * Mask DMA ring overflow, underflow and
+        * AXI write/read error interrupts
+        */
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_MASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+       /* Mask DMA error interrupts */
+       iowrite32(XGENE_DMA_INT_ALL_MASK, pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_unmask_interrupts(struct xgene_dma *pdma)
+{
+       /*
+        * Unmask DMA ring overflow, underflow and
+        * AXI write/read error interrupts
+        */
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT0_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT1_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT2_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT3_MASK);
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_RING_INT4_MASK);
+
+       /* Unmask DMA error interrupts */
+       iowrite32(XGENE_DMA_INT_ALL_UNMASK,
+                 pdma->csr_dma + XGENE_DMA_INT_MASK);
+}
+
+static void xgene_dma_init_hw(struct xgene_dma *pdma)
+{
+       u32 val;
+
+       /* Associate DMA ring to corresponding ring HW */
+       iowrite32(XGENE_DMA_ASSOC_RING_MNGR1,
+                 pdma->csr_dma + XGENE_DMA_CFG_RING_WQ_ASSOC);
+
+       /* Configure RAID6 polynomial control setting */
+       if (is_pq_enabled(pdma))
+               iowrite32(XGENE_DMA_RAID6_MULTI_CTRL(0x1D),
+                         pdma->csr_dma + XGENE_DMA_RAID6_CONT);
+       else
+               dev_info(pdma->dev, "PQ is disabled in HW\n");
+
+       xgene_dma_enable(pdma);
+       xgene_dma_unmask_interrupts(pdma);
+
+       /* Get DMA id and version info */
+       val = ioread32(pdma->csr_dma + XGENE_DMA_IPBRR);
+
+       /* DMA device info */
+       dev_info(pdma->dev,
+                "X-Gene DMA v%d.%02d.%02d driver registered %d channels",
+                XGENE_DMA_REV_NO_RD(val), XGENE_DMA_BUS_ID_RD(val),
+                XGENE_DMA_DEV_ID_RD(val), XGENE_DMA_MAX_CHANNEL);
+}
+
+static int xgene_dma_init_ring_mngr(struct xgene_dma *pdma)
+{
+       if (ioread32(pdma->csr_ring + XGENE_DMA_RING_CLKEN) &&
+           (!ioread32(pdma->csr_ring + XGENE_DMA_RING_SRST)))
+               return 0;
+
+       iowrite32(0x3, pdma->csr_ring + XGENE_DMA_RING_CLKEN);
+       iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_SRST);
+
+       /* Bring up memory */
+       iowrite32(0x0, pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+       /* Force a barrier */
+       ioread32(pdma->csr_ring + XGENE_DMA_RING_MEM_RAM_SHUTDOWN);
+
+       /* reset may take up to 1ms */
+       usleep_range(1000, 1100);
+
+       if (ioread32(pdma->csr_ring + XGENE_DMA_RING_BLK_MEM_RDY)
+               != XGENE_DMA_RING_BLK_MEM_RDY_VAL) {
+               dev_err(pdma->dev,
+                       "Failed to release ring mngr memory from shutdown\n");
+               return -ENODEV;
+       }
+
+       /* program threshold set 1 and all hysteresis */
+       iowrite32(XGENE_DMA_RING_THRESLD0_SET1_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_THRESLD0_SET1);
+       iowrite32(XGENE_DMA_RING_THRESLD1_SET1_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_THRESLD1_SET1);
+       iowrite32(XGENE_DMA_RING_HYSTERESIS_VAL,
+                 pdma->csr_ring + XGENE_DMA_RING_HYSTERESIS);
+
+       /* Enable QPcore and assign error queue */
+       iowrite32(XGENE_DMA_RING_ENABLE,
+                 pdma->csr_ring + XGENE_DMA_RING_CONFIG);
+
+       return 0;
+}
+
+static int xgene_dma_init_mem(struct xgene_dma *pdma)
+{
+       int ret;
+
+       ret = xgene_dma_init_ring_mngr(pdma);
+       if (ret)
+               return ret;
+
+       /* Bring up memory */
+       iowrite32(0x0, pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+       /* Force a barrier */
+       ioread32(pdma->csr_dma + XGENE_DMA_MEM_RAM_SHUTDOWN);
+
+       /* reset may take up to 1ms */
+       usleep_range(1000, 1100);
+
+       if (ioread32(pdma->csr_dma + XGENE_DMA_BLK_MEM_RDY)
+               != XGENE_DMA_BLK_MEM_RDY_VAL) {
+               dev_err(pdma->dev,
+                       "Failed to release DMA memory from shutdown\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static int xgene_dma_request_irqs(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int ret, i, j;
+
+       /* Register DMA error irq */
+       ret = devm_request_irq(pdma->dev, pdma->err_irq, xgene_dma_err_isr,
+                              0, "dma_error", pdma);
+       if (ret) {
+               dev_err(pdma->dev,
+                       "Failed to register error IRQ %d\n", pdma->err_irq);
+               return ret;
+       }
+
+       /* Register DMA channel rx irq */
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               ret = devm_request_irq(chan->dev, chan->rx_irq,
+                                      xgene_dma_chan_ring_isr,
+                                      0, chan->name, chan);
+               if (ret) {
+                       chan_err(chan, "Failed to register Rx IRQ %d\n",
+                                chan->rx_irq);
+                       devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+                       for (j = 0; j < i; j++) {
+                               chan = &pdma->chan[i];
+                               devm_free_irq(chan->dev, chan->rx_irq, chan);
+                       }
+
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void xgene_dma_free_irqs(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int i;
+
+       /* Free DMA device error irq */
+       devm_free_irq(pdma->dev, pdma->err_irq, pdma);
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               devm_free_irq(chan->dev, chan->rx_irq, chan);
+       }
+}
+
+static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
+                              struct dma_device *dma_dev)
+{
+       /* Initialize DMA device capability mask */
+       dma_cap_zero(dma_dev->cap_mask);
+
+       /* Set DMA device capability */
+       dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+       dma_cap_set(DMA_SG, dma_dev->cap_mask);
+
+       /* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR
+        * and channel 1 supports XOR, PQ both. First thing here is we have
+        * mechanism in hw to enable/disable PQ/XOR supports on channel 1,
+        * we can make sure this by reading SoC Efuse register.
+        * Second thing, we have hw errata that if we run channel 0 and
+        * channel 1 simultaneously with executing XOR and PQ request,
+        * suddenly DMA engine hangs, So here we enable XOR on channel 0 only
+        * if XOR and PQ supports on channel 1 is disabled.
+        */
+       if ((chan->id == XGENE_DMA_PQ_CHANNEL) &&
+           is_pq_enabled(chan->pdma)) {
+               dma_cap_set(DMA_PQ, dma_dev->cap_mask);
+               dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+       } else if ((chan->id == XGENE_DMA_XOR_CHANNEL) &&
+                  !is_pq_enabled(chan->pdma)) {
+               dma_cap_set(DMA_XOR, dma_dev->cap_mask);
+       }
+
+       /* Set base and prep routines */
+       dma_dev->dev = chan->dev;
+       dma_dev->device_alloc_chan_resources = xgene_dma_alloc_chan_resources;
+       dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources;
+       dma_dev->device_issue_pending = xgene_dma_issue_pending;
+       dma_dev->device_tx_status = xgene_dma_tx_status;
+       dma_dev->device_prep_dma_memcpy = xgene_dma_prep_memcpy;
+       dma_dev->device_prep_dma_sg = xgene_dma_prep_sg;
+
+       if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+               dma_dev->device_prep_dma_xor = xgene_dma_prep_xor;
+               dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC;
+               dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT;
+       }
+
+       if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
+               dma_dev->device_prep_dma_pq = xgene_dma_prep_pq;
+               dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
+               dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT;
+       }
+}
+
+static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
+{
+       struct xgene_dma_chan *chan = &pdma->chan[id];
+       struct dma_device *dma_dev = &pdma->dma_dev[id];
+       int ret;
+
+       chan->dma_chan.device = dma_dev;
+
+       spin_lock_init(&chan->lock);
+       INIT_LIST_HEAD(&chan->ld_pending);
+       INIT_LIST_HEAD(&chan->ld_running);
+       INIT_LIST_HEAD(&chan->ld_completed);
+       tasklet_init(&chan->tasklet, xgene_dma_tasklet_cb,
+                    (unsigned long)chan);
+
+       chan->pending = 0;
+       chan->desc_pool = NULL;
+       dma_cookie_init(&chan->dma_chan);
+
+       /* Setup dma device capabilities and prep routines */
+       xgene_dma_set_caps(chan, dma_dev);
+
+       /* Initialize DMA device list head */
+       INIT_LIST_HEAD(&dma_dev->channels);
+       list_add_tail(&chan->dma_chan.device_node, &dma_dev->channels);
+
+       /* Register with Linux async DMA framework*/
+       ret = dma_async_device_register(dma_dev);
+       if (ret) {
+               chan_err(chan, "Failed to register async device %d", ret);
+               tasklet_kill(&chan->tasklet);
+
+               return ret;
+       }
+
+       /* DMA capability info */
+       dev_info(pdma->dev,
+                "%s: CAPABILITY ( %s%s%s%s)\n", dma_chan_name(&chan->dma_chan),
+                dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "MEMCPY " : "",
+                dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "",
+                dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
+                dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
+
+       return 0;
+}
+
+static int xgene_dma_init_async(struct xgene_dma *pdma)
+{
+       int ret, i, j;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL ; i++) {
+               ret = xgene_dma_async_register(pdma, i);
+               if (ret) {
+                       for (j = 0; j < i; j++) {
+                               dma_async_device_unregister(&pdma->dma_dev[j]);
+                               tasklet_kill(&pdma->chan[j].tasklet);
+                       }
+
+                       return ret;
+               }
+       }
+
+       return ret;
+}
+
+static void xgene_dma_async_unregister(struct xgene_dma *pdma)
+{
+       int i;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+               dma_async_device_unregister(&pdma->dma_dev[i]);
+}
+
+static void xgene_dma_init_channels(struct xgene_dma *pdma)
+{
+       struct xgene_dma_chan *chan;
+       int i;
+
+       pdma->ring_num = XGENE_DMA_RING_NUM;
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               chan->dev = pdma->dev;
+               chan->pdma = pdma;
+               chan->id = i;
+               snprintf(chan->name, sizeof(chan->name), "dmachan%d", chan->id);
+       }
+}
+
+static int xgene_dma_get_resources(struct platform_device *pdev,
+                                  struct xgene_dma *pdma)
+{
+       struct resource *res;
+       int irq, i;
+
+       /* Get DMA csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_dma = devm_ioremap(&pdev->dev, res->start,
+                                    resource_size(res));
+       if (!pdma->csr_dma) {
+               dev_err(&pdev->dev, "Failed to ioremap csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA ring csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get ring csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_ring =  devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (!pdma->csr_ring) {
+               dev_err(&pdev->dev, "Failed to ioremap ring csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA ring cmd csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get ring cmd csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_ring_cmd = devm_ioremap(&pdev->dev, res->start,
+                                         resource_size(res));
+       if (!pdma->csr_ring_cmd) {
+               dev_err(&pdev->dev, "Failed to ioremap ring cmd csr region");
+               return -ENOMEM;
+       }
+
+       /* Get efuse csr region */
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+       if (!res) {
+               dev_err(&pdev->dev, "Failed to get efuse csr region\n");
+               return -ENXIO;
+       }
+
+       pdma->csr_efuse = devm_ioremap(&pdev->dev, res->start,
+                                      resource_size(res));
+       if (!pdma->csr_efuse) {
+               dev_err(&pdev->dev, "Failed to ioremap efuse csr region");
+               return -ENOMEM;
+       }
+
+       /* Get DMA error interrupt */
+       irq = platform_get_irq(pdev, 0);
+       if (irq <= 0) {
+               dev_err(&pdev->dev, "Failed to get Error IRQ\n");
+               return -ENXIO;
+       }
+
+       pdma->err_irq = irq;
+
+       /* Get DMA Rx ring descriptor interrupts for all DMA channels */
+       for (i = 1; i <= XGENE_DMA_MAX_CHANNEL; i++) {
+               irq = platform_get_irq(pdev, i);
+               if (irq <= 0) {
+                       dev_err(&pdev->dev, "Failed to get Rx IRQ\n");
+                       return -ENXIO;
+               }
+
+               pdma->chan[i - 1].rx_irq = irq;
+       }
+
+       return 0;
+}
+
+static int xgene_dma_probe(struct platform_device *pdev)
+{
+       struct xgene_dma *pdma;
+       int ret, i;
+
+       pdma = devm_kzalloc(&pdev->dev, sizeof(*pdma), GFP_KERNEL);
+       if (!pdma)
+               return -ENOMEM;
+
+       pdma->dev = &pdev->dev;
+       platform_set_drvdata(pdev, pdma);
+
+       ret = xgene_dma_get_resources(pdev, pdma);
+       if (ret)
+               return ret;
+
+       pdma->clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(pdma->clk)) {
+               dev_err(&pdev->dev, "Failed to get clk\n");
+               return PTR_ERR(pdma->clk);
+       }
+
+       /* Enable clk before accessing registers */
+       ret = clk_prepare_enable(pdma->clk);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
+               return ret;
+       }
+
+       /* Remove DMA RAM out of shutdown */
+       ret = xgene_dma_init_mem(pdma);
+       if (ret)
+               goto err_clk_enable;
+
+       ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(42));
+       if (ret) {
+               dev_err(&pdev->dev, "No usable DMA configuration\n");
+               goto err_dma_mask;
+       }
+
+       /* Initialize DMA channels software state */
+       xgene_dma_init_channels(pdma);
+
+       /* Configue DMA rings */
+       ret = xgene_dma_init_rings(pdma);
+       if (ret)
+               goto err_clk_enable;
+
+       ret = xgene_dma_request_irqs(pdma);
+       if (ret)
+               goto err_request_irq;
+
+       /* Configure and enable DMA engine */
+       xgene_dma_init_hw(pdma);
+
+       /* Register DMA device with linux async framework */
+       ret = xgene_dma_init_async(pdma);
+       if (ret)
+               goto err_async_init;
+
+       return 0;
+
+err_async_init:
+       xgene_dma_free_irqs(pdma);
+
+err_request_irq:
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++)
+               xgene_dma_delete_chan_rings(&pdma->chan[i]);
+
+err_dma_mask:
+err_clk_enable:
+       clk_disable_unprepare(pdma->clk);
+
+       return ret;
+}
+
+static int xgene_dma_remove(struct platform_device *pdev)
+{
+       struct xgene_dma *pdma = platform_get_drvdata(pdev);
+       struct xgene_dma_chan *chan;
+       int i;
+
+       xgene_dma_async_unregister(pdma);
+
+       /* Mask interrupts and disable DMA engine */
+       xgene_dma_mask_interrupts(pdma);
+       xgene_dma_disable(pdma);
+       xgene_dma_free_irqs(pdma);
+
+       for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
+               chan = &pdma->chan[i];
+               tasklet_kill(&chan->tasklet);
+               xgene_dma_delete_chan_rings(chan);
+       }
+
+       clk_disable_unprepare(pdma->clk);
+
+       return 0;
+}
+
+static const struct of_device_id xgene_dma_of_match_ptr[] = {
+       {.compatible = "apm,xgene-storm-dma",},
+       {},
+};
+MODULE_DEVICE_TABLE(of, xgene_dma_of_match_ptr);
+
+static struct platform_driver xgene_dma_driver = {
+       .probe = xgene_dma_probe,
+       .remove = xgene_dma_remove,
+       .driver = {
+               .name = "X-Gene-DMA",
+               .of_match_table = xgene_dma_of_match_ptr,
+       },
+};
+
+module_platform_driver(xgene_dma_driver);
+
+MODULE_DESCRIPTION("APM X-Gene SoC DMA driver");
+MODULE_AUTHOR("Rameshwar Prasad Sahu <rsahu@apm.com>");
+MODULE_AUTHOR("Loc Ho <lho@apm.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c

index bdd2a5d..d8434d4 100644 (file)
--- a/drivers/dma/xilinx/xilinx_vdma.c
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -22,9 +22,9 @@
   * (at your option) any later version.
   */
  
-#include <linux/amba/xilinx_dma.h>
  #include <linux/bitops.h>
  #include <linux/dmapool.h>
+#include <linux/dma/xilinx_dma.h>
  #include <linux/init.h>
  #include <linux/interrupt.h>
  #include <linux/io.h>
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c

index ef5feee..580e10a 100644 (file)
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -538,8 +538,14 @@ struct dma_buf *
  armada_gem_prime_export(struct drm_device *dev, struct drm_gem_object *obj,
         int flags)
  {
-       return dma_buf_export(obj, &armada_gem_prime_dmabuf_ops, obj->size,
-                             O_RDWR, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &armada_gem_prime_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = O_RDWR;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
  }
  
  struct drm_gem_object *
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c

index 7482b06..7fec191 100644 (file)
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -339,13 +339,17 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
  struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
                                      struct drm_gem_object *obj, int flags)
  {
-       struct reservation_object *robj = NULL;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &drm_gem_prime_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
  
         if (dev->driver->gem_prime_res_obj)
-               robj = dev->driver->gem_prime_res_obj(obj);
+               exp_info.resv = dev->driver->gem_prime_res_obj(obj);
  
-       return dma_buf_export(obj, &drm_gem_prime_dmabuf_ops, obj->size,
-                             flags, robj);
+       return dma_buf_export(&exp_info);
  }
  EXPORT_SYMBOL(drm_gem_prime_export);
  
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c

index 3833bf8..cd485c0 100644 (file)
--- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
@@ -185,9 +185,14 @@ struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev,
                                 struct drm_gem_object *obj, int flags)
  {
         struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
  
-       return dma_buf_export(obj, &exynos_dmabuf_ops,
-                               exynos_gem_obj->base.size, flags, NULL);
+       exp_info.ops = &exynos_dmabuf_ops;
+       exp_info.size = exynos_gem_obj->base.size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
  }
  
  struct drm_gem_object *exynos_dmabuf_prime_import(struct drm_device *drm_dev,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index c24c3f1..c302ffb 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1038,7 +1038,7 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
                 s->lra_limits[i] = I915_READ(GEN7_LRA_LIMITS_BASE + i * 4);
  
         s->media_max_req_count  = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
-       s->gfx_max_req_count    = I915_READ(GEN7_MEDIA_MAX_REQ_COUNT);
+       s->gfx_max_req_count    = I915_READ(GEN7_GFX_MAX_REQ_COUNT);
  
         s->render_hwsp          = I915_READ(RENDER_HWS_PGA_GEN7);
         s->ecochk               = I915_READ(GAM_ECOCHK);
@@ -1120,7 +1120,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
                 I915_WRITE(GEN7_LRA_LIMITS_BASE + i * 4, s->lra_limits[i]);
  
         I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->media_max_req_count);
-       I915_WRITE(GEN7_MEDIA_MAX_REQ_COUNT, s->gfx_max_req_count);
+       I915_WRITE(GEN7_GFX_MAX_REQ_COUNT, s->gfx_max_req_count);
  
         I915_WRITE(RENDER_HWS_PGA_GEN7, s->render_hwsp);
         I915_WRITE(GAM_ECOCHK,          s->ecochk);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c

index d07c0b1..53394f9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2377,10 +2377,11 @@ int __i915_add_request(struct intel_engine_cs *ring,
                 ret = ring->add_request(ring);
                 if (ret)
                         return ret;
+
+               request->tail = intel_ring_get_tail(ringbuf);
         }
  
         request->head = request_start;
-       request->tail = intel_ring_get_tail(ringbuf);
  
         /* Whilst this request exists, batch_obj will be on the
          * active_list, and so will hold the active reference. Only when this
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c

index 82a1f4b..7998da2 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -230,6 +230,13 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
                                       struct drm_gem_object *gem_obj, int flags)
  {
         struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &i915_dmabuf_ops;
+       exp_info.size = gem_obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = gem_obj;
+
  
         if (obj->ops->dmabuf_export) {
                 int ret = obj->ops->dmabuf_export(obj);
@@ -237,8 +244,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
                         return ERR_PTR(ret);
         }
  
-       return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags,
-                             NULL);
+       return dma_buf_export(&exp_info);
  }
  
  static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index b522eb6..3da1af4 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1807,6 +1807,7 @@ enum skl_disp_power_wells {
  #define   GMBUS_CYCLE_INDEX    (2<<25)
  #define   GMBUS_CYCLE_STOP     (4<<25)
  #define   GMBUS_BYTE_COUNT_SHIFT 16
+#define   GMBUS_BYTE_COUNT_MAX   256U
  #define   GMBUS_SLAVE_INDEX_SHIFT 8
  #define   GMBUS_SLAVE_ADDR_SHIFT 1
  #define   GMBUS_SLAVE_READ     (1<<0)
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c

index b31088a..56e437e 100644 (file)
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -270,18 +270,17 @@ gmbus_wait_idle(struct drm_i915_private *dev_priv)
  }
  
  static int
-gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
-               u32 gmbus1_index)
+gmbus_xfer_read_chunk(struct drm_i915_private *dev_priv,
+                     unsigned short addr, u8 *buf, unsigned int len,
+                     u32 gmbus1_index)
  {
         int reg_offset = dev_priv->gpio_mmio_base;
-       u16 len = msg->len;
-       u8 *buf = msg->buf;
  
         I915_WRITE(GMBUS1 + reg_offset,
                    gmbus1_index |
                    GMBUS_CYCLE_WAIT |
                    (len << GMBUS_BYTE_COUNT_SHIFT) |
-                  (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) |
+                  (addr << GMBUS_SLAVE_ADDR_SHIFT) |
                    GMBUS_SLAVE_READ | GMBUS_SW_RDY);
         while (len) {
                 int ret;
@@ -303,11 +302,35 @@ gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
  }
  
  static int
-gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
+gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg,
+               u32 gmbus1_index)
  {
-       int reg_offset = dev_priv->gpio_mmio_base;
-       u16 len = msg->len;
         u8 *buf = msg->buf;
+       unsigned int rx_size = msg->len;
+       unsigned int len;
+       int ret;
+
+       do {
+               len = min(rx_size, GMBUS_BYTE_COUNT_MAX);
+
+               ret = gmbus_xfer_read_chunk(dev_priv, msg->addr,
+                                           buf, len, gmbus1_index);
+               if (ret)
+                       return ret;
+
+               rx_size -= len;
+               buf += len;
+       } while (rx_size != 0);
+
+       return 0;
+}
+
+static int
+gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv,
+                      unsigned short addr, u8 *buf, unsigned int len)
+{
+       int reg_offset = dev_priv->gpio_mmio_base;
+       unsigned int chunk_size = len;
         u32 val, loop;
  
         val = loop = 0;
@@ -319,8 +342,8 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
         I915_WRITE(GMBUS3 + reg_offset, val);
         I915_WRITE(GMBUS1 + reg_offset,
                    GMBUS_CYCLE_WAIT |
-                  (msg->len << GMBUS_BYTE_COUNT_SHIFT) |
-                  (msg->addr << GMBUS_SLAVE_ADDR_SHIFT) |
+                  (chunk_size << GMBUS_BYTE_COUNT_SHIFT) |
+                  (addr << GMBUS_SLAVE_ADDR_SHIFT) |
                    GMBUS_SLAVE_WRITE | GMBUS_SW_RDY);
         while (len) {
                 int ret;
@@ -337,6 +360,29 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
                 if (ret)
                         return ret;
         }
+
+       return 0;
+}
+
+static int
+gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg)
+{
+       u8 *buf = msg->buf;
+       unsigned int tx_size = msg->len;
+       unsigned int len;
+       int ret;
+
+       do {
+               len = min(tx_size, GMBUS_BYTE_COUNT_MAX);
+
+               ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len);
+               if (ret)
+                       return ret;
+
+               buf += len;
+               tx_size -= len;
+       } while (tx_size != 0);
+
         return 0;
  }
  
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index fcb074b..09df74b 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -393,6 +393,26 @@ static void execlists_context_unqueue(struct intel_engine_cs *ring)
                 }
         }
  
+       if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
+               /*
+                * WaIdleLiteRestore: make sure we never cause a lite
+                * restore with HEAD==TAIL
+                */
+               if (req0 && req0->elsp_submitted) {
+                       /*
+                        * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
+                        * as we resubmit the request. See gen8_emit_request()
+                        * for where we prepare the padding after the end of the
+                        * request.
+                        */
+                       struct intel_ringbuffer *ringbuf;
+
+                       ringbuf = req0->ctx->engine[ring->id].ringbuf;
+                       req0->tail += 8;
+                       req0->tail &= ringbuf->size - 1;
+               }
+       }
+
         WARN_ON(req1 && req1->elsp_submitted);
  
         execlists_submit_contexts(ring, req0->ctx, req0->tail,
@@ -1315,7 +1335,12 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
         u32 cmd;
         int ret;
  
-       ret = intel_logical_ring_begin(ringbuf, request->ctx, 6);
+       /*
+        * Reserve space for 2 NOOPs at the end of each request to be
+        * used as a workaround for not being allowed to do lite
+        * restore with HEAD==TAIL (WaIdleLiteRestore).
+        */
+       ret = intel_logical_ring_begin(ringbuf, request->ctx, 8);
         if (ret)
                 return ret;
  
@@ -1333,6 +1358,14 @@ static int gen8_emit_request(struct intel_ringbuffer *ringbuf,
         intel_logical_ring_emit(ringbuf, MI_NOOP);
         intel_logical_ring_advance_and_submit(ringbuf, request->ctx, request);
  
+       /*
+        * Here we add two extra NOOPs as padding to avoid
+        * lite restore of a context with HEAD==TAIL.
+        */
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_emit(ringbuf, MI_NOOP);
+       intel_logical_ring_advance(ringbuf);
+
         return 0;
  }
  
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c

index b46dabd..344fd78 100644 (file)
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -171,7 +171,14 @@ static struct dma_buf_ops omap_dmabuf_ops = {
  struct dma_buf *omap_gem_prime_export(struct drm_device *dev,
                 struct drm_gem_object *obj, int flags)
  {
-       return dma_buf_export(obj, &omap_dmabuf_ops, obj->size, flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &omap_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
  }
  
  struct drm_gem_object *omap_gem_prime_import(struct drm_device *dev,
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c

index cfb4819..1217272 100644 (file)
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -627,8 +627,14 @@ struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
                                        struct drm_gem_object *gem,
                                        int flags)
  {
-       return dma_buf_export(gem, &tegra_gem_prime_dmabuf_ops, gem->size,
-                             flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &tegra_gem_prime_dmabuf_ops;
+       exp_info.size = gem->size;
+       exp_info.flags = flags;
+       exp_info.priv = gem;
+
+       return dma_buf_export(&exp_info);
  }
  
  struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c

index 12c8711..4f5fa8d 100644 (file)
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -683,6 +683,12 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
  
         dma_buf = prime->dma_buf;
         if (!dma_buf || !get_dma_buf_unless_doomed(dma_buf)) {
+               DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+               exp_info.ops = &tdev->ops;
+               exp_info.size = prime->size;
+               exp_info.flags = flags;
+               exp_info.priv = prime;
  
                 /*
                  * Need to create a new dma_buf, with memory accounting.
@@ -694,8 +700,7 @@ int ttm_prime_handle_to_fd(struct ttm_object_file *tfile,
                         goto out_unref;
                 }
  
-               dma_buf = dma_buf_export(prime, &tdev->ops,
-                                        prime->size, flags, NULL);
+               dma_buf = dma_buf_export(&exp_info);
                 if (IS_ERR(dma_buf)) {
                         ret = PTR_ERR(dma_buf);
                         ttm_mem_global_free(tdev->mem_glob,
diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c

index ac8a66b..e2243ed 100644 (file)
--- a/drivers/gpu/drm/udl/udl_dmabuf.c
+++ b/drivers/gpu/drm/udl/udl_dmabuf.c
@@ -202,7 +202,14 @@ static struct dma_buf_ops udl_dmabuf_ops = {
  struct dma_buf *udl_gem_prime_export(struct drm_device *dev,
                                      struct drm_gem_object *obj, int flags)
  {
-       return dma_buf_export(obj, &udl_dmabuf_ops, obj->size, flags, NULL);
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &udl_dmabuf_ops;
+       exp_info.size = obj->size;
+       exp_info.flags = flags;
+       exp_info.priv = obj;
+
+       return dma_buf_export(&exp_info);
  }
  
  static int udl_prime_create(struct drm_device *dev,
diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c

index 875c22a..fa8dedd 100644 (file)
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -182,72 +182,41 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
         const u16 bus_num = bus->remote_bus;
         int request_len;
         int response_len;
-       u8 *request = NULL;
-       u8 *response = NULL;
         int result;
-       struct cros_ec_command msg;
+       struct cros_ec_command msg = { };
  
         request_len = ec_i2c_count_message(i2c_msgs, num);
         if (request_len < 0) {
                 dev_warn(dev, "Error constructing message %d\n", request_len);
-               result = request_len;
-               goto exit;
+               return request_len;
         }
+
         response_len = ec_i2c_count_response(i2c_msgs, num);
         if (response_len < 0) {
                 /* Unexpected; no errors should come when NULL response */
                 dev_warn(dev, "Error preparing response %d\n", response_len);
-               result = response_len;
-               goto exit;
-       }
-
-       if (request_len <= ARRAY_SIZE(bus->request_buf)) {
-               request = bus->request_buf;
-       } else {
-               request = kzalloc(request_len, GFP_KERNEL);
-               if (request == NULL) {
-                       result = -ENOMEM;
-                       goto exit;
-               }
-       }
-       if (response_len <= ARRAY_SIZE(bus->response_buf)) {
-               response = bus->response_buf;
-       } else {
-               response = kzalloc(response_len, GFP_KERNEL);
-               if (response == NULL) {
-                       result = -ENOMEM;
-                       goto exit;
-               }
+               return response_len;
         }
  
-       result = ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+       result = ec_i2c_construct_message(msg.outdata, i2c_msgs, num, bus_num);
         if (result)
-               goto exit;
+               return result;
  
         msg.version = 0;
         msg.command = EC_CMD_I2C_PASSTHRU;
-       msg.outdata = request;
         msg.outsize = request_len;
-       msg.indata = response;
         msg.insize = response_len;
  
         result = cros_ec_cmd_xfer(bus->ec, &msg);
         if (result < 0)
-               goto exit;
+               return result;
  
-       result = ec_i2c_parse_response(response, i2c_msgs, &num);
+       result = ec_i2c_parse_response(msg.indata, i2c_msgs, &num);
         if (result < 0)
-               goto exit;
+               return result;
  
         /* Indicate success by saying how many messages were sent */
-       result = num;
-exit:
-       if (request != bus->request_buf)
-               kfree(request);
-       if (response != bus->response_buf)
-               kfree(response);
-
-       return result;
+       return num;
  }
  
  static u32 ec_i2c_functionality(struct i2c_adapter *adap)
diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c

index 03f1e55..9604024 100644 (file)
--- a/drivers/i2c/busses/i2c-digicolor.c
+++ b/drivers/i2c/busses/i2c-digicolor.c
@@ -12,11 +12,10 @@
  
  #include <linux/clk.h>
  #include <linux/completion.h>
+#include <linux/delay.h>
  #include <linux/i2c.h>
  #include <linux/interrupt.h>
  #include <linux/io.h>
-#include <linux/clk.h>
-#include <linux/delay.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
  #include <linux/of.h>
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c

index 56fceff..3e84f6c 100644 (file)
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -913,7 +913,7 @@ static void __exit mxs_i2c_exit(void)
  module_exit(mxs_i2c_exit);
  
  MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
  MODULE_DESCRIPTION("MXS I2C Bus Driver");
  MODULE_LICENSE("GPL");
  MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/i2c/busses/i2c-pca-platform.c b/drivers/i2c/busses/i2c-pca-platform.c

index 6336f02..3bd2e7d 100644 (file)
--- a/drivers/i2c/busses/i2c-pca-platform.c
+++ b/drivers/i2c/busses/i2c-pca-platform.c
@@ -285,6 +285,6 @@ static struct platform_driver i2c_pca_pf_driver = {
  
  module_platform_driver(i2c_pca_pf_driver);
  
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
  MODULE_DESCRIPTION("I2C-PCA9564/PCA9665 platform driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c

index 5f96b1b..019d542 100644 (file)
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -833,7 +833,7 @@ static int rk3x_i2c_xfer(struct i2c_adapter *adap,
         clk_disable(i2c->clk);
         spin_unlock_irqrestore(&i2c->lock, flags);
  
-       return ret;
+       return ret < 0 ? ret : num;
  }
  
  static u32 rk3x_i2c_func(struct i2c_adapter *adap)
diff --git a/drivers/i2c/busses/i2c-st.c b/drivers/i2c/busses/i2c-st.c

index 88057fa..ea72dca 100644 (file)
--- a/drivers/i2c/busses/i2c-st.c
+++ b/drivers/i2c/busses/i2c-st.c
@@ -10,17 +10,18 @@
   * published by the Free Software Foundation.
   */
  
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/i2c.h>
  #include <linux/clk.h>
-#include <linux/io.h>
  #include <linux/delay.h>
-#include <linux/interrupt.h>
  #include <linux/err.h>
-#include <linux/of.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
+#include <linux/of.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
  
  /* SSC registers */
  #define SSC_BRG                                0x000
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c

index 098f698..987c124 100644 (file)
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1413,6 +1413,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
  
         dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name);
  
+       pm_runtime_no_callbacks(&adap->dev);
+
  #ifdef CONFIG_I2C_COMPAT
         res = class_compat_create_link(i2c_adapter_compat_class, &adap->dev,
                                        adap->dev.parent);
diff --git a/drivers/i2c/i2c-mux.c b/drivers/i2c/i2c-mux.c

index 593f7ca..06cc1ff 100644 (file)
--- a/drivers/i2c/i2c-mux.c
+++ b/drivers/i2c/i2c-mux.c
@@ -32,8 +32,9 @@ struct i2c_mux_priv {
         struct i2c_algorithm algo;
  
         struct i2c_adapter *parent;
-       void *mux_priv; /* the mux chip/device */
-       u32  chan_id;   /* the channel id */
+       struct device *mux_dev;
+       void *mux_priv;
+       u32 chan_id;
  
         int (*select)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
         int (*deselect)(struct i2c_adapter *, void *mux_priv, u32 chan_id);
@@ -119,6 +120,7 @@ struct i2c_adapter *i2c_add_mux_adapter(struct i2c_adapter *parent,
  
         /* Set up private adapter data */
         priv->parent = parent;
+       priv->mux_dev = mux_dev;
         priv->mux_priv = mux_priv;
         priv->chan_id = chan_id;
         priv->select = select;
@@ -203,7 +205,7 @@ void i2c_del_mux_adapter(struct i2c_adapter *adap)
         char symlink_name[20];
  
         snprintf(symlink_name, sizeof(symlink_name), "channel-%u", priv->chan_id);
-       sysfs_remove_link(&adap->dev.parent->kobj, symlink_name);
+       sysfs_remove_link(&priv->mux_dev->kobj, symlink_name);
  
         sysfs_remove_link(&priv->adap.dev.kobj, "mux_device");
         i2c_del_adapter(adap);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c

index 8c014b5..38acb3c 100644 (file)
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
         if (dmasync)
                 dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
  
+       if (!size)
+               return ERR_PTR(-EINVAL);
+
         /*
          * If the combination of the addr and size requested for this memory
          * region causes an integer overflow, return error.
          */
-       if ((PAGE_ALIGN(addr + size) <= size) ||
-           (PAGE_ALIGN(addr + size) <= addr))
+       if (((addr + size) < addr) ||
+           PAGE_ALIGN(addr + size) < (addr + size))
                 return ERR_PTR(-EINVAL);
  
         if (!can_do_mlock())
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c

index 259dcc7..88cce9b 100644 (file)
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                 kfree(uqp);
         }
  
+       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+               struct ib_srq *srq = uobj->object;
+               struct ib_uevent_object *uevent =
+                       container_of(uobj, struct ib_uevent_object, uobject);
+
+               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+               ib_destroy_srq(srq);
+               ib_uverbs_release_uevent(file, uevent);
+               kfree(uevent);
+       }
+
         list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
                 struct ib_cq *cq = uobj->object;
                 struct ib_uverbs_event_file *ev_file = cq->cq_context;
@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                 kfree(ucq);
         }
  
-       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
-               struct ib_srq *srq = uobj->object;
-               struct ib_uevent_object *uevent =
-                       container_of(uobj, struct ib_uevent_object, uobject);
-
-               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-               ib_destroy_srq(srq);
-               ib_uverbs_release_uevent(file, uevent);
-               kfree(uevent);
-       }
-
         list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
                 struct ib_mr *mr = uobj->object;
  
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c

index 33c45df..1ca8e32 100644 (file)
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -82,14 +82,14 @@ static int create_file(const char *name, umode_t mode,
  {
         int error;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         *dentry = lookup_one_len(name, parent, strlen(name));
         if (!IS_ERR(*dentry))
-               error = ipathfs_mknod(parent->d_inode, *dentry,
+               error = ipathfs_mknod(d_inode(parent), *dentry,
                                       mode, fops, data);
         else
                 error = PTR_ERR(*dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  
         return error;
  }
@@ -277,11 +277,11 @@ static int remove_file(struct dentry *parent, char *name)
         }
  
         spin_lock(&tmp->d_lock);
-       if (!d_unhashed(tmp) && tmp->d_inode) {
+       if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
                 dget_dlock(tmp);
                 __d_drop(tmp);
                 spin_unlock(&tmp->d_lock);
-               simple_unlink(parent->d_inode, tmp);
+               simple_unlink(d_inode(parent), tmp);
         } else
                 spin_unlock(&tmp->d_lock);
  
@@ -302,7 +302,7 @@ static int remove_device_files(struct super_block *sb,
         int ret;
  
         root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
         snprintf(unit, sizeof unit, "%02d", dd->ipath_unit);
         dir = lookup_one_len(unit, root, strlen(unit));
  
@@ -315,10 +315,10 @@ static int remove_device_files(struct super_block *sb,
         remove_file(dir, "flash");
         remove_file(dir, "atomic_counters");
         d_delete(dir);
-       ret = simple_rmdir(root->d_inode, dir);
+       ret = simple_rmdir(d_inode(root), dir);
  
  bail:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         dput(root);
         return ret;
  }
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c

index a31e031..0f00204 100644 (file)
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
         int                     query_id;
         struct list_head        list;
         int                     block_num;
+       ib_sa_comp_mask         guid_indexes;
+       u8                      method;
  };
  
  struct mlx4_next_alias_guid_work {
         u8 port;
         u8 block_num;
+       u8 method;
         struct mlx4_sriov_alias_guid_info_rec_det rec_det;
  };
  
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec);
  
  void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
                                          u8 port_num, u8 *p_data)
@@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
         return IB_SA_COMP_MASK(4 + index);
  }
  
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port,  int slave_init)
+{
+       __be64 curr_guid, required_guid;
+       int record_num = slave / 8;
+       int index = slave % 8;
+       int port_index = port - 1;
+       unsigned long flags;
+       int do_work = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
+           GUID_STATE_NEED_PORT_INIT)
+               goto unlock;
+       if (!slave_init) {
+               curr_guid = *(__be64 *)&dev->sriov.
+                       alias_guid.ports_guid[port_index].
+                       all_rec_per_port[record_num].
+                       all_recs[GUID_REC_SIZE * index];
+               if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
+                   !curr_guid)
+                       goto unlock;
+               required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+       } else {
+               required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
+               if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       goto unlock;
+       }
+       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               all_recs[GUID_REC_SIZE * index] = required_guid;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].guid_indexes
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(index);
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].status
+               = MLX4_GUID_INFO_STATUS_IDLE;
+       /* set to run immediately */
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].time_to_run = 0;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               guids_retry_schedule[index] = 0;
+       do_work = 1;
+unlock:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+
+       if (do_work)
+               mlx4_ib_init_alias_guid_work(dev, port_index);
+}
+
  /*
   * Whenever new GUID is set/unset (guid table change) create event and
   * notify the relevant slave (master also should be notified).
@@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
         enum slave_port_state prev_state;
         __be64 tmp_cur_ag, form_cache_ag;
         enum slave_port_gen_event gen_event;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec;
+       unsigned long flags;
+       __be64 required_value;
  
         if (!mlx4_is_master(dev->dev))
                 return;
  
+       rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
+                       all_rec_per_port[block_num];
         guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
                                    ports_guid[port_num - 1].
                                    all_rec_per_port[block_num].guid_indexes);
@@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                  */
                 if (tmp_cur_ag != form_cache_ag)
                         continue;
-               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
  
+               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+               required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+
+               if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       required_value = 0;
+
+               if (tmp_cur_ag == required_value) {
+                       rec->guid_indexes = rec->guid_indexes &
+                              ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               } else {
+                       /* may notify port down if value is 0 */
+                       if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
+                               spin_unlock_irqrestore(&dev->sriov.
+                                       alias_guid.ag_work_lock, flags);
+                               continue;
+                       }
+               }
+               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
+                                      flags);
+               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
                 /*2 cases: Valid GUID, and Invalid Guid*/
  
                 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                         set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
                                                       MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
                                                       &gen_event);
-                       pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
-                                slave_id, port_num);
-                       mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
-                                                      MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
+                               pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+                                        slave_id, port_num);
+                               mlx4_gen_port_state_change_eqe(dev->dev,
+                                                              slave_id,
+                                                              port_num,
+                                                              MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       }
                 }
         }
  }
@@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
         int i;
         struct mlx4_sriov_alias_guid_info_rec_det *rec;
         unsigned long flags, flags1;
+       ib_sa_comp_mask declined_guid_indexes = 0;
+       ib_sa_comp_mask applied_guid_indexes = 0;
+       unsigned int resched_delay_sec = 0;
  
         if (!context)
                 return;
@@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
                 all_rec_per_port[cb_ctx->block_num];
  
         if (status) {
-               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
                 pr_debug("(port: %d) failed: status = %d\n",
                          cb_ctx->port, status);
+               rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
                 goto out;
         }
  
@@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
         rec = &dev->sriov.alias_guid.ports_guid[port_index].
                 all_rec_per_port[guid_rec->block_num];
  
-       rec->status = MLX4_GUID_INFO_STATUS_SET;
-       rec->method = MLX4_GUID_INFO_RECORD_SET;
-
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
         for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
-               __be64 tmp_cur_ag;
-               tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+               __be64 sm_response, required_val;
+
+               if (!(cb_ctx->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+               sm_response = *(__be64 *)&guid_rec->guid_info_list
+                               [i * GUID_REC_SIZE];
+               required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+               if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
+                       if (required_val ==
+                           cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                               goto next_entry;
+
+                       /* A new value was set till we got the response */
+                       pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
+                                be64_to_cpu(required_val),
+                                i, guid_rec->block_num);
+                       goto entry_declined;
+               }
+
                 /* check if the SM didn't assign one of the records.
-                * if it didn't, if it was not sysadmin request:
-                * ask the SM to give a new GUID, (instead of the driver request).
+                * if it didn't, re-ask for.
                  */
-               if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
-                       mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
-                                    "block_num: %d was declined by SM, "
-                                    "ownership by %d (0 = driver, 1=sysAdmin,"
-                                    " 2=None)\n", __func__, i,
-                                    guid_rec->block_num, rec->ownership);
-                       if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
-                               /* if it is driver assign, asks for new GUID from SM*/
-                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
-                                       MLX4_NOT_SET_GUID;
-
-                               /* Mark the record as not assigned, and let it
-                                * be sent again in the next work sched.*/
-                               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
-                               rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
-                       }
+               if (sm_response == MLX4_NOT_SET_GUID) {
+                       if (rec->guids_retry_schedule[i] == 0)
+                               mlx4_ib_warn(&dev->ib_dev,
+                                            "%s:Record num %d in  block_num: %d was declined by SM\n",
+                                            __func__, i,
+                                            guid_rec->block_num);
+                       goto entry_declined;
                 } else {
                        /* properly assigned record. */
                        /* We save the GUID we just got from the SM in the
                         * admin_guid in order to be persistent, and in the
                         * request from the sm the process will ask for the same GUID */
-                       if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
-                           tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
-                               /* the sysadmin assignment failed.*/
-                               mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
-                                            " admin guid after SysAdmin "
-                                            "configuration. "
-                                            "Record num %d in block_num:%d "
-                                            "was declined by SM, "
-                                            "new val(0x%llx) was kept\n",
-                                             __func__, i,
-                                            guid_rec->block_num,
-                                            be64_to_cpu(*(__be64 *) &
-                                                        rec->all_recs[i * GUID_REC_SIZE]));
+                       if (required_val &&
+                           sm_response != required_val) {
+                               /* Warn only on first retry */
+                               if (rec->guids_retry_schedule[i] == 0)
+                                       mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+                                                    " admin guid after SysAdmin "
+                                                    "configuration. "
+                                                    "Record num %d in block_num:%d "
+                                                    "was declined by SM, "
+                                                    "new val(0x%llx) was kept, SM returned (0x%llx)\n",
+                                                     __func__, i,
+                                                    guid_rec->block_num,
+                                                    be64_to_cpu(required_val),
+                                                    be64_to_cpu(sm_response));
+                               goto entry_declined;
                         } else {
-                               memcpy(&rec->all_recs[i * GUID_REC_SIZE],
-                                      &guid_rec->guid_info_list[i * GUID_REC_SIZE],
-                                      GUID_REC_SIZE);
+                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+                                       sm_response;
+                               if (required_val == 0)
+                                       mlx4_set_admin_guid(dev->dev,
+                                                           sm_response,
+                                                           (guid_rec->block_num
+                                                           * NUM_ALIAS_GUID_IN_REC) + i,
+                                                           cb_ctx->port);
+                               goto next_entry;
                         }
                 }
+entry_declined:
+               declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               rec->guids_retry_schedule[i] =
+                       (rec->guids_retry_schedule[i] == 0) ?  1 :
+                       min((unsigned int)60,
+                           rec->guids_retry_schedule[i] * 2);
+               /* using the minimum value among all entries in that record */
+               resched_delay_sec = (resched_delay_sec == 0) ?
+                               rec->guids_retry_schedule[i] :
+                               min(resched_delay_sec,
+                                   rec->guids_retry_schedule[i]);
+               continue;
+
+next_entry:
+               rec->guids_retry_schedule[i] = 0;
         }
+
+       applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
+       if (declined_guid_indexes ||
+           rec->guid_indexes & ~(applied_guid_indexes)) {
+               pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
+                        guid_rec->block_num,
+                        be64_to_cpu((__force __be64)rec->guid_indexes),
+                        be64_to_cpu((__force __be64)applied_guid_indexes),
+                        be64_to_cpu((__force __be64)declined_guid_indexes));
+               rec->time_to_run = ktime_get_real_ns() +
+                       resched_delay_sec * NSEC_PER_SEC;
+       } else {
+               rec->status = MLX4_GUID_INFO_STATUS_SET;
+       }
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
         /*
         The func is call here to close the cases when the
         sm doesn't send smp, so in the sa response the driver
@@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
  out:
         spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
         spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
-       if (!dev->sriov.is_going_down)
+       if (!dev->sriov.is_going_down) {
+               get_low_record_time_index(dev, port_index, &resched_delay_sec);
                 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
                                    &dev->sriov.alias_guid.ports_guid[port_index].
-                                  alias_guid_work, 0);
+                                  alias_guid_work,
+                                  msecs_to_jiffies(resched_delay_sec * 1000));
+       }
         if (cb_ctx->sa_query) {
                 list_del(&cb_ctx->list);
                 kfree(cb_ctx);
@@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
         ib_sa_comp_mask comp_mask = 0;
  
         dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
-               = MLX4_GUID_INFO_STATUS_IDLE;
-       dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
-               = MLX4_GUID_INFO_RECORD_SET;
+               = MLX4_GUID_INFO_STATUS_SET;
  
         /* calculate the comp_mask for that record.*/
         for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
                 need to assign GUIDs, then don't put it up for assignment.
                 */
                 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
-                   (!index && !i) ||
-                   MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
-                   ports_guid[port - 1].all_rec_per_port[index].ownership)
+                   (!index && !i))
                         continue;
                 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
         }
         dev->sriov.alias_guid.ports_guid[port - 1].
-               all_rec_per_port[index].guid_indexes = comp_mask;
+               all_rec_per_port[index].guid_indexes |= comp_mask;
+       if (dev->sriov.alias_guid.ports_guid[port - 1].
+           all_rec_per_port[index].guid_indexes)
+               dev->sriov.alias_guid.ports_guid[port - 1].
+               all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
+
  }
  
  static int set_guid_rec(struct ib_device *ibdev,
-                       u8 port, int index,
-                       struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+                       struct mlx4_next_alias_guid_work *rec)
  {
         int err;
         struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
         struct ib_port_attr attr;
         struct mlx4_alias_guid_work_context *callback_context;
         unsigned long resched_delay, flags, flags1;
+       u8 port = rec->port + 1;
+       int index = rec->block_num;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
         struct list_head *head =
                 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
  
@@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
         callback_context->port = port;
         callback_context->dev = dev;
         callback_context->block_num = index;
+       callback_context->guid_indexes = rec_det->guid_indexes;
+       callback_context->method = rec->method;
  
         memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
  
@@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
         callback_context->query_id =
                 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
                                           ibdev, port, &guid_info_rec,
-                                         comp_mask, rec_det->method, 1000,
+                                         comp_mask, rec->method, 1000,
                                           GFP_KERNEL, aliasguid_query_handler,
                                           callback_context,
                                           &callback_context->sa_query);
@@ -434,6 +573,30 @@ out:
         return err;
  }
  
+static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
+{
+       int j, k, entry;
+       __be64 guid;
+
+       /*Check if the SM doesn't need to assign the GUIDs*/
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+                       entry = j * NUM_ALIAS_GUID_IN_REC + k;
+                       /* no request for the 0 entry (hw guid) */
+                       if (!entry || entry > dev->dev->persist->num_vfs ||
+                           !mlx4_is_slave_active(dev->dev, entry))
+                               continue;
+                       guid = mlx4_get_admin_guid(dev->dev, entry, port);
+                       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+                               all_rec_per_port[j].all_recs
+                               [GUID_REC_SIZE * k] = guid;
+                       pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
+                                entry,
+                                be64_to_cpu(guid),
+                                port);
+               }
+       }
+}
  void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
  {
         int i;
@@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
  
         spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
         spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+       if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
+               GUID_STATE_NEED_PORT_INIT) {
+               mlx4_ib_guid_port_init(dev, port);
+               dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
+                       (~GUID_STATE_NEED_PORT_INIT);
+       }
         for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
                 invalidate_guid_record(dev, port, i);
  
@@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
         spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
  }
  
-/* The function returns the next record that was
- * not configured (or failed to be configured) */
-static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
-                                    struct mlx4_next_alias_guid_work *rec)
+static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
+                               struct mlx4_next_alias_guid_work *next_rec,
+                               int record_index)
  {
-       int j;
-       unsigned long flags;
+       int i;
+       int lowset_time_entry = -1;
+       int lowest_time = 0;
+       ib_sa_comp_mask delete_guid_indexes = 0;
+       ib_sa_comp_mask set_guid_indexes = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec =
+                       &dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[record_index];
  
-       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
-               if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
-                   MLX4_GUID_INFO_STATUS_IDLE) {
-                       memcpy(&rec->rec_det,
-                              &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
-                              sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
-                       rec->port = port;
-                       rec->block_num = j;
-                       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
-                               MLX4_GUID_INFO_STATUS_PENDING;
-                       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
-                       return 0;
+       for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+               if (!(rec->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+
+               if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       delete_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               else
+                       set_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+
+               if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
+                       lowest_time) {
+                       lowset_time_entry = i;
+                       lowest_time = rec->guids_retry_schedule[i];
                 }
-               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
         }
-       return -ENOENT;
+
+       memcpy(&next_rec->rec_det, rec, sizeof(*rec));
+       next_rec->port = port;
+       next_rec->block_num = record_index;
+
+       if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
+               next_rec->rec_det.guid_indexes = delete_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
+       } else {
+               next_rec->rec_det.guid_indexes = set_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_SET;
+       }
  }
  
-static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
-                                            int rec_index,
-                                            struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+/* return index of record that should be updated based on lowest
+ * rescheduled time
+ */
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec)
  {
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
-               rec_det->guid_indexes;
-       memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
-              rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
-               rec_det->status;
+       int record_index = -1;
+       u64 low_record_time = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det rec;
+       int j;
+
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               rec = dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[j];
+               if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
+                   rec.guid_indexes) {
+                       if (record_index == -1 ||
+                           rec.time_to_run < low_record_time) {
+                               record_index = j;
+                               low_record_time = rec.time_to_run;
+                       }
+               }
+       }
+       if (resched_delay_sec) {
+               u64 curr_time = ktime_get_real_ns();
+
+               *resched_delay_sec = (low_record_time < curr_time) ? 0 :
+                       div_u64((low_record_time - curr_time), NSEC_PER_SEC);
+       }
+
+       return record_index;
  }
  
-static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+                                    struct mlx4_next_alias_guid_work *rec)
  {
-       int j;
-       struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
-
-       for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
-               memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-               rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
-                       IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
-                       IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
-                       IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
-                       IB_SA_GUIDINFO_REC_GID7;
-               rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
-               set_administratively_guid_record(dev, port, j, &rec_det);
+       unsigned long flags;
+       int record_index;
+       int ret = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       record_index = get_low_record_time_index(dev, port, NULL);
+
+       if (record_index < 0) {
+               ret = -ENOENT;
+               goto out;
         }
+
+       set_required_record(dev, port, rec, record_index);
+out:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+       return ret;
  }
  
  static void alias_guid_work(struct work_struct *work)
@@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
                 goto out;
         }
  
-       set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
-                    &rec->rec_det);
-
+       set_guid_rec(&dev->ib_dev, rec);
  out:
         kfree(rec);
  }
@@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
         spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
         spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
         if (!dev->sriov.is_going_down) {
+               /* If there is pending one should cancell then run, otherwise
+                 * won't run till previous one is ended as same work
+                 * struct is used.
+                 */
+               cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
+                                   alias_guid_work);
                 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
                            &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
         }
@@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
  {
         char alias_wq_name[15];
         int ret = 0;
-       int i, j, k;
+       int i, j;
         union ib_gid gid;
  
         if (!mlx4_is_master(dev->dev))
@@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
         for (i = 0 ; i < dev->num_ports; i++) {
                 memset(&dev->sriov.alias_guid.ports_guid[i], 0,
                        sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
-               /*Check if the SM doesn't need to assign the GUIDs*/
+               dev->sriov.alias_guid.ports_guid[i].state_flags |=
+                               GUID_STATE_NEED_PORT_INIT;
                 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-                       if (mlx4_ib_sm_guid_assign) {
-                               dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].
-                                       ownership = MLX4_GUID_DRIVER_ASSIGN;
-                               continue;
-                       }
-                       dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
-                                       ownership = MLX4_GUID_NONE_ASSIGN;
-                       /*mark each val as it was deleted,
-                         till the sysAdmin will give it valid val*/
-                       for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
-                               *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
-                                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
-                       }
+                       /* mark each val as it was deleted */
+                       memset(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs, 0xFF,
+                               sizeof(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs));
                 }
                 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
                 /*prepare the records, set them to be allocated by sm*/
+               if (mlx4_ib_sm_guid_assign)
+                       for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
+                               mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
                 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
                         invalidate_guid_record(dev, i + 1, j);
  
                 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
                 dev->sriov.alias_guid.ports_guid[i].port  = i;
-               if (mlx4_ib_sm_guid_assign)
-                       set_all_slaves_guids(dev, i);
  
                 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
                 dev->sriov.alias_guid.ports_guid[i].wq =
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c

index 5904026..9cd2b00 100644 (file)
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                                         tun_qp->ring[i].addr,
                                                         rx_buf_size,
                                                         DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
+                       kfree(tun_qp->ring[i].addr);
+                       goto err;
+               }
         }
  
         for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                           tun_qp->tx_ring[i].buf.addr,
                                           tx_buf_size,
                                           DMA_TO_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev,
+                                        tun_qp->tx_ring[i].buf.map)) {
+                       kfree(tun_qp->tx_ring[i].buf.addr);
+                       goto tx_err;
+               }
                 tun_qp->tx_ring[i].ah = NULL;
         }
         spin_lock_init(&tun_qp->tx_lock);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c

index 976bea7..57070c5 100644 (file)
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
  MODULE_LICENSE("Dual BSD/GPL");
  MODULE_VERSION(DRV_VERSION);
  
-int mlx4_ib_sm_guid_assign = 1;
+int mlx4_ib_sm_guid_assign = 0;
  module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
-MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
  
  static const char mlx4_ib_version[] =
         DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
         case MLX4_DEV_EVENT_SLAVE_INIT:
                 /* here, p is the slave id */
                 do_slave_init(ibdev, p, 1);
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      1);
+                       }
+               }
                 return;
  
         case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      0);
+                       }
+               }
                 /* here, p is the slave id */
                 do_slave_init(ibdev, p, 0);
                 return;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h

index f829fd9..fce3934 100644 (file)
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
  enum mlx4_guid_alias_rec_status {
         MLX4_GUID_INFO_STATUS_IDLE,
         MLX4_GUID_INFO_STATUS_SET,
-       MLX4_GUID_INFO_STATUS_PENDING,
  };
  
-enum mlx4_guid_alias_rec_ownership {
-       MLX4_GUID_DRIVER_ASSIGN,
-       MLX4_GUID_SYSADMIN_ASSIGN,
-       MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
-};
+#define GUID_STATE_NEED_PORT_INIT 0x01
  
  enum mlx4_guid_alias_rec_method {
         MLX4_GUID_INFO_RECORD_SET       = IB_MGMT_METHOD_SET,
@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
         u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
         ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
         enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
-       u8 method; /*set or delete*/
-       enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+       unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
+       u64 time_to_run;
  };
  
  struct mlx4_sriov_alias_guid_port_rec_det {
@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
         struct workqueue_struct *wq;
         struct delayed_work alias_guid_work;
         u8 port;
+       u32 state_flags;
         struct mlx4_sriov_alias_guid *parent;
         struct list_head cb_list;
  };
@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
  void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
                              struct attribute *attr);
  ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port, int slave_init);
  
  int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
  
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c

index ed2bd67..02fc91c 100644 (file)
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
                         ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
                                           sizeof (struct mlx4_ib_proxy_sqp_hdr),
                                           DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
+                       kfree(qp->sqp_proxy_rcv[i].addr);
+                       goto err;
+               }
         }
         return 0;
  
@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
  
         memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
  
-       *lso_hdr_sz  = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
-                                  wr->wr.ud.hlen);
+       *lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
         *lso_seg_len = halign;
         return 0;
  }
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c

index d10c2b8..6797108 100644 (file)
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -46,21 +46,17 @@
  static ssize_t show_admin_alias_guid(struct device *dev,
                               struct device_attribute *attr, char *buf)
  {
-       int record_num;/*0-15*/
-       int guid_index_in_rec; /*0 - 7*/
         struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
                 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
         struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
         struct mlx4_ib_dev *mdev = port->dev;
+       __be64 sysadmin_ag_val;
  
-       record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
-       guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+       sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
+                                             mlx4_ib_iov_dentry->entry_num,
+                                             port->num);
  
-       return sprintf(buf, "%llx\n",
-                      be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
-                                  ports_guid[port->num - 1].
-                                  all_rec_per_port[record_num].
-                                  all_recs[8 * guid_index_in_rec]));
+       return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
  }
  
  /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
         struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
         struct mlx4_ib_dev *mdev = port->dev;
         u64 sysadmin_ag_val;
+       unsigned long flags;
  
         record_num = mlx4_ib_iov_dentry->entry_num / 8;
         guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
                 pr_err("GUID 0 block 0 is RO\n");
                 return count;
         }
+       spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
         sscanf(buf, "%llx", &sysadmin_ag_val);
         *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
                 all_rec_per_port[record_num].
@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
         /* Change the state to be pending for update */
         mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
                 = MLX4_GUID_INFO_STATUS_IDLE ;
-
-       mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-               = MLX4_GUID_INFO_RECORD_SET;
-
-       switch (sysadmin_ag_val) {
-       case MLX4_GUID_FOR_DELETE_VAL:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-                       = MLX4_GUID_INFO_RECORD_DELETE;
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       /* The sysadmin requests the SM to re-assign */
-       case MLX4_NOT_SET_GUID:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_DRIVER_ASSIGN;
-               break;
-       /* The sysadmin requests a specific value.*/
-       default:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       }
+       mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
+                           mlx4_ib_iov_dentry->entry_num,
+                           port->num);
  
         /* set the record index */
         mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
-               = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
  
+       spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
         mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
  
         return count;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c

index 650897a..bdd5d38 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -89,14 +89,14 @@ static int create_file(const char *name, umode_t mode,
  {
         int error;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         *dentry = lookup_one_len(name, parent, strlen(name));
         if (!IS_ERR(*dentry))
-               error = qibfs_mknod(parent->d_inode, *dentry,
+               error = qibfs_mknod(d_inode(parent), *dentry,
                                     mode, fops, data);
         else
                 error = PTR_ERR(*dentry);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  
         return error;
  }
@@ -455,10 +455,10 @@ static int remove_file(struct dentry *parent, char *name)
         }
  
         spin_lock(&tmp->d_lock);
-       if (!d_unhashed(tmp) && tmp->d_inode) {
+       if (!d_unhashed(tmp) && d_really_is_positive(tmp)) {
                 __d_drop(tmp);
                 spin_unlock(&tmp->d_lock);
-               simple_unlink(parent->d_inode, tmp);
+               simple_unlink(d_inode(parent), tmp);
         } else {
                 spin_unlock(&tmp->d_lock);
         }
@@ -481,7 +481,7 @@ static int remove_device_files(struct super_block *sb,
         int ret, i;
  
         root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
         snprintf(unit, sizeof(unit), "%u", dd->unit);
         dir = lookup_one_len(unit, root, strlen(unit));
  
@@ -491,7 +491,7 @@ static int remove_device_files(struct super_block *sb,
                 goto bail;
         }
  
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
         remove_file(dir, "counters");
         remove_file(dir, "counter_names");
         remove_file(dir, "portcounter_names");
@@ -506,13 +506,13 @@ static int remove_device_files(struct super_block *sb,
                 }
         }
         remove_file(dir, "flash");
-       mutex_unlock(&dir->d_inode->i_mutex);
-       ret = simple_rmdir(root->d_inode, dir);
+       mutex_unlock(&d_inode(dir)->i_mutex);
+       ret = simple_rmdir(d_inode(root), dir);
         d_delete(dir);
         dput(dir);
  
  bail:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         dput(root);
         return ret;
  }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h

index d7562be..bd94b0a 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -87,7 +87,6 @@ enum {
         IPOIB_FLAG_ADMIN_UP       = 2,
         IPOIB_PKEY_ASSIGNED       = 3,
         IPOIB_FLAG_SUBINTERFACE   = 5,
-       IPOIB_MCAST_RUN           = 6,
         IPOIB_STOP_REAPER         = 7,
         IPOIB_FLAG_ADMIN_CM       = 9,
         IPOIB_FLAG_UMCAST         = 10,
@@ -98,9 +97,15 @@ enum {
  
         IPOIB_MCAST_FLAG_FOUND    = 0,  /* used in set_multicast_list */
         IPOIB_MCAST_FLAG_SENDONLY = 1,
-       IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
+       /*
+        * For IPOIB_MCAST_FLAG_BUSY
+        * When set, in flight join and mcast->mc is unreliable
+        * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+        *   haven't started yet
+        * When clear and mcast->mc is valid pointer, join was successful
+        */
+       IPOIB_MCAST_FLAG_BUSY     = 2,
         IPOIB_MCAST_FLAG_ATTACHED = 3,
-       IPOIB_MCAST_JOIN_STARTED  = 4,
  
         MAX_SEND_CQE              = 16,
         IPOIB_CM_COPYBREAK        = 256,
@@ -148,6 +153,7 @@ struct ipoib_mcast {
  
         unsigned long created;
         unsigned long backoff;
+       unsigned long delay_until;
  
         unsigned long flags;
         unsigned char logcount;
@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
         struct completion               deleted;
  };
  
+struct ipoib_qp_state_validate {
+       struct work_struct work;
+       struct ipoib_dev_priv   *priv;
+};
+
  /*
   * Device private locking: network stack tx_lock protects members used
   * in TX fast path, lock protects everything else.  lock nests inside
@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
         struct list_head multicast_list;
         struct rb_root multicast_tree;
  
+       struct workqueue_struct *wq;
         struct delayed_work mcast_task;
         struct work_struct carrier_on_task;
         struct work_struct flush_light;
@@ -426,11 +438,6 @@ struct ipoib_neigh {
  #define IPOIB_UD_MTU(ib_mtu)           (ib_mtu - IPOIB_ENCAP_LEN)
  #define IPOIB_UD_BUF_SIZE(ib_mtu)      (ib_mtu + IB_GRH_BYTES)
  
-static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
-{
-       return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
-}
-
  void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
  static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
  {
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
  void ipoib_pkey_event(struct work_struct *work);
  void ipoib_ib_dev_cleanup(struct net_device *dev);
  
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
  int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
  void ipoib_pkey_dev_check_presence(struct net_device *dev);
  
  int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
  
  void ipoib_mcast_restart_task(struct work_struct *work);
  int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
  
  void ipoib_mcast_dev_down(struct net_device *dev);
  void ipoib_mcast_dev_flush(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

index 933efce..56959ad 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
         }
  
         spin_lock_irq(&priv->lock);
-       queue_delayed_work(ipoib_workqueue,
+       queue_delayed_work(priv->wq,
                            &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
         /* Add this entry to passive ids list head, but do not re-add it
          * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                         spin_lock_irqsave(&priv->lock, flags);
                         list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
                         ipoib_cm_start_rx_drain(priv);
-                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                       queue_work(priv->wq, &priv->cm.rx_reap_task);
                         spin_unlock_irqrestore(&priv->lock, flags);
                 } else
                         ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                                 spin_lock_irqsave(&priv->lock, flags);
                                 list_move(&p->list, &priv->cm.rx_reap_list);
                                 spin_unlock_irqrestore(&priv->lock, flags);
-                               queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                               queue_work(priv->wq, &priv->cm.rx_reap_task);
                         }
                         return;
                 }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
  
                 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                         list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                 }
  
                 clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
  
                 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                         list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                 }
  
                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
         tx->dev = dev;
         list_add(&tx->list, &priv->cm.start_list);
         set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-       queue_work(ipoib_workqueue, &priv->cm.start_task);
+       queue_work(priv->wq, &priv->cm.start_task);
         return tx;
  }
  
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
         if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                 spin_lock_irqsave(&priv->lock, flags);
                 list_move(&tx->list, &priv->cm.reap_list);
-               queue_work(ipoib_workqueue, &priv->cm.reap_task);
+               queue_work(priv->wq, &priv->cm.reap_task);
                 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
                           tx->neigh->daddr + 4);
                 tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
  
         skb_queue_tail(&priv->cm.skb_queue, skb);
         if (e)
-               queue_work(ipoib_workqueue, &priv->cm.skb_task);
+               queue_work(priv->wq, &priv->cm.skb_task);
  }
  
  static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
         }
  
         if (!list_empty(&priv->cm.passive_ids))
-               queue_delayed_work(ipoib_workqueue,
+               queue_delayed_work(priv->wq,
                                    &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
         spin_unlock_irq(&priv->lock);
  }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

index 72626c3..63b92cb 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
  static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
                                   u64 mapping[IPOIB_UD_RX_SG])
  {
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
-                                   DMA_FROM_DEVICE);
-               ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
-                                 DMA_FROM_DEVICE);
-       } else
-               ib_dma_unmap_single(priv->ca, mapping[0],
-                                   IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
-                                   DMA_FROM_DEVICE);
-}
-
-static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
-                                  struct sk_buff *skb,
-                                  unsigned int length)
-{
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
-               unsigned int size;
-               /*
-                * There is only two buffers needed for max_payload = 4K,
-                * first buf size is IPOIB_UD_HEAD_SIZE
-                */
-               skb->tail += IPOIB_UD_HEAD_SIZE;
-               skb->len  += length;
-
-               size = length - IPOIB_UD_HEAD_SIZE;
-
-               skb_frag_size_set(frag, size);
-               skb->data_len += size;
-               skb->truesize += PAGE_SIZE;
-       } else
-               skb_put(skb, length);
-
+       ib_dma_unmap_single(priv->ca, mapping[0],
+                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
+                           DMA_FROM_DEVICE);
  }
  
  static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct sk_buff *skb;
         int buf_size;
-       int tailroom;
         u64 *mapping;
  
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               buf_size = IPOIB_UD_HEAD_SIZE;
-               tailroom = 128; /* reserve some tailroom for IP/TCP headers */
-       } else {
-               buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               tailroom = 0;
-       }
+       buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
  
-       skb = dev_alloc_skb(buf_size + tailroom + 4);
+       skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
         if (unlikely(!skb))
                 return NULL;
  
@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
         if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
                 goto error;
  
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               struct page *page = alloc_page(GFP_ATOMIC);
-               if (!page)
-                       goto partial_error;
-               skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
-               mapping[1] =
-                       ib_dma_map_page(priv->ca, page,
-                                       0, PAGE_SIZE, DMA_FROM_DEVICE);
-               if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
-                       goto partial_error;
-       }
-
         priv->rx_ring[id].skb = skb;
         return skb;
-
-partial_error:
-       ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
  error:
         dev_kfree_skb_any(skb);
         return NULL;
@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                        wc->byte_len, wc->slid);
  
         ipoib_ud_dma_unmap_rx(priv, mapping);
-       ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+       skb_put(skb, wc->byte_len);
  
         /* First byte of dgid signals multicast when 0xff */
         dgid = &((struct ib_grh *)skb->data)->dgid;
@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
         skb_reset_mac_header(skb);
         skb_pull(skb, IPOIB_ENCAP_LEN);
  
+       skb->truesize = SKB_TRUESIZE(skb->len);
+
         ++dev->stats.rx_packets;
         dev->stats.rx_bytes += skb->len;
  
@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
         }
  }
  
+/*
+ * As the result of a completion error the QP Can be transferred to SQE states.
+ * The function checks if the (send)QP is in SQE state and
+ * moves it back to RTS state, that in order to have it functional again.
+ */
+static void ipoib_qp_state_validate_work(struct work_struct *work)
+{
+       struct ipoib_qp_state_validate *qp_work =
+               container_of(work, struct ipoib_qp_state_validate, work);
+
+       struct ipoib_dev_priv *priv = qp_work->priv;
+       struct ib_qp_attr qp_attr;
+       struct ib_qp_init_attr query_init_attr;
+       int ret;
+
+       ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+       if (ret) {
+               ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
+                          __func__, ret);
+               goto free_res;
+       }
+       pr_info("%s: QP: 0x%x is in state: %d\n",
+               __func__, priv->qp->qp_num, qp_attr.qp_state);
+
+       /* currently support only in SQE->RTS transition*/
+       if (qp_attr.qp_state == IB_QPS_SQE) {
+               qp_attr.qp_state = IB_QPS_RTS;
+
+               ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
+               if (ret) {
+                       pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
+                               ret, priv->qp->qp_num);
+                       goto free_res;
+               }
+               pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
+                       __func__, priv->qp->qp_num);
+       } else {
+               pr_warn("QP (%d) will stay in state: %d\n",
+                       priv->qp->qp_num, qp_attr.qp_state);
+       }
+
+free_res:
+       kfree(qp_work);
+}
+
  static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
                 netif_wake_queue(dev);
  
         if (wc->status != IB_WC_SUCCESS &&
-           wc->status != IB_WC_WR_FLUSH_ERR)
+           wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct ipoib_qp_state_validate *qp_work;
                 ipoib_warn(priv, "failed send event "
                            "(status=%d, wrid=%d vend_err %x)\n",
                            wc->status, wr_id, wc->vendor_err);
+               qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
+               if (!qp_work) {
+                       ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
+                                  __func__, priv->qp->qp_num);
+                       return;
+               }
+
+               INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
+               qp_work->priv = priv;
+               queue_work(priv->wq, &qp_work->work);
+       }
  }
  
  static int poll_tx(struct ipoib_dev_priv *priv)
@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
         __ipoib_reap_ah(dev);
  
         if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+               queue_delayed_work(priv->wq, &priv->ah_reap_task,
                                    round_jiffies_relative(HZ));
  }
  
+static void ipoib_flush_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       cancel_delayed_work(&priv->ah_reap_task);
+       flush_workqueue(priv->wq);
+       ipoib_reap_ah(&priv->ah_reap_task.work);
+}
+
+static void ipoib_stop_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       set_bit(IPOIB_STOP_REAPER, &priv->flags);
+       ipoib_flush_ah(dev);
+}
+
  static void ipoib_ib_tx_timer_func(unsigned long ctx)
  {
         drain_tx_cq((struct net_device *)ctx);
  }
  
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         int ret;
@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
         }
  
         clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+       queue_delayed_work(priv->wq, &priv->ah_reap_task,
                            round_jiffies_relative(HZ));
  
         if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
  dev_stop:
         if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                 napi_enable(&priv->napi);
-       ipoib_ib_dev_stop(dev, flush);
+       ipoib_ib_dev_stop(dev);
         return -1;
  }
  
@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
         return ipoib_mcast_start_thread(dev);
  }
  
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
         clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
         netif_carrier_off(dev);
  
-       ipoib_mcast_stop_thread(dev, flush);
+       ipoib_mcast_stop_thread(dev);
         ipoib_mcast_dev_flush(dev);
  
         ipoib_flush_paths(dev);
@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
         local_bh_enable();
  }
  
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
         struct ib_qp_attr qp_attr;
@@ -877,24 +902,7 @@ timeout:
         if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
                 ipoib_warn(priv, "Failed to modify QP to RESET state\n");
  
-       /* Wait for all AHs to be reaped */
-       set_bit(IPOIB_STOP_REAPER, &priv->flags);
-       cancel_delayed_work(&priv->ah_reap_task);
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
-
-       begin = jiffies;
-
-       while (!list_empty(&priv->dead_ahs)) {
-               __ipoib_reap_ah(dev);
-
-               if (time_after(jiffies, begin + HZ)) {
-                       ipoib_warn(priv, "timing out; will leak address handles\n");
-                       break;
-               }
-
-               msleep(1);
-       }
+       ipoib_flush_ah(dev);
  
         ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
  
@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
                     (unsigned long) dev);
  
         if (dev->flags & IFF_UP) {
-               if (ipoib_ib_dev_open(dev, 1)) {
+               if (ipoib_ib_dev_open(dev)) {
                         ipoib_transport_dev_cleanup(dev);
                         return -ENODEV;
                 }
@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
         if (level == IPOIB_FLUSH_LIGHT) {
                 ipoib_mark_paths_invalid(dev);
                 ipoib_mcast_dev_flush(dev);
+               ipoib_flush_ah(dev);
         }
  
         if (level >= IPOIB_FLUSH_NORMAL)
-               ipoib_ib_dev_down(dev, 0);
+               ipoib_ib_dev_down(dev);
  
         if (level == IPOIB_FLUSH_HEAVY) {
                 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-                       ipoib_ib_dev_stop(dev, 0);
-               if (ipoib_ib_dev_open(dev, 0) != 0)
+                       ipoib_ib_dev_stop(dev);
+               if (ipoib_ib_dev_open(dev) != 0)
                         return;
                 if (netif_queue_stopped(dev))
                         netif_start_queue(dev);
@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
          */
         ipoib_flush_paths(dev);
  
-       ipoib_mcast_stop_thread(dev, 1);
+       ipoib_mcast_stop_thread(dev);
         ipoib_mcast_dev_flush(dev);
  
+       /*
+        * All of our ah references aren't free until after
+        * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
+        * the neighbor garbage collection is stopped and reaped.
+        * That should all be done now, so make a final ah flush.
+        */
+       ipoib_stop_ah(dev);
+
         ipoib_transport_dev_cleanup(dev);
  }
  
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index 915ad04..9e1b203 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
  
         set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  
-       if (ipoib_ib_dev_open(dev, 1)) {
+       if (ipoib_ib_dev_open(dev)) {
                 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                         return 0;
                 goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
         return 0;
  
  err_stop:
-       ipoib_ib_dev_stop(dev, 1);
+       ipoib_ib_dev_stop(dev);
  
  err_disable:
         clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
  
         netif_stop_queue(dev);
  
-       ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev, 0);
+       ipoib_ib_dev_down(dev);
+       ipoib_ib_dev_stop(dev);
  
         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                 struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
  
                 if (!path->query && path_rec_start(dev, path))
                         goto err_path;
-
-               __skb_queue_tail(&neigh->queue, skb);
+               if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+                       __skb_queue_tail(&neigh->queue, skb);
+               else
+                       goto err_drop;
         }
  
         spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                         new_path = 1;
                 }
                 if (path) {
-                       __skb_queue_tail(&path->queue, skb);
+                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+                               __skb_queue_tail(&path->queue, skb);
+                       } else {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                       }
  
                         if (!path->query && path_rec_start(dev, path)) {
                                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
                 return;
         }
  
-       queue_work(ipoib_workqueue, &priv->restart_task);
+       queue_work(priv->wq, &priv->restart_task);
  }
  
  static int ipoib_get_iflink(const struct net_device *dev)
@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
         __ipoib_reap_neigh(priv);
  
         if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+               queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                                    arp_tbl.gc_interval);
  }
  
@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
  
         /* start garbage collection */
         clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+       queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                            arp_tbl.gc_interval);
  
         return 0;
@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
-       if (ipoib_neigh_hash_init(priv) < 0)
-               goto out;
         /* Allocate RX/TX "rings" to hold queued skbs */
         priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                 GFP_KERNEL);
         if (!priv->rx_ring) {
                 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                        ca->name, ipoib_recvq_size);
-               goto out_neigh_hash_cleanup;
+               goto out;
         }
  
         priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
         if (ipoib_ib_dev_init(dev, ca, port))
                 goto out_tx_ring_cleanup;
  
+       /*
+        * Must be after ipoib_ib_dev_init so we can allocate a per
+        * device wq there and use it here
+        */
+       if (ipoib_neigh_hash_init(priv) < 0)
+               goto out_dev_uninit;
+
         return 0;
  
+out_dev_uninit:
+       ipoib_ib_dev_cleanup(dev);
+
  out_tx_ring_cleanup:
         vfree(priv->tx_ring);
  
  out_rx_ring_cleanup:
         kfree(priv->rx_ring);
  
-out_neigh_hash_cleanup:
-       ipoib_neigh_hash_uninit(dev);
  out:
         return -ENOMEM;
  }
@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
         }
         unregister_netdevice_many(&head);
  
+       /*
+        * Must be before ipoib_ib_dev_cleanup or we delete an in use
+        * work queue
+        */
+       ipoib_neigh_hash_uninit(dev);
+
         ipoib_ib_dev_cleanup(dev);
  
         kfree(priv->rx_ring);
@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
  
         priv->rx_ring = NULL;
         priv->tx_ring = NULL;
-
-       ipoib_neigh_hash_uninit(dev);
  }
  
  static const struct header_ops ipoib_header_ops = {
@@ -1646,10 +1663,11 @@ sysfs_failed:
  
  register_failed:
         ib_unregister_event_handler(&priv->event_handler);
+       flush_workqueue(ipoib_workqueue);
         /* Stop GC if started before flush */
         set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
         cancel_delayed_work(&priv->neigh_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
  event_failed:
         ipoib_dev_cleanup(priv->dev);
@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
  
         list_for_each_entry_safe(priv, tmp, dev_list, list) {
                 ib_unregister_event_handler(&priv->event_handler);
+               flush_workqueue(ipoib_workqueue);
  
                 rtnl_lock();
                 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
                 /* Stop GC */
                 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
                 cancel_delayed_work(&priv->neigh_reap_task);
-               flush_workqueue(ipoib_workqueue);
+               flush_workqueue(priv->wq);
  
                 unregister_netdev(priv->dev);
                 free_netdev(priv->dev);
@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
                 return ret;
  
         /*
-        * We create our own workqueue mainly because we want to be
-        * able to flush it when devices are being removed.  We can't
-        * use schedule_work()/flush_scheduled_work() because both
-        * unregister_netdev() and linkwatch_event take the rtnl lock,
-        * so flush_scheduled_work() can deadlock during device
-        * removal.
+        * We create a global workqueue here that is used for all flush
+        * operations.  However, if you attempt to flush a workqueue
+        * from a task on that same workqueue, it deadlocks the system.
+        * We want to be able to flush the tasks associated with a
+        * specific net device, so we also create a workqueue for each
+        * netdevice.  We queue up the tasks for that device only on
+        * its private workqueue, and we only queue up flush events
+        * on our global flush workqueue.  This avoids the deadlocks.
          */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib");
+       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
         if (!ipoib_workqueue) {
                 ret = -ENOMEM;
                 goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index ffb83b5..0d23e05 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
                  "Enable multicast debug tracing if > 0");
  #endif
  
-static DEFINE_MUTEX(mcast_mutex);
-
  struct ipoib_mcast_iter {
         struct net_device *dev;
         union ib_gid       mgid;
@@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
         unsigned int       send_only;
  };
  
+/*
+ * This should be called with the priv->lock held
+ */
+static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
+                                              struct ipoib_mcast *mcast,
+                                              bool delay)
+{
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               return;
+
+       /*
+        * We will be scheduling *something*, so cancel whatever is
+        * currently scheduled first
+        */
+       cancel_delayed_work(&priv->mcast_task);
+       if (mcast && delay) {
+               /*
+                * We had a failure and want to schedule a retry later
+                */
+               mcast->backoff *= 2;
+               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+               mcast->delay_until = jiffies + (mcast->backoff * HZ);
+               /*
+                * Mark this mcast for its delay, but restart the
+                * task immediately.  The join task will make sure to
+                * clear out all entries without delays, and then
+                * schedule itself to run again when the earliest
+                * delay expires
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+       } else if (delay) {
+               /*
+                * Special case of retrying after a failure to
+                * allocate the broadcast multicast group, wait
+                * 1 second and try again
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
+       } else
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+}
+
  static void ipoib_mcast_free(struct ipoib_mcast *mcast)
  {
         struct net_device *dev = mcast->dev;
@@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
  
         mcast->dev = dev;
         mcast->created = jiffies;
+       mcast->delay_until = jiffies;
         mcast->backoff = 1;
  
         INIT_LIST_HEAD(&mcast->list);
@@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                         spin_unlock_irq(&priv->lock);
                         return -EAGAIN;
                 }
-               priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               /*update priv member according to the new mcast*/
+               priv->broadcast->mcmember.qkey = mcmember->qkey;
+               priv->broadcast->mcmember.mtu = mcmember->mtu;
+               priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
+               priv->broadcast->mcmember.rate = mcmember->rate;
+               priv->broadcast->mcmember.sl = mcmember->sl;
+               priv->broadcast->mcmember.flow_label = mcmember->flow_label;
+               priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
+               /* assume if the admin and the mcast are the same both can be changed */
+               if (priv->mcast_mtu == priv->admin_mtu)
+                       priv->admin_mtu =
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               else
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+
                 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                 spin_unlock_irq(&priv->lock);
                 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
                 set_qkey = 1;
-
-               if (!ipoib_cm_admin_enabled(dev)) {
-                       rtnl_lock();
-                       dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-                       rtnl_unlock();
-               }
         }
  
         if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
         return 0;
  }
  
-static int
-ipoib_mcast_sendonly_join_complete(int status,
-                                  struct ib_sa_multicast *multicast)
-{
-       struct ipoib_mcast *mcast = multicast->context;
-       struct net_device *dev = mcast->dev;
-
-       /* We trap for port events ourselves. */
-       if (status == -ENETRESET)
-               return 0;
-
-       if (!status)
-               status = ipoib_mcast_join_finish(mcast, &multicast->rec);
-
-       if (status) {
-               if (mcast->logcount++ < 20)
-                       ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
-
-               /* Flush out any queued packets */
-               netif_tx_lock_bh(dev);
-               while (!skb_queue_empty(&mcast->pkt_queue)) {
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
-               }
-               netif_tx_unlock_bh(dev);
-
-               /* Clear the busy flag so we try again */
-               status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-                                           &mcast->flags);
-       }
-       return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
-       struct net_device *dev = mcast->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_sa_mcmember_rec rec = {
-#if 0                          /* Some SMs don't support send-only yet */
-               .join_state = 4
-#else
-               .join_state = 1
-#endif
-       };
-       int ret = 0;
-
-       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-               ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
-               return -ENODEV;
-       }
-
-       if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-               ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
-               return -EBUSY;
-       }
-
-       rec.mgid     = mcast->mcmember.mgid;
-       rec.port_gid = priv->local_gid;
-       rec.pkey     = cpu_to_be16(priv->pkey);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
-                                        priv->port, &rec,
-                                        IB_SA_MCMEMBER_REC_MGID        |
-                                        IB_SA_MCMEMBER_REC_PORT_GID    |
-                                        IB_SA_MCMEMBER_REC_PKEY        |
-                                        IB_SA_MCMEMBER_REC_JOIN_STATE,
-                                        GFP_ATOMIC,
-                                        ipoib_mcast_sendonly_join_complete,
-                                        mcast);
-       if (IS_ERR(mcast->mc)) {
-               ret = PTR_ERR(mcast->mc);
-               clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-               ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-                          ret);
-       } else {
-               ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-                               mcast->mcmember.mgid.raw);
-       }
-
-       return ret;
-}
-
  void ipoib_mcast_carrier_on_task(struct work_struct *work)
  {
         struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                    carrier_on_task);
         struct ib_port_attr attr;
  
-       /*
-        * Take rtnl_lock to avoid racing with ipoib_stop() and
-        * turning the carrier back on while a device is being
-        * removed.
-        */
         if (ib_query_port(priv->ca, priv->port, &attr) ||
             attr.state != IB_PORT_ACTIVE) {
                 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
                 return;
         }
  
-       rtnl_lock();
+       /*
+        * Take rtnl_lock to avoid racing with ipoib_stop() and
+        * turning the carrier back on while a device is being
+        * removed.  However, ipoib_stop() will attempt to flush
+        * the workqueue while holding the rtnl lock, so loop
+        * on trylock until either we get the lock or we see
+        * FLAG_OPER_UP go away as that signals that we are bailing
+        * and can safely ignore the carrier on work.
+        */
+       while (!rtnl_trylock()) {
+               if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+                       return;
+               else
+                       msleep(20);
+       }
+       if (!ipoib_cm_admin_enabled(priv->dev))
+               dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
         netif_carrier_on(priv->dev);
         rtnl_unlock();
  }
@@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
         struct net_device *dev = mcast->dev;
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
-       ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+       ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
+                       test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
+                       "sendonly " : "",
                         mcast->mcmember.mgid.raw, status);
  
         /* We trap for port events ourselves. */
@@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
  
         if (!status) {
                 mcast->backoff = 1;
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task, 0);
-               mutex_unlock(&mcast_mutex);
+               mcast->delay_until = jiffies;
  
                 /*
-                * Defer carrier on work to ipoib_workqueue to avoid a
-                * deadlock on rtnl_lock here.
+                * Defer carrier on work to priv->wq to avoid a
+                * deadlock on rtnl_lock here.  Requeue our multicast
+                * work too, which will end up happening right after
+                * our carrier on task work and will allow us to
+                * send out all of the non-broadcast joins
                  */
-               if (mcast == priv->broadcast)
-                       queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-               status = 0;
-               goto out;
-       }
+               if (mcast == priv->broadcast) {
+                       spin_lock_irq(&priv->lock);
+                       queue_work(priv->wq, &priv->carrier_on_task);
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+                       goto out_locked;
+               }
+       } else {
+               if (mcast->logcount++ < 20) {
+                       if (status == -ETIMEDOUT || status == -EAGAIN) {
+                               ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                               mcast->mcmember.mgid.raw, status);
+                       } else {
+                               ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                          mcast->mcmember.mgid.raw, status);
+                       }
+               }
  
-       if (mcast->logcount++ < 20) {
-               if (status == -ETIMEDOUT || status == -EAGAIN) {
-                       ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+                   mcast->backoff >= 2) {
+                       /*
+                        * We only retry sendonly joins once before we drop
+                        * the packet and quit trying to deal with the
+                        * group.  However, we leave the group in the
+                        * mcast list as an unjoined group.  If we want to
+                        * try joining again, we simply queue up a packet
+                        * and restart the join thread.  The empty queue
+                        * is why the join thread ignores this group.
+                        */
+                       mcast->backoff = 1;
+                       netif_tx_lock_bh(dev);
+                       while (!skb_queue_empty(&mcast->pkt_queue)) {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+                       }
+                       netif_tx_unlock_bh(dev);
                 } else {
-                       ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-                                  mcast->mcmember.mgid.raw, status);
+                       spin_lock_irq(&priv->lock);
+                       /* Requeue this join task with a backoff delay */
+                       __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
+                       goto out_locked;
                 }
         }
-
-       mcast->backoff *= 2;
-       if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-               mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-       /* Clear the busy flag so we try again */
-       status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-       mutex_lock(&mcast_mutex);
+out:
         spin_lock_irq(&priv->lock);
-       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-                                  mcast->backoff * HZ);
+out_locked:
+       /*
+        * Make sure to set mcast->mc before we clear the busy flag to avoid
+        * racing with code that checks for BUSY before checking mcast->mc
+        */
+       if (status)
+               mcast->mc = NULL;
+       else
+               mcast->mc = multicast;
+       clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
         spin_unlock_irq(&priv->lock);
-       mutex_unlock(&mcast_mutex);
-out:
         complete(&mcast->done);
+
         return status;
  }
  
@@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                              int create)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ib_sa_multicast *multicast;
         struct ib_sa_mcmember_rec rec = {
                 .join_state = 1
         };
@@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                 rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
         }
  
-       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-       init_completion(&mcast->done);
-       set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+       multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                          &rec, comp_mask, GFP_KERNEL,
                                          ipoib_mcast_join_complete, mcast);
-       if (IS_ERR(mcast->mc)) {
+       if (IS_ERR(multicast)) {
+               ret = PTR_ERR(multicast);
+               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+               spin_lock_irq(&priv->lock);
+               /* Requeue this join task with a backoff delay */
+               __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
                 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+               spin_unlock_irq(&priv->lock);
                 complete(&mcast->done);
-               ret = PTR_ERR(mcast->mc);
-               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-
-               mcast->backoff *= 2;
-               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task,
-                                          mcast->backoff * HZ);
-               mutex_unlock(&mcast_mutex);
         }
  }
  
@@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
                 container_of(work, struct ipoib_dev_priv, mcast_task.work);
         struct net_device *dev = priv->dev;
         struct ib_port_attr port_attr;
+       unsigned long delay_until = 0;
+       struct ipoib_mcast *mcast = NULL;
+       int create = 1;
  
-       if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
                 return;
  
         if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
         else
                 memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
  
+       spin_lock_irq(&priv->lock);
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               goto out;
+
         if (!priv->broadcast) {
                 struct ipoib_mcast *broadcast;
  
-               if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-                       return;
-
-               broadcast = ipoib_mcast_alloc(dev, 1);
+               broadcast = ipoib_mcast_alloc(dev, 0);
                 if (!broadcast) {
                         ipoib_warn(priv, "failed to allocate broadcast group\n");
-                       mutex_lock(&mcast_mutex);
-                       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                               queue_delayed_work(ipoib_workqueue,
-                                                  &priv->mcast_task, HZ);
-                       mutex_unlock(&mcast_mutex);
-                       return;
+                       /*
+                        * Restart us after a 1 second delay to retry
+                        * creating our broadcast group and attaching to
+                        * it.  Until this succeeds, this ipoib dev is
+                        * completely stalled (multicast wise).
+                        */
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
+                       goto out;
                 }
  
-               spin_lock_irq(&priv->lock);
                 memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
                        sizeof (union ib_gid));
                 priv->broadcast = broadcast;
  
                 __ipoib_mcast_add(dev, priv->broadcast);
-               spin_unlock_irq(&priv->lock);
         }
  
         if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
-                       ipoib_mcast_join(dev, priv->broadcast, 0);
-               return;
-       }
-
-       while (1) {
-               struct ipoib_mcast *mcast = NULL;
-
-               spin_lock_irq(&priv->lock);
-               list_for_each_entry(mcast, &priv->multicast_list, list) {
-                       if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
-                               /* Found the next unjoined group */
-                               break;
+               if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
+                       mcast = priv->broadcast;
+                       create = 0;
+                       if (mcast->backoff > 1 &&
+                           time_before(jiffies, mcast->delay_until)) {
+                               delay_until = mcast->delay_until;
+                               mcast = NULL;
                         }
                 }
-               spin_unlock_irq(&priv->lock);
+               goto out;
+       }
  
-               if (&mcast->list == &priv->multicast_list) {
-                       /* All done */
-                       break;
+       /*
+        * We'll never get here until the broadcast group is both allocated
+        * and attached
+        */
+       list_for_each_entry(mcast, &priv->multicast_list, list) {
+               if (IS_ERR_OR_NULL(mcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+                   (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
+                    !skb_queue_empty(&mcast->pkt_queue))) {
+                       if (mcast->backoff == 1 ||
+                           time_after_eq(jiffies, mcast->delay_until)) {
+                               /* Found the next unjoined group */
+                               init_completion(&mcast->done);
+                               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+                               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                                       create = 0;
+                               else
+                                       create = 1;
+                               spin_unlock_irq(&priv->lock);
+                               ipoib_mcast_join(dev, mcast, create);
+                               spin_lock_irq(&priv->lock);
+                       } else if (!delay_until ||
+                                time_before(mcast->delay_until, delay_until))
+                               delay_until = mcast->delay_until;
                 }
-
-               ipoib_mcast_join(dev, mcast, 1);
-               return;
         }
  
-       ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
+       mcast = NULL;
+       ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
  
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+out:
+       if (delay_until) {
+               cancel_delayed_work(&priv->mcast_task);
+               queue_delayed_work(priv->wq, &priv->mcast_task,
+                                  delay_until - jiffies);
+       }
+       if (mcast) {
+               init_completion(&mcast->done);
+               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       }
+       spin_unlock_irq(&priv->lock);
+       if (mcast)
+               ipoib_mcast_join(dev, mcast, create);
  }
  
  int ipoib_mcast_start_thread(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
  
         ipoib_dbg_mcast(priv, "starting multicast thread\n");
  
-       mutex_lock(&mcast_mutex);
-       if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
-       mutex_unlock(&mcast_mutex);
+       spin_lock_irqsave(&priv->lock, flags);
+       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+       spin_unlock_irqrestore(&priv->lock, flags);
  
         return 0;
  }
  
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
  
         ipoib_dbg_mcast(priv, "stopping multicast thread\n");
  
-       mutex_lock(&mcast_mutex);
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+       spin_lock_irqsave(&priv->lock, flags);
         cancel_delayed_work(&priv->mcast_task);
-       mutex_unlock(&mcast_mutex);
+       spin_unlock_irqrestore(&priv->lock, flags);
  
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
         return 0;
  }
@@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
         int ret = 0;
  
         if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+               ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+       if (!IS_ERR_OR_NULL(mcast->mc))
                 ib_sa_free_multicast(mcast->mc);
  
         if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
                                       be16_to_cpu(mcast->mcmember.mlid));
                 if (ret)
                         ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
-       }
+       } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+               ipoib_dbg(priv, "leaving with no mcmember but not a "
+                         "SENDONLY join\n");
  
         return 0;
  }
@@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
         }
  
         mcast = __ipoib_mcast_find(dev, mgid);
-       if (!mcast) {
-               /* Let's create a new send only group now */
-               ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
-                               mgid);
-
-               mcast = ipoib_mcast_alloc(dev, 0);
+       if (!mcast || !mcast->ah) {
                 if (!mcast) {
-                       ipoib_warn(priv, "unable to allocate memory for "
-                                  "multicast structure\n");
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-                       goto out;
-               }
-
-               set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
-               memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
-               __ipoib_mcast_add(dev, mcast);
-               list_add_tail(&mcast->list, &priv->multicast_list);
-       }
+                       /* Let's create a new send only group now */
+                       ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
+                                       mgid);
+
+                       mcast = ipoib_mcast_alloc(dev, 0);
+                       if (!mcast) {
+                               ipoib_warn(priv, "unable to allocate memory "
+                                          "for multicast structure\n");
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                               goto unlock;
+                       }
  
-       if (!mcast->ah) {
+                       set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
+                       memcpy(mcast->mcmember.mgid.raw, mgid,
+                              sizeof (union ib_gid));
+                       __ipoib_mcast_add(dev, mcast);
+                       list_add_tail(&mcast->list, &priv->multicast_list);
+               }
                 if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
                         skb_queue_tail(&mcast->pkt_queue, skb);
                 else {
                         ++dev->stats.tx_dropped;
                         dev_kfree_skb_any(skb);
                 }
-
-               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-                       ipoib_dbg_mcast(priv, "no address vector, "
-                                       "but multicast join already started\n");
-               else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                       ipoib_mcast_sendonly_join(mcast);
-
-               /*
-                * If lookup completes between here and out:, don't
-                * want to send packet twice.
-                */
-               mcast = NULL;
-       }
-
-out:
-       if (mcast && mcast->ah) {
+               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               }
+       } else {
                 struct ipoib_neigh *neigh;
  
                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
  
         spin_unlock_irqrestore(&priv->lock, flags);
  
-       /* seperate between the wait to the leave*/
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                         wait_for_completion(&mcast->done);
  
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
         unsigned long flags;
         struct ib_sa_mcmember_rec rec;
  
-       ipoib_dbg_mcast(priv, "restarting multicast task\n");
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               /*
+                * shortcut...on shutdown flush is called next, just
+                * let it do all the work
+                */
+               return;
  
-       ipoib_mcast_stop_thread(dev, 0);
+       ipoib_dbg_mcast(priv, "restarting multicast task\n");
  
         local_irq_save(flags);
         netif_addr_lock(dev);
@@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
         netif_addr_unlock(dev);
         local_irq_restore(flags);
  
-       /* We have to cancel outside of the spinlock */
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
                 ipoib_mcast_leave(mcast->dev, mcast);
                 ipoib_mcast_free(mcast);
         }
  
-       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-               ipoib_mcast_start_thread(dev);
+       /*
+        * Double check that we are still up
+        */
+       if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+               spin_lock_irqsave(&priv->lock, flags);
+               __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
  }
  
  #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c

index c56d5d4..e5cc430 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                 goto out_free_pd;
         }
  
+       /*
+        * the various IPoIB tasks assume they will never race against
+        * themselves, so always use a single thread workqueue
+        */
+       priv->wq = create_singlethread_workqueue("ipoib_wq");
+       if (!priv->wq) {
+               printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+               goto out_free_mr;
+       }
+
         size = ipoib_recvq_size + 1;
         ret = ipoib_cm_dev_init(dev);
         if (!ret) {
@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                         size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
                 else
                         size += ipoib_recvq_size * ipoib_max_conn_qp;
-       }
+       } else
+               goto out_free_wq;
  
         priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
         if (IS_ERR(priv->recv_cq)) {
                 printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
-               goto out_free_mr;
+               goto out_cm_dev_cleanup;
         }
  
         priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
         priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
  
         priv->rx_sge[0].lkey = priv->mr->lkey;
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
-               priv->rx_sge[1].length = PAGE_SIZE;
-               priv->rx_sge[1].lkey = priv->mr->lkey;
-               priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
-       } else {
-               priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               priv->rx_wr.num_sge = 1;
-       }
+
+       priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+       priv->rx_wr.num_sge = 1;
+
         priv->rx_wr.next = NULL;
         priv->rx_wr.sg_list = priv->rx_sge;
  
@@ -236,12 +242,19 @@ out_free_send_cq:
  out_free_recv_cq:
         ib_destroy_cq(priv->recv_cq);
  
+out_cm_dev_cleanup:
+       ipoib_cm_dev_cleanup(dev);
+
+out_free_wq:
+       destroy_workqueue(priv->wq);
+       priv->wq = NULL;
+
  out_free_mr:
         ib_dereg_mr(priv->mr);
-       ipoib_cm_dev_cleanup(dev);
  
  out_free_pd:
         ib_dealloc_pd(priv->pd);
+
         return -ENODEV;
  }
  
@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
  
         ipoib_cm_dev_cleanup(dev);
  
+       if (priv->wq) {
+               flush_workqueue(priv->wq);
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
+
         if (ib_dereg_mr(priv->mr))
                 ipoib_warn(priv, "ib_dereg_mr failed\n");
  
         if (ib_dealloc_pd(priv->pd))
                 ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
  }
  
  void ipoib_event(struct ib_event_handler *handler,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h

index b47aea1..262ba1f 100644 (file)
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -69,7 +69,7 @@
  
  #define DRV_NAME       "iser"
  #define PFX            DRV_NAME ": "
-#define DRV_VER                "1.5"
+#define DRV_VER                "1.6"
  
  #define iser_dbg(fmt, arg...)                           \
         do {                                             \
@@ -218,22 +218,21 @@ enum iser_data_dir {
  /**
   * struct iser_data_buf - iSER data buffer
   *
- * @buf:          pointer to the sg list
+ * @sg:           pointer to the sg list
   * @size:         num entries of this sg
   * @data_len:     total beffer byte len
   * @dma_nents:    returned by dma_map_sg
- * @copy_buf:     allocated copy buf for SGs unaligned
- *                for rdma which are copied
- * @sg_single:    SG-ified clone of a non SG SC or
- *                unaligned SG
+ * @orig_sg:      pointer to the original sg list (in case
+ *                we used a copy)
+ * @orig_size:    num entris of orig sg list
   */
  struct iser_data_buf {
-       void               *buf;
+       struct scatterlist *sg;
         unsigned int       size;
         unsigned long      data_len;
         unsigned int       dma_nents;
-       char               *copy_buf;
-       struct scatterlist sg_single;
+       struct scatterlist *orig_sg;
+       unsigned int       orig_size;
    };
  
  /* fwd declarations */
@@ -244,35 +243,14 @@ struct iscsi_endpoint;
  /**
   * struct iser_mem_reg - iSER memory registration info
   *
- * @lkey:         MR local key
- * @rkey:         MR remote key
- * @va:           MR start address (buffer va)
- * @len:          MR length
+ * @sge:          memory region sg element
+ * @rkey:         memory region remote key
   * @mem_h:        pointer to registration context (FMR/Fastreg)
   */
  struct iser_mem_reg {
-       u32  lkey;
-       u32  rkey;
-       u64  va;
-       u64  len;
-       void *mem_h;
-};
-
-/**
- * struct iser_regd_buf - iSER buffer registration desc
- *
- * @reg:          memory registration info
- * @virt_addr:    virtual address of buffer
- * @device:       reference to iser device
- * @direction:    dma direction (for dma_unmap)
- * @data_size:    data buffer size in bytes
- */
-struct iser_regd_buf {
-       struct iser_mem_reg     reg;
-       void                    *virt_addr;
-       struct iser_device      *device;
-       enum dma_data_direction direction;
-       unsigned int            data_size;
+       struct ib_sge    sge;
+       u32              rkey;
+       void            *mem_h;
  };
  
  enum iser_desc_type {
@@ -534,11 +512,9 @@ struct iser_conn {
   * @sc:               link to scsi command
   * @command_sent:     indicate if command was sent
   * @dir:              iser data direction
- * @rdma_regd:        task rdma registration desc
+ * @rdma_reg:         task rdma registration desc
   * @data:             iser data buffer desc
- * @data_copy:        iser data copy buffer desc (bounce buffer)
   * @prot:             iser protection buffer desc
- * @prot_copy:        iser protection copy buffer desc (bounce buffer)
   */
  struct iscsi_iser_task {
         struct iser_tx_desc          desc;
@@ -547,11 +523,9 @@ struct iscsi_iser_task {
         struct scsi_cmnd             *sc;
         int                          command_sent;
         int                          dir[ISER_DIRS_NUM];
-       struct iser_regd_buf         rdma_regd[ISER_DIRS_NUM];
+       struct iser_mem_reg          rdma_reg[ISER_DIRS_NUM];
         struct iser_data_buf         data[ISER_DIRS_NUM];
-       struct iser_data_buf         data_copy[ISER_DIRS_NUM];
         struct iser_data_buf         prot[ISER_DIRS_NUM];
-       struct iser_data_buf         prot_copy[ISER_DIRS_NUM];
  };
  
  struct iser_page_vec {
@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
  
  void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                      struct iser_data_buf *mem,
-                                    struct iser_data_buf *mem_copy,
                                      enum iser_data_dir cmd_dir);
  
  int  iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
@@ -634,10 +607,6 @@ int  iser_connect(struct iser_conn *iser_conn,
                   struct sockaddr *dst_addr,
                   int non_blocking);
  
-int  iser_reg_page_vec(struct ib_conn *ib_conn,
-                      struct iser_page_vec *page_vec,
-                      struct iser_mem_reg *mem_reg);
-
  void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
                         enum iser_data_dir cmd_dir);
  void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
  void iser_free_fastreg_pool(struct ib_conn *ib_conn);
  u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                              enum iser_data_dir cmd_dir, sector_t *sector);
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn);
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+                 struct fast_reg_descriptor *desc);
  #endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c

index 20e859a..3e2118e 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
  {
         struct iscsi_iser_task *iser_task = task->dd_data;
         struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
         int err;
         struct iser_hdr *hdr = &iser_task->desc.iser_header;
         struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
                 iser_err("Failed to set up Data-IN RDMA\n");
                 return err;
         }
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
  
         hdr->flags    |= ISER_RSV;
-       hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
-       hdr->read_va   = cpu_to_be64(regd_buf->reg.va);
+       hdr->read_stag = cpu_to_be32(mem_reg->rkey);
+       hdr->read_va   = cpu_to_be64(mem_reg->sge.addr);
  
         iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
-                task->itt, regd_buf->reg.rkey,
-                (unsigned long long)regd_buf->reg.va);
+                task->itt, mem_reg->rkey,
+                (unsigned long long)mem_reg->sge.addr);
  
         return 0;
  }
@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
  {
         struct iscsi_iser_task *iser_task = task->dd_data;
         struct iser_device  *device = iser_task->iser_conn->ib_conn.device;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
         int err;
         struct iser_hdr *hdr = &iser_task->desc.iser_header;
         struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
                 return err;
         }
  
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
  
         if (unsol_sz < edtl) {
                 hdr->flags     |= ISER_WSV;
-               hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
-               hdr->write_va   = cpu_to_be64(regd_buf->reg.va + unsol_sz);
+               hdr->write_stag = cpu_to_be32(mem_reg->rkey);
+               hdr->write_va   = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
  
                 iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
                          "VA:%#llX + unsol:%d\n",
-                        task->itt, regd_buf->reg.rkey,
-                        (unsigned long long)regd_buf->reg.va, unsol_sz);
+                        task->itt, mem_reg->rkey,
+                        (unsigned long long)mem_reg->sge.addr, unsol_sz);
         }
  
         if (imm_sz > 0) {
                 iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
                          task->itt, imm_sz);
-               tx_dsg->addr   = regd_buf->reg.va;
+               tx_dsg->addr = mem_reg->sge.addr;
                 tx_dsg->length = imm_sz;
-               tx_dsg->lkey   = regd_buf->reg.lkey;
+               tx_dsg->lkey = mem_reg->sge.lkey;
                 iser_task->desc.num_sge = 2;
         }
  
@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
         }
  
         if (scsi_sg_count(sc)) { /* using a scatter list */
-               data_buf->buf  = scsi_sglist(sc);
+               data_buf->sg = scsi_sglist(sc);
                 data_buf->size = scsi_sg_count(sc);
         }
         data_buf->data_len = scsi_bufflen(sc);
  
         if (scsi_prot_sg_count(sc)) {
-               prot_buf->buf  = scsi_prot_sglist(sc);
+               prot_buf->sg  = scsi_prot_sglist(sc);
                 prot_buf->size = scsi_prot_sg_count(sc);
-               prot_buf->data_len = data_buf->data_len >>
-                                    ilog2(sc->device->sector_size) * 8;
+               prot_buf->data_len = (data_buf->data_len >>
+                                    ilog2(sc->device->sector_size)) * 8;
         }
  
         if (hdr->flags & ISCSI_FLAG_CMD_READ) {
@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
         struct iser_conn *iser_conn = conn->dd_data;
         struct iscsi_iser_task *iser_task = task->dd_data;
         struct iser_tx_desc *tx_desc = NULL;
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
         unsigned long buf_offset;
         unsigned long data_seg_len;
         uint32_t itt;
@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
         /* build the tx desc */
         iser_initialize_task_headers(task, tx_desc);
  
-       regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+       mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
         tx_dsg = &tx_desc->tx_sg[1];
-       tx_dsg->addr    = regd_buf->reg.va + buf_offset;
-       tx_dsg->length  = data_seg_len;
-       tx_dsg->lkey    = regd_buf->reg.lkey;
+       tx_dsg->addr = mem_reg->sge.addr + buf_offset;
+       tx_dsg->length = data_seg_len;
+       tx_dsg->lkey = mem_reg->sge.lkey;
         tx_desc->num_sge = 2;
  
         if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
         iser_task->prot[ISER_DIR_IN].data_len  = 0;
         iser_task->prot[ISER_DIR_OUT].data_len = 0;
  
-       memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
-              sizeof(struct iser_regd_buf));
-       memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
-              sizeof(struct iser_regd_buf));
+       memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
+              sizeof(struct iser_mem_reg));
+       memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
+              sizeof(struct iser_mem_reg));
  }
  
  void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
         /* if we were reading, copy back to unaligned sglist,
          * anyway dma_unmap and free the copy
          */
-       if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
+       if (iser_task->data[ISER_DIR_IN].orig_sg) {
                 is_rdma_data_aligned = 0;
                 iser_finalize_rdma_unaligned_sg(iser_task,
                                                 &iser_task->data[ISER_DIR_IN],
-                                               &iser_task->data_copy[ISER_DIR_IN],
                                                 ISER_DIR_IN);
         }
  
-       if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
+       if (iser_task->data[ISER_DIR_OUT].orig_sg) {
                 is_rdma_data_aligned = 0;
                 iser_finalize_rdma_unaligned_sg(iser_task,
                                                 &iser_task->data[ISER_DIR_OUT],
-                                               &iser_task->data_copy[ISER_DIR_OUT],
                                                 ISER_DIR_OUT);
         }
  
-       if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
+       if (iser_task->prot[ISER_DIR_IN].orig_sg) {
                 is_rdma_prot_aligned = 0;
                 iser_finalize_rdma_unaligned_sg(iser_task,
                                                 &iser_task->prot[ISER_DIR_IN],
-                                               &iser_task->prot_copy[ISER_DIR_IN],
                                                 ISER_DIR_IN);
         }
  
-       if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
+       if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
                 is_rdma_prot_aligned = 0;
                 iser_finalize_rdma_unaligned_sg(iser_task,
                                                 &iser_task->prot[ISER_DIR_OUT],
-                                               &iser_task->prot_copy[ISER_DIR_OUT],
                                                 ISER_DIR_OUT);
         }
  
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c

index 341040b..f0cdc96 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -39,68 +39,173 @@
  
  #include "iscsi_iser.h"
  
-#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
+static void
+iser_free_bounce_sg(struct iser_data_buf *data)
+{
+       struct scatterlist *sg;
+       int count;
  
-/**
- * iser_start_rdma_unaligned_sg
- */
-static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
-                                       struct iser_data_buf *data,
-                                       struct iser_data_buf *data_copy,
-                                       enum iser_data_dir cmd_dir)
+       for_each_sg(data->sg, sg, data->size, count)
+               __free_page(sg_page(sg));
+
+       kfree(data->sg);
+
+       data->sg = data->orig_sg;
+       data->size = data->orig_size;
+       data->orig_sg = NULL;
+       data->orig_size = 0;
+}
+
+static int
+iser_alloc_bounce_sg(struct iser_data_buf *data)
  {
-       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
-       struct scatterlist *sgl = (struct scatterlist *)data->buf;
         struct scatterlist *sg;
-       char *mem = NULL;
-       unsigned long  cmd_data_len = 0;
-       int dma_nents, i;
+       struct page *page;
+       unsigned long length = data->data_len;
+       int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
  
-       for_each_sg(sgl, sg, data->size, i)
-               cmd_data_len += ib_sg_dma_len(dev, sg);
+       sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
+       if (!sg)
+               goto err;
  
-       if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
-               mem = (void *)__get_free_pages(GFP_ATOMIC,
-                     ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
-       else
-               mem = kmalloc(cmd_data_len, GFP_ATOMIC);
+       sg_init_table(sg, nents);
+       while (length) {
+               u32 page_len = min_t(u32, length, PAGE_SIZE);
  
-       if (mem == NULL) {
-               iser_err("Failed to allocate mem size %d %d for copying sglist\n",
-                        data->size, (int)cmd_data_len);
-               return -ENOMEM;
+               page = alloc_page(GFP_ATOMIC);
+               if (!page)
+                       goto err;
+
+               sg_set_page(&sg[i], page, page_len, 0);
+               length -= page_len;
+               i++;
         }
  
-       if (cmd_dir == ISER_DIR_OUT) {
-               /* copy the unaligned sg the buffer which is used for RDMA */
-               char *p, *from;
-
-               sgl = (struct scatterlist *)data->buf;
-               p = mem;
-               for_each_sg(sgl, sg, data->size, i) {
-                       from = kmap_atomic(sg_page(sg));
-                       memcpy(p,
-                              from + sg->offset,
-                              sg->length);
-                       kunmap_atomic(from);
-                       p += sg->length;
+       data->orig_sg = data->sg;
+       data->orig_size = data->size;
+       data->sg = sg;
+       data->size = nents;
+
+       return 0;
+
+err:
+       for (; i > 0; i--)
+               __free_page(sg_page(&sg[i - 1]));
+       kfree(sg);
+
+       return -ENOMEM;
+}
+
+static void
+iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
+{
+       struct scatterlist *osg, *bsg = data->sg;
+       void *oaddr, *baddr;
+       unsigned int left = data->data_len;
+       unsigned int bsg_off = 0;
+       int i;
+
+       for_each_sg(data->orig_sg, osg, data->orig_size, i) {
+               unsigned int copy_len, osg_off = 0;
+
+               oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
+               copy_len = min(left, osg->length);
+               while (copy_len) {
+                       unsigned int len = min(copy_len, bsg->length - bsg_off);
+
+                       baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
+                       if (to_buffer)
+                               memcpy(baddr + bsg_off, oaddr + osg_off, len);
+                       else
+                               memcpy(oaddr + osg_off, baddr + bsg_off, len);
+
+                       kunmap_atomic(baddr - bsg->offset);
+                       osg_off += len;
+                       bsg_off += len;
+                       copy_len -= len;
+
+                       if (bsg_off >= bsg->length) {
+                               bsg = sg_next(bsg);
+                               bsg_off = 0;
+                       }
                 }
+               kunmap_atomic(oaddr - osg->offset);
+               left -= osg_off;
         }
+}
+
+static inline void
+iser_copy_from_bounce(struct iser_data_buf *data)
+{
+       iser_copy_bounce(data, false);
+}
+
+static inline void
+iser_copy_to_bounce(struct iser_data_buf *data)
+{
+       iser_copy_bounce(data, true);
+}
+
+struct fast_reg_descriptor *
+iser_reg_desc_get(struct ib_conn *ib_conn)
+{
+       struct fast_reg_descriptor *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ib_conn->lock, flags);
+       desc = list_first_entry(&ib_conn->fastreg.pool,
+                               struct fast_reg_descriptor, list);
+       list_del(&desc->list);
+       spin_unlock_irqrestore(&ib_conn->lock, flags);
+
+       return desc;
+}
+
+void
+iser_reg_desc_put(struct ib_conn *ib_conn,
+                 struct fast_reg_descriptor *desc)
+{
+       unsigned long flags;
  
-       sg_init_one(&data_copy->sg_single, mem, cmd_data_len);
-       data_copy->buf = &data_copy->sg_single;
-       data_copy->size = 1;
-       data_copy->copy_buf = mem;
+       spin_lock_irqsave(&ib_conn->lock, flags);
+       list_add(&desc->list, &ib_conn->fastreg.pool);
+       spin_unlock_irqrestore(&ib_conn->lock, flags);
+}
  
-       dma_nents = ib_dma_map_sg(dev, &data_copy->sg_single, 1,
-                                 (cmd_dir == ISER_DIR_OUT) ?
-                                 DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       BUG_ON(dma_nents == 0);
+/**
+ * iser_start_rdma_unaligned_sg
+ */
+static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
+                                       struct iser_data_buf *data,
+                                       enum iser_data_dir cmd_dir)
+{
+       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
+       int rc;
+
+       rc = iser_alloc_bounce_sg(data);
+       if (rc) {
+               iser_err("Failed to allocate bounce for data len %lu\n",
+                        data->data_len);
+               return rc;
+       }
+
+       if (cmd_dir == ISER_DIR_OUT)
+               iser_copy_to_bounce(data);
  
-       data_copy->dma_nents = dma_nents;
-       data_copy->data_len = cmd_data_len;
+       data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
+                                       (cmd_dir == ISER_DIR_OUT) ?
+                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+       if (!data->dma_nents) {
+               iser_err("Got dma_nents %d, something went wrong...\n",
+                        data->dma_nents);
+               rc = -ENOMEM;
+               goto err;
+       }
  
         return 0;
+err:
+       iser_free_bounce_sg(data);
+       return rc;
  }
  
  /**
@@ -109,51 +214,18 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
  
  void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
                                      struct iser_data_buf *data,
-                                    struct iser_data_buf *data_copy,
                                      enum iser_data_dir cmd_dir)
  {
-       struct ib_device *dev;
-       unsigned long  cmd_data_len;
-
-       dev = iser_task->iser_conn->ib_conn.device->ib_device;
+       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
  
-       ib_dma_unmap_sg(dev, &data_copy->sg_single, 1,
+       ib_dma_unmap_sg(dev, data->sg, data->size,
                         (cmd_dir == ISER_DIR_OUT) ?
                         DMA_TO_DEVICE : DMA_FROM_DEVICE);
  
-       if (cmd_dir == ISER_DIR_IN) {
-               char *mem;
-               struct scatterlist *sgl, *sg;
-               unsigned char *p, *to;
-               unsigned int sg_size;
-               int i;
-
-               /* copy back read RDMA to unaligned sg */
-               mem = data_copy->copy_buf;
-
-               sgl = (struct scatterlist *)data->buf;
-               sg_size = data->size;
-
-               p = mem;
-               for_each_sg(sgl, sg, sg_size, i) {
-                       to = kmap_atomic(sg_page(sg));
-                       memcpy(to + sg->offset,
-                              p,
-                              sg->length);
-                       kunmap_atomic(to);
-                       p += sg->length;
-               }
-       }
+       if (cmd_dir == ISER_DIR_IN)
+               iser_copy_from_bounce(data);
  
-       cmd_data_len = data->data_len;
-
-       if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
-               free_pages((unsigned long)data_copy->copy_buf,
-                          ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
-       else
-               kfree(data_copy->copy_buf);
-
-       data_copy->copy_buf = NULL;
+       iser_free_bounce_sg(data);
  }
  
  #define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
@@ -175,7 +247,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
                                struct ib_device *ibdev, u64 *pages,
                                int *offset, int *data_size)
  {
-       struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
+       struct scatterlist *sg, *sgl = data->sg;
         u64 start_addr, end_addr, page, chunk_start = 0;
         unsigned long total_sz = 0;
         unsigned int dma_len;
@@ -227,14 +299,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
  static int iser_data_buf_aligned_len(struct iser_data_buf *data,
                                       struct ib_device *ibdev)
  {
-       struct scatterlist *sgl, *sg, *next_sg = NULL;
+       struct scatterlist *sg, *sgl, *next_sg = NULL;
         u64 start_addr, end_addr;
         int i, ret_len, start_check = 0;
  
         if (data->dma_nents == 1)
                 return 1;
  
-       sgl = (struct scatterlist *)data->buf;
+       sgl = data->sg;
         start_addr  = ib_sg_dma_address(ibdev, sgl);
  
         for_each_sg(sgl, sg, data->dma_nents, i) {
@@ -266,11 +338,10 @@ static int iser_data_buf_aligned_len(struct iser_data_buf *data,
  static void iser_data_buf_dump(struct iser_data_buf *data,
                                struct ib_device *ibdev)
  {
-       struct scatterlist *sgl = (struct scatterlist *)data->buf;
         struct scatterlist *sg;
         int i;
  
-       for_each_sg(sgl, sg, data->dma_nents, i)
+       for_each_sg(data->sg, sg, data->dma_nents, i)
                 iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
                          "off:0x%x sz:0x%x dma_len:0x%x\n",
                          i, (unsigned long)ib_sg_dma_address(ibdev, sg),
@@ -288,31 +359,6 @@ static void iser_dump_page_vec(struct iser_page_vec *page_vec)
                 iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]);
  }
  
-static void iser_page_vec_build(struct iser_data_buf *data,
-                               struct iser_page_vec *page_vec,
-                               struct ib_device *ibdev)
-{
-       int page_vec_len = 0;
-
-       page_vec->length = 0;
-       page_vec->offset = 0;
-
-       iser_dbg("Translating sg sz: %d\n", data->dma_nents);
-       page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
-                                          &page_vec->offset,
-                                          &page_vec->data_size);
-       iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
-
-       page_vec->length = page_vec_len;
-
-       if (page_vec_len * SIZE_4K < page_vec->data_size) {
-               iser_err("page_vec too short to hold this SG\n");
-               iser_data_buf_dump(data, ibdev);
-               iser_dump_page_vec(page_vec);
-               BUG();
-       }
-}
-
  int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
                             struct iser_data_buf *data,
                             enum iser_data_dir iser_dir,
@@ -323,7 +369,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
         iser_task->dir[iser_dir] = 1;
         dev = iser_task->iser_conn->ib_conn.device->ib_device;
  
-       data->dma_nents = ib_dma_map_sg(dev, data->buf, data->size, dma_dir);
+       data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size, dma_dir);
         if (data->dma_nents == 0) {
                 iser_err("dma_map_sg failed!!!\n");
                 return -EINVAL;
@@ -338,24 +384,41 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task,
         struct ib_device *dev;
  
         dev = iser_task->iser_conn->ib_conn.device->ib_device;
-       ib_dma_unmap_sg(dev, data->buf, data->size, dir);
+       ib_dma_unmap_sg(dev, data->sg, data->size, dir);
+}
+
+static int
+iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
+            struct iser_mem_reg *reg)
+{
+       struct scatterlist *sg = mem->sg;
+
+       reg->sge.lkey = device->mr->lkey;
+       reg->rkey = device->mr->rkey;
+       reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]);
+       reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]);
+
+       iser_dbg("Single DMA entry: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
+                " length=0x%x\n", reg->sge.lkey, reg->rkey,
+                reg->sge.addr, reg->sge.length);
+
+       return 0;
  }
  
  static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
-                             struct ib_device *ibdev,
                               struct iser_data_buf *mem,
-                             struct iser_data_buf *mem_copy,
                               enum iser_data_dir cmd_dir,
                               int aligned_len)
  {
-       struct iscsi_conn    *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+       struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+       struct iser_device *device = iser_task->iser_conn->ib_conn.device;
  
         iscsi_conn->fmr_unalign_cnt++;
         iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
                   aligned_len, mem->size);
  
         if (iser_debug_level > 0)
-               iser_data_buf_dump(mem, ibdev);
+               iser_data_buf_dump(mem, device->ib_device);
  
         /* unmap the command data before accessing it */
         iser_dma_unmap_task_data(iser_task, mem,
@@ -364,12 +427,94 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
  
         /* allocate copy buf, if we are writing, copy the */
         /* unaligned scatterlist, dma map the copy        */
-       if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0)
+       if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
                 return -ENOMEM;
  
         return 0;
  }
  
+/**
+ * iser_reg_page_vec - Register physical memory
+ *
+ * returns: 0 on success, errno code on failure
+ */
+static
+int iser_reg_page_vec(struct iscsi_iser_task *iser_task,
+                     struct iser_data_buf *mem,
+                     struct iser_page_vec *page_vec,
+                     struct iser_mem_reg *mem_reg)
+{
+       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
+       struct iser_device *device = ib_conn->device;
+       struct ib_pool_fmr *fmr;
+       int ret, plen;
+
+       plen = iser_sg_to_page_vec(mem, device->ib_device,
+                                  page_vec->pages,
+                                  &page_vec->offset,
+                                  &page_vec->data_size);
+       page_vec->length = plen;
+       if (plen * SIZE_4K < page_vec->data_size) {
+               iser_err("page vec too short to hold this SG\n");
+               iser_data_buf_dump(mem, device->ib_device);
+               iser_dump_page_vec(page_vec);
+               return -EINVAL;
+       }
+
+       fmr  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
+                                   page_vec->pages,
+                                   page_vec->length,
+                                   page_vec->pages[0]);
+       if (IS_ERR(fmr)) {
+               ret = PTR_ERR(fmr);
+               iser_err("ib_fmr_pool_map_phys failed: %d\n", ret);
+               return ret;
+       }
+
+       mem_reg->sge.lkey = fmr->fmr->lkey;
+       mem_reg->rkey = fmr->fmr->rkey;
+       mem_reg->sge.addr = page_vec->pages[0] + page_vec->offset;
+       mem_reg->sge.length = page_vec->data_size;
+       mem_reg->mem_h = fmr;
+
+       return 0;
+}
+
+/**
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
+ */
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+                       enum iser_data_dir cmd_dir)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+       int ret;
+
+       if (!reg->mem_h)
+               return;
+
+       iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
+
+       ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
+       if (ret)
+               iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+
+       reg->mem_h = NULL;
+}
+
+void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
+                           enum iser_data_dir cmd_dir)
+{
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
+
+       if (!reg->mem_h)
+               return;
+
+       iser_reg_desc_put(&iser_task->iser_conn->ib_conn,
+                         reg->mem_h);
+       reg->mem_h = NULL;
+}
+
  /**
   * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
   * using FMR (if possible) obtaining rkey and va
@@ -383,45 +528,29 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
         struct iser_device   *device = ib_conn->device;
         struct ib_device     *ibdev = device->ib_device;
         struct iser_data_buf *mem = &iser_task->data[cmd_dir];
-       struct iser_regd_buf *regd_buf;
+       struct iser_mem_reg *mem_reg;
         int aligned_len;
         int err;
         int i;
-       struct scatterlist *sg;
  
-       regd_buf = &iser_task->rdma_regd[cmd_dir];
+       mem_reg = &iser_task->rdma_reg[cmd_dir];
  
         aligned_len = iser_data_buf_aligned_len(mem, ibdev);
         if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                        &iser_task->data_copy[cmd_dir],
+               err = fall_to_bounce_buf(iser_task, mem,
                                          cmd_dir, aligned_len);
                 if (err) {
                         iser_err("failed to allocate bounce buffer\n");
                         return err;
                 }
-               mem = &iser_task->data_copy[cmd_dir];
         }
  
         /* if there a single dma entry, FMR is not needed */
         if (mem->dma_nents == 1) {
-               sg = (struct scatterlist *)mem->buf;
-
-               regd_buf->reg.lkey = device->mr->lkey;
-               regd_buf->reg.rkey = device->mr->rkey;
-               regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
-               regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-
-               iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
-                        "va: 0x%08lX sz: %ld]\n",
-                        (unsigned int)regd_buf->reg.lkey,
-                        (unsigned int)regd_buf->reg.rkey,
-                        (unsigned long)regd_buf->reg.va,
-                        (unsigned long)regd_buf->reg.len);
+               return iser_reg_dma(device, mem, mem_reg);
         } else { /* use FMR for multiple dma entries */
-               iser_page_vec_build(mem, ib_conn->fmr.page_vec, ibdev);
-               err = iser_reg_page_vec(ib_conn, ib_conn->fmr.page_vec,
-                                       &regd_buf->reg);
+               err = iser_reg_page_vec(iser_task, mem, ib_conn->fmr.page_vec,
+                                       mem_reg);
                 if (err && err != -EAGAIN) {
                         iser_data_buf_dump(mem, ibdev);
                         iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
@@ -519,8 +648,10 @@ iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
  
  static int
  iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
-               struct fast_reg_descriptor *desc, struct ib_sge *data_sge,
-               struct ib_sge *prot_sge, struct ib_sge *sig_sge)
+               struct fast_reg_descriptor *desc,
+               struct iser_mem_reg *data_reg,
+               struct iser_mem_reg *prot_reg,
+               struct iser_mem_reg *sig_reg)
  {
         struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
         struct iser_pi_context *pi_ctx = desc->pi_ctx;
@@ -544,12 +675,12 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
         memset(&sig_wr, 0, sizeof(sig_wr));
         sig_wr.opcode = IB_WR_REG_SIG_MR;
         sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-       sig_wr.sg_list = data_sge;
+       sig_wr.sg_list = &data_reg->sge;
         sig_wr.num_sge = 1;
         sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
         sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
         if (scsi_prot_sg_count(iser_task->sc))
-               sig_wr.wr.sig_handover.prot = prot_sge;
+               sig_wr.wr.sig_handover.prot = &prot_reg->sge;
         sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
                                               IB_ACCESS_REMOTE_READ |
                                               IB_ACCESS_REMOTE_WRITE;
@@ -566,27 +697,26 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
         }
         desc->reg_indicators &= ~ISER_SIG_KEY_VALID;
  
-       sig_sge->lkey = pi_ctx->sig_mr->lkey;
-       sig_sge->addr = 0;
-       sig_sge->length = scsi_transfer_length(iser_task->sc);
+       sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
+       sig_reg->rkey = pi_ctx->sig_mr->rkey;
+       sig_reg->sge.addr = 0;
+       sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
  
-       iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
-                sig_sge->addr, sig_sge->length,
-                sig_sge->lkey);
+       iser_dbg("sig_sge: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+                sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
+                sig_reg->sge.length);
  err:
         return ret;
  }
  
  static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
-                           struct iser_regd_buf *regd_buf,
                             struct iser_data_buf *mem,
+                           struct fast_reg_descriptor *desc,
                             enum iser_reg_indicator ind,
-                           struct ib_sge *sge)
+                           struct iser_mem_reg *reg)
  {
-       struct fast_reg_descriptor *desc = regd_buf->reg.mem_h;
         struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
         struct iser_device *device = ib_conn->device;
-       struct ib_device *ibdev = device->ib_device;
         struct ib_mr *mr;
         struct ib_fast_reg_page_list *frpl;
         struct ib_send_wr fastreg_wr, inv_wr;
@@ -594,17 +724,8 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
         int ret, offset, size, plen;
  
         /* if there a single dma entry, dma mr suffices */
-       if (mem->dma_nents == 1) {
-               struct scatterlist *sg = (struct scatterlist *)mem->buf;
-
-               sge->lkey = device->mr->lkey;
-               sge->addr   = ib_sg_dma_address(ibdev, &sg[0]);
-               sge->length  = ib_sg_dma_len(ibdev, &sg[0]);
-
-               iser_dbg("Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n",
-                        sge->lkey, sge->addr, sge->length);
-               return 0;
-       }
+       if (mem->dma_nents == 1)
+               return iser_reg_dma(device, mem, reg);
  
         if (ind == ISER_DATA_KEY_VALID) {
                 mr = desc->data_mr;
@@ -652,9 +773,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
         }
         desc->reg_indicators &= ~ind;
  
-       sge->lkey = mr->lkey;
-       sge->addr = frpl->page_list[0] + offset;
-       sge->length = size;
+       reg->sge.lkey = mr->lkey;
+       reg->rkey = mr->rkey;
+       reg->sge.addr = frpl->page_list[0] + offset;
+       reg->sge.length = size;
  
         return ret;
  }
@@ -672,93 +794,66 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task,
         struct iser_device *device = ib_conn->device;
         struct ib_device *ibdev = device->ib_device;
         struct iser_data_buf *mem = &iser_task->data[cmd_dir];
-       struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+       struct iser_mem_reg *mem_reg = &iser_task->rdma_reg[cmd_dir];
         struct fast_reg_descriptor *desc = NULL;
-       struct ib_sge data_sge;
         int err, aligned_len;
-       unsigned long flags;
  
         aligned_len = iser_data_buf_aligned_len(mem, ibdev);
         if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                        &iser_task->data_copy[cmd_dir],
+               err = fall_to_bounce_buf(iser_task, mem,
                                          cmd_dir, aligned_len);
                 if (err) {
                         iser_err("failed to allocate bounce buffer\n");
                         return err;
                 }
-               mem = &iser_task->data_copy[cmd_dir];
         }
  
         if (mem->dma_nents != 1 ||
             scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
-               spin_lock_irqsave(&ib_conn->lock, flags);
-               desc = list_first_entry(&ib_conn->fastreg.pool,
-                                       struct fast_reg_descriptor, list);
-               list_del(&desc->list);
-               spin_unlock_irqrestore(&ib_conn->lock, flags);
-               regd_buf->reg.mem_h = desc;
+               desc = iser_reg_desc_get(ib_conn);
+               mem_reg->mem_h = desc;
         }
  
-       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
-                              ISER_DATA_KEY_VALID, &data_sge);
+       err = iser_fast_reg_mr(iser_task, mem, desc,
+                              ISER_DATA_KEY_VALID, mem_reg);
         if (err)
                 goto err_reg;
  
         if (scsi_get_prot_op(iser_task->sc) != SCSI_PROT_NORMAL) {
-               struct ib_sge prot_sge, sig_sge;
+               struct iser_mem_reg prot_reg;
  
-               memset(&prot_sge, 0, sizeof(prot_sge));
+               memset(&prot_reg, 0, sizeof(prot_reg));
                 if (scsi_prot_sg_count(iser_task->sc)) {
                         mem = &iser_task->prot[cmd_dir];
                         aligned_len = iser_data_buf_aligned_len(mem, ibdev);
                         if (aligned_len != mem->dma_nents) {
-                               err = fall_to_bounce_buf(iser_task, ibdev, mem,
-                                                        &iser_task->prot_copy[cmd_dir],
+                               err = fall_to_bounce_buf(iser_task, mem,
                                                          cmd_dir, aligned_len);
                                 if (err) {
                                         iser_err("failed to allocate bounce buffer\n");
                                         return err;
                                 }
-                               mem = &iser_task->prot_copy[cmd_dir];
                         }
  
-                       err = iser_fast_reg_mr(iser_task, regd_buf, mem,
-                                              ISER_PROT_KEY_VALID, &prot_sge);
+                       err = iser_fast_reg_mr(iser_task, mem, desc,
+                                              ISER_PROT_KEY_VALID, &prot_reg);
                         if (err)
                                 goto err_reg;
                 }
  
-               err = iser_reg_sig_mr(iser_task, desc, &data_sge,
-                                     &prot_sge, &sig_sge);
+               err = iser_reg_sig_mr(iser_task, desc, mem_reg,
+                                     &prot_reg, mem_reg);
                 if (err) {
                         iser_err("Failed to register signature mr\n");
                         return err;
                 }
                 desc->reg_indicators |= ISER_FASTREG_PROTECTED;
-
-               regd_buf->reg.lkey = sig_sge.lkey;
-               regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
-               regd_buf->reg.va = sig_sge.addr;
-               regd_buf->reg.len = sig_sge.length;
-       } else {
-               if (desc)
-                       regd_buf->reg.rkey = desc->data_mr->rkey;
-               else
-                       regd_buf->reg.rkey = device->mr->rkey;
-
-               regd_buf->reg.lkey = data_sge.lkey;
-               regd_buf->reg.va = data_sge.addr;
-               regd_buf->reg.len = data_sge.length;
         }
  
         return 0;
  err_reg:
-       if (desc) {
-               spin_lock_irqsave(&ib_conn->lock, flags);
-               list_add_tail(&desc->list, &ib_conn->fastreg.pool);
-               spin_unlock_irqrestore(&ib_conn->lock, flags);
-       }
+       if (desc)
+               iser_reg_desc_put(ib_conn, desc);
  
         return err;
  }
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c

index 4065abe..cc2dd35 100644 (file)
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
         ib_conn->fmr.page_vec = NULL;
  }
  
+static int
+iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
+                 struct fast_reg_descriptor *desc)
+{
+       struct iser_pi_context *pi_ctx = NULL;
+       struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
+                                              .flags = IB_MR_SIGNATURE_EN};
+       int ret = 0;
+
+       desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+       if (!desc->pi_ctx)
+               return -ENOMEM;
+
+       pi_ctx = desc->pi_ctx;
+
+       pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
+                                           ISCSI_ISER_SG_TABLESIZE);
+       if (IS_ERR(pi_ctx->prot_frpl)) {
+               ret = PTR_ERR(pi_ctx->prot_frpl);
+               goto prot_frpl_failure;
+       }
+
+       pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
+                                       ISCSI_ISER_SG_TABLESIZE + 1);
+       if (IS_ERR(pi_ctx->prot_mr)) {
+               ret = PTR_ERR(pi_ctx->prot_mr);
+               goto prot_mr_failure;
+       }
+       desc->reg_indicators |= ISER_PROT_KEY_VALID;
+
+       pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+       if (IS_ERR(pi_ctx->sig_mr)) {
+               ret = PTR_ERR(pi_ctx->sig_mr);
+               goto sig_mr_failure;
+       }
+       desc->reg_indicators |= ISER_SIG_KEY_VALID;
+       desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
+
+       return 0;
+
+sig_mr_failure:
+       ib_dereg_mr(desc->pi_ctx->prot_mr);
+prot_mr_failure:
+       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+prot_frpl_failure:
+       kfree(desc->pi_ctx);
+
+       return ret;
+}
+
+static void
+iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
+{
+       ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
+       ib_dereg_mr(pi_ctx->prot_mr);
+       ib_destroy_mr(pi_ctx->sig_mr);
+       kfree(pi_ctx);
+}
+
  static int
  iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
                          bool pi_enable, struct fast_reg_descriptor *desc)
@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
         desc->reg_indicators |= ISER_DATA_KEY_VALID;
  
         if (pi_enable) {
-               struct ib_mr_init_attr mr_init_attr = {0};
-               struct iser_pi_context *pi_ctx = NULL;
-
-               desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
-               if (!desc->pi_ctx) {
-                       iser_err("Failed to allocate pi context\n");
-                       ret = -ENOMEM;
+               ret = iser_alloc_pi_ctx(ib_device, pd, desc);
+               if (ret)
                         goto pi_ctx_alloc_failure;
-               }
-               pi_ctx = desc->pi_ctx;
-
-               pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
-                                                   ISCSI_ISER_SG_TABLESIZE);
-               if (IS_ERR(pi_ctx->prot_frpl)) {
-                       ret = PTR_ERR(pi_ctx->prot_frpl);
-                       iser_err("Failed to allocate prot frpl ret=%d\n",
-                                ret);
-                       goto prot_frpl_failure;
-               }
-
-               pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
-                                               ISCSI_ISER_SG_TABLESIZE + 1);
-               if (IS_ERR(pi_ctx->prot_mr)) {
-                       ret = PTR_ERR(pi_ctx->prot_mr);
-                       iser_err("Failed to allocate prot frmr ret=%d\n",
-                                ret);
-                       goto prot_mr_failure;
-               }
-               desc->reg_indicators |= ISER_PROT_KEY_VALID;
-
-               mr_init_attr.max_reg_descriptors = 2;
-               mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
-               pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
-               if (IS_ERR(pi_ctx->sig_mr)) {
-                       ret = PTR_ERR(pi_ctx->sig_mr);
-                       iser_err("Failed to allocate signature enabled mr err=%d\n",
-                                ret);
-                       goto sig_mr_failure;
-               }
-               desc->reg_indicators |= ISER_SIG_KEY_VALID;
         }
-       desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
-
-       iser_dbg("Create fr_desc %p page_list %p\n",
-                desc, desc->data_frpl->page_list);
  
         return 0;
-sig_mr_failure:
-       ib_dereg_mr(desc->pi_ctx->prot_mr);
-prot_mr_failure:
-       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-prot_frpl_failure:
-       kfree(desc->pi_ctx);
  pi_ctx_alloc_failure:
         ib_dereg_mr(desc->data_mr);
  fast_reg_mr_failure:
@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
                 list_del(&desc->list);
                 ib_free_fast_reg_page_list(desc->data_frpl);
                 ib_dereg_mr(desc->data_mr);
-               if (desc->pi_ctx) {
-                       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
-                       ib_dereg_mr(desc->pi_ctx->prot_mr);
-                       ib_destroy_mr(desc->pi_ctx->sig_mr);
-                       kfree(desc->pi_ctx);
-               }
+               if (desc->pi_ctx)
+                       iser_free_pi_ctx(desc->pi_ctx);
                 kfree(desc);
                 ++i;
         }
@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
         struct iser_conn *iser_conn;
  
         iser_conn = (struct iser_conn *)cma_id->context;
-       iser_conn->state = ISER_CONN_DOWN;
+       iser_conn->state = ISER_CONN_TERMINATING;
  }
  
  /**
@@ -992,93 +1000,6 @@ connect_failure:
         return err;
  }
  
-/**
- * iser_reg_page_vec - Register physical memory
- *
- * returns: 0 on success, errno code on failure
- */
-int iser_reg_page_vec(struct ib_conn *ib_conn,
-                     struct iser_page_vec *page_vec,
-                     struct iser_mem_reg  *mem_reg)
-{
-       struct ib_pool_fmr *mem;
-       u64                io_addr;
-       u64                *page_list;
-       int                status;
-
-       page_list = page_vec->pages;
-       io_addr   = page_list[0];
-
-       mem  = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
-                                   page_list,
-                                   page_vec->length,
-                                   io_addr);
-
-       if (IS_ERR(mem)) {
-               status = (int)PTR_ERR(mem);
-               iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
-               return status;
-       }
-
-       mem_reg->lkey  = mem->fmr->lkey;
-       mem_reg->rkey  = mem->fmr->rkey;
-       mem_reg->len   = page_vec->length * SIZE_4K;
-       mem_reg->va    = io_addr;
-       mem_reg->mem_h = (void *)mem;
-
-       mem_reg->va   += page_vec->offset;
-       mem_reg->len   = page_vec->data_size;
-
-       iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
-                "entry[0]: (0x%08lx,%ld)] -> "
-                "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
-                page_vec, page_vec->length,
-                (unsigned long)page_vec->pages[0],
-                (unsigned long)page_vec->data_size,
-                (unsigned int)mem_reg->lkey, mem_reg->mem_h,
-                (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
-       return 0;
-}
-
-/**
- * Unregister (previosuly registered using FMR) memory.
- * If memory is non-FMR does nothing.
- */
-void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
-                       enum iser_data_dir cmd_dir)
-{
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       int ret;
-
-       if (!reg->mem_h)
-               return;
-
-       iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
-
-       ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
-       if (ret)
-               iser_err("ib_fmr_pool_unmap failed %d\n", ret);
-
-       reg->mem_h = NULL;
-}
-
-void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
-                           enum iser_data_dir cmd_dir)
-{
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
-       struct iser_conn *iser_conn = iser_task->iser_conn;
-       struct ib_conn *ib_conn = &iser_conn->ib_conn;
-       struct fast_reg_descriptor *desc = reg->mem_h;
-
-       if (!desc)
-               return;
-
-       reg->mem_h = NULL;
-       spin_lock_bh(&ib_conn->lock);
-       list_add_tail(&desc->list, &ib_conn->fastreg.pool);
-       spin_unlock_bh(&ib_conn->lock);
-}
-
  int iser_post_recvl(struct iser_conn *iser_conn)
  {
         struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
                         iscsi_conn_failure(iser_conn->iscsi_conn,
                                            ISCSI_ERR_CONN_FAILED);
  
+       if (wc->wr_id == ISER_FASTREG_LI_WRID)
+               return;
+
         if (is_iser_tx_desc(iser_conn, wr_id)) {
                 struct iser_tx_desc *desc = wr_id;
  
@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
                 else
                         iser_dbg("flush error: wr id %llx\n", wc->wr_id);
  
-               if (wc->wr_id != ISER_FASTREG_LI_WRID &&
-                   wc->wr_id != ISER_BEACON_WRID)
-                       iser_handle_comp_error(ib_conn, wc);
-
-               /* complete in case all flush errors were consumed */
                 if (wc->wr_id == ISER_BEACON_WRID)
+                       /* all flush errors were consumed */
                         complete(&ib_conn->flush_comp);
+               else
+                       iser_handle_comp_error(ib_conn, wc);
         }
  }
  
@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
  u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
                              enum iser_data_dir cmd_dir, sector_t *sector)
  {
-       struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+       struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
         struct fast_reg_descriptor *desc = reg->mem_h;
         unsigned long sector_size = iser_task->sc->device->sector_size;
         struct ib_mr_status mr_status;
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c

index 075b19c..327529e 100644 (file)
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -76,12 +76,12 @@ isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
  static void
  isert_qp_event_callback(struct ib_event *e, void *context)
  {
-       struct isert_conn *isert_conn = (struct isert_conn *)context;
+       struct isert_conn *isert_conn = context;
  
         isert_err("conn %p event: %d\n", isert_conn, e->event);
         switch (e->event) {
         case IB_EVENT_COMM_EST:
-               rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
+               rdma_notify(isert_conn->cm_id, IB_EVENT_COMM_EST);
                 break;
         case IB_EVENT_QP_LAST_WQE_REACHED:
                 isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
@@ -107,13 +107,12 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr)
         return 0;
  }
  
-static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+static struct isert_comp *
+isert_comp_get(struct isert_conn *isert_conn)
  {
-       struct isert_device *device = isert_conn->conn_device;
-       struct ib_qp_init_attr attr;
+       struct isert_device *device = isert_conn->device;
         struct isert_comp *comp;
-       int ret, i, min = 0;
+       int i, min = 0;
  
         mutex_lock(&device_list_mutex);
         for (i = 0; i < device->comps_used; i++)
@@ -122,9 +121,30 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
                         min = i;
         comp = &device->comps[min];
         comp->active_qps++;
+       mutex_unlock(&device_list_mutex);
+
         isert_info("conn %p, using comp %p min_index: %d\n",
                    isert_conn, comp, min);
+
+       return comp;
+}
+
+static void
+isert_comp_put(struct isert_comp *comp)
+{
+       mutex_lock(&device_list_mutex);
+       comp->active_qps--;
         mutex_unlock(&device_list_mutex);
+}
+
+static struct ib_qp *
+isert_create_qp(struct isert_conn *isert_conn,
+               struct isert_comp *comp,
+               struct rdma_cm_id *cma_id)
+{
+       struct isert_device *device = isert_conn->device;
+       struct ib_qp_init_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(struct ib_qp_init_attr));
         attr.event_handler = isert_qp_event_callback;
@@ -149,19 +169,31 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
         if (device->pi_capable)
                 attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
  
-       ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
+       ret = rdma_create_qp(cma_id, device->pd, &attr);
         if (ret) {
                 isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+               return ERR_PTR(ret);
+       }
+
+       return cma_id->qp;
+}
+
+static int
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
+{
+       struct isert_comp *comp;
+       int ret;
+
+       comp = isert_comp_get(isert_conn);
+       isert_conn->qp = isert_create_qp(isert_conn, comp, cma_id);
+       if (IS_ERR(isert_conn->qp)) {
+               ret = PTR_ERR(isert_conn->qp);
                 goto err;
         }
-       isert_conn->conn_qp = cma_id->qp;
  
         return 0;
  err:
-       mutex_lock(&device_list_mutex);
-       comp->active_qps--;
-       mutex_unlock(&device_list_mutex);
-
+       isert_comp_put(comp);
         return ret;
  }
  
@@ -174,18 +206,19 @@ isert_cq_event_callback(struct ib_event *e, void *context)
  static int
  isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
         struct iser_rx_desc *rx_desc;
         struct ib_sge *rx_sg;
         u64 dma_addr;
         int i, j;
  
-       isert_conn->conn_rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
+       isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
                                 sizeof(struct iser_rx_desc), GFP_KERNEL);
-       if (!isert_conn->conn_rx_descs)
+       if (!isert_conn->rx_descs)
                 goto fail;
  
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
  
         for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
                 dma_addr = ib_dma_map_single(ib_dev, (void *)rx_desc,
@@ -198,21 +231,21 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
                 rx_sg = &rx_desc->rx_sg;
                 rx_sg->addr = rx_desc->dma_addr;
                 rx_sg->length = ISER_RX_PAYLOAD_SIZE;
-               rx_sg->lkey = isert_conn->conn_mr->lkey;
+               rx_sg->lkey = device->mr->lkey;
         }
  
-       isert_conn->conn_rx_desc_head = 0;
+       isert_conn->rx_desc_head = 0;
  
         return 0;
  
  dma_map_fail:
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
         for (j = 0; j < i; j++, rx_desc++) {
                 ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
                                     ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
         }
-       kfree(isert_conn->conn_rx_descs);
-       isert_conn->conn_rx_descs = NULL;
+       kfree(isert_conn->rx_descs);
+       isert_conn->rx_descs = NULL;
  fail:
         isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
  
@@ -222,59 +255,51 @@ fail:
  static void
  isert_free_rx_descriptors(struct isert_conn *isert_conn)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->device->ib_device;
         struct iser_rx_desc *rx_desc;
         int i;
  
-       if (!isert_conn->conn_rx_descs)
+       if (!isert_conn->rx_descs)
                 return;
  
-       rx_desc = isert_conn->conn_rx_descs;
+       rx_desc = isert_conn->rx_descs;
         for (i = 0; i < ISERT_QP_MAX_RECV_DTOS; i++, rx_desc++)  {
                 ib_dma_unmap_single(ib_dev, rx_desc->dma_addr,
                                     ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
         }
  
-       kfree(isert_conn->conn_rx_descs);
-       isert_conn->conn_rx_descs = NULL;
+       kfree(isert_conn->rx_descs);
+       isert_conn->rx_descs = NULL;
  }
  
  static void isert_cq_work(struct work_struct *);
  static void isert_cq_callback(struct ib_cq *, void *);
  
-static int
-isert_create_device_ib_res(struct isert_device *device)
+static void
+isert_free_comps(struct isert_device *device)
  {
-       struct ib_device *ib_dev = device->ib_device;
-       struct ib_device_attr *dev_attr;
-       int ret = 0, i;
-       int max_cqe;
-
-       dev_attr = &device->dev_attr;
-       ret = isert_query_device(ib_dev, dev_attr);
-       if (ret)
-               return ret;
+       int i;
  
-       max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+       for (i = 0; i < device->comps_used; i++) {
+               struct isert_comp *comp = &device->comps[i];
  
-       /* asign function handlers */
-       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
-           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
-               device->use_fastreg = 1;
-               device->reg_rdma_mem = isert_reg_rdma;
-               device->unreg_rdma_mem = isert_unreg_rdma;
-       } else {
-               device->use_fastreg = 0;
-               device->reg_rdma_mem = isert_map_rdma;
-               device->unreg_rdma_mem = isert_unmap_cmd;
+               if (comp->cq) {
+                       cancel_work_sync(&comp->work);
+                       ib_destroy_cq(comp->cq);
+               }
         }
+       kfree(device->comps);
+}
  
-       /* Check signature cap */
-       device->pi_capable = dev_attr->device_cap_flags &
-                            IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+static int
+isert_alloc_comps(struct isert_device *device,
+                 struct ib_device_attr *attr)
+{
+       int i, max_cqe, ret = 0;
  
         device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
-                                       device->ib_device->num_comp_vectors));
+                                device->ib_device->num_comp_vectors));
+
         isert_info("Using %d CQs, %s supports %d vectors support "
                    "Fast registration %d pi_capable %d\n",
                    device->comps_used, device->ib_device->name,
@@ -288,6 +313,8 @@ isert_create_device_ib_res(struct isert_device *device)
                 return -ENOMEM;
         }
  
+       max_cqe = min(ISER_MAX_CQ_LEN, attr->max_cqe);
+
         for (i = 0; i < device->comps_used; i++) {
                 struct isert_comp *comp = &device->comps[i];
  
@@ -299,6 +326,7 @@ isert_create_device_ib_res(struct isert_device *device)
                                         (void *)comp,
                                         max_cqe, i);
                 if (IS_ERR(comp->cq)) {
+                       isert_err("Unable to allocate cq\n");
                         ret = PTR_ERR(comp->cq);
                         comp->cq = NULL;
                         goto out_cq;
@@ -310,40 +338,79 @@ isert_create_device_ib_res(struct isert_device *device)
         }
  
         return 0;
-
  out_cq:
-       for (i = 0; i < device->comps_used; i++) {
-               struct isert_comp *comp = &device->comps[i];
+       isert_free_comps(device);
+       return ret;
+}
  
-               if (comp->cq) {
-                       cancel_work_sync(&comp->work);
-                       ib_destroy_cq(comp->cq);
-               }
+static int
+isert_create_device_ib_res(struct isert_device *device)
+{
+       struct ib_device_attr *dev_attr;
+       int ret;
+
+       dev_attr = &device->dev_attr;
+       ret = isert_query_device(device->ib_device, dev_attr);
+       if (ret)
+               return ret;
+
+       /* asign function handlers */
+       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
+           dev_attr->device_cap_flags & IB_DEVICE_SIGNATURE_HANDOVER) {
+               device->use_fastreg = 1;
+               device->reg_rdma_mem = isert_reg_rdma;
+               device->unreg_rdma_mem = isert_unreg_rdma;
+       } else {
+               device->use_fastreg = 0;
+               device->reg_rdma_mem = isert_map_rdma;
+               device->unreg_rdma_mem = isert_unmap_cmd;
         }
-       kfree(device->comps);
  
+       ret = isert_alloc_comps(device, dev_attr);
+       if (ret)
+               return ret;
+
+       device->pd = ib_alloc_pd(device->ib_device);
+       if (IS_ERR(device->pd)) {
+               ret = PTR_ERR(device->pd);
+               isert_err("failed to allocate pd, device %p, ret=%d\n",
+                         device, ret);
+               goto out_cq;
+       }
+
+       device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(device->mr)) {
+               ret = PTR_ERR(device->mr);
+               isert_err("failed to create dma mr, device %p, ret=%d\n",
+                         device, ret);
+               goto out_mr;
+       }
+
+       /* Check signature cap */
+       device->pi_capable = dev_attr->device_cap_flags &
+                            IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
+
+       return 0;
+
+out_mr:
+       ib_dealloc_pd(device->pd);
+out_cq:
+       isert_free_comps(device);
         return ret;
  }
  
  static void
  isert_free_device_ib_res(struct isert_device *device)
  {
-       int i;
-
         isert_info("device %p\n", device);
  
-       for (i = 0; i < device->comps_used; i++) {
-               struct isert_comp *comp = &device->comps[i];
-
-               cancel_work_sync(&comp->work);
-               ib_destroy_cq(comp->cq);
-               comp->cq = NULL;
-       }
-       kfree(device->comps);
+       ib_dereg_mr(device->mr);
+       ib_dealloc_pd(device->pd);
+       isert_free_comps(device);
  }
  
  static void
-isert_device_try_release(struct isert_device *device)
+isert_device_put(struct isert_device *device)
  {
         mutex_lock(&device_list_mutex);
         device->refcount--;
@@ -357,7 +424,7 @@ isert_device_try_release(struct isert_device *device)
  }
  
  static struct isert_device *
-isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
+isert_device_get(struct rdma_cm_id *cma_id)
  {
         struct isert_device *device;
         int ret;
@@ -404,13 +471,13 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
         struct fast_reg_descriptor *fr_desc, *tmp;
         int i = 0;
  
-       if (list_empty(&isert_conn->conn_fr_pool))
+       if (list_empty(&isert_conn->fr_pool))
                 return;
  
         isert_info("Freeing conn %p fastreg pool", isert_conn);
  
         list_for_each_entry_safe(fr_desc, tmp,
-                                &isert_conn->conn_fr_pool, list) {
+                                &isert_conn->fr_pool, list) {
                 list_del(&fr_desc->list);
                 ib_free_fast_reg_page_list(fr_desc->data_frpl);
                 ib_dereg_mr(fr_desc->data_mr);
@@ -424,9 +491,9 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
                 ++i;
         }
  
-       if (i < isert_conn->conn_fr_pool_size)
+       if (i < isert_conn->fr_pool_size)
                 isert_warn("Pool still has %d regions registered\n",
-                       isert_conn->conn_fr_pool_size - i);
+                       isert_conn->fr_pool_size - i);
  }
  
  static int
@@ -526,7 +593,7 @@ static int
  isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
  {
         struct fast_reg_descriptor *fr_desc;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
         struct se_session *se_sess = isert_conn->conn->sess->se_sess;
         struct se_node_acl *se_nacl = se_sess->se_node_acl;
         int i, ret, tag_num;
@@ -537,7 +604,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
         tag_num = max_t(u32, ISCSIT_MIN_TAGS, se_nacl->queue_depth);
         tag_num = (tag_num * 2) + ISCSIT_EXTRA_TAGS;
  
-       isert_conn->conn_fr_pool_size = 0;
+       isert_conn->fr_pool_size = 0;
         for (i = 0; i < tag_num; i++) {
                 fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
                 if (!fr_desc) {
@@ -547,7 +614,7 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
                 }
  
                 ret = isert_create_fr_desc(device->ib_device,
-                                          isert_conn->conn_pd, fr_desc);
+                                          device->pd, fr_desc);
                 if (ret) {
                         isert_err("Failed to create fastreg descriptor err=%d\n",
                                ret);
@@ -555,12 +622,12 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
                         goto err;
                 }
  
-               list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
-               isert_conn->conn_fr_pool_size++;
+               list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+               isert_conn->fr_pool_size++;
         }
  
         isert_dbg("Creating conn %p fastreg pool size=%d",
-                isert_conn, isert_conn->conn_fr_pool_size);
+                isert_conn, isert_conn->fr_pool_size);
  
         return 0;
  
@@ -569,55 +636,50 @@ err:
         return ret;
  }
  
-static int
-isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+static void
+isert_init_conn(struct isert_conn *isert_conn)
  {
-       struct isert_np *isert_np = cma_id->context;
-       struct iscsi_np *np = isert_np->np;
-       struct isert_conn *isert_conn;
-       struct isert_device *device;
-       struct ib_device *ib_dev = cma_id->device;
-       int ret = 0;
-
-       spin_lock_bh(&np->np_thread_lock);
-       if (!np->enabled) {
-               spin_unlock_bh(&np->np_thread_lock);
-               isert_dbg("iscsi_np is not enabled, reject connect request\n");
-               return rdma_reject(cma_id, NULL, 0);
-       }
-       spin_unlock_bh(&np->np_thread_lock);
-
-       isert_dbg("cma_id: %p, portal: %p\n",
-                cma_id, cma_id->context);
-
-       isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
-       if (!isert_conn) {
-               isert_err("Unable to allocate isert_conn\n");
-               return -ENOMEM;
-       }
         isert_conn->state = ISER_CONN_INIT;
-       INIT_LIST_HEAD(&isert_conn->conn_accept_node);
-       init_completion(&isert_conn->conn_login_comp);
+       INIT_LIST_HEAD(&isert_conn->accept_node);
+       init_completion(&isert_conn->login_comp);
         init_completion(&isert_conn->login_req_comp);
-       init_completion(&isert_conn->conn_wait);
-       kref_init(&isert_conn->conn_kref);
-       mutex_init(&isert_conn->conn_mutex);
-       spin_lock_init(&isert_conn->conn_lock);
-       INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
+       init_completion(&isert_conn->wait);
+       kref_init(&isert_conn->kref);
+       mutex_init(&isert_conn->mutex);
+       spin_lock_init(&isert_conn->pool_lock);
+       INIT_LIST_HEAD(&isert_conn->fr_pool);
+}
+
+static void
+isert_free_login_buf(struct isert_conn *isert_conn)
+{
+       struct ib_device *ib_dev = isert_conn->device->ib_device;
  
-       isert_conn->conn_cm_id = cma_id;
+       ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
+                           ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
+       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN,
+                           DMA_FROM_DEVICE);
+       kfree(isert_conn->login_buf);
+}
+
+static int
+isert_alloc_login_buf(struct isert_conn *isert_conn,
+                     struct ib_device *ib_dev)
+{
+       int ret;
  
         isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
                                         ISER_RX_LOGIN_SIZE, GFP_KERNEL);
         if (!isert_conn->login_buf) {
                 isert_err("Unable to allocate isert_conn->login_buf\n");
-               ret = -ENOMEM;
-               goto out;
+               return -ENOMEM;
         }
  
         isert_conn->login_req_buf = isert_conn->login_buf;
         isert_conn->login_rsp_buf = isert_conn->login_buf +
                                     ISCSI_DEF_MAX_RECV_SEG_LEN;
+
         isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
                  isert_conn->login_buf, isert_conn->login_req_buf,
                  isert_conn->login_rsp_buf);
@@ -628,8 +690,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  
         ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
         if (ret) {
-               isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
-                      ret);
+               isert_err("login_req_dma mapping error: %d\n", ret);
                 isert_conn->login_req_dma = 0;
                 goto out_login_buf;
         }
@@ -640,17 +701,58 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  
         ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
         if (ret) {
-               isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
-                      ret);
+               isert_err("login_rsp_dma mapping error: %d\n", ret);
                 isert_conn->login_rsp_dma = 0;
                 goto out_req_dma_map;
         }
  
-       device = isert_device_find_by_ib_dev(cma_id);
+       return 0;
+
+out_req_dma_map:
+       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
+                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
+out_login_buf:
+       kfree(isert_conn->login_buf);
+       return ret;
+}
+
+static int
+isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+{
+       struct isert_np *isert_np = cma_id->context;
+       struct iscsi_np *np = isert_np->np;
+       struct isert_conn *isert_conn;
+       struct isert_device *device;
+       int ret = 0;
+
+       spin_lock_bh(&np->np_thread_lock);
+       if (!np->enabled) {
+               spin_unlock_bh(&np->np_thread_lock);
+               isert_dbg("iscsi_np is not enabled, reject connect request\n");
+               return rdma_reject(cma_id, NULL, 0);
+       }
+       spin_unlock_bh(&np->np_thread_lock);
+
+       isert_dbg("cma_id: %p, portal: %p\n",
+                cma_id, cma_id->context);
+
+       isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
+       if (!isert_conn)
+               return -ENOMEM;
+
+       isert_init_conn(isert_conn);
+       isert_conn->cm_id = cma_id;
+
+       ret = isert_alloc_login_buf(isert_conn, cma_id->device);
+       if (ret)
+               goto out;
+
+       device = isert_device_get(cma_id);
         if (IS_ERR(device)) {
                 ret = PTR_ERR(device);
                 goto out_rsp_dma_map;
         }
+       isert_conn->device = device;
  
         /* Set max inflight RDMA READ requests */
         isert_conn->initiator_depth = min_t(u8,
@@ -658,24 +760,6 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
                                 device->dev_attr.max_qp_init_rd_atom);
         isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
  
-       isert_conn->conn_device = device;
-       isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
-       if (IS_ERR(isert_conn->conn_pd)) {
-               ret = PTR_ERR(isert_conn->conn_pd);
-               isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
-                      isert_conn, ret);
-               goto out_pd;
-       }
-
-       isert_conn->conn_mr = ib_get_dma_mr(isert_conn->conn_pd,
-                                          IB_ACCESS_LOCAL_WRITE);
-       if (IS_ERR(isert_conn->conn_mr)) {
-               ret = PTR_ERR(isert_conn->conn_mr);
-               isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
-                      isert_conn, ret);
-               goto out_mr;
-       }
-
         ret = isert_conn_setup_qp(isert_conn, cma_id);
         if (ret)
                 goto out_conn_dev;
@@ -689,7 +773,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
                 goto out_conn_dev;
  
         mutex_lock(&isert_np->np_accept_mutex);
-       list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
+       list_add_tail(&isert_conn->accept_node, &isert_np->np_accept_list);
         mutex_unlock(&isert_np->np_accept_mutex);
  
         isert_info("np %p: Allow accept_np to continue\n", np);
@@ -697,19 +781,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
         return 0;
  
  out_conn_dev:
-       ib_dereg_mr(isert_conn->conn_mr);
-out_mr:
-       ib_dealloc_pd(isert_conn->conn_pd);
-out_pd:
-       isert_device_try_release(device);
+       isert_device_put(device);
  out_rsp_dma_map:
-       ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
-                           ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
-out_req_dma_map:
-       ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-                           ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_FROM_DEVICE);
-out_login_buf:
-       kfree(isert_conn->login_buf);
+       isert_free_login_buf(isert_conn);
  out:
         kfree(isert_conn);
         rdma_reject(cma_id, NULL, 0);
@@ -719,43 +793,32 @@ out:
  static void
  isert_connect_release(struct isert_conn *isert_conn)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
  
         isert_dbg("conn %p\n", isert_conn);
  
-       if (device && device->use_fastreg)
+       BUG_ON(!device);
+
+       if (device->use_fastreg)
                 isert_conn_free_fastreg_pool(isert_conn);
  
         isert_free_rx_descriptors(isert_conn);
-       rdma_destroy_id(isert_conn->conn_cm_id);
+       if (isert_conn->cm_id)
+               rdma_destroy_id(isert_conn->cm_id);
  
-       if (isert_conn->conn_qp) {
-               struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
+       if (isert_conn->qp) {
+               struct isert_comp *comp = isert_conn->qp->recv_cq->cq_context;
  
-               isert_dbg("dec completion context %p active_qps\n", comp);
-               mutex_lock(&device_list_mutex);
-               comp->active_qps--;
-               mutex_unlock(&device_list_mutex);
-
-               ib_destroy_qp(isert_conn->conn_qp);
+               isert_comp_put(comp);
+               ib_destroy_qp(isert_conn->qp);
         }
  
-       ib_dereg_mr(isert_conn->conn_mr);
-       ib_dealloc_pd(isert_conn->conn_pd);
+       if (isert_conn->login_buf)
+               isert_free_login_buf(isert_conn);
  
-       if (isert_conn->login_buf) {
-               ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
-                                   ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE);
-               ib_dma_unmap_single(ib_dev, isert_conn->login_req_dma,
-                                   ISCSI_DEF_MAX_RECV_SEG_LEN,
-                                   DMA_FROM_DEVICE);
-               kfree(isert_conn->login_buf);
-       }
-       kfree(isert_conn);
+       isert_device_put(device);
  
-       if (device)
-               isert_device_try_release(device);
+       kfree(isert_conn);
  }
  
  static void
@@ -765,22 +828,22 @@ isert_connected_handler(struct rdma_cm_id *cma_id)
  
         isert_info("conn %p\n", isert_conn);
  
-       if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+       if (!kref_get_unless_zero(&isert_conn->kref)) {
                 isert_warn("conn %p connect_release is running\n", isert_conn);
                 return;
         }
  
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
         if (isert_conn->state != ISER_CONN_FULL_FEATURE)
                 isert_conn->state = ISER_CONN_UP;
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
  }
  
  static void
-isert_release_conn_kref(struct kref *kref)
+isert_release_kref(struct kref *kref)
  {
         struct isert_conn *isert_conn = container_of(kref,
-                               struct isert_conn, conn_kref);
+                               struct isert_conn, kref);
  
         isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
                    current->pid);
@@ -791,7 +854,7 @@ isert_release_conn_kref(struct kref *kref)
  static void
  isert_put_conn(struct isert_conn *isert_conn)
  {
-       kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
+       kref_put(&isert_conn->kref, isert_release_kref);
  }
  
  /**
@@ -803,7 +866,7 @@ isert_put_conn(struct isert_conn *isert_conn)
   * to TEMINATING and start teardown sequence (rdma_disconnect).
   * In case the connection state is UP, complete flush as well.
   *
- * This routine must be called with conn_mutex held. Thus it is
+ * This routine must be called with mutex held. Thus it is
   * safe to call multiple times.
   */
  static void
@@ -819,7 +882,7 @@ isert_conn_terminate(struct isert_conn *isert_conn)
                 isert_info("Terminating conn %p state %d\n",
                            isert_conn, isert_conn->state);
                 isert_conn->state = ISER_CONN_TERMINATING;
-               err = rdma_disconnect(isert_conn->conn_cm_id);
+               err = rdma_disconnect(isert_conn->cm_id);
                 if (err)
                         isert_warn("Failed rdma_disconnect isert_conn %p\n",
                                    isert_conn);
@@ -868,22 +931,25 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
  
         isert_conn = cma_id->qp->qp_context;
  
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
         isert_conn_terminate(isert_conn);
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
  
-       isert_info("conn %p completing conn_wait\n", isert_conn);
-       complete(&isert_conn->conn_wait);
+       isert_info("conn %p completing wait\n", isert_conn);
+       complete(&isert_conn->wait);
  
         return 0;
  }
  
-static void
+static int
  isert_connect_error(struct rdma_cm_id *cma_id)
  {
         struct isert_conn *isert_conn = cma_id->qp->qp_context;
  
+       isert_conn->cm_id = NULL;
         isert_put_conn(isert_conn);
+
+       return -1;
  }
  
  static int
@@ -912,7 +978,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
         case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
         case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
         case RDMA_CM_EVENT_CONNECT_ERROR:
-               isert_connect_error(cma_id);
+               ret = isert_connect_error(cma_id);
                 break;
         default:
                 isert_err("Unhandled RDMA CMA event: %d\n", event->event);
@@ -927,11 +993,11 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
  {
         struct ib_recv_wr *rx_wr, *rx_wr_failed;
         int i, ret;
-       unsigned int rx_head = isert_conn->conn_rx_desc_head;
+       unsigned int rx_head = isert_conn->rx_desc_head;
         struct iser_rx_desc *rx_desc;
  
-       for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
-               rx_desc         = &isert_conn->conn_rx_descs[rx_head];
+       for (rx_wr = isert_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
+               rx_desc         = &isert_conn->rx_descs[rx_head];
                 rx_wr->wr_id    = (uintptr_t)rx_desc;
                 rx_wr->sg_list  = &rx_desc->rx_sg;
                 rx_wr->num_sge  = 1;
@@ -943,14 +1009,14 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
         rx_wr->next = NULL; /* mark end of work requests list */
  
         isert_conn->post_recv_buf_count += count;
-       ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
+       ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
                                 &rx_wr_failed);
         if (ret) {
                 isert_err("ib_post_recv() failed with ret: %d\n", ret);
                 isert_conn->post_recv_buf_count -= count;
         } else {
                 isert_dbg("Posted %d RX buffers\n", count);
-               isert_conn->conn_rx_desc_head = rx_head;
+               isert_conn->rx_desc_head = rx_head;
         }
         return ret;
  }
@@ -958,7 +1024,7 @@ isert_post_recv(struct isert_conn *isert_conn, u32 count)
  static int
  isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
         struct ib_send_wr send_wr, *send_wr_failed;
         int ret;
  
@@ -972,7 +1038,7 @@ isert_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
         send_wr.opcode  = IB_WR_SEND;
         send_wr.send_flags = IB_SEND_SIGNALED;
  
-       ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
+       ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed);
         if (ret)
                 isert_err("ib_post_send() failed, ret: %d\n", ret);
  
@@ -984,7 +1050,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
                        struct isert_cmd *isert_cmd,
                        struct iser_tx_desc *tx_desc)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
  
         ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
                                    ISER_HEADERS_LEN, DMA_TO_DEVICE);
@@ -995,8 +1062,8 @@ isert_create_send_desc(struct isert_conn *isert_conn,
         tx_desc->num_sge = 1;
         tx_desc->isert_cmd = isert_cmd;
  
-       if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
-               tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+       if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
+               tx_desc->tx_sg[0].lkey = device->mr->lkey;
                 isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
         }
  }
@@ -1005,7 +1072,8 @@ static int
  isert_init_tx_hdrs(struct isert_conn *isert_conn,
                    struct iser_tx_desc *tx_desc)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
         u64 dma_addr;
  
         dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
@@ -1018,7 +1086,7 @@ isert_init_tx_hdrs(struct isert_conn *isert_conn,
         tx_desc->dma_addr = dma_addr;
         tx_desc->tx_sg[0].addr  = tx_desc->dma_addr;
         tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
-       tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
+       tx_desc->tx_sg[0].lkey = device->mr->lkey;
  
         isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
                   tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
@@ -1051,7 +1119,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
         memset(&sge, 0, sizeof(struct ib_sge));
         sge.addr = isert_conn->login_req_dma;
         sge.length = ISER_RX_LOGIN_SIZE;
-       sge.lkey = isert_conn->conn_mr->lkey;
+       sge.lkey = isert_conn->device->mr->lkey;
  
         isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
                 sge.addr, sge.length, sge.lkey);
@@ -1062,7 +1130,7 @@ isert_rdma_post_recvl(struct isert_conn *isert_conn)
         rx_wr.num_sge = 1;
  
         isert_conn->post_recv_buf_count++;
-       ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
+       ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
         if (ret) {
                 isert_err("ib_post_recv() failed: %d\n", ret);
                 isert_conn->post_recv_buf_count--;
@@ -1076,8 +1144,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
                    u32 length)
  {
         struct isert_conn *isert_conn = conn->context;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-       struct iser_tx_desc *tx_desc = &isert_conn->conn_login_tx_desc;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
+       struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
         int ret;
  
         isert_create_send_desc(isert_conn, NULL, tx_desc);
@@ -1100,13 +1169,13 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
  
                 tx_dsg->addr    = isert_conn->login_rsp_dma;
                 tx_dsg->length  = length;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = isert_conn->device->mr->lkey;
                 tx_desc->num_sge = 2;
         }
         if (!login->login_failed) {
                 if (login->login_complete) {
                         if (!conn->sess->sess_ops->SessionType &&
-                           isert_conn->conn_device->use_fastreg) {
+                           isert_conn->device->use_fastreg) {
                                 ret = isert_conn_create_fastreg_pool(isert_conn);
                                 if (ret) {
                                         isert_err("Conn: %p failed to create"
@@ -1124,9 +1193,9 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
                                 return ret;
  
                         /* Now we are in FULL_FEATURE phase */
-                       mutex_lock(&isert_conn->conn_mutex);
+                       mutex_lock(&isert_conn->mutex);
                         isert_conn->state = ISER_CONN_FULL_FEATURE;
-                       mutex_unlock(&isert_conn->conn_mutex);
+                       mutex_unlock(&isert_conn->mutex);
                         goto post_send;
                 }
  
@@ -1185,7 +1254,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
         memcpy(login->req_buf, &rx_desc->data[0], size);
  
         if (login->first_request) {
-               complete(&isert_conn->conn_login_comp);
+               complete(&isert_conn->login_comp);
                 return;
         }
         schedule_delayed_work(&conn->login_work, 0);
@@ -1194,7 +1263,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
  static struct iscsi_cmd
  *isert_allocate_cmd(struct iscsi_conn *conn)
  {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct isert_cmd *isert_cmd;
         struct iscsi_cmd *cmd;
  
@@ -1379,13 +1448,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
  {
         struct iscsi_hdr *hdr = &rx_desc->iscsi_header;
         struct iscsi_conn *conn = isert_conn->conn;
-       struct iscsi_session *sess = conn->sess;
         struct iscsi_cmd *cmd;
         struct isert_cmd *isert_cmd;
         int ret = -EINVAL;
         u8 opcode = (hdr->opcode & ISCSI_OPCODE_MASK);
  
-       if (sess->sess_ops->SessionType &&
+       if (conn->sess->sess_ops->SessionType &&
            (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
                 isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
                           " ignoring\n", opcode);
@@ -1497,10 +1565,11 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
  }
  
  static void
-isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
-                   u32 xfer_len)
+isert_rcv_completion(struct iser_rx_desc *desc,
+                    struct isert_conn *isert_conn,
+                    u32 xfer_len)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
         struct iscsi_hdr *hdr;
         u64 rx_dma;
         int rx_buflen, outstanding;
@@ -1532,9 +1601,9 @@ isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
                         if (login && !login->first_request)
                                 isert_rx_login_req(isert_conn);
                 }
-               mutex_lock(&isert_conn->conn_mutex);
+               mutex_lock(&isert_conn->mutex);
                 complete(&isert_conn->login_req_comp);
-               mutex_unlock(&isert_conn->conn_mutex);
+               mutex_unlock(&isert_conn->mutex);
         } else {
                 isert_rx_do_work(desc, isert_conn);
         }
@@ -1566,7 +1635,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                    struct scatterlist *sg, u32 nents, u32 length, u32 offset,
                    enum iser_ib_op_code op, struct isert_data_buf *data)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
  
         data->dma_dir = op == ISER_IB_RDMA_WRITE ?
                               DMA_TO_DEVICE : DMA_FROM_DEVICE;
@@ -1597,7 +1666,7 @@ isert_map_data_buf(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
  static void
  isert_unmap_data_buf(struct isert_conn *isert_conn, struct isert_data_buf *data)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
  
         ib_dma_unmap_sg(ib_dev, data->sg, data->nents, data->dma_dir);
         memset(data, 0, sizeof(*data));
@@ -1634,7 +1703,6 @@ static void
  isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
  {
         struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       LIST_HEAD(unmap_list);
  
         isert_dbg("Cmd %p\n", isert_cmd);
  
@@ -1644,9 +1712,9 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
                         isert_unmap_data_buf(isert_conn, &wr->prot);
                         wr->fr_desc->ind &= ~ISERT_PROTECTED;
                 }
-               spin_lock_bh(&isert_conn->conn_lock);
-               list_add_tail(&wr->fr_desc->list, &isert_conn->conn_fr_pool);
-               spin_unlock_bh(&isert_conn->conn_lock);
+               spin_lock_bh(&isert_conn->pool_lock);
+               list_add_tail(&wr->fr_desc->list, &isert_conn->fr_pool);
+               spin_unlock_bh(&isert_conn->pool_lock);
                 wr->fr_desc = NULL;
         }
  
@@ -1665,7 +1733,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd, bool comp_err)
         struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
         struct isert_conn *isert_conn = isert_cmd->conn;
         struct iscsi_conn *conn = isert_conn->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
         struct iscsi_text_rsp *hdr;
  
         isert_dbg("Cmd %p\n", isert_cmd);
@@ -1815,7 +1883,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
         struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
         struct se_cmd *se_cmd = &cmd->se_cmd;
         struct isert_conn *isert_conn = isert_cmd->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
         int ret = 0;
  
         if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1841,7 +1909,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
         struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
         struct se_cmd *se_cmd = &cmd->se_cmd;
         struct isert_conn *isert_conn = isert_cmd->conn;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
         int ret = 0;
  
         if (wr->fr_desc && wr->fr_desc->ind & ISERT_PROTECTED) {
@@ -1861,11 +1929,13 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
         cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
         spin_unlock_bh(&cmd->istate_lock);
  
-       if (ret)
+       if (ret) {
+               target_put_sess_cmd(se_cmd->se_sess, se_cmd);
                 transport_send_check_condition_and_sense(se_cmd,
                                                          se_cmd->pi_err, 0);
-       else
+       } else {
                 target_execute_cmd(se_cmd);
+       }
  }
  
  static void
@@ -1874,7 +1944,7 @@ isert_do_control_comp(struct work_struct *work)
         struct isert_cmd *isert_cmd = container_of(work,
                         struct isert_cmd, comp_work);
         struct isert_conn *isert_conn = isert_cmd->conn;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
         struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
  
         isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
@@ -1922,10 +1992,10 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
  }
  
  static void
-isert_send_completion(struct iser_tx_desc *tx_desc,
+isert_snd_completion(struct iser_tx_desc *tx_desc,
                       struct isert_conn *isert_conn)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_device *ib_dev = isert_conn->cm_id->device;
         struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
         struct isert_rdma_wr *wr;
  
@@ -1938,10 +2008,6 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
         isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
  
         switch (wr->iser_ib_op) {
-       case ISER_IB_RECV:
-               isert_err("Got ISER_IB_RECV\n");
-               dump_stack();
-               break;
         case ISER_IB_SEND:
                 isert_response_completion(tx_desc, isert_cmd,
                                           isert_conn, ib_dev);
@@ -1973,8 +2039,8 @@ isert_send_completion(struct iser_tx_desc *tx_desc,
  static inline bool
  is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
  {
-       void *start = isert_conn->conn_rx_descs;
-       int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
+       void *start = isert_conn->rx_descs;
+       int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->rx_descs);
  
         if (wr_id >= start && wr_id < start + len)
                 return false;
@@ -1986,11 +2052,11 @@ static void
  isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
  {
         if (wc->wr_id == ISER_BEACON_WRID) {
-               isert_info("conn %p completing conn_wait_comp_err\n",
+               isert_info("conn %p completing wait_comp_err\n",
                            isert_conn);
-               complete(&isert_conn->conn_wait_comp_err);
+               complete(&isert_conn->wait_comp_err);
         } else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct ib_device *ib_dev = isert_conn->cm_id->device;
                 struct isert_cmd *isert_cmd;
                 struct iser_tx_desc *desc;
  
@@ -2018,10 +2084,10 @@ isert_handle_wc(struct ib_wc *wc)
         if (likely(wc->status == IB_WC_SUCCESS)) {
                 if (wc->opcode == IB_WC_RECV) {
                         rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
-                       isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
+                       isert_rcv_completion(rx_desc, isert_conn, wc->byte_len);
                 } else {
                         tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
-                       isert_send_completion(tx_desc, isert_conn);
+                       isert_snd_completion(tx_desc, isert_conn);
                 }
         } else {
                 if (wc->status != IB_WC_WR_FLUSH_ERR)
@@ -2070,7 +2136,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
         struct ib_send_wr *wr_failed;
         int ret;
  
-       ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
+       ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
                            &wr_failed);
         if (ret) {
                 isert_err("ib_post_send failed with %d\n", ret);
@@ -2083,7 +2149,7 @@ static int
  isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
         struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)
                                 &isert_cmd->tx_desc.iscsi_header;
@@ -2097,7 +2163,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
         if (cmd->se_cmd.sense_buffer &&
             ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ||
             (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct isert_device *device = isert_conn->device;
+               struct ib_device *ib_dev = device->ib_device;
                 struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
                 u32 padding, pdu_len;
  
@@ -2116,7 +2183,7 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                 isert_cmd->pdu_buf_len = pdu_len;
                 tx_dsg->addr    = isert_cmd->pdu_buf_dma;
                 tx_dsg->length  = pdu_len;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = device->mr->lkey;
                 isert_cmd->tx_desc.num_sge = 2;
         }
  
@@ -2131,8 +2198,8 @@ static void
  isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
  
         spin_lock_bh(&conn->cmd_lock);
         if (!list_empty(&cmd->i_conn_node))
@@ -2148,8 +2215,8 @@ isert_aborted_task(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
  static enum target_prot_op
  isert_get_sup_prot_ops(struct iscsi_conn *conn)
  {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
  
         if (conn->tpg->tpg_attrib.t10_pi) {
                 if (device->pi_capable) {
@@ -2170,7 +2237,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
                 bool nopout_response)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
  
         isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2189,7 +2256,7 @@ static int
  isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
  
         isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2207,7 +2274,7 @@ static int
  isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
  
         isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
@@ -2225,9 +2292,10 @@ static int
  isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
         struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
         struct iscsi_reject *hdr =
                 (struct iscsi_reject *)&isert_cmd->tx_desc.iscsi_header;
@@ -2243,7 +2311,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
         isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
         tx_dsg->addr    = isert_cmd->pdu_buf_dma;
         tx_dsg->length  = ISCSI_HDR_LEN;
-       tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+       tx_dsg->lkey    = device->mr->lkey;
         isert_cmd->tx_desc.num_sge = 2;
  
         isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2257,7 +2325,7 @@ static int
  isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
  {
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
         struct iscsi_text_rsp *hdr =
                 (struct iscsi_text_rsp *)&isert_cmd->tx_desc.iscsi_header;
@@ -2273,7 +2341,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
         isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
  
         if (txt_rsp_len) {
-               struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+               struct isert_device *device = isert_conn->device;
+               struct ib_device *ib_dev = device->ib_device;
                 struct ib_sge *tx_dsg = &isert_cmd->tx_desc.tx_sg[1];
                 void *txt_rsp_buf = cmd->buf_ptr;
  
@@ -2283,7 +2352,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
                 isert_cmd->pdu_buf_len = txt_rsp_len;
                 tx_dsg->addr    = isert_cmd->pdu_buf_dma;
                 tx_dsg->length  = txt_rsp_len;
-               tx_dsg->lkey    = isert_conn->conn_mr->lkey;
+               tx_dsg->lkey    = device->mr->lkey;
                 isert_cmd->tx_desc.num_sge = 2;
         }
         isert_init_send_wr(isert_conn, isert_cmd, send_wr);
@@ -2300,7 +2369,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
  {
         struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
         struct scatterlist *sg_start, *tmp_sg;
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
         u32 sg_off, page_off;
         int i = 0, sg_nents;
  
@@ -2324,7 +2394,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                 ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
                 ib_sge->length = min_t(u32, data_left,
                                 ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
-               ib_sge->lkey = isert_conn->conn_mr->lkey;
+               ib_sge->lkey = device->mr->lkey;
  
                 isert_dbg("RDMA ib_sge: addr: 0x%llx  length: %u lkey: %x\n",
                           ib_sge->addr, ib_sge->length, ib_sge->lkey);
@@ -2346,7 +2416,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
  {
         struct se_cmd *se_cmd = &cmd->se_cmd;
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         struct isert_data_buf *data = &wr->data;
         struct ib_send_wr *send_wr;
         struct ib_sge *ib_sge;
@@ -2485,7 +2555,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
                   enum isert_indicator ind,
                   struct ib_sge *sge)
  {
-       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct isert_device *device = isert_conn->device;
+       struct ib_device *ib_dev = device->ib_device;
         struct ib_mr *mr;
         struct ib_fast_reg_page_list *frpl;
         struct ib_send_wr fr_wr, inv_wr;
@@ -2494,7 +2565,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
         u32 page_off;
  
         if (mem->dma_nents == 1) {
-               sge->lkey = isert_conn->conn_mr->lkey;
+               sge->lkey = device->mr->lkey;
                 sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
                 sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
                 isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
@@ -2542,7 +2613,7 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
         else
                 wr->next = &fr_wr;
  
-       ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+       ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
         if (ret) {
                 isert_err("fast registration failed, ret:%d\n", ret);
                 return ret;
@@ -2655,7 +2726,7 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
         else
                 wr->next = &sig_wr;
  
-       ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+       ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
         if (ret) {
                 isert_err("fast registration failed, ret:%d\n", ret);
                 goto err;
@@ -2685,14 +2756,14 @@ isert_handle_prot_cmd(struct isert_conn *isert_conn,
                       struct isert_cmd *isert_cmd,
                       struct isert_rdma_wr *wr)
  {
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_device *device = isert_conn->device;
         struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
         int ret;
  
         if (!wr->fr_desc->pi_ctx) {
                 ret = isert_create_pi_ctx(wr->fr_desc,
                                           device->ib_device,
-                                         isert_conn->conn_pd);
+                                         device->pd);
                 if (ret) {
                         isert_err("conn %p failed to allocate pi_ctx\n",
                                   isert_conn);
@@ -2763,11 +2834,11 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
                 return ret;
  
         if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
-               spin_lock_irqsave(&isert_conn->conn_lock, flags);
-               fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
+               spin_lock_irqsave(&isert_conn->pool_lock, flags);
+               fr_desc = list_first_entry(&isert_conn->fr_pool,
                                            struct fast_reg_descriptor, list);
                 list_del(&fr_desc->list);
-               spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+               spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
                 wr->fr_desc = fr_desc;
         }
  
@@ -2814,9 +2885,9 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
  
  unmap_cmd:
         if (fr_desc) {
-               spin_lock_irqsave(&isert_conn->conn_lock, flags);
-               list_add_tail(&fr_desc->list, &isert_conn->conn_fr_pool);
-               spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+               spin_lock_irqsave(&isert_conn->pool_lock, flags);
+               list_add_tail(&fr_desc->list, &isert_conn->fr_pool);
+               spin_unlock_irqrestore(&isert_conn->pool_lock, flags);
         }
         isert_unmap_data_buf(isert_conn, &wr->data);
  
@@ -2829,8 +2900,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
         struct se_cmd *se_cmd = &cmd->se_cmd;
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
         struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
         struct ib_send_wr *wr_failed;
         int rc;
  
@@ -2859,7 +2930,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                 wr->send_wr_num += 1;
         }
  
-       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
         if (rc)
                 isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
  
@@ -2879,8 +2950,8 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
         struct se_cmd *se_cmd = &cmd->se_cmd;
         struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
         struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct isert_device *device = isert_conn->conn_device;
+       struct isert_conn *isert_conn = conn->context;
+       struct isert_device *device = isert_conn->device;
         struct ib_send_wr *wr_failed;
         int rc;
  
@@ -2893,7 +2964,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
                 return rc;
         }
  
-       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
         if (rc)
                 isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
  
@@ -2987,7 +3058,7 @@ isert_setup_id(struct isert_np *isert_np)
                 goto out_id;
         }
  
-       ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+       ret = rdma_listen(id, 0);
         if (ret) {
                 isert_err("rdma_listen() failed: %d\n", ret);
                 goto out_id;
@@ -3046,7 +3117,7 @@ out:
  static int
  isert_rdma_accept(struct isert_conn *isert_conn)
  {
-       struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+       struct rdma_cm_id *cm_id = isert_conn->cm_id;
         struct rdma_conn_param cp;
         int ret;
  
@@ -3067,7 +3138,7 @@ isert_rdma_accept(struct isert_conn *isert_conn)
  static int
  isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
  {
-       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_conn *isert_conn = conn->context;
         int ret;
  
         isert_info("before login_req comp conn: %p\n", isert_conn);
@@ -3090,8 +3161,8 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
  
         isert_rx_login_req(isert_conn);
  
-       isert_info("before conn_login_comp conn: %p\n", conn);
-       ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
+       isert_info("before login_comp conn: %p\n", conn);
+       ret = wait_for_completion_interruptible(&isert_conn->login_comp);
         if (ret)
                 return ret;
  
@@ -3104,7 +3175,7 @@ static void
  isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
                     struct isert_conn *isert_conn)
  {
-       struct rdma_cm_id *cm_id = isert_conn->conn_cm_id;
+       struct rdma_cm_id *cm_id = isert_conn->cm_id;
         struct rdma_route *cm_route = &cm_id->route;
         struct sockaddr_in *sock_in;
         struct sockaddr_in6 *sock_in6;
@@ -3137,13 +3208,13 @@ isert_set_conn_info(struct iscsi_np *np, struct iscsi_conn *conn,
  static int
  isert_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
  {
-       struct isert_np *isert_np = (struct isert_np *)np->np_context;
+       struct isert_np *isert_np = np->np_context;
         struct isert_conn *isert_conn;
-       int max_accept = 0, ret;
+       int ret;
  
  accept_wait:
         ret = down_interruptible(&isert_np->np_sem);
-       if (ret || max_accept > 5)
+       if (ret)
                 return -ENODEV;
  
         spin_lock_bh(&np->np_thread_lock);
@@ -3162,17 +3233,15 @@ accept_wait:
         mutex_lock(&isert_np->np_accept_mutex);
         if (list_empty(&isert_np->np_accept_list)) {
                 mutex_unlock(&isert_np->np_accept_mutex);
-               max_accept++;
                 goto accept_wait;
         }
         isert_conn = list_first_entry(&isert_np->np_accept_list,
-                       struct isert_conn, conn_accept_node);
-       list_del_init(&isert_conn->conn_accept_node);
+                       struct isert_conn, accept_node);
+       list_del_init(&isert_conn->accept_node);
         mutex_unlock(&isert_np->np_accept_mutex);
  
         conn->context = isert_conn;
         isert_conn->conn = conn;
-       max_accept = 0;
  
         isert_set_conn_info(np, conn, isert_conn);
  
@@ -3184,7 +3253,7 @@ accept_wait:
  static void
  isert_free_np(struct iscsi_np *np)
  {
-       struct isert_np *isert_np = (struct isert_np *)np->np_context;
+       struct isert_np *isert_np = np->np_context;
         struct isert_conn *isert_conn, *n;
  
         if (isert_np->np_cm_id)
@@ -3202,7 +3271,7 @@ isert_free_np(struct iscsi_np *np)
                 isert_info("Still have isert connections, cleaning up...\n");
                 list_for_each_entry_safe(isert_conn, n,
                                          &isert_np->np_accept_list,
-                                        conn_accept_node) {
+                                        accept_node) {
                         isert_info("cleaning isert_conn %p state (%d)\n",
                                    isert_conn, isert_conn->state);
                         isert_connect_release(isert_conn);
@@ -3222,11 +3291,11 @@ static void isert_release_work(struct work_struct *work)
  
         isert_info("Starting release conn %p\n", isert_conn);
  
-       wait_for_completion(&isert_conn->conn_wait);
+       wait_for_completion(&isert_conn->wait);
  
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
         isert_conn->state = ISER_CONN_DOWN;
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
  
         isert_info("Destroying conn %p\n", isert_conn);
         isert_put_conn(isert_conn);
@@ -3264,15 +3333,15 @@ isert_wait4flush(struct isert_conn *isert_conn)
  
         isert_info("conn %p\n", isert_conn);
  
-       init_completion(&isert_conn->conn_wait_comp_err);
+       init_completion(&isert_conn->wait_comp_err);
         isert_conn->beacon.wr_id = ISER_BEACON_WRID;
         /* post an indication that all flush errors were consumed */
-       if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+       if (ib_post_recv(isert_conn->qp, &isert_conn->beacon, &bad_wr)) {
                 isert_err("conn %p failed to post beacon", isert_conn);
                 return;
         }
  
-       wait_for_completion(&isert_conn->conn_wait_comp_err);
+       wait_for_completion(&isert_conn->wait_comp_err);
  }
  
  static void isert_wait_conn(struct iscsi_conn *conn)
@@ -3281,17 +3350,17 @@ static void isert_wait_conn(struct iscsi_conn *conn)
  
         isert_info("Starting conn %p\n", isert_conn);
  
-       mutex_lock(&isert_conn->conn_mutex);
+       mutex_lock(&isert_conn->mutex);
         /*
-        * Only wait for conn_wait_comp_err if the isert_conn made it
+        * Only wait for wait_comp_err if the isert_conn made it
          * into full feature phase..
          */
         if (isert_conn->state == ISER_CONN_INIT) {
-               mutex_unlock(&isert_conn->conn_mutex);
+               mutex_unlock(&isert_conn->mutex);
                 return;
         }
         isert_conn_terminate(isert_conn);
-       mutex_unlock(&isert_conn->conn_mutex);
+       mutex_unlock(&isert_conn->mutex);
  
         isert_wait4cmds(conn);
         isert_wait4flush(isert_conn);
@@ -3370,7 +3439,7 @@ static void __exit isert_exit(void)
  }
  
  MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");
-MODULE_VERSION("0.1");
+MODULE_VERSION("1.0");
  MODULE_AUTHOR("nab@Linux-iSCSI.org");
  MODULE_LICENSE("GPL");
  
diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h

index 8dc8415..9ec23a7 100644 (file)
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h
@@ -31,7 +31,6 @@
  #define isert_err(fmt, arg...) \
         pr_err(PFX "%s: " fmt, __func__ , ## arg)
  
-#define ISERT_RDMA_LISTEN_BACKLOG      10
  #define ISCSI_ISER_SG_TABLESIZE                256
  #define ISER_FASTREG_LI_WRID           0xffffffffffffffffULL
  #define ISER_BEACON_WRID               0xfffffffffffffffeULL
@@ -160,27 +159,25 @@ struct isert_conn {
         u64                     login_req_dma;
         int                     login_req_len;
         u64                     login_rsp_dma;
-       unsigned int            conn_rx_desc_head;
-       struct iser_rx_desc     *conn_rx_descs;
-       struct ib_recv_wr       conn_rx_wr[ISERT_MIN_POSTED_RX];
+       unsigned int            rx_desc_head;
+       struct iser_rx_desc     *rx_descs;
+       struct ib_recv_wr       rx_wr[ISERT_MIN_POSTED_RX];
         struct iscsi_conn       *conn;
-       struct list_head        conn_accept_node;
-       struct completion       conn_login_comp;
+       struct list_head        accept_node;
+       struct completion       login_comp;
         struct completion       login_req_comp;
-       struct iser_tx_desc     conn_login_tx_desc;
-       struct rdma_cm_id       *conn_cm_id;
-       struct ib_pd            *conn_pd;
-       struct ib_mr            *conn_mr;
-       struct ib_qp            *conn_qp;
-       struct isert_device     *conn_device;
-       struct mutex            conn_mutex;
-       struct completion       conn_wait;
-       struct completion       conn_wait_comp_err;
-       struct kref             conn_kref;
-       struct list_head        conn_fr_pool;
-       int                     conn_fr_pool_size;
+       struct iser_tx_desc     login_tx_desc;
+       struct rdma_cm_id       *cm_id;
+       struct ib_qp            *qp;
+       struct isert_device     *device;
+       struct mutex            mutex;
+       struct completion       wait;
+       struct completion       wait_comp_err;
+       struct kref             kref;
+       struct list_head        fr_pool;
+       int                     fr_pool_size;
         /* lock to protect fastreg pool */
-       spinlock_t              conn_lock;
+       spinlock_t              pool_lock;
         struct work_struct      release_work;
         struct ib_recv_wr       beacon;
         bool                    logout_posted;
@@ -211,6 +208,8 @@ struct isert_device {
         bool                    pi_capable;
         int                     refcount;
         struct ib_device        *ib_device;
+       struct ib_pd            *pd;
+       struct ib_mr            *mr;
         struct isert_comp       *comps;
         int                     comps_used;
         struct list_head        dev_node;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c

index 0747c05..918814c 100644 (file)
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
  #include <linux/parser.h>
  #include <linux/random.h>
  #include <linux/jiffies.h>
+#include <rdma/ib_cache.h>
  
  #include <linux/atomic.h>
  
@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
         if (!attr)
                 return -ENOMEM;
  
-       ret = ib_find_pkey(target->srp_host->srp_dev->dev,
-                          target->srp_host->port,
-                          be16_to_cpu(target->pkey),
-                          &attr->pkey_index);
+       ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
+                                 target->srp_host->port,
+                                 be16_to_cpu(target->pkey),
+                                 &attr->pkey_index);
         if (ret)
                 goto out;
  
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c

index 6e0a477..9b84b4c 100644 (file)
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -93,7 +93,7 @@ MODULE_PARM_DESC(srpt_service_guid,
                  " instead of using the node_guid of the first HCA.");
  
  static struct ib_client srpt_client;
-static struct target_fabric_configfs *srpt_target;
+static const struct target_core_fabric_ops srpt_template;
  static void srpt_release_channel(struct srpt_rdma_ch *ch);
  static int srpt_queue_status(struct se_cmd *cmd);
  
@@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
                 }
                 break;
         default:
-               printk(KERN_ERR "received unrecognized IB event %d\n",
+               pr_err("received unrecognized IB event %d\n",
                        event->event);
                 break;
         }
@@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
   */
  static void srpt_srq_event(struct ib_event *event, void *ctx)
  {
-       printk(KERN_INFO "SRQ event %d\n", event->event);
+       pr_info("SRQ event %d\n", event->event);
  }
  
  /**
@@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
                                  ch->sess_name, srpt_get_ch_state(ch));
                 break;
         default:
-               printk(KERN_ERR "received unrecognized IB QP event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB QP event %d\n", event->event);
                 break;
         }
  }
@@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
                 sport = &sdev->port[i - 1];
                 WARN_ON(sport->port != i);
                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
-                       printk(KERN_ERR "disabling MAD processing failed.\n");
+                       pr_err("disabling MAD processing failed.\n");
                 if (sport->mad_agent) {
                         ib_unregister_mad_agent(sport->mad_agent);
                         sport->mad_agent = NULL;
@@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
  
         ret = -ENOMEM;
         if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
-               printk(KERN_WARNING "IB send queue full (needed 1)\n");
+               pr_warn("IB send queue full (needed 1)\n");
                 goto out;
         }
  
@@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
  
                 if (ioctx->n_rbuf >
                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
-                       printk(KERN_ERR "received unsupported SRP_CMD request"
+                       pr_err("received unsupported SRP_CMD request"
                                " type (%u out + %u in != %u / %zu)\n",
                                srp_cmd->data_out_desc_cnt,
                                srp_cmd->data_in_desc_cnt,
@@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
                 srpt_unmap_sg_to_ib_sge(ch, ioctx);
                 transport_generic_free_cmd(&ioctx->cmd, 0);
         } else {
-               printk(KERN_ERR "IB completion has been received too late for"
+               pr_err("IB completion has been received too late for"
                        " wr_id = %u.\n", ioctx->ioctx.index);
         }
  }
@@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
                                                 SRPT_STATE_DATA_IN))
                         target_execute_cmd(&ioctx->cmd);
                 else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
+                       pr_err("%s[%d]: wrong state = %d\n", __func__,
                                __LINE__, srpt_get_cmd_state(ioctx));
         } else if (opcode == SRPT_RDMA_ABORT) {
                 ioctx->rdma_aborted = true;
@@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
         switch (opcode) {
         case SRPT_RDMA_READ_LAST:
                 if (ioctx->n_rdma <= 0) {
-                       printk(KERN_ERR "Received invalid RDMA read"
+                       pr_err("Received invalid RDMA read"
                                " error completion with idx %d\n",
                                ioctx->ioctx.index);
                         break;
@@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
                 if (state == SRPT_STATE_NEED_DATA)
                         srpt_abort_cmd(ioctx);
                 else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n",
+                       pr_err("%s[%d]: wrong state = %d\n",
                                __func__, __LINE__, state);
                 break;
         case SRPT_RDMA_WRITE_LAST:
                 break;
         default:
-               printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
-                      __LINE__, opcode);
+               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
                 break;
         }
  }
@@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
                 BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
                 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
                 if (sense_data_len > max_sense_len) {
-                       printk(KERN_WARNING "truncated sense data from %d to %d"
-                              " bytes\n", sense_data_len, max_sense_len);
+                       pr_warn("truncated sense data from %d to %d"
+                               " bytes\n", sense_data_len, max_sense_len);
                         sense_data_len = max_sense_len;
                 }
  
@@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
         int addressing_method;
  
         if (unlikely(len < 2)) {
-               printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
-                      "more", len);
+               pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
+                      len);
                 goto out;
         }
  
@@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
  
         case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
         default:
-               printk(KERN_ERR "Unimplemented LUN addressing method %u",
+               pr_err("Unimplemented LUN addressing method %u\n",
                        addressing_method);
                 break;
         }
@@ -1672,8 +1670,7 @@ out:
         return res;
  
  out_err:
-       printk(KERN_ERR "Support for multi-level LUNs has not yet been"
-              " implemented");
+       pr_err("Support for multi-level LUNs has not yet been implemented\n");
         goto out;
  }
  
@@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
         }
  
         if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
-               printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
+               pr_err("0x%llx: parsing SRP descriptor table failed.\n",
                        srp_cmd->tag);
                 ret = TCM_INVALID_CDB_FIELD;
                 goto send_sense;
@@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                 srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
                 break;
         case SRP_I_LOGOUT:
-               printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
+               pr_err("Not yet implemented: SRP_I_LOGOUT\n");
                 break;
         case SRP_CRED_RSP:
                 pr_debug("received SRP_CRED_RSP\n");
@@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                 pr_debug("received SRP_AER_RSP\n");
                 break;
         case SRP_RSP:
-               printk(KERN_ERR "Received SRP_RSP\n");
+               pr_err("Received SRP_RSP\n");
                 break;
         default:
-               printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
+               pr_err("received IU with unknown opcode 0x%x\n",
                        srp_cmd->opcode);
                 break;
         }
@@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
  
                 req_lim = atomic_dec_return(&ch->req_lim);
                 if (unlikely(req_lim < 0))
-                       printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
+                       pr_err("req_lim = %d < 0\n", req_lim);
                 ioctx = sdev->ioctx_ring[index];
                 srpt_handle_new_iu(ch, ioctx, NULL);
         } else {
-               printk(KERN_INFO "receiving failed for idx %u with status %d\n",
-                      index, wc->status);
+               pr_info("receiving failed for idx %u with status %d\n",
+                       index, wc->status);
         }
  }
  
@@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
                 }
         } else {
                 if (opcode == SRPT_SEND) {
-                       printk(KERN_INFO "sending response for idx %u failed"
-                              " with status %d\n", index, wc->status);
+                       pr_info("sending response for idx %u failed"
+                               " with status %d\n", index, wc->status);
                         srpt_handle_send_err_comp(ch, wc->wr_id);
                 } else if (opcode != SRPT_RDMA_MID) {
-                       printk(KERN_INFO "RDMA t %d for idx %u failed with"
-                               " status %d", opcode, index, wc->status);
+                       pr_info("RDMA t %d for idx %u failed with"
+                               " status %d\n", opcode, index, wc->status);
                         srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
                 }
         }
@@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
  
         ch = arg;
         BUG_ON(!ch);
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) started\n",
+               ch->sess_name, ch->thread->comm, current->pid);
         while (!kthread_should_stop()) {
                 wait_event_interruptible(ch->wait_queue,
                         (srpt_process_completion(ch->cq, ch),
                          kthread_should_stop()));
         }
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
+               ch->sess_name, ch->thread->comm, current->pid);
         return 0;
  }
  
@@ -2097,7 +2094,7 @@ retry:
                               ch->rq_size + srp_sq_size, 0);
         if (IS_ERR(ch->cq)) {
                 ret = PTR_ERR(ch->cq);
-               printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
+               pr_err("failed to create CQ cqe= %d ret= %d\n",
                        ch->rq_size + srp_sq_size, ret);
                 goto out;
         }
@@ -2123,7 +2120,7 @@ retry:
                                 goto retry;
                         }
                 }
-               printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
+               pr_err("failed to create_qp ret= %d\n", ret);
                 goto err_destroy_cq;
         }
  
@@ -2143,7 +2140,7 @@ retry:
  
         ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
         if (IS_ERR(ch->thread)) {
-               printk(KERN_ERR "failed to create kernel thread %ld\n",
+               pr_err("failed to create kernel thread %ld\n",
                        PTR_ERR(ch->thread));
                 ch->thread = NULL;
                 goto err_destroy_qp;
@@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
                 /* fall through */
         case CH_LIVE:
                 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "sending CM DREQ failed.\n");
+                       pr_err("sending CM DREQ failed.\n");
                 break;
         case CH_DISCONNECTING:
                 break;
@@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
  
                 ret = srpt_ch_qp_err(ch);
                 if (ret < 0)
-                       printk(KERN_ERR "Setting queue pair in error state"
+                       pr_err("Setting queue pair in error state"
                                " failed: %d\n", ret);
         }
  }
@@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
  
         it_iu_len = be32_to_cpu(req->req_it_iu_len);
  
-       printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
-              " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
-              " (guid=0x%llx:0x%llx)\n",
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
-              it_iu_len,
-              param->port,
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
+       pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
+               " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
+               " (guid=0x%llx:0x%llx)\n",
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
+               it_iu_len,
+               param->port,
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
  
         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
         rej = kzalloc(sizeof *rej, GFP_KERNEL);
@@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                 rej->reason = __constant_cpu_to_be32(
                                 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
                 ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
+               pr_err("rejected SRP_LOGIN_REQ because its"
                        " length (%d bytes) is out of range (%d .. %d)\n",
                        it_iu_len, 64, srp_max_req_size);
                 goto reject;
@@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                 rej->reason = __constant_cpu_to_be32(
                              SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
                 ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
+               pr_err("rejected SRP_LOGIN_REQ because the target port"
                        " has not yet been enabled\n");
                 goto reject;
         }
@@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                 rej->reason = __constant_cpu_to_be32(
                                 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
                 ret = -ENOMEM;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
+               pr_err("rejected SRP_LOGIN_REQ because it"
                        " has an invalid target port identifier.\n");
                 goto reject;
         }
@@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
         if (!ch) {
                 rej->reason = __constant_cpu_to_be32(
                                         SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
+               pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
                 ret = -ENOMEM;
                 goto reject;
         }
@@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
         if (ret) {
                 rej->reason = __constant_cpu_to_be32(
                                 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
+               pr_err("rejected SRP_LOGIN_REQ because creating"
                        " a new RDMA channel failed.\n");
                 goto free_ring;
         }
@@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
         if (ret) {
                 rej->reason = __constant_cpu_to_be32(
                                 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
+               pr_err("rejected SRP_LOGIN_REQ because enabling"
                        " RTR failed (error code = %d)\n", ret);
                 goto destroy_ib;
         }
@@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
  
         nacl = srpt_lookup_acl(sport, ch->i_port_id);
         if (!nacl) {
-               printk(KERN_INFO "Rejected login because no ACL has been"
-                      " configured yet for initiator %s.\n", ch->sess_name);
+               pr_info("Rejected login because no ACL has been"
+                       " configured yet for initiator %s.\n", ch->sess_name);
                 rej->reason = __constant_cpu_to_be32(
                                 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
                 goto destroy_ib;
@@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
  
         ret = ib_send_cm_rep(cm_id, rep_param);
         if (ret) {
-               printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
+               pr_err("sending SRP_LOGIN_REQ response failed"
                        " (error code = %d)\n", ret);
                 goto release_channel;
         }
@@ -2679,7 +2676,7 @@ out:
  
  static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
  {
-       printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
+       pr_info("Received IB REJ for cm_id %p.\n", cm_id);
         srpt_drain_channel(cm_id);
  }
  
@@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
  
  static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
  {
-       printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
+       pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
         srpt_drain_channel(cm_id);
  }
  
  static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
  {
-       printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
+       pr_info("Received IB REP error for cm_id %p.\n", cm_id);
         srpt_drain_channel(cm_id);
  }
  
@@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
  
         if (send_drep) {
                 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "Sending IB DREP failed.\n");
-               printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
-                      ch->sess_name);
+                       pr_err("Sending IB DREP failed.\n");
+               pr_info("Received DREQ and sent DREP for session %s.\n",
+                       ch->sess_name);
         }
  }
  
@@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
   */
  static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
  {
-       printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
-              cm_id);
+       pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
         srpt_drain_channel(cm_id);
  }
  
@@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
                 srpt_cm_rep_error(cm_id);
                 break;
         case IB_CM_DREQ_ERROR:
-               printk(KERN_INFO "Received IB DREQ ERROR event.\n");
+               pr_info("Received IB DREQ ERROR event.\n");
                 break;
         case IB_CM_MRA_RECEIVED:
-               printk(KERN_INFO "Received IB MRA event\n");
+               pr_info("Received IB MRA event\n");
                 break;
         default:
-               printk(KERN_ERR "received unrecognized IB CM event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB CM event %d\n", event->event);
                 break;
         }
  
@@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                 ret = -ENOMEM;
                 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
                 if (sq_wr_avail < 0) {
-                       printk(KERN_WARNING "IB send queue full (needed %d)\n",
-                              n_rdma);
+                       pr_warn("IB send queue full (needed %d)\n",
+                               n_rdma);
                         goto out;
                 }
         }
@@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
         }
  
         if (ret)
-               printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
+               pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                  __func__, __LINE__, ret, i, n_rdma);
         if (ret && i > 0) {
                 wr.num_sge = 0;
@@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                 wr.send_flags = IB_SEND_SIGNALED;
                 while (ch->state == CH_LIVE &&
                         ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
-                       printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
+                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
                                 ioctx->ioctx.index);
                         msleep(1000);
                 }
                 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
+                       pr_info("Waiting until RDMA abort finished [%d]\n",
                                 ioctx->ioctx.index);
                         msleep(1000);
                 }
@@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
  
         ret = srpt_map_sg_to_ib_sge(ch, ioctx);
         if (ret) {
-               printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
+               pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
                 goto out;
         }
  
         ret = srpt_perform_rdmas(ch, ioctx);
         if (ret) {
                 if (ret == -EAGAIN || ret == -ENOMEM)
-                       printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
-                                  __func__, __LINE__, ret);
+                       pr_info("%s[%d] queue full -- ret=%d\n",
+                               __func__, __LINE__, ret);
                 else
-                       printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
+                       pr_err("%s[%d] fatal error -- ret=%d\n",
                                __func__, __LINE__, ret);
                 goto out_unmap;
         }
@@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
             !ioctx->queue_status_only) {
                 ret = srpt_xfer_data(ch, ioctx);
                 if (ret) {
-                       printk(KERN_ERR "xfer_data failed for tag %llu\n",
+                       pr_err("xfer_data failed for tag %llu\n",
                                ioctx->tag);
                         return;
                 }
@@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
         }
         ret = srpt_post_send(ch, ioctx, resp_len);
         if (ret) {
-               printk(KERN_ERR "sending cmd response failed for tag %llu\n",
+               pr_err("sending cmd response failed for tag %llu\n",
                        ioctx->tag);
                 srpt_unmap_sg_to_ib_sge(ch, ioctx);
                 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
         res = wait_event_interruptible(sdev->ch_releaseQ,
                                        srpt_ch_list_empty(sdev));
         if (res)
-               printk(KERN_ERR "%s: interrupted.\n", __func__);
+               pr_err("%s: interrupted.\n", __func__);
  
         return 0;
  }
@@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
                 spin_lock_init(&sport->port_acl_lock);
  
                 if (srpt_refresh_port(sport)) {
-                       printk(KERN_ERR "MAD registration failed for %s-%d.\n",
+                       pr_err("MAD registration failed for %s-%d.\n",
                                srpt_sdev_name(sdev), i);
                         goto err_ring;
                 }
@@ -3330,7 +3325,7 @@ free_dev:
         kfree(sdev);
  err:
         sdev = NULL;
-       printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
+       pr_info("%s(%s) failed.\n", __func__, device->name);
         goto out;
  }
  
@@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
  
         sdev = ib_get_client_data(device, &srpt_client);
         if (!sdev) {
-               printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
-                      device->name);
+               pr_info("%s(%s): nothing to do.\n", __func__, device->name);
                 return;
         }
  
@@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
  
         nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
         if (!nacl) {
-               printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
+               pr_err("Unable to allocate struct srpt_node_acl\n");
                 return NULL;
         }
  
@@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
         u8 i_port_id[16];
  
         if (srpt_parse_i_port_id(i_port_id, name) < 0) {
-               printk(KERN_ERR "invalid initiator port ID %s\n", name);
+               pr_err("invalid initiator port ID %s\n", name);
                 ret = -EINVAL;
                 goto err;
         }
@@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
  
         ret = kstrtoul(page, 0, &tmp);
         if (ret < 0) {
-               printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
+               pr_err("Unable to extract srpt_tpg_store_enable\n");
                 return -EINVAL;
         }
  
         if ((tmp != 0) && (tmp != 1)) {
-               printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
+               pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
                 return -EINVAL;
         }
         if (tmp == 1)
@@ -3851,7 +3845,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
         int res;
  
         /* Initialize sport->port_wwn and sport->port_tpg_1 */
-       res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn,
+       res = core_tpg_register(&srpt_template, &sport->port_wwn,
                         &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL);
         if (res)
                 return ERR_PTR(res);
@@ -3919,7 +3913,9 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
         NULL,
  };
  
-static struct target_core_fabric_ops srpt_template = {
+static const struct target_core_fabric_ops srpt_template = {
+       .module                         = THIS_MODULE,
+       .name                           = "srpt",
         .get_fabric_name                = srpt_get_fabric_name,
         .get_fabric_proto_ident         = srpt_get_fabric_proto_ident,
         .tpg_get_wwn                    = srpt_get_fabric_wwn,
@@ -3964,6 +3960,10 @@ static struct target_core_fabric_ops srpt_template = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = srpt_make_nodeacl,
         .fabric_drop_nodeacl            = srpt_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = srpt_wwn_attrs,
+       .tfc_tpg_base_attrs             = srpt_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = srpt_tpg_attrib_attrs,
  };
  
  /**
@@ -3980,7 +3980,7 @@ static int __init srpt_init_module(void)
  
         ret = -EINVAL;
         if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                        " srp_max_req_size -- must be at least %d.\n",
                        srp_max_req_size, MIN_MAX_REQ_SIZE);
                 goto out;
@@ -3988,54 +3988,26 @@ static int __init srpt_init_module(void)
  
         if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
             || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                        " srpt_srq_size -- must be in the range [%d..%d].\n",
                        srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
                 goto out;
         }
  
-       srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
-       if (IS_ERR(srpt_target)) {
-               printk(KERN_ERR "couldn't register\n");
-               ret = PTR_ERR(srpt_target);
+       ret = target_register_template(&srpt_template);
+       if (ret)
                 goto out;
-       }
-
-       srpt_target->tf_ops = srpt_template;
-
-       /*
-        * Set up default attribute lists.
-        */
-       srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs;
-       srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-
-       ret = target_fabric_configfs_register(srpt_target);
-       if (ret < 0) {
-               printk(KERN_ERR "couldn't register\n");
-               goto out_free_target;
-       }
  
         ret = ib_register_client(&srpt_client);
         if (ret) {
-               printk(KERN_ERR "couldn't register IB client\n");
+               pr_err("couldn't register IB client\n");
                 goto out_unregister_target;
         }
  
         return 0;
  
  out_unregister_target:
-       target_fabric_configfs_deregister(srpt_target);
-       srpt_target = NULL;
-out_free_target:
-       if (srpt_target)
-               target_fabric_configfs_free(srpt_target);
+       target_unregister_template(&srpt_template);
  out:
         return ret;
  }
@@ -4043,8 +4015,7 @@ out:
  static void __exit srpt_cleanup_module(void)
  {
         ib_unregister_client(&srpt_client);
-       target_fabric_configfs_deregister(srpt_target);
-       srpt_target = NULL;
+       target_unregister_template(&srpt_template);
  }
  
  module_init(srpt_init_module);
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c

index 64b9b59..b50c5b8 100644 (file)
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -148,16 +148,19 @@ static void cros_ec_keyb_process(struct cros_ec_keyb *ckdev,
  
  static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
  {
+       int ret;
         struct cros_ec_command msg = {
-               .version = 0,
                 .command = EC_CMD_MKBP_STATE,
-               .outdata = NULL,
-               .outsize = 0,
-               .indata = kb_state,
                 .insize = ckdev->cols,
         };
  
-       return cros_ec_cmd_xfer(ckdev->ec, &msg);
+       ret = cros_ec_cmd_xfer(ckdev->ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       memcpy(kb_state, msg.indata, ckdev->cols);
+
+       return 0;
  }
  
  static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c

index a35927c..68d43be 100644 (file)
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -50,6 +50,7 @@
  #define CONTEXT_SIZE           VTD_PAGE_SIZE
  
  #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
+#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
  #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
  #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
  
@@ -184,32 +185,11 @@ static int force_on = 0;
   * 64-127: Reserved
   */
  struct root_entry {
-       u64     val;
-       u64     rsvd1;
+       u64     lo;
+       u64     hi;
  };
  #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
-static inline bool root_present(struct root_entry *root)
-{
-       return (root->val & 1);
-}
-static inline void set_root_present(struct root_entry *root)
-{
-       root->val |= 1;
-}
-static inline void set_root_value(struct root_entry *root, unsigned long value)
-{
-       root->val &= ~VTD_PAGE_MASK;
-       root->val |= value & VTD_PAGE_MASK;
-}
  
-static inline struct context_entry *
-get_context_addr_from_root(struct root_entry *root)
-{
-       return (struct context_entry *)
-               (root_present(root)?phys_to_virt(
-               root->val & VTD_PAGE_MASK) :
-               NULL);
-}
  
  /*
   * low 64 bits:
@@ -682,6 +662,40 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
  }
  
+static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
+                                                      u8 bus, u8 devfn, int alloc)
+{
+       struct root_entry *root = &iommu->root_entry[bus];
+       struct context_entry *context;
+       u64 *entry;
+
+       if (ecap_ecs(iommu->ecap)) {
+               if (devfn >= 0x80) {
+                       devfn -= 0x80;
+                       entry = &root->hi;
+               }
+               devfn *= 2;
+       }
+       entry = &root->lo;
+       if (*entry & 1)
+               context = phys_to_virt(*entry & VTD_PAGE_MASK);
+       else {
+               unsigned long phy_addr;
+               if (!alloc)
+                       return NULL;
+
+               context = alloc_pgtable_page(iommu->node);
+               if (!context)
+                       return NULL;
+
+               __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
+               phy_addr = virt_to_phys((void *)context);
+               *entry = phy_addr | 1;
+               __iommu_flush_cache(iommu, entry, sizeof(*entry));
+       }
+       return &context[devfn];
+}
+
  static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
  {
         struct dmar_drhd_unit *drhd = NULL;
@@ -741,75 +755,36 @@ static void domain_flush_cache(struct dmar_domain *domain,
                 clflush_cache_range(addr, size);
  }
  
-/* Gets context entry for a given bus and devfn */
-static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
-               u8 bus, u8 devfn)
-{
-       struct root_entry *root;
-       struct context_entry *context;
-       unsigned long phy_addr;
-       unsigned long flags;
-
-       spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
-       if (!context) {
-               context = (struct context_entry *)
-                               alloc_pgtable_page(iommu->node);
-               if (!context) {
-                       spin_unlock_irqrestore(&iommu->lock, flags);
-                       return NULL;
-               }
-               __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
-               phy_addr = virt_to_phys((void *)context);
-               set_root_value(root, phy_addr);
-               set_root_present(root);
-               __iommu_flush_cache(iommu, root, sizeof(*root));
-       }
-       spin_unlock_irqrestore(&iommu->lock, flags);
-       return &context[devfn];
-}
-
  static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
  {
-       struct root_entry *root;
         struct context_entry *context;
-       int ret;
+       int ret = 0;
         unsigned long flags;
  
         spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
-       if (!context) {
-               ret = 0;
-               goto out;
-       }
-       ret = context_present(&context[devfn]);
-out:
+       context = iommu_context_addr(iommu, bus, devfn, 0);
+       if (context)
+               ret = context_present(context);
         spin_unlock_irqrestore(&iommu->lock, flags);
         return ret;
  }
  
  static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
  {
-       struct root_entry *root;
         struct context_entry *context;
         unsigned long flags;
  
         spin_lock_irqsave(&iommu->lock, flags);
-       root = &iommu->root_entry[bus];
-       context = get_context_addr_from_root(root);
+       context = iommu_context_addr(iommu, bus, devfn, 0);
         if (context) {
-               context_clear_entry(&context[devfn]);
-               __iommu_flush_cache(iommu, &context[devfn], \
-                       sizeof(*context));
+               context_clear_entry(context);
+               __iommu_flush_cache(iommu, context, sizeof(*context));
         }
         spin_unlock_irqrestore(&iommu->lock, flags);
  }
  
  static void free_context_table(struct intel_iommu *iommu)
  {
-       struct root_entry *root;
         int i;
         unsigned long flags;
         struct context_entry *context;
@@ -819,10 +794,17 @@ static void free_context_table(struct intel_iommu *iommu)
                 goto out;
         }
         for (i = 0; i < ROOT_ENTRY_NR; i++) {
-               root = &iommu->root_entry[i];
-               context = get_context_addr_from_root(root);
+               context = iommu_context_addr(iommu, i, 0, 0);
+               if (context)
+                       free_pgtable_page(context);
+
+               if (!ecap_ecs(iommu->ecap))
+                       continue;
+
+               context = iommu_context_addr(iommu, i, 0x80, 0);
                 if (context)
                         free_pgtable_page(context);
+
         }
         free_pgtable_page(iommu->root_entry);
         iommu->root_entry = NULL;
@@ -1146,14 +1128,16 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
  
  static void iommu_set_root_entry(struct intel_iommu *iommu)
  {
-       void *addr;
+       u64 addr;
         u32 sts;
         unsigned long flag;
  
-       addr = iommu->root_entry;
+       addr = virt_to_phys(iommu->root_entry);
+       if (ecap_ecs(iommu->ecap))
+               addr |= DMA_RTADDR_RTT;
  
         raw_spin_lock_irqsave(&iommu->register_lock, flag);
-       dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
+       dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
  
         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
  
@@ -1800,7 +1784,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
         BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
                translation != CONTEXT_TT_MULTI_LEVEL);
  
-       context = device_to_context_entry(iommu, bus, devfn);
+       spin_lock_irqsave(&iommu->lock, flags);
+       context = iommu_context_addr(iommu, bus, devfn, 1);
+       spin_unlock_irqrestore(&iommu->lock, flags);
         if (!context)
                 return -ENOMEM;
         spin_lock_irqsave(&iommu->lock, flags);
@@ -2564,6 +2550,10 @@ static bool device_has_rmrr(struct device *dev)
   * In both cases we assume that PCI USB devices with RMRRs have them largely
   * for historical reasons and that the RMRR space is not actively used post
   * boot.  This exclusion may change if vendors begin to abuse it.
+ *
+ * The same exception is made for graphics devices, with the requirement that
+ * any use of the RMRR regions will be torn down before assigning the device
+ * to a guest.
   */
  static bool device_is_rmrr_locked(struct device *dev)
  {
@@ -2573,7 +2563,7 @@ static bool device_is_rmrr_locked(struct device *dev)
         if (dev_is_pci(dev)) {
                 struct pci_dev *pdev = to_pci_dev(dev);
  
-               if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
+               if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
                         return false;
         }
  
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c

index 6c25b3c..5709ae9 100644 (file)
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -637,10 +637,7 @@ static int __init intel_enable_irq_remapping(void)
         if (x2apic_supported()) {
                 eim = !dmar_x2apic_optout();
                 if (!eim)
-                       printk(KERN_WARNING
-                               "Your BIOS is broken and requested that x2apic be disabled.\n"
-                               "This will slightly decrease performance.\n"
-                               "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
+                       pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
         }
  
         for_each_iommu(iommu, drhd) {
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c

index a6ce347..7b315e3 100644 (file)
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -33,12 +33,14 @@
  #include <linux/of.h>
  #include <linux/of_address.h>
  #include <linux/of_irq.h>
+#include <linux/acpi.h>
  #include <linux/irqdomain.h>
  #include <linux/interrupt.h>
  #include <linux/percpu.h>
  #include <linux/slab.h>
  #include <linux/irqchip/chained_irq.h>
  #include <linux/irqchip/arm-gic.h>
+#include <linux/irqchip/arm-gic-acpi.h>
  
  #include <asm/cputype.h>
  #include <asm/irq.h>
@@ -1107,3 +1109,105 @@ IRQCHIP_DECLARE(msm_8660_qgic, "qcom,msm-8660-qgic", gic_of_init);
  IRQCHIP_DECLARE(msm_qgic2, "qcom,msm-qgic2", gic_of_init);
  
  #endif
+
+#ifdef CONFIG_ACPI
+static phys_addr_t dist_phy_base, cpu_phy_base __initdata;
+
+static int __init
+gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header,
+                       const unsigned long end)
+{
+       struct acpi_madt_generic_interrupt *processor;
+       phys_addr_t gic_cpu_base;
+       static int cpu_base_assigned;
+
+       processor = (struct acpi_madt_generic_interrupt *)header;
+
+       if (BAD_MADT_ENTRY(processor, end))
+               return -EINVAL;
+
+       /*
+        * There is no support for non-banked GICv1/2 register in ACPI spec.
+        * All CPU interface addresses have to be the same.
+        */
+       gic_cpu_base = processor->base_address;
+       if (cpu_base_assigned && gic_cpu_base != cpu_phy_base)
+               return -EINVAL;
+
+       cpu_phy_base = gic_cpu_base;
+       cpu_base_assigned = 1;
+       return 0;
+}
+
+static int __init
+gic_acpi_parse_madt_distributor(struct acpi_subtable_header *header,
+                               const unsigned long end)
+{
+       struct acpi_madt_generic_distributor *dist;
+
+       dist = (struct acpi_madt_generic_distributor *)header;
+
+       if (BAD_MADT_ENTRY(dist, end))
+               return -EINVAL;
+
+       dist_phy_base = dist->base_address;
+       return 0;
+}
+
+int __init
+gic_v2_acpi_init(struct acpi_table_header *table)
+{
+       void __iomem *cpu_base, *dist_base;
+       int count;
+
+       /* Collect CPU base addresses */
+       count = acpi_parse_entries(ACPI_SIG_MADT,
+                                  sizeof(struct acpi_table_madt),
+                                  gic_acpi_parse_madt_cpu, table,
+                                  ACPI_MADT_TYPE_GENERIC_INTERRUPT, 0);
+       if (count <= 0) {
+               pr_err("No valid GICC entries exist\n");
+               return -EINVAL;
+       }
+
+       /*
+        * Find distributor base address. We expect one distributor entry since
+        * ACPI 5.1 spec neither support multi-GIC instances nor GIC cascade.
+        */
+       count = acpi_parse_entries(ACPI_SIG_MADT,
+                                  sizeof(struct acpi_table_madt),
+                                  gic_acpi_parse_madt_distributor, table,
+                                  ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR, 0);
+       if (count <= 0) {
+               pr_err("No valid GICD entries exist\n");
+               return -EINVAL;
+       } else if (count > 1) {
+               pr_err("More than one GICD entry detected\n");
+               return -EINVAL;
+       }
+
+       cpu_base = ioremap(cpu_phy_base, ACPI_GIC_CPU_IF_MEM_SIZE);
+       if (!cpu_base) {
+               pr_err("Unable to map GICC registers\n");
+               return -ENOMEM;
+       }
+
+       dist_base = ioremap(dist_phy_base, ACPI_GICV2_DIST_MEM_SIZE);
+       if (!dist_base) {
+               pr_err("Unable to map GICD registers\n");
+               iounmap(cpu_base);
+               return -ENOMEM;
+       }
+
+       /*
+        * Initialize zero GIC instance (no multi-GIC support). Also, set GIC
+        * as default IRQ domain to allow for GSI registration and GSI to IRQ
+        * number translation (see acpi_register_gsi() and acpi_gsi_to_irq()).
+        */
+       gic_init_bases(0, -1, dist_base, cpu_base, 0, NULL);
+       irq_set_default_host(gic_data[0].domain);
+
+       acpi_irq_model = ACPI_IRQ_MODEL_GIC;
+       return 0;
+}
+#endif
diff --git a/drivers/irqchip/irqchip.c b/drivers/irqchip/irqchip.c

index 0fe2f71..afd1af3 100644 (file)
--- a/drivers/irqchip/irqchip.c
+++ b/drivers/irqchip/irqchip.c
@@ -8,6 +8,7 @@
   * warranty of any kind, whether express or implied.
   */
  
+#include <linux/acpi_irq.h>
  #include <linux/init.h>
  #include <linux/of_irq.h>
  #include <linux/irqchip.h>
@@ -26,4 +27,6 @@ extern struct of_device_id __irqchip_of_table[];
  void __init irqchip_init(void)
  {
         of_irq_init(__irqchip_of_table);
+
+       acpi_irq_init();
  }
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c

index 1219af4..19a3228 100644 (file)
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -211,10 +211,9 @@ static void initialize(struct lg_cpu *cpu)
  
         /*
          * The Guest tells us where we're not to deliver interrupts by putting
-        * the range of addresses into "struct lguest_data".
+        * the instruction address into "struct lguest_data".
          */
-       if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
-           || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
+       if (get_user(cpu->lg->noirq_iret, &cpu->lg->lguest_data->noirq_iret))
                 kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
  
         /*
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c

index 70dfcdc..5e7559b 100644 (file)
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -56,21 +56,16 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
  }
  
  /*H:210
- * The set_guest_interrupt() routine actually delivers the interrupt or
- * trap.  The mechanics of delivering traps and interrupts to the Guest are the
- * same, except some traps have an "error code" which gets pushed onto the
- * stack as well: the caller tells us if this is one.
- *
- * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
- * interrupt or trap.  It's split into two parts for traditional reasons: gcc
- * on i386 used to be frightened by 64 bit numbers.
+ * The push_guest_interrupt_stack() routine saves Guest state on the stack for
+ * an interrupt or trap.  The mechanics of delivering traps and interrupts to
+ * the Guest are the same, except some traps have an "error code" which gets
+ * pushed onto the stack as well: the caller tells us if this is one.
   *
   * We set up the stack just like the CPU does for a real interrupt, so it's
   * identical for the Guest (and the standard "iret" instruction will undo
   * it).
   */
-static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
-                               bool has_err)
+static void push_guest_interrupt_stack(struct lg_cpu *cpu, bool has_err)
  {
         unsigned long gstack, origstack;
         u32 eflags, ss, irq_enable;
@@ -130,12 +125,28 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
         if (has_err)
                 push_guest_stack(cpu, &gstack, cpu->regs->errcode);
  
-       /*
-        * Now we've pushed all the old state, we change the stack, the code
-        * segment and the address to execute.
-        */
+       /* Adjust the stack pointer and stack segment. */
         cpu->regs->ss = ss;
         cpu->regs->esp = virtstack + (gstack - origstack);
+}
+
+/*
+ * This actually makes the Guest start executing the given interrupt/trap
+ * handler.
+ *
+ * "lo" and "hi" are the two parts of the Interrupt Descriptor Table for this
+ * interrupt or trap.  It's split into two parts for traditional reasons: gcc
+ * on i386 used to be frightened by 64 bit numbers.
+ */
+static void guest_run_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi)
+{
+       /* If we're already in the kernel, we don't change stacks. */
+       if ((cpu->regs->ss&0x3) != GUEST_PL)
+               cpu->regs->ss = cpu->esp1;
+
+       /*
+        * Set the code segment and the address to execute.
+        */
         cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
         cpu->regs->eip = idt_address(lo, hi);
  
@@ -158,6 +169,24 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
                         kill_guest(cpu, "Disabling interrupts");
  }
  
+/* This restores the eflags word which was pushed on the stack by a trap */
+static void restore_eflags(struct lg_cpu *cpu)
+{
+       /* This is the physical address of the stack. */
+       unsigned long stack_pa = guest_pa(cpu, cpu->regs->esp);
+
+       /*
+        * Stack looks like this:
+        * Address      Contents
+        * esp          EIP
+        * esp + 4      CS
+        * esp + 8      EFLAGS
+        */
+       cpu->regs->eflags = lgread(cpu, stack_pa + 8, u32);
+       cpu->regs->eflags &=
+               ~(X86_EFLAGS_TF|X86_EFLAGS_VM|X86_EFLAGS_RF|X86_EFLAGS_NT);
+}
+
  /*H:205
   * Virtual Interrupts.
   *
@@ -200,14 +229,6 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
  
         BUG_ON(irq >= LGUEST_IRQS);
  
-       /*
-        * They may be in the middle of an iret, where they asked us never to
-        * deliver interrupts.
-        */
-       if (cpu->regs->eip >= cpu->lg->noirq_start &&
-          (cpu->regs->eip < cpu->lg->noirq_end))
-               return;
-
         /* If they're halted, interrupts restart them. */
         if (cpu->halted) {
                 /* Re-enable interrupts. */
@@ -237,12 +258,34 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
         if (idt_present(idt->a, idt->b)) {
                 /* OK, mark it no longer pending and deliver it. */
                 clear_bit(irq, cpu->irqs_pending);
+
                 /*
-                * set_guest_interrupt() takes the interrupt descriptor and a
-                * flag to say whether this interrupt pushes an error code onto
-                * the stack as well: virtual interrupts never do.
+                * They may be about to iret, where they asked us never to
+                * deliver interrupts.  In this case, we can emulate that iret
+                * then immediately deliver the interrupt.  This is basically
+                * a noop: the iret would pop the interrupt frame and restore
+                * eflags, and then we'd set it up again.  So just restore the
+                * eflags word and jump straight to the handler in this case.
+                *
+                * Denys Vlasenko points out that this isn't quite right: if
+                * the iret was returning to userspace, then that interrupt
+                * would reset the stack pointer (which the Guest told us
+                * about via LHCALL_SET_STACK).  But unless the Guest is being
+                * *really* weird, that will be the same as the current stack
+                * anyway.
                  */
-               set_guest_interrupt(cpu, idt->a, idt->b, false);
+               if (cpu->regs->eip == cpu->lg->noirq_iret) {
+                       restore_eflags(cpu);
+               } else {
+                       /*
+                        * set_guest_interrupt() takes a flag to say whether
+                        * this interrupt pushes an error code onto the stack
+                        * as well: virtual interrupts never do.
+                        */
+                       push_guest_interrupt_stack(cpu, false);
+               }
+               /* Actually make Guest cpu jump to handler. */
+               guest_run_interrupt(cpu, idt->a, idt->b);
         }
  
         /*
@@ -353,8 +396,9 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
          */
         if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
                 return false;
-       set_guest_interrupt(cpu, cpu->arch.idt[num].a,
-                           cpu->arch.idt[num].b, has_err(num));
+       push_guest_interrupt_stack(cpu, has_err(num));
+       guest_run_interrupt(cpu, cpu->arch.idt[num].a,
+                           cpu->arch.idt[num].b);
         return true;
  }
  
@@ -395,8 +439,9 @@ static bool direct_trap(unsigned int num)
   * The Guest has the ability to turn its interrupt gates into trap gates,
   * if it is careful.  The Host will let trap gates can go directly to the
   * Guest, but the Guest needs the interrupts atomically disabled for an
- * interrupt gate.  It can do this by pointing the trap gate at instructions
- * within noirq_start and noirq_end, where it can safely disable interrupts.
+ * interrupt gate.  The Host could provide a mechanism to register more
+ * "no-interrupt" regions, and the Guest could point the trap gate at
+ * instructions within that region, where it can safely disable interrupts.
   */
  
  /*M:006
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h

index 307e8b3..ac8ad04 100644 (file)
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -102,7 +102,7 @@ struct lguest {
  
         struct pgdir pgdirs[4];
  
-       unsigned long noirq_start, noirq_end;
+       unsigned long noirq_iret;
  
         unsigned int stack_pages;
         u32 tsc_khz;
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c

index c4c6113..30c6068 100644 (file)
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -339,6 +339,13 @@ static ssize_t write(struct file *file, const char __user *in,
         }
  }
  
+static int open(struct inode *inode, struct file *file)
+{
+       file->private_data = NULL;
+
+       return 0;
+}
+
  /*L:060
   * The final piece of interface code is the close() routine.  It reverses
   * everything done in initialize().  This is usually called because the
@@ -409,6 +416,7 @@ static int close(struct inode *inode, struct file *file)
   */
  static const struct file_operations lguest_fops = {
         .owner   = THIS_MODULE,
+       .open    = open,
         .release = close,
         .write   = write,
         .read    = read,
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig

index 6ddc983..edcf4ab 100644 (file)
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -175,6 +175,22 @@ config MD_FAULTY
  
           In unsure, say N.
  
+
+config MD_CLUSTER
+       tristate "Cluster Support for MD (EXPERIMENTAL)"
+       depends on BLK_DEV_MD
+       depends on DLM
+       default n
+       ---help---
+       Clustering support for MD devices. This enables locking and
+       synchronization across multiple systems on the cluster, so all
+       nodes in the cluster can access the MD devices simultaneously.
+
+       This brings the redundancy (and uptime) of RAID levels across the
+       nodes of the cluster.
+
+       If unsure, say N.
+
  source "drivers/md/bcache/Kconfig"
  
  config BLK_DEV_DM_BUILTIN
diff --git a/drivers/md/Makefile b/drivers/md/Makefile

index 1863fea..dba4db5 100644 (file)
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_MD_RAID10)               += raid10.o
  obj-$(CONFIG_MD_RAID456)       += raid456.o
  obj-$(CONFIG_MD_MULTIPATH)     += multipath.o
  obj-$(CONFIG_MD_FAULTY)                += faulty.o
+obj-$(CONFIG_MD_CLUSTER)       += md-cluster.o
  obj-$(CONFIG_BCACHE)           += bcache/
  obj-$(CONFIG_BLK_DEV_MD)       += md-mod.o
  obj-$(CONFIG_BLK_DEV_DM)       += dm-mod.o
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c

index 3a57679..2bc56e2 100644 (file)
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -205,6 +205,10 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
         struct block_device *bdev;
         struct mddev *mddev = bitmap->mddev;
         struct bitmap_storage *store = &bitmap->storage;
+       int node_offset = 0;
+
+       if (mddev_is_clustered(bitmap->mddev))
+               node_offset = bitmap->cluster_slot * store->file_pages;
  
         while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
                 int size = PAGE_SIZE;
@@ -433,6 +437,7 @@ void bitmap_update_sb(struct bitmap *bitmap)
         /* This might have been changed by a reshape */
         sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
         sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize);
+       sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
         sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
                                            bitmap_info.space);
         kunmap_atomic(sb);
@@ -544,6 +549,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
         bitmap_super_t *sb;
         unsigned long chunksize, daemon_sleep, write_behind;
         unsigned long long events;
+       int nodes = 0;
         unsigned long sectors_reserved = 0;
         int err = -EINVAL;
         struct page *sb_page;
@@ -562,6 +568,22 @@ static int bitmap_read_sb(struct bitmap *bitmap)
                 return -ENOMEM;
         bitmap->storage.sb_page = sb_page;
  
+re_read:
+       /* If cluster_slot is set, the cluster is setup */
+       if (bitmap->cluster_slot >= 0) {
+               sector_t bm_blocks = bitmap->mddev->resync_max_sectors;
+
+               sector_div(bm_blocks,
+                          bitmap->mddev->bitmap_info.chunksize >> 9);
+               /* bits to bytes */
+               bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
+               /* to 4k blocks */
+               bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
+               bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
+               pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
+                       bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
+       }
+
         if (bitmap->storage.file) {
                 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host);
                 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize;
@@ -577,12 +599,15 @@ static int bitmap_read_sb(struct bitmap *bitmap)
         if (err)
                 return err;
  
+       err = -EINVAL;
         sb = kmap_atomic(sb_page);
  
         chunksize = le32_to_cpu(sb->chunksize);
         daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
         write_behind = le32_to_cpu(sb->write_behind);
         sectors_reserved = le32_to_cpu(sb->sectors_reserved);
+       nodes = le32_to_cpu(sb->nodes);
+       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
  
         /* verify that the bitmap-specific fields are valid */
         if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -619,7 +644,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
                         goto out;
                 }
                 events = le64_to_cpu(sb->events);
-               if (events < bitmap->mddev->events) {
+               if (!nodes && (events < bitmap->mddev->events)) {
                         printk(KERN_INFO
                                "%s: bitmap file is out of date (%llu < %llu) "
                                "-- forcing full recovery\n",
@@ -634,20 +659,40 @@ static int bitmap_read_sb(struct bitmap *bitmap)
         if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
                 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
         bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
+       strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
         err = 0;
+
  out:
         kunmap_atomic(sb);
+       /* Assiging chunksize is required for "re_read" */
+       bitmap->mddev->bitmap_info.chunksize = chunksize;
+       if (nodes && (bitmap->cluster_slot < 0)) {
+               err = md_setup_cluster(bitmap->mddev, nodes);
+               if (err) {
+                       pr_err("%s: Could not setup cluster service (%d)\n",
+                                       bmname(bitmap), err);
+                       goto out_no_sb;
+               }
+               bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
+               goto re_read;
+       }
+
+
  out_no_sb:
         if (test_bit(BITMAP_STALE, &bitmap->flags))
                 bitmap->events_cleared = bitmap->mddev->events;
         bitmap->mddev->bitmap_info.chunksize = chunksize;
         bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
         bitmap->mddev->bitmap_info.max_write_behind = write_behind;
+       bitmap->mddev->bitmap_info.nodes = nodes;
         if (bitmap->mddev->bitmap_info.space == 0 ||
             bitmap->mddev->bitmap_info.space > sectors_reserved)
                 bitmap->mddev->bitmap_info.space = sectors_reserved;
-       if (err)
+       if (err) {
                 bitmap_print_sb(bitmap);
+               if (bitmap->cluster_slot < 0)
+                       md_cluster_stop(bitmap->mddev);
+       }
         return err;
  }
  
@@ -692,9 +737,10 @@ static inline struct page *filemap_get_page(struct bitmap_storage *store,
  }
  
  static int bitmap_storage_alloc(struct bitmap_storage *store,
-                               unsigned long chunks, int with_super)
+                               unsigned long chunks, int with_super,
+                               int slot_number)
  {
-       int pnum;
+       int pnum, offset = 0;
         unsigned long num_pages;
         unsigned long bytes;
  
@@ -703,6 +749,7 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
                 bytes += sizeof(bitmap_super_t);
  
         num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE);
+       offset = slot_number * (num_pages - 1);
  
         store->filemap = kmalloc(sizeof(struct page *)
                                  * num_pages, GFP_KERNEL);
@@ -713,20 +760,22 @@ static int bitmap_storage_alloc(struct bitmap_storage *store,
                 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO);
                 if (store->sb_page == NULL)
                         return -ENOMEM;
-               store->sb_page->index = 0;
         }
+
         pnum = 0;
         if (store->sb_page) {
                 store->filemap[0] = store->sb_page;
                 pnum = 1;
+               store->sb_page->index = offset;
         }
+
         for ( ; pnum < num_pages; pnum++) {
                 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO);
                 if (!store->filemap[pnum]) {
                         store->file_pages = pnum;
                         return -ENOMEM;
                 }
-               store->filemap[pnum]->index = pnum;
+               store->filemap[pnum]->index = pnum + offset;
         }
         store->file_pages = pnum;
  
@@ -885,6 +934,28 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
         }
  }
  
+static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
+{
+       unsigned long bit;
+       struct page *page;
+       void *paddr;
+       unsigned long chunk = block >> bitmap->counts.chunkshift;
+       int set = 0;
+
+       page = filemap_get_page(&bitmap->storage, chunk);
+       if (!page)
+               return -EINVAL;
+       bit = file_page_offset(&bitmap->storage, chunk);
+       paddr = kmap_atomic(page);
+       if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
+               set = test_bit(bit, paddr);
+       else
+               set = test_bit_le(bit, paddr);
+       kunmap_atomic(paddr);
+       return set;
+}
+
+
  /* this gets called when the md device is ready to unplug its underlying
   * (slave) device queues -- before we let any writes go down, we need to
   * sync the dirty pages of the bitmap file to disk */
@@ -935,7 +1006,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
   */
  static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
  {
-       unsigned long i, chunks, index, oldindex, bit;
+       unsigned long i, chunks, index, oldindex, bit, node_offset = 0;
         struct page *page = NULL;
         unsigned long bit_cnt = 0;
         struct file *file;
@@ -981,6 +1052,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
         if (!bitmap->mddev->bitmap_info.external)
                 offset = sizeof(bitmap_super_t);
  
+       if (mddev_is_clustered(bitmap->mddev))
+               node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE));
+
         for (i = 0; i < chunks; i++) {
                 int b;
                 index = file_page_index(&bitmap->storage, i);
@@ -1001,7 +1075,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
                                         bitmap->mddev,
                                         bitmap->mddev->bitmap_info.offset,
                                         page,
-                                       index, count);
+                                       index + node_offset, count);
  
                         if (ret)
                                 goto err;
@@ -1207,7 +1281,6 @@ void bitmap_daemon_work(struct mddev *mddev)
              j < bitmap->storage.file_pages
                      && !test_bit(BITMAP_STALE, &bitmap->flags);
              j++) {
-
                 if (test_page_attr(bitmap, j,
                                    BITMAP_PAGE_DIRTY))
                         /* bitmap_unplug will handle the rest */
@@ -1530,11 +1603,13 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
                 return;
         }
         if (!*bmc) {
-               *bmc = 2 | (needed ? NEEDED_MASK : 0);
+               *bmc = 2;
                 bitmap_count_page(&bitmap->counts, offset, 1);
                 bitmap_set_pending(&bitmap->counts, offset);
                 bitmap->allclean = 0;
         }
+       if (needed)
+               *bmc |= NEEDED_MASK;
         spin_unlock_irq(&bitmap->counts.lock);
  }
  
@@ -1591,6 +1666,10 @@ static void bitmap_free(struct bitmap *bitmap)
         if (!bitmap) /* there was no bitmap */
                 return;
  
+       if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info &&
+               bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev))
+               md_cluster_stop(bitmap->mddev);
+
         /* Shouldn't be needed - but just in case.... */
         wait_event(bitmap->write_wait,
                    atomic_read(&bitmap->pending_writes) == 0);
@@ -1636,7 +1715,7 @@ void bitmap_destroy(struct mddev *mddev)
   * initialize the bitmap structure
   * if this returns an error, bitmap_destroy must be called to do clean up
   */
-int bitmap_create(struct mddev *mddev)
+struct bitmap *bitmap_create(struct mddev *mddev, int slot)
  {
         struct bitmap *bitmap;
         sector_t blocks = mddev->resync_max_sectors;
@@ -1650,7 +1729,7 @@ int bitmap_create(struct mddev *mddev)
  
         bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
         if (!bitmap)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
  
         spin_lock_init(&bitmap->counts.lock);
         atomic_set(&bitmap->pending_writes, 0);
@@ -1659,6 +1738,7 @@ int bitmap_create(struct mddev *mddev)
         init_waitqueue_head(&bitmap->behind_wait);
  
         bitmap->mddev = mddev;
+       bitmap->cluster_slot = slot;
  
         if (mddev->kobj.sd)
                 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
@@ -1706,12 +1786,14 @@ int bitmap_create(struct mddev *mddev)
         printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
                bitmap->counts.pages, bmname(bitmap));
  
-       mddev->bitmap = bitmap;
-       return test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
+       err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
+       if (err)
+               goto error;
  
+       return bitmap;
   error:
         bitmap_free(bitmap);
-       return err;
+       return ERR_PTR(err);
  }
  
  int bitmap_load(struct mddev *mddev)
@@ -1765,6 +1847,60 @@ out:
  }
  EXPORT_SYMBOL_GPL(bitmap_load);
  
+/* Loads the bitmap associated with slot and copies the resync information
+ * to our bitmap
+ */
+int bitmap_copy_from_slot(struct mddev *mddev, int slot,
+               sector_t *low, sector_t *high, bool clear_bits)
+{
+       int rv = 0, i, j;
+       sector_t block, lo = 0, hi = 0;
+       struct bitmap_counts *counts;
+       struct bitmap *bitmap = bitmap_create(mddev, slot);
+
+       if (IS_ERR(bitmap))
+               return PTR_ERR(bitmap);
+
+       rv = bitmap_read_sb(bitmap);
+       if (rv)
+               goto err;
+
+       rv = bitmap_init_from_disk(bitmap, 0);
+       if (rv)
+               goto err;
+
+       counts = &bitmap->counts;
+       for (j = 0; j < counts->chunks; j++) {
+               block = (sector_t)j << counts->chunkshift;
+               if (bitmap_file_test_bit(bitmap, block)) {
+                       if (!lo)
+                               lo = block;
+                       hi = block;
+                       bitmap_file_clear_bit(bitmap, block);
+                       bitmap_set_memory_bits(mddev->bitmap, block, 1);
+                       bitmap_file_set_bit(mddev->bitmap, block);
+               }
+       }
+
+       if (clear_bits) {
+               bitmap_update_sb(bitmap);
+               /* Setting this for the ev_page should be enough.
+                * And we do not require both write_all and PAGE_DIRT either
+                */
+               for (i = 0; i < bitmap->storage.file_pages; i++)
+                       set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY);
+               bitmap_write_all(bitmap);
+               bitmap_unplug(bitmap);
+       }
+       *low = lo;
+       *high = hi;
+err:
+       bitmap_free(bitmap);
+       return rv;
+}
+EXPORT_SYMBOL_GPL(bitmap_copy_from_slot);
+
+
  void bitmap_status(struct seq_file *seq, struct bitmap *bitmap)
  {
         unsigned long chunk_kb;
@@ -1849,7 +1985,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
         memset(&store, 0, sizeof(store));
         if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
                 ret = bitmap_storage_alloc(&store, chunks,
-                                          !bitmap->mddev->bitmap_info.external);
+                                          !bitmap->mddev->bitmap_info.external,
+                                          bitmap->cluster_slot);
         if (ret)
                 goto err;
  
@@ -2021,13 +2158,18 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
                                 return -EINVAL;
                         mddev->bitmap_info.offset = offset;
                         if (mddev->pers) {
+                               struct bitmap *bitmap;
                                 mddev->pers->quiesce(mddev, 1);
-                               rv = bitmap_create(mddev);
-                               if (!rv)
+                               bitmap = bitmap_create(mddev, -1);
+                               if (IS_ERR(bitmap))
+                                       rv = PTR_ERR(bitmap);
+                               else {
+                                       mddev->bitmap = bitmap;
                                         rv = bitmap_load(mddev);
-                               if (rv) {
-                                       bitmap_destroy(mddev);
-                                       mddev->bitmap_info.offset = 0;
+                                       if (rv) {
+                                               bitmap_destroy(mddev);
+                                               mddev->bitmap_info.offset = 0;
+                                       }
                                 }
                                 mddev->pers->quiesce(mddev, 0);
                                 if (rv)
@@ -2186,6 +2328,8 @@ __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store);
  
  static ssize_t metadata_show(struct mddev *mddev, char *page)
  {
+       if (mddev_is_clustered(mddev))
+               return sprintf(page, "clustered\n");
         return sprintf(page, "%s\n", (mddev->bitmap_info.external
                                       ? "external" : "internal"));
  }
@@ -2198,7 +2342,8 @@ static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len)
                 return -EBUSY;
         if (strncmp(buf, "external", 8) == 0)
                 mddev->bitmap_info.external = 1;
-       else if (strncmp(buf, "internal", 8) == 0)
+       else if ((strncmp(buf, "internal", 8) == 0) ||
+                       (strncmp(buf, "clustered", 9) == 0))
                 mddev->bitmap_info.external = 0;
         else
                 return -EINVAL;
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h

index 30210b9..f1f4dd0 100644 (file)
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -130,8 +130,9 @@ typedef struct bitmap_super_s {
         __le32 write_behind; /* 60  number of outstanding write-behind writes */
         __le32 sectors_reserved; /* 64 number of 512-byte sectors that are
                                   * reserved for the bitmap. */
-
-       __u8  pad[256 - 68]; /* set to zero */
+       __le32 nodes;        /* 68 the maximum number of nodes in cluster. */
+       __u8 cluster_name[64]; /* 72 cluster name to which this md belongs */
+       __u8  pad[256 - 136]; /* set to zero */
  } bitmap_super_t;
  
  /* notes:
@@ -226,12 +227,13 @@ struct bitmap {
         wait_queue_head_t behind_wait;
  
         struct kernfs_node *sysfs_can_clear;
+       int cluster_slot;               /* Slot offset for clustered env */
  };
  
  /* the bitmap API */
  
  /* these are used only by md/bitmap */
-int  bitmap_create(struct mddev *mddev);
+struct bitmap *bitmap_create(struct mddev *mddev, int slot);
  int bitmap_load(struct mddev *mddev);
  void bitmap_flush(struct mddev *mddev);
  void bitmap_destroy(struct mddev *mddev);
@@ -260,6 +262,8 @@ void bitmap_daemon_work(struct mddev *mddev);
  
  int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
                   int chunksize, int init);
+int bitmap_copy_from_slot(struct mddev *mddev, int slot,
+                               sector_t *lo, sector_t *hi, bool clear_bits);
  #endif
  
  #endif
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c

new file mode 100644 (file)

index 0000000..fcfc4b9
--- /dev/null
+++ b/drivers/md/md-cluster.c
@@ -0,0 +1,965 @@
+/*
+ * Copyright (C) 2015, SUSE
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/dlm.h>
+#include <linux/sched.h>
+#include <linux/raid/md_p.h>
+#include "md.h"
+#include "bitmap.h"
+#include "md-cluster.h"
+
+#define LVB_SIZE       64
+#define NEW_DEV_TIMEOUT 5000
+
+struct dlm_lock_resource {
+       dlm_lockspace_t *ls;
+       struct dlm_lksb lksb;
+       char *name; /* lock name. */
+       uint32_t flags; /* flags to pass to dlm_lock() */
+       struct completion completion; /* completion for synchronized locking */
+       void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
+       struct mddev *mddev; /* pointing back to mddev. */
+};
+
+struct suspend_info {
+       int slot;
+       sector_t lo;
+       sector_t hi;
+       struct list_head list;
+};
+
+struct resync_info {
+       __le64 lo;
+       __le64 hi;
+};
+
+/* md_cluster_info flags */
+#define                MD_CLUSTER_WAITING_FOR_NEWDISK          1
+
+
+struct md_cluster_info {
+       /* dlm lock space and resources for clustered raid. */
+       dlm_lockspace_t *lockspace;
+       int slot_number;
+       struct completion completion;
+       struct dlm_lock_resource *sb_lock;
+       struct mutex sb_mutex;
+       struct dlm_lock_resource *bitmap_lockres;
+       struct list_head suspend_list;
+       spinlock_t suspend_lock;
+       struct md_thread *recovery_thread;
+       unsigned long recovery_map;
+       /* communication loc resources */
+       struct dlm_lock_resource *ack_lockres;
+       struct dlm_lock_resource *message_lockres;
+       struct dlm_lock_resource *token_lockres;
+       struct dlm_lock_resource *no_new_dev_lockres;
+       struct md_thread *recv_thread;
+       struct completion newdisk_completion;
+       unsigned long state;
+};
+
+enum msg_type {
+       METADATA_UPDATED = 0,
+       RESYNCING,
+       NEWDISK,
+       REMOVE,
+       RE_ADD,
+};
+
+struct cluster_msg {
+       int type;
+       int slot;
+       /* TODO: Unionize this for smaller footprint */
+       sector_t low;
+       sector_t high;
+       char uuid[16];
+       int raid_slot;
+};
+
+static void sync_ast(void *arg)
+{
+       struct dlm_lock_resource *res;
+
+       res = (struct dlm_lock_resource *) arg;
+       complete(&res->completion);
+}
+
+static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
+{
+       int ret = 0;
+
+       init_completion(&res->completion);
+       ret = dlm_lock(res->ls, mode, &res->lksb,
+                       res->flags, res->name, strlen(res->name),
+                       0, sync_ast, res, res->bast);
+       if (ret)
+               return ret;
+       wait_for_completion(&res->completion);
+       return res->lksb.sb_status;
+}
+
+static int dlm_unlock_sync(struct dlm_lock_resource *res)
+{
+       return dlm_lock_sync(res, DLM_LOCK_NL);
+}
+
+static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
+               char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
+{
+       struct dlm_lock_resource *res = NULL;
+       int ret, namelen;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
+       if (!res)
+               return NULL;
+       res->ls = cinfo->lockspace;
+       res->mddev = mddev;
+       namelen = strlen(name);
+       res->name = kzalloc(namelen + 1, GFP_KERNEL);
+       if (!res->name) {
+               pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
+               goto out_err;
+       }
+       strlcpy(res->name, name, namelen + 1);
+       if (with_lvb) {
+               res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
+               if (!res->lksb.sb_lvbptr) {
+                       pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
+                       goto out_err;
+               }
+               res->flags = DLM_LKF_VALBLK;
+       }
+
+       if (bastfn)
+               res->bast = bastfn;
+
+       res->flags |= DLM_LKF_EXPEDITE;
+
+       ret = dlm_lock_sync(res, DLM_LOCK_NL);
+       if (ret) {
+               pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
+               goto out_err;
+       }
+       res->flags &= ~DLM_LKF_EXPEDITE;
+       res->flags |= DLM_LKF_CONVERT;
+
+       return res;
+out_err:
+       kfree(res->lksb.sb_lvbptr);
+       kfree(res->name);
+       kfree(res);
+       return NULL;
+}
+
+static void lockres_free(struct dlm_lock_resource *res)
+{
+       if (!res)
+               return;
+
+       init_completion(&res->completion);
+       dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res);
+       wait_for_completion(&res->completion);
+
+       kfree(res->name);
+       kfree(res->lksb.sb_lvbptr);
+       kfree(res);
+}
+
+static char *pretty_uuid(char *dest, char *src)
+{
+       int i, len = 0;
+
+       for (i = 0; i < 16; i++) {
+               if (i == 4 || i == 6 || i == 8 || i == 10)
+                       len += sprintf(dest + len, "-");
+               len += sprintf(dest + len, "%02x", (__u8)src[i]);
+       }
+       return dest;
+}
+
+static void add_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres,
+               sector_t lo, sector_t hi)
+{
+       struct resync_info *ri;
+
+       ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
+       ri->lo = cpu_to_le64(lo);
+       ri->hi = cpu_to_le64(hi);
+}
+
+static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
+{
+       struct resync_info ri;
+       struct suspend_info *s = NULL;
+       sector_t hi = 0;
+
+       dlm_lock_sync(lockres, DLM_LOCK_CR);
+       memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
+       hi = le64_to_cpu(ri.hi);
+       if (ri.hi > 0) {
+               s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
+               if (!s)
+                       goto out;
+               s->hi = hi;
+               s->lo = le64_to_cpu(ri.lo);
+       }
+       dlm_unlock_sync(lockres);
+out:
+       return s;
+}
+
+static void recover_bitmaps(struct md_thread *thread)
+{
+       struct mddev *mddev = thread->mddev;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct dlm_lock_resource *bm_lockres;
+       char str[64];
+       int slot, ret;
+       struct suspend_info *s, *tmp;
+       sector_t lo, hi;
+
+       while (cinfo->recovery_map) {
+               slot = fls64((u64)cinfo->recovery_map) - 1;
+
+               /* Clear suspend_area associated with the bitmap */
+               spin_lock_irq(&cinfo->suspend_lock);
+               list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
+                       if (slot == s->slot) {
+                               list_del(&s->list);
+                               kfree(s);
+                       }
+               spin_unlock_irq(&cinfo->suspend_lock);
+
+               snprintf(str, 64, "bitmap%04d", slot);
+               bm_lockres = lockres_init(mddev, str, NULL, 1);
+               if (!bm_lockres) {
+                       pr_err("md-cluster: Cannot initialize bitmaps\n");
+                       goto clear_bit;
+               }
+
+               ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+               if (ret) {
+                       pr_err("md-cluster: Could not DLM lock %s: %d\n",
+                                       str, ret);
+                       goto clear_bit;
+               }
+               ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
+               if (ret) {
+                       pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
+                       goto dlm_unlock;
+               }
+               if (hi > 0) {
+                       /* TODO:Wait for current resync to get over */
+                       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+                       if (lo < mddev->recovery_cp)
+                               mddev->recovery_cp = lo;
+                       md_check_recovery(mddev);
+               }
+dlm_unlock:
+               dlm_unlock_sync(bm_lockres);
+clear_bit:
+               clear_bit(slot, &cinfo->recovery_map);
+       }
+}
+
+static void recover_prep(void *arg)
+{
+}
+
+static void recover_slot(void *arg, struct dlm_slot *slot)
+{
+       struct mddev *mddev = arg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
+                       mddev->bitmap_info.cluster_name,
+                       slot->nodeid, slot->slot,
+                       cinfo->slot_number);
+       set_bit(slot->slot - 1, &cinfo->recovery_map);
+       if (!cinfo->recovery_thread) {
+               cinfo->recovery_thread = md_register_thread(recover_bitmaps,
+                               mddev, "recover");
+               if (!cinfo->recovery_thread) {
+                       pr_warn("md-cluster: Could not create recovery thread\n");
+                       return;
+               }
+       }
+       md_wakeup_thread(cinfo->recovery_thread);
+}
+
+static void recover_done(void *arg, struct dlm_slot *slots,
+               int num_slots, int our_slot,
+               uint32_t generation)
+{
+       struct mddev *mddev = arg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       cinfo->slot_number = our_slot;
+       complete(&cinfo->completion);
+}
+
+static const struct dlm_lockspace_ops md_ls_ops = {
+       .recover_prep = recover_prep,
+       .recover_slot = recover_slot,
+       .recover_done = recover_done,
+};
+
+/*
+ * The BAST function for the ack lock resource
+ * This function wakes up the receive thread in
+ * order to receive and process the message.
+ */
+static void ack_bast(void *arg, int mode)
+{
+       struct dlm_lock_resource *res = (struct dlm_lock_resource *)arg;
+       struct md_cluster_info *cinfo = res->mddev->cluster_info;
+
+       if (mode == DLM_LOCK_EX)
+               md_wakeup_thread(cinfo->recv_thread);
+}
+
+static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
+{
+       struct suspend_info *s, *tmp;
+
+       list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
+               if (slot == s->slot) {
+                       pr_info("%s:%d Deleting suspend_info: %d\n",
+                                       __func__, __LINE__, slot);
+                       list_del(&s->list);
+                       kfree(s);
+                       break;
+               }
+}
+
+static void remove_suspend_info(struct md_cluster_info *cinfo, int slot)
+{
+       spin_lock_irq(&cinfo->suspend_lock);
+       __remove_suspend_info(cinfo, slot);
+       spin_unlock_irq(&cinfo->suspend_lock);
+}
+
+
+static void process_suspend_info(struct md_cluster_info *cinfo,
+               int slot, sector_t lo, sector_t hi)
+{
+       struct suspend_info *s;
+
+       if (!hi) {
+               remove_suspend_info(cinfo, slot);
+               return;
+       }
+       s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
+       if (!s)
+               return;
+       s->slot = slot;
+       s->lo = lo;
+       s->hi = hi;
+       spin_lock_irq(&cinfo->suspend_lock);
+       /* Remove existing entry (if exists) before adding */
+       __remove_suspend_info(cinfo, slot);
+       list_add(&s->list, &cinfo->suspend_list);
+       spin_unlock_irq(&cinfo->suspend_lock);
+}
+
+static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+{
+       char disk_uuid[64];
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       char event_name[] = "EVENT=ADD_DEVICE";
+       char raid_slot[16];
+       char *envp[] = {event_name, disk_uuid, raid_slot, NULL};
+       int len;
+
+       len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
+       pretty_uuid(disk_uuid + len, cmsg->uuid);
+       snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
+       pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
+       init_completion(&cinfo->newdisk_completion);
+       set_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
+       kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp);
+       wait_for_completion_timeout(&cinfo->newdisk_completion,
+                       NEW_DEV_TIMEOUT);
+       clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
+}
+
+
+static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       md_reload_sb(mddev);
+       dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+}
+
+static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+       if (rdev)
+               md_kick_rdev_from_array(rdev);
+       else
+               pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, msg->raid_slot);
+}
+
+static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
+{
+       struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, msg->raid_slot);
+
+       if (rdev && test_bit(Faulty, &rdev->flags))
+               clear_bit(Faulty, &rdev->flags);
+       else
+               pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, msg->raid_slot);
+}
+
+static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
+{
+       switch (msg->type) {
+       case METADATA_UPDATED:
+               pr_info("%s: %d Received message: METADATA_UPDATE from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_metadata_update(mddev, msg);
+               break;
+       case RESYNCING:
+               pr_info("%s: %d Received message: RESYNCING from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_suspend_info(mddev->cluster_info, msg->slot,
+                               msg->low, msg->high);
+               break;
+       case NEWDISK:
+               pr_info("%s: %d Received message: NEWDISK from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_add_new_disk(mddev, msg);
+               break;
+       case REMOVE:
+               pr_info("%s: %d Received REMOVE from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_remove_disk(mddev, msg);
+               break;
+       case RE_ADD:
+               pr_info("%s: %d Received RE_ADD from %d\n",
+                       __func__, __LINE__, msg->slot);
+               process_readd_disk(mddev, msg);
+               break;
+       default:
+               pr_warn("%s:%d Received unknown message from %d\n",
+                       __func__, __LINE__, msg->slot);
+       }
+}
+
+/*
+ * thread for receiving message
+ */
+static void recv_daemon(struct md_thread *thread)
+{
+       struct md_cluster_info *cinfo = thread->mddev->cluster_info;
+       struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
+       struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
+       struct cluster_msg msg;
+
+       /*get CR on Message*/
+       if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
+               pr_err("md/raid1:failed to get CR on MESSAGE\n");
+               return;
+       }
+
+       /* read lvb and wake up thread to process this message_lockres */
+       memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
+       process_recvd_msg(thread->mddev, &msg);
+
+       /*release CR on ack_lockres*/
+       dlm_unlock_sync(ack_lockres);
+       /*up-convert to EX on message_lockres*/
+       dlm_lock_sync(message_lockres, DLM_LOCK_EX);
+       /*get CR on ack_lockres again*/
+       dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
+       /*release CR on message_lockres*/
+       dlm_unlock_sync(message_lockres);
+}
+
+/* lock_comm()
+ * Takes the lock on the TOKEN lock resource so no other
+ * node can communicate while the operation is underway.
+ */
+static int lock_comm(struct md_cluster_info *cinfo)
+{
+       int error;
+
+       error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
+       if (error)
+               pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
+                               __func__, __LINE__, error);
+       return error;
+}
+
+static void unlock_comm(struct md_cluster_info *cinfo)
+{
+       dlm_unlock_sync(cinfo->token_lockres);
+}
+
+/* __sendmsg()
+ * This function performs the actual sending of the message. This function is
+ * usually called after performing the encompassing operation
+ * The function:
+ * 1. Grabs the message lockresource in EX mode
+ * 2. Copies the message to the message LVB
+ * 3. Downconverts message lockresource to CR
+ * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
+ *    and the other nodes read the message. The thread will wait here until all other
+ *    nodes have released ack lock resource.
+ * 5. Downconvert ack lockresource to CR
+ */
+static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
+{
+       int error;
+       int slot = cinfo->slot_number - 1;
+
+       cmsg->slot = cpu_to_le32(slot);
+       /*get EX on Message*/
+       error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
+       if (error) {
+               pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
+               goto failed_message;
+       }
+
+       memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
+                       sizeof(struct cluster_msg));
+       /*down-convert EX to CR on Message*/
+       error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CR);
+       if (error) {
+               pr_err("md-cluster: failed to convert EX to CR on MESSAGE(%d)\n",
+                               error);
+               goto failed_message;
+       }
+
+       /*up-convert CR to EX on Ack*/
+       error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_EX);
+       if (error) {
+               pr_err("md-cluster: failed to convert CR to EX on ACK(%d)\n",
+                               error);
+               goto failed_ack;
+       }
+
+       /*down-convert EX to CR on Ack*/
+       error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR);
+       if (error) {
+               pr_err("md-cluster: failed to convert EX to CR on ACK(%d)\n",
+                               error);
+               goto failed_ack;
+       }
+
+failed_ack:
+       dlm_unlock_sync(cinfo->message_lockres);
+failed_message:
+       return error;
+}
+
+static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
+{
+       int ret;
+
+       lock_comm(cinfo);
+       ret = __sendmsg(cinfo, cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int gather_all_resync_info(struct mddev *mddev, int total_slots)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int i, ret = 0;
+       struct dlm_lock_resource *bm_lockres;
+       struct suspend_info *s;
+       char str[64];
+
+
+       for (i = 0; i < total_slots; i++) {
+               memset(str, '\0', 64);
+               snprintf(str, 64, "bitmap%04d", i);
+               bm_lockres = lockres_init(mddev, str, NULL, 1);
+               if (!bm_lockres)
+                       return -ENOMEM;
+               if (i == (cinfo->slot_number - 1))
+                       continue;
+
+               bm_lockres->flags |= DLM_LKF_NOQUEUE;
+               ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+               if (ret == -EAGAIN) {
+                       memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
+                       s = read_resync_info(mddev, bm_lockres);
+                       if (s) {
+                               pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
+                                               __func__, __LINE__,
+                                               (unsigned long long) s->lo,
+                                               (unsigned long long) s->hi, i);
+                               spin_lock_irq(&cinfo->suspend_lock);
+                               s->slot = i;
+                               list_add(&s->list, &cinfo->suspend_list);
+                               spin_unlock_irq(&cinfo->suspend_lock);
+                       }
+                       ret = 0;
+                       lockres_free(bm_lockres);
+                       continue;
+               }
+               if (ret)
+                       goto out;
+               /* TODO: Read the disk bitmap sb and check if it needs recovery */
+               dlm_unlock_sync(bm_lockres);
+               lockres_free(bm_lockres);
+       }
+out:
+       return ret;
+}
+
+static int join(struct mddev *mddev, int nodes)
+{
+       struct md_cluster_info *cinfo;
+       int ret, ops_rv;
+       char str[64];
+
+       if (!try_module_get(THIS_MODULE))
+               return -ENOENT;
+
+       cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
+       if (!cinfo)
+               return -ENOMEM;
+
+       init_completion(&cinfo->completion);
+
+       mutex_init(&cinfo->sb_mutex);
+       mddev->cluster_info = cinfo;
+
+       memset(str, 0, 64);
+       pretty_uuid(str, mddev->uuid);
+       ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
+                               DLM_LSFL_FS, LVB_SIZE,
+                               &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
+       if (ret)
+               goto err;
+       wait_for_completion(&cinfo->completion);
+       if (nodes < cinfo->slot_number) {
+               pr_err("md-cluster: Slot allotted(%d) is greater than available slots(%d).",
+                       cinfo->slot_number, nodes);
+               ret = -ERANGE;
+               goto err;
+       }
+       cinfo->sb_lock = lockres_init(mddev, "cmd-super",
+                                       NULL, 0);
+       if (!cinfo->sb_lock) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       /* Initiate the communication resources */
+       ret = -ENOMEM;
+       cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
+       if (!cinfo->recv_thread) {
+               pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
+               goto err;
+       }
+       cinfo->message_lockres = lockres_init(mddev, "message", NULL, 1);
+       if (!cinfo->message_lockres)
+               goto err;
+       cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
+       if (!cinfo->token_lockres)
+               goto err;
+       cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
+       if (!cinfo->ack_lockres)
+               goto err;
+       cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
+       if (!cinfo->no_new_dev_lockres)
+               goto err;
+
+       /* get sync CR lock on ACK. */
+       if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
+               pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
+                               ret);
+       /* get sync CR lock on no-new-dev. */
+       if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
+               pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
+
+
+       pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
+       snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
+       cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
+       if (!cinfo->bitmap_lockres)
+               goto err;
+       if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
+               pr_err("Failed to get bitmap lock\n");
+               ret = -EINVAL;
+               goto err;
+       }
+
+       INIT_LIST_HEAD(&cinfo->suspend_list);
+       spin_lock_init(&cinfo->suspend_lock);
+
+       ret = gather_all_resync_info(mddev, nodes);
+       if (ret)
+               goto err;
+
+       return 0;
+err:
+       lockres_free(cinfo->message_lockres);
+       lockres_free(cinfo->token_lockres);
+       lockres_free(cinfo->ack_lockres);
+       lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->bitmap_lockres);
+       lockres_free(cinfo->sb_lock);
+       if (cinfo->lockspace)
+               dlm_release_lockspace(cinfo->lockspace, 2);
+       mddev->cluster_info = NULL;
+       kfree(cinfo);
+       module_put(THIS_MODULE);
+       return ret;
+}
+
+static int leave(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       if (!cinfo)
+               return 0;
+       md_unregister_thread(&cinfo->recovery_thread);
+       md_unregister_thread(&cinfo->recv_thread);
+       lockres_free(cinfo->message_lockres);
+       lockres_free(cinfo->token_lockres);
+       lockres_free(cinfo->ack_lockres);
+       lockres_free(cinfo->no_new_dev_lockres);
+       lockres_free(cinfo->sb_lock);
+       lockres_free(cinfo->bitmap_lockres);
+       dlm_release_lockspace(cinfo->lockspace, 2);
+       return 0;
+}
+
+/* slot_number(): Returns the MD slot number to use
+ * DLM starts the slot numbers from 1, wheras cluster-md
+ * wants the number to be from zero, so we deduct one
+ */
+static int slot_number(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       return cinfo->slot_number - 1;
+}
+
+static void resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       add_resync_info(mddev, cinfo->bitmap_lockres, lo, hi);
+       /* Re-acquire the lock to refresh LVB */
+       dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
+}
+
+static int metadata_update_start(struct mddev *mddev)
+{
+       return lock_comm(mddev->cluster_info);
+}
+
+static int metadata_update_finish(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int ret;
+
+       memset(&cmsg, 0, sizeof(cmsg));
+       cmsg.type = cpu_to_le32(METADATA_UPDATED);
+       ret = __sendmsg(cinfo, &cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int metadata_update_cancel(struct mddev *mddev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       return dlm_unlock_sync(cinfo->token_lockres);
+}
+
+static int resync_send(struct mddev *mddev, enum msg_type type,
+               sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int slot = cinfo->slot_number - 1;
+
+       pr_info("%s:%d lo: %llu hi: %llu\n", __func__, __LINE__,
+                       (unsigned long long)lo,
+                       (unsigned long long)hi);
+       resync_info_update(mddev, lo, hi);
+       cmsg.type = cpu_to_le32(type);
+       cmsg.slot = cpu_to_le32(slot);
+       cmsg.low = cpu_to_le64(lo);
+       cmsg.high = cpu_to_le64(hi);
+       return sendmsg(cinfo, &cmsg);
+}
+
+static int resync_start(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       pr_info("%s:%d\n", __func__, __LINE__);
+       return resync_send(mddev, RESYNCING, lo, hi);
+}
+
+static void resync_finish(struct mddev *mddev)
+{
+       pr_info("%s:%d\n", __func__, __LINE__);
+       resync_send(mddev, RESYNCING, 0, 0);
+}
+
+static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int ret = 0;
+       struct suspend_info *s;
+
+       spin_lock_irq(&cinfo->suspend_lock);
+       if (list_empty(&cinfo->suspend_list))
+               goto out;
+       list_for_each_entry(s, &cinfo->suspend_list, list)
+               if (hi > s->lo && lo < s->hi) {
+                       ret = 1;
+                       break;
+               }
+out:
+       spin_unlock_irq(&cinfo->suspend_lock);
+       return ret;
+}
+
+static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       struct cluster_msg cmsg;
+       int ret = 0;
+       struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
+       char *uuid = sb->device_uuid;
+
+       memset(&cmsg, 0, sizeof(cmsg));
+       cmsg.type = cpu_to_le32(NEWDISK);
+       memcpy(cmsg.uuid, uuid, 16);
+       cmsg.raid_slot = rdev->desc_nr;
+       lock_comm(cinfo);
+       ret = __sendmsg(cinfo, &cmsg);
+       if (ret)
+               return ret;
+       cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
+       ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
+       cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
+       /* Some node does not "see" the device */
+       if (ret == -EAGAIN)
+               ret = -ENOENT;
+       else
+               dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+       return ret;
+}
+
+static int add_new_disk_finish(struct mddev *mddev)
+{
+       struct cluster_msg cmsg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       int ret;
+       /* Write sb and inform others */
+       md_update_sb(mddev, 1);
+       cmsg.type = METADATA_UPDATED;
+       ret = __sendmsg(cinfo, &cmsg);
+       unlock_comm(cinfo);
+       return ret;
+}
+
+static int new_disk_ack(struct mddev *mddev, bool ack)
+{
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       if (!test_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state)) {
+               pr_warn("md-cluster(%s): Spurious cluster confirmation\n", mdname(mddev));
+               return -EINVAL;
+       }
+
+       if (ack)
+               dlm_unlock_sync(cinfo->no_new_dev_lockres);
+       complete(&cinfo->newdisk_completion);
+       return 0;
+}
+
+static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+{
+       struct cluster_msg cmsg;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+       cmsg.type = REMOVE;
+       cmsg.raid_slot = rdev->desc_nr;
+       return __sendmsg(cinfo, &cmsg);
+}
+
+static int gather_bitmaps(struct md_rdev *rdev)
+{
+       int sn, err;
+       sector_t lo, hi;
+       struct cluster_msg cmsg;
+       struct mddev *mddev = rdev->mddev;
+       struct md_cluster_info *cinfo = mddev->cluster_info;
+
+       cmsg.type = RE_ADD;
+       cmsg.raid_slot = rdev->desc_nr;
+       err = sendmsg(cinfo, &cmsg);
+       if (err)
+               goto out;
+
+       for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
+               if (sn == (cinfo->slot_number - 1))
+                       continue;
+               err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
+               if (err) {
+                       pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
+                       goto out;
+               }
+               if ((hi > 0) && (lo < mddev->recovery_cp))
+                       mddev->recovery_cp = lo;
+       }
+out:
+       return err;
+}
+
+static struct md_cluster_operations cluster_ops = {
+       .join   = join,
+       .leave  = leave,
+       .slot_number = slot_number,
+       .resync_info_update = resync_info_update,
+       .resync_start = resync_start,
+       .resync_finish = resync_finish,
+       .metadata_update_start = metadata_update_start,
+       .metadata_update_finish = metadata_update_finish,
+       .metadata_update_cancel = metadata_update_cancel,
+       .area_resyncing = area_resyncing,
+       .add_new_disk_start = add_new_disk_start,
+       .add_new_disk_finish = add_new_disk_finish,
+       .new_disk_ack = new_disk_ack,
+       .remove_disk = remove_disk,
+       .gather_bitmaps = gather_bitmaps,
+};
+
+static int __init cluster_init(void)
+{
+       pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
+       pr_info("Registering Cluster MD functions\n");
+       register_md_cluster_operations(&cluster_ops, THIS_MODULE);
+       return 0;
+}
+
+static void cluster_exit(void)
+{
+       unregister_md_cluster_operations();
+}
+
+module_init(cluster_init);
+module_exit(cluster_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Clustering support for MD");
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h

new file mode 100644 (file)

index 0000000..6817ee0
--- /dev/null
+++ b/drivers/md/md-cluster.h
@@ -0,0 +1,29 @@
+
+
+#ifndef _MD_CLUSTER_H
+#define _MD_CLUSTER_H
+
+#include "md.h"
+
+struct mddev;
+struct md_rdev;
+
+struct md_cluster_operations {
+       int (*join)(struct mddev *mddev, int nodes);
+       int (*leave)(struct mddev *mddev);
+       int (*slot_number)(struct mddev *mddev);
+       void (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
+       int (*resync_start)(struct mddev *mddev, sector_t lo, sector_t hi);
+       void (*resync_finish)(struct mddev *mddev);
+       int (*metadata_update_start)(struct mddev *mddev);
+       int (*metadata_update_finish)(struct mddev *mddev);
+       int (*metadata_update_cancel)(struct mddev *mddev);
+       int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi);
+       int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
+       int (*add_new_disk_finish)(struct mddev *mddev);
+       int (*new_disk_ack)(struct mddev *mddev, bool ack);
+       int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
+       int (*gather_bitmaps)(struct md_rdev *rdev);
+};
+
+#endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md.c b/drivers/md/md.c

index e617878..d4f31e1 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -53,6 +53,7 @@
  #include <linux/slab.h>
  #include "md.h"
  #include "bitmap.h"
+#include "md-cluster.h"
  
  #ifndef MODULE
  static void autostart_arrays(int part);
@@ -66,6 +67,11 @@ static void autostart_arrays(int part);
  static LIST_HEAD(pers_list);
  static DEFINE_SPINLOCK(pers_lock);
  
+struct md_cluster_operations *md_cluster_ops;
+EXPORT_SYMBOL(md_cluster_ops);
+struct module *md_cluster_mod;
+EXPORT_SYMBOL(md_cluster_mod);
+
  static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
  static struct workqueue_struct *md_wq;
  static struct workqueue_struct *md_misc_wq;
@@ -640,7 +646,7 @@ void mddev_unlock(struct mddev *mddev)
  }
  EXPORT_SYMBOL_GPL(mddev_unlock);
  
-static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
+struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr)
  {
         struct md_rdev *rdev;
  
@@ -650,6 +656,7 @@ static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
  
         return NULL;
  }
+EXPORT_SYMBOL_GPL(md_find_rdev_nr_rcu);
  
  static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
  {
@@ -2047,11 +2054,11 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
                 int choice = 0;
                 if (mddev->pers)
                         choice = mddev->raid_disks;
-               while (find_rdev_nr_rcu(mddev, choice))
+               while (md_find_rdev_nr_rcu(mddev, choice))
                         choice++;
                 rdev->desc_nr = choice;
         } else {
-               if (find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
+               if (md_find_rdev_nr_rcu(mddev, rdev->desc_nr)) {
                         rcu_read_unlock();
                         return -EBUSY;
                 }
@@ -2166,11 +2173,12 @@ static void export_rdev(struct md_rdev *rdev)
         kobject_put(&rdev->kobj);
  }
  
-static void kick_rdev_from_array(struct md_rdev *rdev)
+void md_kick_rdev_from_array(struct md_rdev *rdev)
  {
         unbind_rdev_from_array(rdev);
         export_rdev(rdev);
  }
+EXPORT_SYMBOL_GPL(md_kick_rdev_from_array);
  
  static void export_array(struct mddev *mddev)
  {
@@ -2179,7 +2187,7 @@ static void export_array(struct mddev *mddev)
         while (!list_empty(&mddev->disks)) {
                 rdev = list_first_entry(&mddev->disks, struct md_rdev,
                                         same_set);
-               kick_rdev_from_array(rdev);
+               md_kick_rdev_from_array(rdev);
         }
         mddev->raid_disks = 0;
         mddev->major_version = 0;
@@ -2208,7 +2216,7 @@ static void sync_sbs(struct mddev *mddev, int nospares)
         }
  }
  
-static void md_update_sb(struct mddev *mddev, int force_change)
+void md_update_sb(struct mddev *mddev, int force_change)
  {
         struct md_rdev *rdev;
         int sync_req;
@@ -2369,6 +2377,37 @@ repeat:
                 wake_up(&rdev->blocked_wait);
         }
  }
+EXPORT_SYMBOL(md_update_sb);
+
+static int add_bound_rdev(struct md_rdev *rdev)
+{
+       struct mddev *mddev = rdev->mddev;
+       int err = 0;
+
+       if (!mddev->pers->hot_remove_disk) {
+               /* If there is hot_add_disk but no hot_remove_disk
+                * then added disks for geometry changes,
+                * and should be added immediately.
+                */
+               super_types[mddev->major_version].
+                       validate_super(mddev, rdev);
+               err = mddev->pers->hot_add_disk(mddev, rdev);
+               if (err) {
+                       unbind_rdev_from_array(rdev);
+                       export_rdev(rdev);
+                       return err;
+               }
+       }
+       sysfs_notify_dirent_safe(rdev->sysfs_state);
+
+       set_bit(MD_CHANGE_DEVS, &mddev->flags);
+       if (mddev->degraded)
+               set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_new_event(mddev);
+       md_wakeup_thread(mddev->thread);
+       return 0;
+}
  
  /* words written to sysfs files may, or may not, be \n terminated.
   * We want to accept with case. For this we use cmd_match.
@@ -2471,10 +2510,16 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                         err = -EBUSY;
                 else {
                         struct mddev *mddev = rdev->mddev;
-                       kick_rdev_from_array(rdev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->remove_disk(mddev, rdev);
+                       md_kick_rdev_from_array(rdev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                         if (mddev->pers)
                                 md_update_sb(mddev, 1);
                         md_new_event(mddev);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
                         err = 0;
                 }
         } else if (cmd_match(buf, "writemostly")) {
@@ -2553,6 +2598,21 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
                         clear_bit(Replacement, &rdev->flags);
                         err = 0;
                 }
+       } else if (cmd_match(buf, "re-add")) {
+               if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) {
+                       /* clear_bit is performed _after_ all the devices
+                        * have their local Faulty bit cleared. If any writes
+                        * happen in the meantime in the local node, they
+                        * will land in the local bitmap, which will be synced
+                        * by this node eventually
+                        */
+                       if (!mddev_is_clustered(rdev->mddev) ||
+                           (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) {
+                               clear_bit(Faulty, &rdev->flags);
+                               err = add_bound_rdev(rdev);
+                       }
+               } else
+                       err = -EBUSY;
         }
         if (!err)
                 sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -3127,7 +3187,7 @@ static void analyze_sbs(struct mddev *mddev)
                                 "md: fatal superblock inconsistency in %s"
                                 " -- removing from array\n",
                                 bdevname(rdev->bdev,b));
-                       kick_rdev_from_array(rdev);
+                       md_kick_rdev_from_array(rdev);
                 }
  
         super_types[mddev->major_version].
@@ -3142,18 +3202,27 @@ static void analyze_sbs(struct mddev *mddev)
                                "md: %s: %s: only %d devices permitted\n",
                                mdname(mddev), bdevname(rdev->bdev, b),
                                mddev->max_disks);
-                       kick_rdev_from_array(rdev);
+                       md_kick_rdev_from_array(rdev);
                         continue;
                 }
-               if (rdev != freshest)
+               if (rdev != freshest) {
                         if (super_types[mddev->major_version].
                             validate_super(mddev, rdev)) {
                                 printk(KERN_WARNING "md: kicking non-fresh %s"
                                         " from array!\n",
                                         bdevname(rdev->bdev,b));
-                               kick_rdev_from_array(rdev);
+                               md_kick_rdev_from_array(rdev);
                                 continue;
                         }
+                       /* No device should have a Candidate flag
+                        * when reading devices
+                        */
+                       if (test_bit(Candidate, &rdev->flags)) {
+                               pr_info("md: kicking Cluster Candidate %s from array!\n",
+                                       bdevname(rdev->bdev, b));
+                               md_kick_rdev_from_array(rdev);
+                       }
+               }
                 if (mddev->level == LEVEL_MULTIPATH) {
                         rdev->desc_nr = i++;
                         rdev->raid_disk = rdev->desc_nr;
@@ -4008,8 +4077,12 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
         if (err)
                 return err;
         if (mddev->pers) {
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                 err = update_size(mddev, sectors);
                 md_update_sb(mddev, 1);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
         } else {
                 if (mddev->dev_sectors == 0 ||
                     mddev->dev_sectors > sectors)
@@ -4354,7 +4427,6 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
  {
         unsigned long long min;
         int err;
-       int chunk;
  
         if (kstrtoull(buf, 10, &min))
                 return -EINVAL;
@@ -4368,16 +4440,8 @@ min_sync_store(struct mddev *mddev, const char *buf, size_t len)
         if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                 goto out_unlock;
  
-       /* Must be a multiple of chunk_size */
-       chunk = mddev->chunk_sectors;
-       if (chunk) {
-               sector_t temp = min;
-
-               err = -EINVAL;
-               if (sector_div(temp, chunk))
-                       goto out_unlock;
-       }
-       mddev->resync_min = min;
+       /* Round down to multiple of 4K for safety */
+       mddev->resync_min = round_down(min, 8);
         err = 0;
  
  out_unlock:
@@ -5077,10 +5141,16 @@ int md_run(struct mddev *mddev)
         }
         if (err == 0 && pers->sync_request &&
             (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
-               err = bitmap_create(mddev);
-               if (err)
+               struct bitmap *bitmap;
+
+               bitmap = bitmap_create(mddev, -1);
+               if (IS_ERR(bitmap)) {
+                       err = PTR_ERR(bitmap);
                         printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
                                mdname(mddev), err);
+               } else
+                       mddev->bitmap = bitmap;
+
         }
         if (err) {
                 mddev_detach(mddev);
@@ -5232,6 +5302,8 @@ static void md_clean(struct mddev *mddev)
  
  static void __md_stop_writes(struct mddev *mddev)
  {
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
         set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         flush_workqueue(md_misc_wq);
         if (mddev->sync_thread) {
@@ -5250,6 +5322,8 @@ static void __md_stop_writes(struct mddev *mddev)
                 mddev->in_sync = 1;
                 md_update_sb(mddev, 1);
         }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
  }
  
  void md_stop_writes(struct mddev *mddev)
@@ -5636,6 +5710,8 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
                 info.state = (1<<MD_SB_CLEAN);
         if (mddev->bitmap && mddev->bitmap_info.offset)
                 info.state |= (1<<MD_SB_BITMAP_PRESENT);
+       if (mddev_is_clustered(mddev))
+               info.state |= (1<<MD_SB_CLUSTERED);
         info.active_disks  = insync;
         info.working_disks = working;
         info.failed_disks  = failed;
@@ -5691,7 +5767,7 @@ static int get_disk_info(struct mddev *mddev, void __user * arg)
                 return -EFAULT;
  
         rcu_read_lock();
-       rdev = find_rdev_nr_rcu(mddev, info.number);
+       rdev = md_find_rdev_nr_rcu(mddev, info.number);
         if (rdev) {
                 info.major = MAJOR(rdev->bdev->bd_dev);
                 info.minor = MINOR(rdev->bdev->bd_dev);
@@ -5724,6 +5800,13 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
         struct md_rdev *rdev;
         dev_t dev = MKDEV(info->major,info->minor);
  
+       if (mddev_is_clustered(mddev) &&
+               !(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
+               pr_err("%s: Cannot add to clustered mddev.\n",
+                              mdname(mddev));
+               return -EINVAL;
+       }
+
         if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
                 return -EOVERFLOW;
  
@@ -5810,31 +5893,38 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
                 else
                         clear_bit(WriteMostly, &rdev->flags);
  
+               /*
+                * check whether the device shows up in other nodes
+                */
+               if (mddev_is_clustered(mddev)) {
+                       if (info->state & (1 << MD_DISK_CANDIDATE)) {
+                               /* Through --cluster-confirm */
+                               set_bit(Candidate, &rdev->flags);
+                               err = md_cluster_ops->new_disk_ack(mddev, true);
+                               if (err) {
+                                       export_rdev(rdev);
+                                       return err;
+                               }
+                       } else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
+                               /* --add initiated by this node */
+                               err = md_cluster_ops->add_new_disk_start(mddev, rdev);
+                               if (err) {
+                                       md_cluster_ops->add_new_disk_finish(mddev);
+                                       export_rdev(rdev);
+                                       return err;
+                               }
+                       }
+               }
+
                 rdev->raid_disk = -1;
                 err = bind_rdev_to_array(rdev, mddev);
-               if (!err && !mddev->pers->hot_remove_disk) {
-                       /* If there is hot_add_disk but no hot_remove_disk
-                        * then added disks for geometry changes,
-                        * and should be added immediately.
-                        */
-                       super_types[mddev->major_version].
-                               validate_super(mddev, rdev);
-                       err = mddev->pers->hot_add_disk(mddev, rdev);
-                       if (err)
-                               unbind_rdev_from_array(rdev);
-               }
                 if (err)
                         export_rdev(rdev);
                 else
-                       sysfs_notify_dirent_safe(rdev->sysfs_state);
-
-               set_bit(MD_CHANGE_DEVS, &mddev->flags);
-               if (mddev->degraded)
-                       set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
-               if (!err)
-                       md_new_event(mddev);
-               md_wakeup_thread(mddev->thread);
+                       err = add_bound_rdev(rdev);
+               if (mddev_is_clustered(mddev) &&
+                               (info->state & (1 << MD_DISK_CLUSTER_ADD)))
+                       md_cluster_ops->add_new_disk_finish(mddev);
                 return err;
         }
  
@@ -5895,18 +5985,29 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev)
         if (!rdev)
                 return -ENXIO;
  
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
+
         clear_bit(Blocked, &rdev->flags);
         remove_and_add_spares(mddev, rdev);
  
         if (rdev->raid_disk >= 0)
                 goto busy;
  
-       kick_rdev_from_array(rdev);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->remove_disk(mddev, rdev);
+
+       md_kick_rdev_from_array(rdev);
         md_update_sb(mddev, 1);
         md_new_event(mddev);
  
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+
         return 0;
  busy:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
         printk(KERN_WARNING "md: cannot remove active disk %s from %s ...\n",
                 bdevname(rdev->bdev,b), mdname(mddev));
         return -EBUSY;
@@ -5956,12 +6057,15 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
                 err = -EINVAL;
                 goto abort_export;
         }
+
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
         clear_bit(In_sync, &rdev->flags);
         rdev->desc_nr = -1;
         rdev->saved_raid_disk = -1;
         err = bind_rdev_to_array(rdev, mddev);
         if (err)
-               goto abort_export;
+               goto abort_clustered;
  
         /*
          * The rest should better be atomic, we can have disk failures
@@ -5972,6 +6076,8 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
  
         md_update_sb(mddev, 1);
  
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
         /*
          * Kick recovery, maybe this spare has to be added to the
          * array immediately.
@@ -5981,6 +6087,9 @@ static int hot_add_disk(struct mddev *mddev, dev_t dev)
         md_new_event(mddev);
         return 0;
  
+abort_clustered:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
  abort_export:
         export_rdev(rdev);
         return err;
@@ -6038,9 +6147,14 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
         if (mddev->pers) {
                 mddev->pers->quiesce(mddev, 1);
                 if (fd >= 0) {
-                       err = bitmap_create(mddev);
-                       if (!err)
+                       struct bitmap *bitmap;
+
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                 err = bitmap_load(mddev);
+                       } else
+                               err = PTR_ERR(bitmap);
                 }
                 if (fd < 0 || err) {
                         bitmap_destroy(mddev);
@@ -6293,6 +6407,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                         return rv;
                 }
         }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
         if (info->size >= 0 && mddev->dev_sectors / 2 != info->size)
                 rv = update_size(mddev, (sector_t)info->size * 2);
  
@@ -6300,33 +6416,49 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                 rv = update_raid_disks(mddev, info->raid_disks);
  
         if ((state ^ info->state) & (1<<MD_SB_BITMAP_PRESENT)) {
-               if (mddev->pers->quiesce == NULL || mddev->thread == NULL)
-                       return -EINVAL;
-               if (mddev->recovery || mddev->sync_thread)
-                       return -EBUSY;
+               if (mddev->pers->quiesce == NULL || mddev->thread == NULL) {
+                       rv = -EINVAL;
+                       goto err;
+               }
+               if (mddev->recovery || mddev->sync_thread) {
+                       rv = -EBUSY;
+                       goto err;
+               }
                 if (info->state & (1<<MD_SB_BITMAP_PRESENT)) {
+                       struct bitmap *bitmap;
                         /* add the bitmap */
-                       if (mddev->bitmap)
-                               return -EEXIST;
-                       if (mddev->bitmap_info.default_offset == 0)
-                               return -EINVAL;
+                       if (mddev->bitmap) {
+                               rv = -EEXIST;
+                               goto err;
+                       }
+                       if (mddev->bitmap_info.default_offset == 0) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                         mddev->bitmap_info.offset =
                                 mddev->bitmap_info.default_offset;
                         mddev->bitmap_info.space =
                                 mddev->bitmap_info.default_space;
                         mddev->pers->quiesce(mddev, 1);
-                       rv = bitmap_create(mddev);
-                       if (!rv)
+                       bitmap = bitmap_create(mddev, -1);
+                       if (!IS_ERR(bitmap)) {
+                               mddev->bitmap = bitmap;
                                 rv = bitmap_load(mddev);
+                       } else
+                               rv = PTR_ERR(bitmap);
                         if (rv)
                                 bitmap_destroy(mddev);
                         mddev->pers->quiesce(mddev, 0);
                 } else {
                         /* remove the bitmap */
-                       if (!mddev->bitmap)
-                               return -ENOENT;
-                       if (mddev->bitmap->storage.file)
-                               return -EINVAL;
+                       if (!mddev->bitmap) {
+                               rv = -ENOENT;
+                               goto err;
+                       }
+                       if (mddev->bitmap->storage.file) {
+                               rv = -EINVAL;
+                               goto err;
+                       }
                         mddev->pers->quiesce(mddev, 1);
                         bitmap_destroy(mddev);
                         mddev->pers->quiesce(mddev, 0);
@@ -6334,6 +6466,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
                 }
         }
         md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
+       return rv;
+err:
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_cancel(mddev);
         return rv;
  }
  
@@ -6393,6 +6531,7 @@ static inline bool md_ioctl_valid(unsigned int cmd)
         case SET_DISK_FAULTY:
         case STOP_ARRAY:
         case STOP_ARRAY_RO:
+       case CLUSTERED_DISK_NACK:
                 return true;
         default:
                 return false;
@@ -6665,6 +6804,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                 goto unlock;
         }
  
+       case CLUSTERED_DISK_NACK:
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->new_disk_ack(mddev, false);
+               else
+                       err = -EINVAL;
+               goto unlock;
+
         case HOT_ADD_DISK:
                 err = hot_add_disk(mddev, new_decode_dev(arg));
                 goto unlock;
@@ -7238,6 +7384,55 @@ int unregister_md_personality(struct md_personality *p)
  }
  EXPORT_SYMBOL(unregister_md_personality);
  
+int register_md_cluster_operations(struct md_cluster_operations *ops, struct module *module)
+{
+       if (md_cluster_ops != NULL)
+               return -EALREADY;
+       spin_lock(&pers_lock);
+       md_cluster_ops = ops;
+       md_cluster_mod = module;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(register_md_cluster_operations);
+
+int unregister_md_cluster_operations(void)
+{
+       spin_lock(&pers_lock);
+       md_cluster_ops = NULL;
+       spin_unlock(&pers_lock);
+       return 0;
+}
+EXPORT_SYMBOL(unregister_md_cluster_operations);
+
+int md_setup_cluster(struct mddev *mddev, int nodes)
+{
+       int err;
+
+       err = request_module("md-cluster");
+       if (err) {
+               pr_err("md-cluster module not found.\n");
+               return err;
+       }
+
+       spin_lock(&pers_lock);
+       if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
+               spin_unlock(&pers_lock);
+               return -ENOENT;
+       }
+       spin_unlock(&pers_lock);
+
+       return md_cluster_ops->join(mddev, nodes);
+}
+
+void md_cluster_stop(struct mddev *mddev)
+{
+       if (!md_cluster_ops)
+               return;
+       md_cluster_ops->leave(mddev);
+       module_put(md_cluster_mod);
+}
+
  static int is_mddev_idle(struct mddev *mddev, int init)
  {
         struct md_rdev *rdev;
@@ -7375,7 +7570,11 @@ int md_allow_write(struct mddev *mddev)
                     mddev->safemode == 0)
                         mddev->safemode = 1;
                 spin_unlock(&mddev->lock);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_start(mddev);
                 md_update_sb(mddev, 0);
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->metadata_update_finish(mddev);
                 sysfs_notify_dirent_safe(mddev->sysfs_state);
         } else
                 spin_unlock(&mddev->lock);
@@ -7576,6 +7775,9 @@ void md_do_sync(struct md_thread *thread)
         md_new_event(mddev);
         update_time = jiffies;
  
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_start(mddev, j, max_sectors);
+
         blk_start_plug(&plug);
         while (j < max_sectors) {
                 sector_t sectors;
@@ -7618,8 +7820,7 @@ void md_do_sync(struct md_thread *thread)
                 if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
                         break;
  
-               sectors = mddev->pers->sync_request(mddev, j, &skipped,
-                                                 currspeed < speed_min(mddev));
+               sectors = mddev->pers->sync_request(mddev, j, &skipped);
                 if (sectors == 0) {
                         set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                         break;
@@ -7636,6 +7837,8 @@ void md_do_sync(struct md_thread *thread)
                 j += sectors;
                 if (j > 2)
                         mddev->curr_resync = j;
+               if (mddev_is_clustered(mddev))
+                       md_cluster_ops->resync_info_update(mddev, j, max_sectors);
                 mddev->curr_mark_cnt = io_sectors;
                 if (last_check == 0)
                         /* this is the earliest that rebuild will be
@@ -7677,11 +7880,18 @@ void md_do_sync(struct md_thread *thread)
                         /((jiffies-mddev->resync_mark)/HZ +1) +1;
  
                 if (currspeed > speed_min(mddev)) {
-                       if ((currspeed > speed_max(mddev)) ||
-                                       !is_mddev_idle(mddev, 0)) {
+                       if (currspeed > speed_max(mddev)) {
                                 msleep(500);
                                 goto repeat;
                         }
+                       if (!is_mddev_idle(mddev, 0)) {
+                               /*
+                                * Give other IO more of a chance.
+                                * The faster the devices, the less we wait.
+                                */
+                               wait_event(mddev->recovery_wait,
+                                          !atomic_read(&mddev->recovery_active));
+                       }
                 }
         }
         printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
@@ -7694,7 +7904,10 @@ void md_do_sync(struct md_thread *thread)
         wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
  
         /* tell personality that we are finished */
-       mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
+       mddev->pers->sync_request(mddev, max_sectors, &skipped);
+
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->resync_finish(mddev);
  
         if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
             mddev->curr_resync > 2) {
@@ -7925,8 +8138,13 @@ void md_check_recovery(struct mddev *mddev)
                                 sysfs_notify_dirent_safe(mddev->sysfs_state);
                 }
  
-               if (mddev->flags & MD_UPDATE_SB_FLAGS)
+               if (mddev->flags & MD_UPDATE_SB_FLAGS) {
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_start(mddev);
                         md_update_sb(mddev, 0);
+                       if (mddev_is_clustered(mddev))
+                               md_cluster_ops->metadata_update_finish(mddev);
+               }
  
                 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
                     !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
@@ -8024,6 +8242,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                         set_bit(MD_CHANGE_DEVS, &mddev->flags);
                 }
         }
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_start(mddev);
         if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
             mddev->pers->finish_reshape)
                 mddev->pers->finish_reshape(mddev);
@@ -8036,6 +8256,8 @@ void md_reap_sync_thread(struct mddev *mddev)
                         rdev->saved_raid_disk = -1;
  
         md_update_sb(mddev, 1);
+       if (mddev_is_clustered(mddev))
+               md_cluster_ops->metadata_update_finish(mddev);
         clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
         clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
         clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
@@ -8656,6 +8878,28 @@ err_wq:
         return ret;
  }
  
+void md_reload_sb(struct mddev *mddev)
+{
+       struct md_rdev *rdev, *tmp;
+
+       rdev_for_each_safe(rdev, tmp, mddev) {
+               rdev->sb_loaded = 0;
+               ClearPageUptodate(rdev->sb_page);
+       }
+       mddev->raid_disks = 0;
+       analyze_sbs(mddev);
+       rdev_for_each_safe(rdev, tmp, mddev) {
+               struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
+               /* since we don't write to faulty devices, we figure out if the
+                *  disk is faulty by comparing events
+                */
+               if (mddev->events > sb->events)
+                       set_bit(Faulty, &rdev->flags);
+       }
+
+}
+EXPORT_SYMBOL(md_reload_sb);
+
  #ifndef MODULE
  
  /*
diff --git a/drivers/md/md.h b/drivers/md/md.h

index 318ca8f..4046a6c 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -23,6 +23,7 @@
  #include <linux/timer.h>
  #include <linux/wait.h>
  #include <linux/workqueue.h>
+#include "md-cluster.h"
  
  #define MaxSector (~(sector_t)0)
  
@@ -170,6 +171,10 @@ enum flag_bits {
                                  * a want_replacement device with same
                                  * raid_disk number.
                                  */
+       Candidate,              /* For clustered environments only:
+                                * This device is seen locally but not
+                                * by the whole cluster
+                                */
  };
  
  #define BB_LEN_MASK    (0x00000000000001FFULL)
@@ -202,6 +207,8 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                 int is_new);
  extern void md_ack_all_badblocks(struct badblocks *bb);
  
+struct md_cluster_info;
+
  struct mddev {
         void                            *private;
         struct md_personality           *pers;
@@ -430,6 +437,8 @@ struct mddev {
                 unsigned long           daemon_sleep; /* how many jiffies between updates? */
                 unsigned long           max_write_behind; /* write-behind mode */
                 int                     external;
+               int                     nodes; /* Maximum number of nodes in the cluster */
+               char                    cluster_name[64]; /* Name of the cluster */
         } bitmap_info;
  
         atomic_t                        max_corr_read_errors; /* max read retries */
@@ -448,6 +457,7 @@ struct mddev {
         struct work_struct flush_work;
         struct work_struct event_work;  /* used by dm to report failure event */
         void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
+       struct md_cluster_info          *cluster_info;
  };
  
  static inline int __must_check mddev_lock(struct mddev *mddev)
@@ -496,7 +506,7 @@ struct md_personality
         int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
         int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
         int (*spare_active) (struct mddev *mddev);
-       sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster);
+       sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
         int (*resize) (struct mddev *mddev, sector_t sectors);
         sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
         int (*check_reshape) (struct mddev *mddev);
@@ -608,6 +618,11 @@ static inline void safe_put_page(struct page *p)
  
  extern int register_md_personality(struct md_personality *p);
  extern int unregister_md_personality(struct md_personality *p);
+extern int register_md_cluster_operations(struct md_cluster_operations *ops,
+               struct module *module);
+extern int unregister_md_cluster_operations(void);
+extern int md_setup_cluster(struct mddev *mddev, int nodes);
+extern void md_cluster_stop(struct mddev *mddev);
  extern struct md_thread *md_register_thread(
         void (*run)(struct md_thread *thread),
         struct mddev *mddev,
@@ -654,6 +669,10 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
                                    struct mddev *mddev);
  
  extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
+extern void md_reload_sb(struct mddev *mddev);
+extern void md_update_sb(struct mddev *mddev, int force);
+extern void md_kick_rdev_from_array(struct md_rdev * rdev);
+struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
  static inline int mddev_check_plugged(struct mddev *mddev)
  {
         return !!blk_check_plugged(md_unplug, mddev,
@@ -669,4 +688,9 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
         }
  }
  
+extern struct md_cluster_operations *md_cluster_ops;
+static inline int mddev_is_clustered(struct mddev *mddev)
+{
+       return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
+}
  #endif /* _MD_MD_H */
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c

index 3b5d7f7..2cb59a6 100644 (file)
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -271,14 +271,16 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
                 goto abort;
         }
  
-       blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
-       blk_queue_io_opt(mddev->queue,
-                        (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
-       if (!discard_supported)
-               queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
-       else
-               queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       if (mddev->queue) {
+               blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
+               blk_queue_io_opt(mddev->queue,
+                                (mddev->chunk_sectors << 9) * mddev->raid_disks);
+
+               if (!discard_supported)
+                       queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+               else
+                       queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+       }
  
         pr_debug("md/raid0:%s: done.\n", mdname(mddev));
         *private_conf = conf;
@@ -429,9 +431,12 @@ static int raid0_run(struct mddev *mddev)
         }
         if (md_check_no_bitmap(mddev))
                 return -EINVAL;
-       blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
-       blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
-       blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+
+       if (mddev->queue) {
+               blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
+               blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+               blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+       }
  
         /* if private is not null, we are here after takeover */
         if (mddev->private == NULL) {
@@ -448,16 +453,17 @@ static int raid0_run(struct mddev *mddev)
         printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
                mdname(mddev),
                (unsigned long long)mddev->array_sectors);
-       /* calculate the max read-ahead size.
-        * For read-ahead of large files to be effective, we need to
-        * readahead at least twice a whole stripe. i.e. number of devices
-        * multiplied by chunk size times 2.
-        * If an individual device has an ra_pages greater than the
-        * chunk size, then we will not drive that device as hard as it
-        * wants.  We consider this a configuration error: a larger
-        * chunksize should be used in that case.
-        */
-       {
+
+       if (mddev->queue) {
+               /* calculate the max read-ahead size.
+                * For read-ahead of large files to be effective, we need to
+                * readahead at least twice a whole stripe. i.e. number of devices
+                * multiplied by chunk size times 2.
+                * If an individual device has an ra_pages greater than the
+                * chunk size, then we will not drive that device as hard as it
+                * wants.  We consider this a configuration error: a larger
+                * chunksize should be used in that case.
+                */
                 int stripe = mddev->raid_disks *
                         (mddev->chunk_sectors << 9) / PAGE_SIZE;
                 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index d34e238..9157a29 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -539,7 +539,13 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
         has_nonrot_disk = 0;
         choose_next_idle = 0;
  
-       choose_first = (conf->mddev->recovery_cp < this_sector + sectors);
+       if ((conf->mddev->recovery_cp < this_sector + sectors) ||
+           (mddev_is_clustered(conf->mddev) &&
+           md_cluster_ops->area_resyncing(conf->mddev, this_sector,
+                   this_sector + sectors)))
+               choose_first = 1;
+       else
+               choose_first = 0;
  
         for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
                 sector_t dist;
@@ -1102,8 +1108,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
         md_write_start(mddev, bio); /* wait on superblock update early */
  
         if (bio_data_dir(bio) == WRITE &&
-           bio_end_sector(bio) > mddev->suspend_lo &&
-           bio->bi_iter.bi_sector < mddev->suspend_hi) {
+           ((bio_end_sector(bio) > mddev->suspend_lo &&
+           bio->bi_iter.bi_sector < mddev->suspend_hi) ||
+           (mddev_is_clustered(mddev) &&
+            md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
                 /* As the suspend_* range is controlled by
                  * userspace, we want an interruptible
                  * wait.
@@ -1114,7 +1122,10 @@ static void make_request(struct mddev *mddev, struct bio * bio)
                         prepare_to_wait(&conf->wait_barrier,
                                         &w, TASK_INTERRUPTIBLE);
                         if (bio_end_sector(bio) <= mddev->suspend_lo ||
-                           bio->bi_iter.bi_sector >= mddev->suspend_hi)
+                           bio->bi_iter.bi_sector >= mddev->suspend_hi ||
+                           (mddev_is_clustered(mddev) &&
+                            !md_cluster_ops->area_resyncing(mddev,
+                                    bio->bi_iter.bi_sector, bio_end_sector(bio))))
                                 break;
                         schedule();
                 }
@@ -1561,6 +1572,7 @@ static int raid1_spare_active(struct mddev *mddev)
                 struct md_rdev *rdev = conf->mirrors[i].rdev;
                 struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
                 if (repl
+                   && !test_bit(Candidate, &repl->flags)
                     && repl->recovery_offset == MaxSector
                     && !test_bit(Faulty, &repl->flags)
                     && !test_and_set_bit(In_sync, &repl->flags)) {
@@ -2468,7 +2480,7 @@ static int init_resync(struct r1conf *conf)
   * that can be installed to exclude normal IO requests.
   */
  
-static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
+static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
  {
         struct r1conf *conf = mddev->private;
         struct r1bio *r1_bio;
@@ -2521,13 +2533,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
                 *skipped = 1;
                 return sync_blocks;
         }
-       /*
-        * If there is non-resync activity waiting for a turn,
-        * and resync is going fast enough,
-        * then let it though before starting on this new sync request.
-        */
-       if (!go_faster && conf->nr_waiting)
-               msleep_interruptible(1000);
  
         bitmap_cond_end_sync(mddev->bitmap, sector_nr);
         r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index a7196c4..e793ab6 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2889,7 +2889,7 @@ static int init_resync(struct r10conf *conf)
   */
  
  static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
-                            int *skipped, int go_faster)
+                            int *skipped)
  {
         struct r10conf *conf = mddev->private;
         struct r10bio *r10_bio;
@@ -2994,12 +2994,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
         if (conf->geo.near_copies < conf->geo.raid_disks &&
             max_sector > (sector_nr | chunk_mask))
                 max_sector = (sector_nr | chunk_mask) + 1;
-       /*
-        * If there is non-resync activity waiting for us then
-        * put in a delay to throttle resync.
-        */
-       if (!go_faster && conf->nr_waiting)
-               msleep_interruptible(1000);
  
         /* Again, very different code for resync and recovery.
          * Both must result in an r10bio with a list of bios that
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index cd2f96b..77dfd72 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -54,6 +54,7 @@
  #include <linux/slab.h>
  #include <linux/ratelimit.h>
  #include <linux/nodemask.h>
+#include <linux/flex_array.h>
  #include <trace/events/block.h>
  
  #include "md.h"
@@ -496,7 +497,7 @@ static void shrink_buffers(struct stripe_head *sh)
         }
  }
  
-static int grow_buffers(struct stripe_head *sh)
+static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
  {
         int i;
         int num = sh->raid_conf->pool_size;
@@ -504,7 +505,7 @@ static int grow_buffers(struct stripe_head *sh)
         for (i = 0; i < num; i++) {
                 struct page *page;
  
-               if (!(page = alloc_page(GFP_KERNEL))) {
+               if (!(page = alloc_page(gfp))) {
                         return 1;
                 }
                 sh->dev[i].page = page;
@@ -525,6 +526,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous)
         BUG_ON(atomic_read(&sh->count) != 0);
         BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
         BUG_ON(stripe_operations_active(sh));
+       BUG_ON(sh->batch_head);
  
         pr_debug("init_stripe called, stripe %llu\n",
                 (unsigned long long)sector);
@@ -552,8 +554,10 @@ retry:
         }
         if (read_seqcount_retry(&conf->gen_lock, seq))
                 goto retry;
+       sh->overwrite_disks = 0;
         insert_hash(conf, sh);
         sh->cpu = smp_processor_id();
+       set_bit(STRIPE_BATCH_READY, &sh->state);
  }
  
  static struct stripe_head *__find_stripe(struct r5conf *conf, sector_t sector,
@@ -668,20 +672,28 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
                                     *(conf->hash_locks + hash));
                 sh = __find_stripe(conf, sector, conf->generation - previous);
                 if (!sh) {
-                       if (!conf->inactive_blocked)
+                       if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
                                 sh = get_free_stripe(conf, hash);
+                               if (!sh && llist_empty(&conf->released_stripes) &&
+                                   !test_bit(R5_DID_ALLOC, &conf->cache_state))
+                                       set_bit(R5_ALLOC_MORE,
+                                               &conf->cache_state);
+                       }
                         if (noblock && sh == NULL)
                                 break;
                         if (!sh) {
-                               conf->inactive_blocked = 1;
+                               set_bit(R5_INACTIVE_BLOCKED,
+                                       &conf->cache_state);
                                 wait_event_lock_irq(
                                         conf->wait_for_stripe,
                                         !list_empty(conf->inactive_list + hash) &&
                                         (atomic_read(&conf->active_stripes)
                                          < (conf->max_nr_stripes * 3 / 4)
-                                        || !conf->inactive_blocked),
+                                        || !test_bit(R5_INACTIVE_BLOCKED,
+                                                     &conf->cache_state)),
                                         *(conf->hash_locks + hash));
-                               conf->inactive_blocked = 0;
+                               clear_bit(R5_INACTIVE_BLOCKED,
+                                         &conf->cache_state);
                         } else {
                                 init_stripe(sh, sector, previous);
                                 atomic_inc(&sh->count);
@@ -708,6 +720,130 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
         return sh;
  }
  
+static bool is_full_stripe_write(struct stripe_head *sh)
+{
+       BUG_ON(sh->overwrite_disks > (sh->disks - sh->raid_conf->max_degraded));
+       return sh->overwrite_disks == (sh->disks - sh->raid_conf->max_degraded);
+}
+
+static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
+{
+       local_irq_disable();
+       if (sh1 > sh2) {
+               spin_lock(&sh2->stripe_lock);
+               spin_lock_nested(&sh1->stripe_lock, 1);
+       } else {
+               spin_lock(&sh1->stripe_lock);
+               spin_lock_nested(&sh2->stripe_lock, 1);
+       }
+}
+
+static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
+{
+       spin_unlock(&sh1->stripe_lock);
+       spin_unlock(&sh2->stripe_lock);
+       local_irq_enable();
+}
+
+/* Only freshly new full stripe normal write stripe can be added to a batch list */
+static bool stripe_can_batch(struct stripe_head *sh)
+{
+       return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+               is_full_stripe_write(sh);
+}
+
+/* we only do back search */
+static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh)
+{
+       struct stripe_head *head;
+       sector_t head_sector, tmp_sec;
+       int hash;
+       int dd_idx;
+
+       if (!stripe_can_batch(sh))
+               return;
+       /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
+       tmp_sec = sh->sector;
+       if (!sector_div(tmp_sec, conf->chunk_sectors))
+               return;
+       head_sector = sh->sector - STRIPE_SECTORS;
+
+       hash = stripe_hash_locks_hash(head_sector);
+       spin_lock_irq(conf->hash_locks + hash);
+       head = __find_stripe(conf, head_sector, conf->generation);
+       if (head && !atomic_inc_not_zero(&head->count)) {
+               spin_lock(&conf->device_lock);
+               if (!atomic_read(&head->count)) {
+                       if (!test_bit(STRIPE_HANDLE, &head->state))
+                               atomic_inc(&conf->active_stripes);
+                       BUG_ON(list_empty(&head->lru) &&
+                              !test_bit(STRIPE_EXPANDING, &head->state));
+                       list_del_init(&head->lru);
+                       if (head->group) {
+                               head->group->stripes_cnt--;
+                               head->group = NULL;
+                       }
+               }
+               atomic_inc(&head->count);
+               spin_unlock(&conf->device_lock);
+       }
+       spin_unlock_irq(conf->hash_locks + hash);
+
+       if (!head)
+               return;
+       if (!stripe_can_batch(head))
+               goto out;
+
+       lock_two_stripes(head, sh);
+       /* clear_batch_ready clear the flag */
+       if (!stripe_can_batch(head) || !stripe_can_batch(sh))
+               goto unlock_out;
+
+       if (sh->batch_head)
+               goto unlock_out;
+
+       dd_idx = 0;
+       while (dd_idx == sh->pd_idx || dd_idx == sh->qd_idx)
+               dd_idx++;
+       if (head->dev[dd_idx].towrite->bi_rw != sh->dev[dd_idx].towrite->bi_rw)
+               goto unlock_out;
+
+       if (head->batch_head) {
+               spin_lock(&head->batch_head->batch_lock);
+               /* This batch list is already running */
+               if (!stripe_can_batch(head)) {
+                       spin_unlock(&head->batch_head->batch_lock);
+                       goto unlock_out;
+               }
+
+               /*
+                * at this point, head's BATCH_READY could be cleared, but we
+                * can still add the stripe to batch list
+                */
+               list_add(&sh->batch_list, &head->batch_list);
+               spin_unlock(&head->batch_head->batch_lock);
+
+               sh->batch_head = head->batch_head;
+       } else {
+               head->batch_head = head;
+               sh->batch_head = head->batch_head;
+               spin_lock(&head->batch_lock);
+               list_add_tail(&sh->batch_list, &head->batch_list);
+               spin_unlock(&head->batch_lock);
+       }
+
+       if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+               if (atomic_dec_return(&conf->preread_active_stripes)
+                   < IO_THRESHOLD)
+                       md_wakeup_thread(conf->mddev->thread);
+
+       atomic_inc(&sh->count);
+unlock_out:
+       unlock_two_stripes(head, sh);
+out:
+       release_stripe(head);
+}
+
  /* Determine if 'data_offset' or 'new_data_offset' should be used
   * in this stripe_head.
   */
@@ -738,6 +874,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
  {
         struct r5conf *conf = sh->raid_conf;
         int i, disks = sh->disks;
+       struct stripe_head *head_sh = sh;
  
         might_sleep();
  
@@ -746,6 +883,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                 int replace_only = 0;
                 struct bio *bi, *rbi;
                 struct md_rdev *rdev, *rrdev = NULL;
+
+               sh = head_sh;
                 if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) {
                         if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags))
                                 rw = WRITE_FUA;
@@ -764,6 +903,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                 if (test_and_clear_bit(R5_SyncIO, &sh->dev[i].flags))
                         rw |= REQ_SYNC;
  
+again:
                 bi = &sh->dev[i].req;
                 rbi = &sh->dev[i].rreq; /* For writing to replacement */
  
@@ -782,7 +922,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                 /* We raced and saw duplicates */
                                 rrdev = NULL;
                 } else {
-                       if (test_bit(R5_ReadRepl, &sh->dev[i].flags) && rrdev)
+                       if (test_bit(R5_ReadRepl, &head_sh->dev[i].flags) && rrdev)
                                 rdev = rrdev;
                         rrdev = NULL;
                 }
@@ -853,13 +993,15 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                 __func__, (unsigned long long)sh->sector,
                                 bi->bi_rw, i);
                         atomic_inc(&sh->count);
+                       if (sh != head_sh)
+                               atomic_inc(&head_sh->count);
                         if (use_new_offset(conf, sh))
                                 bi->bi_iter.bi_sector = (sh->sector
                                                  + rdev->new_data_offset);
                         else
                                 bi->bi_iter.bi_sector = (sh->sector
                                                  + rdev->data_offset);
-                       if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
+                       if (test_bit(R5_ReadNoMerge, &head_sh->dev[i].flags))
                                 bi->bi_rw |= REQ_NOMERGE;
  
                         if (test_bit(R5_SkipCopy, &sh->dev[i].flags))
@@ -903,6 +1045,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                                 __func__, (unsigned long long)sh->sector,
                                 rbi->bi_rw, i);
                         atomic_inc(&sh->count);
+                       if (sh != head_sh)
+                               atomic_inc(&head_sh->count);
                         if (use_new_offset(conf, sh))
                                 rbi->bi_iter.bi_sector = (sh->sector
                                                   + rrdev->new_data_offset);
@@ -934,8 +1078,18 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
                         pr_debug("skip op %ld on disc %d for sector %llu\n",
                                 bi->bi_rw, i, (unsigned long long)sh->sector);
                         clear_bit(R5_LOCKED, &sh->dev[i].flags);
+                       if (sh->batch_head)
+                               set_bit(STRIPE_BATCH_ERR,
+                                       &sh->batch_head->state);
                         set_bit(STRIPE_HANDLE, &sh->state);
                 }
+
+               if (!head_sh->batch_head)
+                       continue;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               if (sh != head_sh)
+                       goto again;
         }
  }
  
@@ -1051,6 +1205,7 @@ static void ops_run_biofill(struct stripe_head *sh)
         struct async_submit_ctl submit;
         int i;
  
+       BUG_ON(sh->batch_head);
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
@@ -1109,16 +1264,28 @@ static void ops_complete_compute(void *stripe_head_ref)
  
  /* return a pointer to the address conversion region of the scribble buffer */
  static addr_conv_t *to_addr_conv(struct stripe_head *sh,
-                                struct raid5_percpu *percpu)
+                                struct raid5_percpu *percpu, int i)
  {
-       return percpu->scribble + sizeof(struct page *) * (sh->disks + 2);
+       void *addr;
+
+       addr = flex_array_get(percpu->scribble, i);
+       return addr + sizeof(struct page *) * (sh->disks + 2);
+}
+
+/* return a pointer to the address conversion region of the scribble buffer */
+static struct page **to_addr_page(struct raid5_percpu *percpu, int i)
+{
+       void *addr;
+
+       addr = flex_array_get(percpu->scribble, i);
+       return addr;
  }
  
  static struct dma_async_tx_descriptor *
  ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
         int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
         int target = sh->ops.target;
         struct r5dev *tgt = &sh->dev[target];
         struct page *xor_dest = tgt->page;
@@ -1127,6 +1294,8 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
         struct async_submit_ctl submit;
         int i;
  
+       BUG_ON(sh->batch_head);
+
         pr_debug("%s: stripe %llu block: %d\n",
                 __func__, (unsigned long long)sh->sector, target);
         BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
@@ -1138,7 +1307,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
         atomic_inc(&sh->count);
  
         init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
-                         ops_complete_compute, sh, to_addr_conv(sh, percpu));
+                         ops_complete_compute, sh, to_addr_conv(sh, percpu, 0));
         if (unlikely(count == 1))
                 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
         else
@@ -1156,7 +1325,9 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
   * destination buffer is recorded in srcs[count] and the Q destination
   * is recorded in srcs[count+1]].
   */
-static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
+static int set_syndrome_sources(struct page **srcs,
+                               struct stripe_head *sh,
+                               int srctype)
  {
         int disks = sh->disks;
         int syndrome_disks = sh->ddf_layout ? disks : (disks - 2);
@@ -1171,8 +1342,15 @@ static int set_syndrome_sources(struct page **srcs, struct stripe_head *sh)
         i = d0_idx;
         do {
                 int slot = raid6_idx_to_slot(i, sh, &count, syndrome_disks);
+               struct r5dev *dev = &sh->dev[i];
  
-               srcs[slot] = sh->dev[i].page;
+               if (i == sh->qd_idx || i == sh->pd_idx ||
+                   (srctype == SYNDROME_SRC_ALL) ||
+                   (srctype == SYNDROME_SRC_WANT_DRAIN &&
+                    test_bit(R5_Wantdrain, &dev->flags)) ||
+                   (srctype == SYNDROME_SRC_WRITTEN &&
+                    dev->written))
+                       srcs[slot] = sh->dev[i].page;
                 i = raid6_next_disk(i, disks);
         } while (i != d0_idx);
  
@@ -1183,7 +1361,7 @@ static struct dma_async_tx_descriptor *
  ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
  {
         int disks = sh->disks;
-       struct page **blocks = percpu->scribble;
+       struct page **blocks = to_addr_page(percpu, 0);
         int target;
         int qd_idx = sh->qd_idx;
         struct dma_async_tx_descriptor *tx;
@@ -1193,6 +1371,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
         int i;
         int count;
  
+       BUG_ON(sh->batch_head);
         if (sh->ops.target < 0)
                 target = sh->ops.target2;
         else if (sh->ops.target2 < 0)
@@ -1211,12 +1390,12 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
         atomic_inc(&sh->count);
  
         if (target == qd_idx) {
-               count = set_syndrome_sources(blocks, sh);
+               count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
                 blocks[count] = NULL; /* regenerating p is not necessary */
                 BUG_ON(blocks[count+1] != dest); /* q should already be set */
                 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                   ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                 tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
         } else {
                 /* Compute any data- or p-drive using XOR */
@@ -1229,7 +1408,7 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
  
                 init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                   NULL, ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                 tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
         }
  
@@ -1248,9 +1427,10 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
         struct r5dev *tgt = &sh->dev[target];
         struct r5dev *tgt2 = &sh->dev[target2];
         struct dma_async_tx_descriptor *tx;
-       struct page **blocks = percpu->scribble;
+       struct page **blocks = to_addr_page(percpu, 0);
         struct async_submit_ctl submit;
  
+       BUG_ON(sh->batch_head);
         pr_debug("%s: stripe %llu block1: %d block2: %d\n",
                  __func__, (unsigned long long)sh->sector, target, target2);
         BUG_ON(target < 0 || target2 < 0);
@@ -1290,7 +1470,7 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
                         /* Missing P+Q, just recompute */
                         init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                           ops_complete_compute, sh,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                         return async_gen_syndrome(blocks, 0, syndrome_disks+2,
                                                   STRIPE_SIZE, &submit);
                 } else {
@@ -1314,21 +1494,21 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
                         init_async_submit(&submit,
                                           ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
                                           NULL, NULL, NULL,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                         tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
                                        &submit);
  
-                       count = set_syndrome_sources(blocks, sh);
+                       count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_ALL);
                         init_async_submit(&submit, ASYNC_TX_FENCE, tx,
                                           ops_complete_compute, sh,
-                                         to_addr_conv(sh, percpu));
+                                         to_addr_conv(sh, percpu, 0));
                         return async_gen_syndrome(blocks, 0, count+2,
                                                   STRIPE_SIZE, &submit);
                 }
         } else {
                 init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
                                   ops_complete_compute, sh,
-                                 to_addr_conv(sh, percpu));
+                                 to_addr_conv(sh, percpu, 0));
                 if (failb == syndrome_disks) {
                         /* We're missing D+P. */
                         return async_raid6_datap_recov(syndrome_disks+2,
@@ -1352,17 +1532,18 @@ static void ops_complete_prexor(void *stripe_head_ref)
  }
  
  static struct dma_async_tx_descriptor *
-ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
-              struct dma_async_tx_descriptor *tx)
+ops_run_prexor5(struct stripe_head *sh, struct raid5_percpu *percpu,
+               struct dma_async_tx_descriptor *tx)
  {
         int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
         int count = 0, pd_idx = sh->pd_idx, i;
         struct async_submit_ctl submit;
  
         /* existing parity data subtracted */
         struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
  
+       BUG_ON(sh->batch_head);
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
@@ -1374,31 +1555,56 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
         }
  
         init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
-                         ops_complete_prexor, sh, to_addr_conv(sh, percpu));
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
         tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
  
         return tx;
  }
  
+static struct dma_async_tx_descriptor *
+ops_run_prexor6(struct stripe_head *sh, struct raid5_percpu *percpu,
+               struct dma_async_tx_descriptor *tx)
+{
+       struct page **blocks = to_addr_page(percpu, 0);
+       int count;
+       struct async_submit_ctl submit;
+
+       pr_debug("%s: stripe %llu\n", __func__,
+               (unsigned long long)sh->sector);
+
+       count = set_syndrome_sources(blocks, sh, SYNDROME_SRC_WANT_DRAIN);
+
+       init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_PQ_XOR_DST, tx,
+                         ops_complete_prexor, sh, to_addr_conv(sh, percpu, 0));
+       tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+
+       return tx;
+}
+
  static struct dma_async_tx_descriptor *
  ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
  {
         int disks = sh->disks;
         int i;
+       struct stripe_head *head_sh = sh;
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
         for (i = disks; i--; ) {
-               struct r5dev *dev = &sh->dev[i];
+               struct r5dev *dev;
                 struct bio *chosen;
  
-               if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) {
+               sh = head_sh;
+               if (test_and_clear_bit(R5_Wantdrain, &head_sh->dev[i].flags)) {
                         struct bio *wbi;
  
+again:
+                       dev = &sh->dev[i];
                         spin_lock_irq(&sh->stripe_lock);
                         chosen = dev->towrite;
                         dev->towrite = NULL;
+                       sh->overwrite_disks = 0;
                         BUG_ON(dev->written);
                         wbi = dev->written = chosen;
                         spin_unlock_irq(&sh->stripe_lock);
@@ -1423,6 +1629,15 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
                                 }
                                 wbi = r5_next_bio(wbi, dev->sector);
                         }
+
+                       if (head_sh->batch_head) {
+                               sh = list_first_entry(&sh->batch_list,
+                                                     struct stripe_head,
+                                                     batch_list);
+                               if (sh == head_sh)
+                                       continue;
+                               goto again;
+                       }
                 }
         }
  
@@ -1478,12 +1693,15 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
                      struct dma_async_tx_descriptor *tx)
  {
         int disks = sh->disks;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs;
         struct async_submit_ctl submit;
-       int count = 0, pd_idx = sh->pd_idx, i;
+       int count, pd_idx = sh->pd_idx, i;
         struct page *xor_dest;
         int prexor = 0;
         unsigned long flags;
+       int j = 0;
+       struct stripe_head *head_sh = sh;
+       int last_stripe;
  
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
@@ -1500,15 +1718,18 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
                 ops_complete_reconstruct(sh);
                 return;
         }
+again:
+       count = 0;
+       xor_srcs = to_addr_page(percpu, j);
         /* check if prexor is active which means only process blocks
          * that are part of a read-modify-write (written)
          */
-       if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+       if (head_sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
                 prexor = 1;
                 xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
-                       if (dev->written)
+                       if (head_sh->dev[i].written)
                                 xor_srcs[count++] = dev->page;
                 }
         } else {
@@ -1525,17 +1746,32 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
          * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
          * for the synchronous xor case
          */
-       flags = ASYNC_TX_ACK |
-               (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
-
-       atomic_inc(&sh->count);
+       last_stripe = !head_sh->batch_head ||
+               list_first_entry(&sh->batch_list,
+                                struct stripe_head, batch_list) == head_sh;
+       if (last_stripe) {
+               flags = ASYNC_TX_ACK |
+                       (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
+
+               atomic_inc(&head_sh->count);
+               init_async_submit(&submit, flags, tx, ops_complete_reconstruct, head_sh,
+                                 to_addr_conv(sh, percpu, j));
+       } else {
+               flags = prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST;
+               init_async_submit(&submit, flags, tx, NULL, NULL,
+                                 to_addr_conv(sh, percpu, j));
+       }
  
-       init_async_submit(&submit, flags, tx, ops_complete_reconstruct, sh,
-                         to_addr_conv(sh, percpu));
         if (unlikely(count == 1))
                 tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
         else
                 tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
+       if (!last_stripe) {
+               j++;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               goto again;
+       }
  }
  
  static void
@@ -1543,8 +1779,12 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
                      struct dma_async_tx_descriptor *tx)
  {
         struct async_submit_ctl submit;
-       struct page **blocks = percpu->scribble;
-       int count, i;
+       struct page **blocks;
+       int count, i, j = 0;
+       struct stripe_head *head_sh = sh;
+       int last_stripe;
+       int synflags;
+       unsigned long txflags;
  
         pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);
  
@@ -1562,13 +1802,36 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
                 return;
         }
  
-       count = set_syndrome_sources(blocks, sh);
+again:
+       blocks = to_addr_page(percpu, j);
  
-       atomic_inc(&sh->count);
+       if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) {
+               synflags = SYNDROME_SRC_WRITTEN;
+               txflags = ASYNC_TX_ACK | ASYNC_TX_PQ_XOR_DST;
+       } else {
+               synflags = SYNDROME_SRC_ALL;
+               txflags = ASYNC_TX_ACK;
+       }
+
+       count = set_syndrome_sources(blocks, sh, synflags);
+       last_stripe = !head_sh->batch_head ||
+               list_first_entry(&sh->batch_list,
+                                struct stripe_head, batch_list) == head_sh;
  
-       init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_reconstruct,
-                         sh, to_addr_conv(sh, percpu));
+       if (last_stripe) {
+               atomic_inc(&head_sh->count);
+               init_async_submit(&submit, txflags, tx, ops_complete_reconstruct,
+                                 head_sh, to_addr_conv(sh, percpu, j));
+       } else
+               init_async_submit(&submit, 0, tx, NULL, NULL,
+                                 to_addr_conv(sh, percpu, j));
         async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE,  &submit);
+       if (!last_stripe) {
+               j++;
+               sh = list_first_entry(&sh->batch_list, struct stripe_head,
+                                     batch_list);
+               goto again;
+       }
  }
  
  static void ops_complete_check(void *stripe_head_ref)
@@ -1589,7 +1852,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
         int pd_idx = sh->pd_idx;
         int qd_idx = sh->qd_idx;
         struct page *xor_dest;
-       struct page **xor_srcs = percpu->scribble;
+       struct page **xor_srcs = to_addr_page(percpu, 0);
         struct dma_async_tx_descriptor *tx;
         struct async_submit_ctl submit;
         int count;
@@ -1598,6 +1861,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
         pr_debug("%s: stripe %llu\n", __func__,
                 (unsigned long long)sh->sector);
  
+       BUG_ON(sh->batch_head);
         count = 0;
         xor_dest = sh->dev[pd_idx].page;
         xor_srcs[count++] = xor_dest;
@@ -1608,7 +1872,7 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
         }
  
         init_async_submit(&submit, 0, NULL, NULL, NULL,
-                         to_addr_conv(sh, percpu));
+                         to_addr_conv(sh, percpu, 0));
         tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
                            &sh->ops.zero_sum_result, &submit);
  
@@ -1619,20 +1883,21 @@ static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu)
  
  static void ops_run_check_pq(struct stripe_head *sh, struct raid5_percpu *percpu, int checkp)
  {
-       struct page **srcs = percpu->scribble;
+       struct page **srcs = to_addr_page(percpu, 0);
         struct async_submit_ctl submit;
         int count;
  
         pr_debug("%s: stripe %llu checkp: %d\n", __func__,
                 (unsigned long long)sh->sector, checkp);
  
-       count = set_syndrome_sources(srcs, sh);
+       BUG_ON(sh->batch_head);
+       count = set_syndrome_sources(srcs, sh, SYNDROME_SRC_ALL);
         if (!checkp)
                 srcs[count] = NULL;
  
         atomic_inc(&sh->count);
         init_async_submit(&submit, ASYNC_TX_ACK, NULL, ops_complete_check,
-                         sh, to_addr_conv(sh, percpu));
+                         sh, to_addr_conv(sh, percpu, 0));
         async_syndrome_val(srcs, 0, count+2, STRIPE_SIZE,
                            &sh->ops.zero_sum_result, percpu->spare_page, &submit);
  }
@@ -1667,8 +1932,12 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                         async_tx_ack(tx);
         }
  
-       if (test_bit(STRIPE_OP_PREXOR, &ops_request))
-               tx = ops_run_prexor(sh, percpu, tx);
+       if (test_bit(STRIPE_OP_PREXOR, &ops_request)) {
+               if (level < 6)
+                       tx = ops_run_prexor5(sh, percpu, tx);
+               else
+                       tx = ops_run_prexor6(sh, percpu, tx);
+       }
  
         if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) {
                 tx = ops_run_biodrain(sh, tx);
@@ -1693,7 +1962,7 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
                         BUG();
         }
  
-       if (overlap_clear)
+       if (overlap_clear && !sh->batch_head)
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
                         if (test_and_clear_bit(R5_Overlap, &dev->flags))
@@ -1702,10 +1971,10 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
         put_cpu();
  }
  
-static int grow_one_stripe(struct r5conf *conf, int hash)
+static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
  {
         struct stripe_head *sh;
-       sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL);
+       sh = kmem_cache_zalloc(conf->slab_cache, gfp);
         if (!sh)
                 return 0;
  
@@ -1713,17 +1982,23 @@ static int grow_one_stripe(struct r5conf *conf, int hash)
  
         spin_lock_init(&sh->stripe_lock);
  
-       if (grow_buffers(sh)) {
+       if (grow_buffers(sh, gfp)) {
                 shrink_buffers(sh);
                 kmem_cache_free(conf->slab_cache, sh);
                 return 0;
         }
-       sh->hash_lock_index = hash;
+       sh->hash_lock_index =
+               conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
         /* we just created an active stripe so... */
         atomic_set(&sh->count, 1);
         atomic_inc(&conf->active_stripes);
         INIT_LIST_HEAD(&sh->lru);
+
+       spin_lock_init(&sh->batch_lock);
+       INIT_LIST_HEAD(&sh->batch_list);
+       sh->batch_head = NULL;
         release_stripe(sh);
+       conf->max_nr_stripes++;
         return 1;
  }
  
@@ -1731,7 +2006,6 @@ static int grow_stripes(struct r5conf *conf, int num)
  {
         struct kmem_cache *sc;
         int devs = max(conf->raid_disks, conf->previous_raid_disks);
-       int hash;
  
         if (conf->mddev->gendisk)
                 sprintf(conf->cache_name[0],
@@ -1749,13 +2023,10 @@ static int grow_stripes(struct r5conf *conf, int num)
                 return 1;
         conf->slab_cache = sc;
         conf->pool_size = devs;
-       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
-       while (num--) {
-               if (!grow_one_stripe(conf, hash))
+       while (num--)
+               if (!grow_one_stripe(conf, GFP_KERNEL))
                         return 1;
-               conf->max_nr_stripes++;
-               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
-       }
+
         return 0;
  }
  
@@ -1772,13 +2043,21 @@ static int grow_stripes(struct r5conf *conf, int num)
   * calculate over all devices (not just the data blocks), using zeros in place
   * of the P and Q blocks.
   */
-static size_t scribble_len(int num)
+static struct flex_array *scribble_alloc(int num, int cnt, gfp_t flags)
  {
+       struct flex_array *ret;
         size_t len;
  
         len = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2);
-
-       return len;
+       ret = flex_array_alloc(len, cnt, flags);
+       if (!ret)
+               return NULL;
+       /* always prealloc all elements, so no locking is required */
+       if (flex_array_prealloc(ret, 0, cnt, flags)) {
+               flex_array_free(ret);
+               return NULL;
+       }
+       return ret;
  }
  
  static int resize_stripes(struct r5conf *conf, int newsize)
@@ -1896,16 +2175,16 @@ static int resize_stripes(struct r5conf *conf, int newsize)
                 err = -ENOMEM;
  
         get_online_cpus();
-       conf->scribble_len = scribble_len(newsize);
         for_each_present_cpu(cpu) {
                 struct raid5_percpu *percpu;
-               void *scribble;
+               struct flex_array *scribble;
  
                 percpu = per_cpu_ptr(conf->percpu, cpu);
-               scribble = kmalloc(conf->scribble_len, GFP_NOIO);
+               scribble = scribble_alloc(newsize, conf->chunk_sectors /
+                       STRIPE_SECTORS, GFP_NOIO);
  
                 if (scribble) {
-                       kfree(percpu->scribble);
+                       flex_array_free(percpu->scribble);
                         percpu->scribble = scribble;
                 } else {
                         err = -ENOMEM;
@@ -1937,9 +2216,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
         return err;
  }
  
-static int drop_one_stripe(struct r5conf *conf, int hash)
+static int drop_one_stripe(struct r5conf *conf)
  {
         struct stripe_head *sh;
+       int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
  
         spin_lock_irq(conf->hash_locks + hash);
         sh = get_free_stripe(conf, hash);
@@ -1950,15 +2230,15 @@ static int drop_one_stripe(struct r5conf *conf, int hash)
         shrink_buffers(sh);
         kmem_cache_free(conf->slab_cache, sh);
         atomic_dec(&conf->active_stripes);
+       conf->max_nr_stripes--;
         return 1;
  }
  
  static void shrink_stripes(struct r5conf *conf)
  {
-       int hash;
-       for (hash = 0; hash < NR_STRIPE_HASH_LOCKS; hash++)
-               while (drop_one_stripe(conf, hash))
-                       ;
+       while (conf->max_nr_stripes &&
+              drop_one_stripe(conf))
+               ;
  
         if (conf->slab_cache)
                 kmem_cache_destroy(conf->slab_cache);
@@ -2154,10 +2434,16 @@ static void raid5_end_write_request(struct bio *bi, int error)
         }
         rdev_dec_pending(rdev, conf->mddev);
  
+       if (sh->batch_head && !uptodate)
+               set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
+
         if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
                 clear_bit(R5_LOCKED, &sh->dev[i].flags);
         set_bit(STRIPE_HANDLE, &sh->state);
         release_stripe(sh);
+
+       if (sh->batch_head && sh != sh->batch_head)
+               release_stripe(sh->batch_head);
  }
  
  static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous);
@@ -2535,7 +2821,7 @@ static void
  schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                          int rcw, int expand)
  {
-       int i, pd_idx = sh->pd_idx, disks = sh->disks;
+       int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx, disks = sh->disks;
         struct r5conf *conf = sh->raid_conf;
         int level = conf->level;
  
@@ -2571,13 +2857,15 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
                         if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state))
                                 atomic_inc(&conf->pending_full_writes);
         } else {
-               BUG_ON(level == 6);
                 BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
                         test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
+               BUG_ON(level == 6 &&
+                       (!(test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags) ||
+                          test_bit(R5_Wantcompute, &sh->dev[qd_idx].flags))));
  
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
-                       if (i == pd_idx)
+                       if (i == pd_idx || i == qd_idx)
                                 continue;
  
                         if (dev->towrite &&
@@ -2624,7 +2912,8 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
   * toread/towrite point to the first in a chain.
   * The bi_next chain must be in order.
   */
-static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, int forwrite)
+static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
+                         int forwrite, int previous)
  {
         struct bio **bip;
         struct r5conf *conf = sh->raid_conf;
@@ -2643,6 +2932,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
          * protect it.
          */
         spin_lock_irq(&sh->stripe_lock);
+       /* Don't allow new IO added to stripes in batch list */
+       if (sh->batch_head)
+               goto overlap;
         if (forwrite) {
                 bip = &sh->dev[dd_idx].towrite;
                 if (*bip == NULL)
@@ -2657,6 +2949,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
         if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
                 goto overlap;
  
+       if (!forwrite || previous)
+               clear_bit(STRIPE_BATCH_READY, &sh->state);
+
         BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
         if (*bip)
                 bi->bi_next = *bip;
@@ -2674,7 +2969,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                                 sector = bio_end_sector(bi);
                 }
                 if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
-                       set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
+                       if (!test_and_set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags))
+                               sh->overwrite_disks++;
         }
  
         pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
@@ -2688,6 +2984,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
                 sh->bm_seq = conf->seq_flush+1;
                 set_bit(STRIPE_BIT_DELAY, &sh->state);
         }
+
+       if (stripe_can_batch(sh))
+               stripe_add_to_batch_list(conf, sh);
         return 1;
  
   overlap:
@@ -2720,6 +3019,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                                 struct bio **return_bi)
  {
         int i;
+       BUG_ON(sh->batch_head);
         for (i = disks; i--; ) {
                 struct bio *bi;
                 int bitmap_end = 0;
@@ -2746,6 +3046,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
                 /* fail all writes first */
                 bi = sh->dev[i].towrite;
                 sh->dev[i].towrite = NULL;
+               sh->overwrite_disks = 0;
                 spin_unlock_irq(&sh->stripe_lock);
                 if (bi)
                         bitmap_end = 1;
@@ -2834,6 +3135,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
         int abort = 0;
         int i;
  
+       BUG_ON(sh->batch_head);
         clear_bit(STRIPE_SYNCING, &sh->state);
         if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
                 wake_up(&conf->wait_for_overlap);
@@ -3064,6 +3366,7 @@ static void handle_stripe_fill(struct stripe_head *sh,
  {
         int i;
  
+       BUG_ON(sh->batch_head);
         /* look for blocks to read/compute, skip this if a compute
          * is already in flight, or if the stripe contents are in the
          * midst of changing due to a write
@@ -3087,6 +3390,9 @@ static void handle_stripe_clean_event(struct r5conf *conf,
         int i;
         struct r5dev *dev;
         int discard_pending = 0;
+       struct stripe_head *head_sh = sh;
+       bool do_endio = false;
+       int wakeup_nr = 0;
  
         for (i = disks; i--; )
                 if (sh->dev[i].written) {
@@ -3102,8 +3408,11 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                         clear_bit(R5_UPTODATE, &dev->flags);
                                 if (test_and_clear_bit(R5_SkipCopy, &dev->flags)) {
                                         WARN_ON(test_bit(R5_UPTODATE, &dev->flags));
-                                       dev->page = dev->orig_page;
                                 }
+                               do_endio = true;
+
+returnbi:
+                               dev->page = dev->orig_page;
                                 wbi = dev->written;
                                 dev->written = NULL;
                                 while (wbi && wbi->bi_iter.bi_sector <
@@ -3120,6 +3429,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                                                 STRIPE_SECTORS,
                                          !test_bit(STRIPE_DEGRADED, &sh->state),
                                                 0);
+                               if (head_sh->batch_head) {
+                                       sh = list_first_entry(&sh->batch_list,
+                                                             struct stripe_head,
+                                                             batch_list);
+                                       if (sh != head_sh) {
+                                               dev = &sh->dev[i];
+                                               goto returnbi;
+                                       }
+                               }
+                               sh = head_sh;
+                               dev = &sh->dev[i];
                         } else if (test_bit(R5_Discard, &dev->flags))
                                 discard_pending = 1;
                         WARN_ON(test_bit(R5_SkipCopy, &dev->flags));
@@ -3141,8 +3461,17 @@ static void handle_stripe_clean_event(struct r5conf *conf,
                  * will be reinitialized
                  */
                 spin_lock_irq(&conf->device_lock);
+unhash:
                 remove_hash(sh);
+               if (head_sh->batch_head) {
+                       sh = list_first_entry(&sh->batch_list,
+                                             struct stripe_head, batch_list);
+                       if (sh != head_sh)
+                                       goto unhash;
+               }
                 spin_unlock_irq(&conf->device_lock);
+               sh = head_sh;
+
                 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
                         set_bit(STRIPE_HANDLE, &sh->state);
  
@@ -3151,6 +3480,45 @@ static void handle_stripe_clean_event(struct r5conf *conf,
         if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
                 if (atomic_dec_and_test(&conf->pending_full_writes))
                         md_wakeup_thread(conf->mddev->thread);
+
+       if (!head_sh->batch_head || !do_endio)
+               return;
+       for (i = 0; i < head_sh->disks; i++) {
+               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+                       wakeup_nr++;
+       }
+       while (!list_empty(&head_sh->batch_list)) {
+               int i;
+               sh = list_first_entry(&head_sh->batch_list,
+                                     struct stripe_head, batch_list);
+               list_del_init(&sh->batch_list);
+
+               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
+                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
+                                                (1 << STRIPE_PREREAD_ACTIVE) |
+                                                STRIPE_EXPAND_SYNC_FLAG));
+               sh->check_state = head_sh->check_state;
+               sh->reconstruct_state = head_sh->reconstruct_state;
+               for (i = 0; i < sh->disks; i++) {
+                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+                               wakeup_nr++;
+                       sh->dev[i].flags = head_sh->dev[i].flags;
+               }
+
+               spin_lock_irq(&sh->stripe_lock);
+               sh->batch_head = NULL;
+               spin_unlock_irq(&sh->stripe_lock);
+               if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
+                       set_bit(STRIPE_HANDLE, &sh->state);
+               release_stripe(sh);
+       }
+
+       spin_lock_irq(&head_sh->stripe_lock);
+       head_sh->batch_head = NULL;
+       spin_unlock_irq(&head_sh->stripe_lock);
+       wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
+       if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
+               set_bit(STRIPE_HANDLE, &head_sh->state);
  }
  
  static void handle_stripe_dirtying(struct r5conf *conf,
@@ -3161,28 +3529,27 @@ static void handle_stripe_dirtying(struct r5conf *conf,
         int rmw = 0, rcw = 0, i;
         sector_t recovery_cp = conf->mddev->recovery_cp;
  
-       /* RAID6 requires 'rcw' in current implementation.
-        * Otherwise, check whether resync is now happening or should start.
+       /* Check whether resync is now happening or should start.
          * If yes, then the array is dirty (after unclean shutdown or
          * initial creation), so parity in some stripes might be inconsistent.
          * In this case, we need to always do reconstruct-write, to ensure
          * that in case of drive failure or read-error correction, we
          * generate correct data from the parity.
          */
-       if (conf->max_degraded == 2 ||
+       if (conf->rmw_level == PARITY_DISABLE_RMW ||
             (recovery_cp < MaxSector && sh->sector >= recovery_cp &&
              s->failed == 0)) {
                 /* Calculate the real rcw later - for now make it
                  * look like rcw is cheaper
                  */
                 rcw = 1; rmw = 2;
-               pr_debug("force RCW max_degraded=%u, recovery_cp=%llu sh->sector=%llu\n",
-                        conf->max_degraded, (unsigned long long)recovery_cp,
+               pr_debug("force RCW rmw_level=%u, recovery_cp=%llu sh->sector=%llu\n",
+                        conf->rmw_level, (unsigned long long)recovery_cp,
                          (unsigned long long)sh->sector);
         } else for (i = disks; i--; ) {
                 /* would I have to read this buffer for read_modify_write */
                 struct r5dev *dev = &sh->dev[i];
-               if ((dev->towrite || i == sh->pd_idx) &&
+               if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
                     !test_bit(R5_LOCKED, &dev->flags) &&
                     !(test_bit(R5_UPTODATE, &dev->flags) ||
                       test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3192,7 +3559,8 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                                 rmw += 2*disks;  /* cannot read it */
                 }
                 /* Would I have to read this buffer for reconstruct_write */
-               if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+               if (!test_bit(R5_OVERWRITE, &dev->flags) &&
+                   i != sh->pd_idx && i != sh->qd_idx &&
                     !test_bit(R5_LOCKED, &dev->flags) &&
                     !(test_bit(R5_UPTODATE, &dev->flags) ||
                     test_bit(R5_Wantcompute, &dev->flags))) {
@@ -3205,7 +3573,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
         pr_debug("for sector %llu, rmw=%d rcw=%d\n",
                 (unsigned long long)sh->sector, rmw, rcw);
         set_bit(STRIPE_HANDLE, &sh->state);
-       if (rmw < rcw && rmw > 0) {
+       if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
                 /* prefer read-modify-write, but need to get some data */
                 if (conf->mddev->queue)
                         blk_add_trace_msg(conf->mddev->queue,
@@ -3213,7 +3581,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                                           (unsigned long long)sh->sector, rmw);
                 for (i = disks; i--; ) {
                         struct r5dev *dev = &sh->dev[i];
-                       if ((dev->towrite || i == sh->pd_idx) &&
+                       if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx) &&
                             !test_bit(R5_LOCKED, &dev->flags) &&
                             !(test_bit(R5_UPTODATE, &dev->flags) ||
                             test_bit(R5_Wantcompute, &dev->flags)) &&
@@ -3232,7 +3600,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
                         }
                 }
         }
-       if (rcw <= rmw && rcw > 0) {
+       if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
                 /* want reconstruct write, but need to get some data */
                 int qread =0;
                 rcw = 0;
@@ -3290,6 +3658,7 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
  {
         struct r5dev *dev = NULL;
  
+       BUG_ON(sh->batch_head);
         set_bit(STRIPE_HANDLE, &sh->state);
  
         switch (sh->check_state) {
@@ -3380,6 +3749,7 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
         int qd_idx = sh->qd_idx;
         struct r5dev *dev;
  
+       BUG_ON(sh->batch_head);
         set_bit(STRIPE_HANDLE, &sh->state);
  
         BUG_ON(s->failed > 2);
@@ -3543,6 +3913,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
          * copy some of them into a target stripe for expand.
          */
         struct dma_async_tx_descriptor *tx = NULL;
+       BUG_ON(sh->batch_head);
         clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
         for (i = 0; i < sh->disks; i++)
                 if (i != sh->pd_idx && i != sh->qd_idx) {
@@ -3615,8 +3986,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
  
         memset(s, 0, sizeof(*s));
  
-       s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
-       s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+       s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state) && !sh->batch_head;
+       s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head;
         s->failed_num[0] = -1;
         s->failed_num[1] = -1;
  
@@ -3786,6 +4157,80 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
         rcu_read_unlock();
  }
  
+static int clear_batch_ready(struct stripe_head *sh)
+{
+       struct stripe_head *tmp;
+       if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
+               return 0;
+       spin_lock(&sh->stripe_lock);
+       if (!sh->batch_head) {
+               spin_unlock(&sh->stripe_lock);
+               return 0;
+       }
+
+       /*
+        * this stripe could be added to a batch list before we check
+        * BATCH_READY, skips it
+        */
+       if (sh->batch_head != sh) {
+               spin_unlock(&sh->stripe_lock);
+               return 1;
+       }
+       spin_lock(&sh->batch_lock);
+       list_for_each_entry(tmp, &sh->batch_list, batch_list)
+               clear_bit(STRIPE_BATCH_READY, &tmp->state);
+       spin_unlock(&sh->batch_lock);
+       spin_unlock(&sh->stripe_lock);
+
+       /*
+        * BATCH_READY is cleared, no new stripes can be added.
+        * batch_list can be accessed without lock
+        */
+       return 0;
+}
+
+static void check_break_stripe_batch_list(struct stripe_head *sh)
+{
+       struct stripe_head *head_sh, *next;
+       int i;
+
+       if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+               return;
+
+       head_sh = sh;
+       do {
+               sh = list_first_entry(&sh->batch_list,
+                                     struct stripe_head, batch_list);
+               BUG_ON(sh == head_sh);
+       } while (!test_bit(STRIPE_DEGRADED, &sh->state));
+
+       while (sh != head_sh) {
+               next = list_first_entry(&sh->batch_list,
+                                       struct stripe_head, batch_list);
+               list_del_init(&sh->batch_list);
+
+               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
+                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
+                                                (1 << STRIPE_PREREAD_ACTIVE) |
+                                                (1 << STRIPE_DEGRADED) |
+                                                STRIPE_EXPAND_SYNC_FLAG));
+               sh->check_state = head_sh->check_state;
+               sh->reconstruct_state = head_sh->reconstruct_state;
+               for (i = 0; i < sh->disks; i++)
+                       sh->dev[i].flags = head_sh->dev[i].flags &
+                               (~((1 << R5_WriteError) | (1 << R5_Overlap)));
+
+               spin_lock_irq(&sh->stripe_lock);
+               sh->batch_head = NULL;
+               spin_unlock_irq(&sh->stripe_lock);
+
+               set_bit(STRIPE_HANDLE, &sh->state);
+               release_stripe(sh);
+
+               sh = next;
+       }
+}
+
  static void handle_stripe(struct stripe_head *sh)
  {
         struct stripe_head_state s;
@@ -3803,7 +4248,14 @@ static void handle_stripe(struct stripe_head *sh)
                 return;
         }
  
-       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
+       if (clear_batch_ready(sh) ) {
+               clear_bit_unlock(STRIPE_ACTIVE, &sh->state);
+               return;
+       }
+
+       check_break_stripe_batch_list(sh);
+
+       if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
                 spin_lock(&sh->stripe_lock);
                 /* Cannot process 'sync' concurrently with 'discard' */
                 if (!test_bit(STRIPE_DISCARD, &sh->state) &&
@@ -4158,7 +4610,7 @@ static int raid5_congested(struct mddev *mddev, int bits)
          * how busy the stripe_cache is
          */
  
-       if (conf->inactive_blocked)
+       if (test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state))
                 return 1;
         if (conf->quiesce)
                 return 1;
@@ -4180,8 +4632,12 @@ static int raid5_mergeable_bvec(struct mddev *mddev,
         unsigned int chunk_sectors = mddev->chunk_sectors;
         unsigned int bio_sectors = bvm->bi_size >> 9;
  
-       if ((bvm->bi_rw & 1) == WRITE)
-               return biovec->bv_len; /* always allow writes to be mergeable */
+       /*
+        * always allow writes to be mergeable, read as well if array
+        * is degraded as we'll go through stripe cache anyway.
+        */
+       if ((bvm->bi_rw & 1) == WRITE || mddev->degraded)
+               return biovec->bv_len;
  
         if (mddev->new_chunk_sectors < mddev->chunk_sectors)
                 chunk_sectors = mddev->new_chunk_sectors;
@@ -4603,12 +5059,14 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
                 }
                 set_bit(STRIPE_DISCARD, &sh->state);
                 finish_wait(&conf->wait_for_overlap, &w);
+               sh->overwrite_disks = 0;
                 for (d = 0; d < conf->raid_disks; d++) {
                         if (d == sh->pd_idx || d == sh->qd_idx)
                                 continue;
                         sh->dev[d].towrite = bi;
                         set_bit(R5_OVERWRITE, &sh->dev[d].flags);
                         raid5_inc_bi_active_stripes(bi);
+                       sh->overwrite_disks++;
                 }
                 spin_unlock_irq(&sh->stripe_lock);
                 if (conf->mddev->bitmap) {
@@ -4656,7 +5114,12 @@ static void make_request(struct mddev *mddev, struct bio * bi)
  
         md_write_start(mddev, bi);
  
-       if (rw == READ &&
+       /*
+        * If array is degraded, better not do chunk aligned read because
+        * later we might have to read it again in order to reconstruct
+        * data on failed drives.
+        */
+       if (rw == READ && mddev->degraded == 0 &&
              mddev->reshape_position == MaxSector &&
              chunk_aligned_read(mddev,bi))
                 return;
@@ -4772,7 +5235,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                         }
  
                         if (test_bit(STRIPE_EXPANDING, &sh->state) ||
-                           !add_stripe_bio(sh, bi, dd_idx, rw)) {
+                           !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
                                 /* Stripe is busy expanding or
                                  * add failed due to overlap.  Flush everything
                                  * and wait a while
@@ -4785,7 +5248,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
                         }
                         set_bit(STRIPE_HANDLE, &sh->state);
                         clear_bit(STRIPE_DELAYED, &sh->state);
-                       if ((bi->bi_rw & REQ_SYNC) &&
+                       if ((!sh->batch_head || sh == sh->batch_head) &&
+                           (bi->bi_rw & REQ_SYNC) &&
                             !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                 atomic_inc(&conf->preread_active_stripes);
                         release_stripe_plug(mddev, sh);
@@ -5050,8 +5514,7 @@ ret:
         return reshape_sectors;
  }
  
-/* FIXME go_faster isn't used */
-static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped, int go_faster)
+static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipped)
  {
         struct r5conf *conf = mddev->private;
         struct stripe_head *sh;
@@ -5186,7 +5649,7 @@ static int  retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
                         return handled;
                 }
  
-               if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
+               if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) {
                         release_stripe(sh);
                         raid5_set_bi_processed_stripes(raid_bio, scnt);
                         conf->retry_read_aligned = raid_bio;
@@ -5312,6 +5775,8 @@ static void raid5d(struct md_thread *thread)
                 int batch_size, released;
  
                 released = release_stripe_list(conf, conf->temp_inactive_list);
+               if (released)
+                       clear_bit(R5_DID_ALLOC, &conf->cache_state);
  
                 if (
                     !list_empty(&conf->bitmap_list)) {
@@ -5350,6 +5815,13 @@ static void raid5d(struct md_thread *thread)
         pr_debug("%d stripes handled\n", handled);
  
         spin_unlock_irq(&conf->device_lock);
+       if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
+               grow_one_stripe(conf, __GFP_NOWARN);
+               /* Set flag even if allocation failed.  This helps
+                * slow down allocation requests when mem is short
+                */
+               set_bit(R5_DID_ALLOC, &conf->cache_state);
+       }
  
         async_tx_issue_pending_all();
         blk_finish_plug(&plug);
@@ -5365,7 +5837,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
         spin_lock(&mddev->lock);
         conf = mddev->private;
         if (conf)
-               ret = sprintf(page, "%d\n", conf->max_nr_stripes);
+               ret = sprintf(page, "%d\n", conf->min_nr_stripes);
         spin_unlock(&mddev->lock);
         return ret;
  }
@@ -5375,30 +5847,24 @@ raid5_set_cache_size(struct mddev *mddev, int size)
  {
         struct r5conf *conf = mddev->private;
         int err;
-       int hash;
  
         if (size <= 16 || size > 32768)
                 return -EINVAL;
-       hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS;
-       while (size < conf->max_nr_stripes) {
-               if (drop_one_stripe(conf, hash))
-                       conf->max_nr_stripes--;
-               else
-                       break;
-               hash--;
-               if (hash < 0)
-                       hash = NR_STRIPE_HASH_LOCKS - 1;
-       }
+
+       conf->min_nr_stripes = size;
+       while (size < conf->max_nr_stripes &&
+              drop_one_stripe(conf))
+               ;
+
+
         err = md_allow_write(mddev);
         if (err)
                 return err;
-       hash = conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS;
-       while (size > conf->max_nr_stripes) {
-               if (grow_one_stripe(conf, hash))
-                       conf->max_nr_stripes++;
-               else break;
-               hash = (hash + 1) % NR_STRIPE_HASH_LOCKS;
-       }
+
+       while (size > conf->max_nr_stripes)
+               if (!grow_one_stripe(conf, GFP_KERNEL))
+                       break;
+
         return 0;
  }
  EXPORT_SYMBOL(raid5_set_cache_size);
@@ -5432,6 +5898,49 @@ raid5_stripecache_size = __ATTR(stripe_cache_size, S_IRUGO | S_IWUSR,
                                 raid5_show_stripe_cache_size,
                                 raid5_store_stripe_cache_size);
  
+static ssize_t
+raid5_show_rmw_level(struct mddev  *mddev, char *page)
+{
+       struct r5conf *conf = mddev->private;
+       if (conf)
+               return sprintf(page, "%d\n", conf->rmw_level);
+       else
+               return 0;
+}
+
+static ssize_t
+raid5_store_rmw_level(struct mddev  *mddev, const char *page, size_t len)
+{
+       struct r5conf *conf = mddev->private;
+       unsigned long new;
+
+       if (!conf)
+               return -ENODEV;
+
+       if (len >= PAGE_SIZE)
+               return -EINVAL;
+
+       if (kstrtoul(page, 10, &new))
+               return -EINVAL;
+
+       if (new != PARITY_DISABLE_RMW && !raid6_call.xor_syndrome)
+               return -EINVAL;
+
+       if (new != PARITY_DISABLE_RMW &&
+           new != PARITY_ENABLE_RMW &&
+           new != PARITY_PREFER_RMW)
+               return -EINVAL;
+
+       conf->rmw_level = new;
+       return len;
+}
+
+static struct md_sysfs_entry
+raid5_rmw_level = __ATTR(rmw_level, S_IRUGO | S_IWUSR,
+                        raid5_show_rmw_level,
+                        raid5_store_rmw_level);
+
+
  static ssize_t
  raid5_show_preread_threshold(struct mddev *mddev, char *page)
  {
@@ -5463,7 +5972,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
         conf = mddev->private;
         if (!conf)
                 err = -ENODEV;
-       else if (new > conf->max_nr_stripes)
+       else if (new > conf->min_nr_stripes)
                 err = -EINVAL;
         else
                 conf->bypass_threshold = new;
@@ -5618,6 +6127,7 @@ static struct attribute *raid5_attrs[] =  {
         &raid5_preread_bypass_threshold.attr,
         &raid5_group_thread_cnt.attr,
         &raid5_skip_copy.attr,
+       &raid5_rmw_level.attr,
         NULL,
  };
  static struct attribute_group raid5_attrs_group = {
@@ -5699,7 +6209,8 @@ raid5_size(struct mddev *mddev, sector_t sectors, int raid_disks)
  static void free_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu)
  {
         safe_put_page(percpu->spare_page);
-       kfree(percpu->scribble);
+       if (percpu->scribble)
+               flex_array_free(percpu->scribble);
         percpu->spare_page = NULL;
         percpu->scribble = NULL;
  }
@@ -5709,7 +6220,9 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
         if (conf->level == 6 && !percpu->spare_page)
                 percpu->spare_page = alloc_page(GFP_KERNEL);
         if (!percpu->scribble)
-               percpu->scribble = kmalloc(conf->scribble_len, GFP_KERNEL);
+               percpu->scribble = scribble_alloc(max(conf->raid_disks,
+                       conf->previous_raid_disks), conf->chunk_sectors /
+                       STRIPE_SECTORS, GFP_KERNEL);
  
         if (!percpu->scribble || (conf->level == 6 && !percpu->spare_page)) {
                 free_scratch_buffer(conf, percpu);
@@ -5740,6 +6253,8 @@ static void raid5_free_percpu(struct r5conf *conf)
  
  static void free_conf(struct r5conf *conf)
  {
+       if (conf->shrinker.seeks)
+               unregister_shrinker(&conf->shrinker);
         free_thread_groups(conf);
         shrink_stripes(conf);
         raid5_free_percpu(conf);
@@ -5807,6 +6322,30 @@ static int raid5_alloc_percpu(struct r5conf *conf)
         return err;
  }
  
+static unsigned long raid5_cache_scan(struct shrinker *shrink,
+                                     struct shrink_control *sc)
+{
+       struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+       int ret = 0;
+       while (ret < sc->nr_to_scan) {
+               if (drop_one_stripe(conf) == 0)
+                       return SHRINK_STOP;
+               ret++;
+       }
+       return ret;
+}
+
+static unsigned long raid5_cache_count(struct shrinker *shrink,
+                                      struct shrink_control *sc)
+{
+       struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
+
+       if (conf->max_nr_stripes < conf->min_nr_stripes)
+               /* unlikely, but not impossible */
+               return 0;
+       return conf->max_nr_stripes - conf->min_nr_stripes;
+}
+
  static struct r5conf *setup_conf(struct mddev *mddev)
  {
         struct r5conf *conf;
@@ -5879,7 +6418,6 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         else
                 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
         max_disks = max(conf->raid_disks, conf->previous_raid_disks);
-       conf->scribble_len = scribble_len(max_disks);
  
         conf->disks = kzalloc(max_disks * sizeof(struct disk_info),
                               GFP_KERNEL);
@@ -5907,6 +6445,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                 INIT_LIST_HEAD(conf->temp_inactive_list + i);
  
         conf->level = mddev->new_level;
+       conf->chunk_sectors = mddev->new_chunk_sectors;
         if (raid5_alloc_percpu(conf) != 0)
                 goto abort;
  
@@ -5939,12 +6478,17 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                         conf->fullsync = 1;
         }
  
-       conf->chunk_sectors = mddev->new_chunk_sectors;
         conf->level = mddev->new_level;
-       if (conf->level == 6)
+       if (conf->level == 6) {
                 conf->max_degraded = 2;
-       else
+               if (raid6_call.xor_syndrome)
+                       conf->rmw_level = PARITY_ENABLE_RMW;
+               else
+                       conf->rmw_level = PARITY_DISABLE_RMW;
+       } else {
                 conf->max_degraded = 1;
+               conf->rmw_level = PARITY_ENABLE_RMW;
+       }
         conf->algorithm = mddev->new_layout;
         conf->reshape_progress = mddev->reshape_position;
         if (conf->reshape_progress != MaxSector) {
@@ -5952,10 +6496,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
                 conf->prev_algo = mddev->layout;
         }
  
-       memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
+       conf->min_nr_stripes = NR_STRIPES;
+       memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
                  max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
         atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
-       if (grow_stripes(conf, NR_STRIPES)) {
+       if (grow_stripes(conf, conf->min_nr_stripes)) {
                 printk(KERN_ERR
                        "md/raid:%s: couldn't allocate %dkB for buffers\n",
                        mdname(mddev), memory);
@@ -5963,6 +6508,17 @@ static struct r5conf *setup_conf(struct mddev *mddev)
         } else
                 printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
                        mdname(mddev), memory);
+       /*
+        * Losing a stripe head costs more than the time to refill it,
+        * it reduces the queue depth and so can hurt throughput.
+        * So set it rather large, scaled by number of devices.
+        */
+       conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
+       conf->shrinker.scan_objects = raid5_cache_scan;
+       conf->shrinker.count_objects = raid5_cache_count;
+       conf->shrinker.batch = 128;
+       conf->shrinker.flags = 0;
+       register_shrinker(&conf->shrinker);
  
         sprintf(pers_name, "raid%d", mddev->new_level);
         conf->thread = md_register_thread(raid5d, mddev, pers_name);
@@ -6604,9 +7160,9 @@ static int check_stripe_cache(struct mddev *mddev)
          */
         struct r5conf *conf = mddev->private;
         if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
-           > conf->max_nr_stripes ||
+           > conf->min_nr_stripes ||
             ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
-           > conf->max_nr_stripes) {
+           > conf->min_nr_stripes) {
                 printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes.  Needed %lu\n",
                        mdname(mddev),
                        ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h

index 983e18a..7dc0dd8 100644 (file)
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -210,11 +210,19 @@ struct stripe_head {
         atomic_t                count;        /* nr of active thread/requests */
         int                     bm_seq; /* sequence number for bitmap flushes */
         int                     disks;          /* disks in stripe */
+       int                     overwrite_disks; /* total overwrite disks in stripe,
+                                                 * this is only checked when stripe
+                                                 * has STRIPE_BATCH_READY
+                                                 */
         enum check_states       check_state;
         enum reconstruct_states reconstruct_state;
         spinlock_t              stripe_lock;
         int                     cpu;
         struct r5worker_group   *group;
+
+       struct stripe_head      *batch_head; /* protected by stripe lock */
+       spinlock_t              batch_lock; /* only header's lock is useful */
+       struct list_head        batch_list; /* protected by head's batch lock*/
         /**
          * struct stripe_operations
          * @target - STRIPE_OP_COMPUTE_BLK target
@@ -327,8 +335,15 @@ enum {
         STRIPE_ON_UNPLUG_LIST,
         STRIPE_DISCARD,
         STRIPE_ON_RELEASE_LIST,
+       STRIPE_BATCH_READY,
+       STRIPE_BATCH_ERR,
  };
  
+#define STRIPE_EXPAND_SYNC_FLAG \
+       ((1 << STRIPE_EXPAND_SOURCE) |\
+       (1 << STRIPE_EXPAND_READY) |\
+       (1 << STRIPE_EXPANDING) |\
+       (1 << STRIPE_SYNC_REQUESTED))
  /*
   * Operation request flags
   */
@@ -340,6 +355,24 @@ enum {
         STRIPE_OP_RECONSTRUCT,
         STRIPE_OP_CHECK,
  };
+
+/*
+ * RAID parity calculation preferences
+ */
+enum {
+       PARITY_DISABLE_RMW = 0,
+       PARITY_ENABLE_RMW,
+       PARITY_PREFER_RMW,
+};
+
+/*
+ * Pages requested from set_syndrome_sources()
+ */
+enum {
+       SYNDROME_SRC_ALL,
+       SYNDROME_SRC_WANT_DRAIN,
+       SYNDROME_SRC_WRITTEN,
+};
  /*
   * Plugging:
   *
@@ -396,10 +429,11 @@ struct r5conf {
         spinlock_t              hash_locks[NR_STRIPE_HASH_LOCKS];
         struct mddev            *mddev;
         int                     chunk_sectors;
-       int                     level, algorithm;
+       int                     level, algorithm, rmw_level;
         int                     max_degraded;
         int                     raid_disks;
         int                     max_nr_stripes;
+       int                     min_nr_stripes;
  
         /* reshape_progress is the leading edge of a 'reshape'
          * It has value MaxSector when no reshape is happening
@@ -458,15 +492,11 @@ struct r5conf {
         /* per cpu variables */
         struct raid5_percpu {
                 struct page     *spare_page; /* Used when checking P/Q in raid6 */
-               void            *scribble;   /* space for constructing buffer
+               struct flex_array *scribble;   /* space for constructing buffer
                                               * lists and performing address
                                               * conversions
                                               */
         } __percpu *percpu;
-       size_t                  scribble_len; /* size of scribble region must be
-                                              * associated with conf to handle
-                                              * cpu hotplug while reshaping
-                                              */
  #ifdef CONFIG_HOTPLUG_CPU
         struct notifier_block   cpu_notify;
  #endif
@@ -480,9 +510,19 @@ struct r5conf {
         struct llist_head       released_stripes;
         wait_queue_head_t       wait_for_stripe;
         wait_queue_head_t       wait_for_overlap;
-       int                     inactive_blocked;       /* release of inactive stripes blocked,
-                                                        * waiting for 25% to be free
-                                                        */
+       unsigned long           cache_state;
+#define R5_INACTIVE_BLOCKED    1       /* release of inactive stripes blocked,
+                                        * waiting for 25% to be free
+                                        */
+#define R5_ALLOC_MORE          2       /* It might help to allocate another
+                                        * stripe.
+                                        */
+#define R5_DID_ALLOC           4       /* A stripe was allocated, don't allocate
+                                        * more until at least one has been
+                                        * released.  This avoids flooding
+                                        * the cache.
+                                        */
+       struct shrinker         shrinker;
         int                     pool_size; /* number of disks in stripeheads in pool */
         spinlock_t              device_lock;
         struct disk_info        *disks;
@@ -497,6 +537,7 @@ struct r5conf {
         int                     worker_cnt_per_group;
  };
  
+
  /*
   * Our supported algorithms
   */
diff --git a/drivers/media/platform/xilinx/xilinx-dma.c b/drivers/media/platform/xilinx/xilinx-dma.c

index 10209c2..efde88a 100644 (file)
--- a/drivers/media/platform/xilinx/xilinx-dma.c
+++ b/drivers/media/platform/xilinx/xilinx-dma.c
@@ -12,7 +12,7 @@
   * published by the Free Software Foundation.
   */
  
-#include <linux/amba/xilinx_dma.h>
+#include <linux/dma/xilinx_dma.h>
  #include <linux/lcm.h>
  #include <linux/list.h>
  #include <linux/module.h>
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c

index 69e0483..644dec7 100644 (file)
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -402,6 +402,12 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
  {
         struct vb2_dc_buf *buf = buf_priv;
         struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_dc_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
  
         if (!buf->sgt_base)
                 buf->sgt_base = vb2_dc_get_base_sgt(buf);
@@ -409,7 +415,7 @@ static struct dma_buf *vb2_dc_get_dmabuf(void *buf_priv, unsigned long flags)
         if (WARN_ON(!buf->sgt_base))
                 return NULL;
  
-       dbuf = dma_buf_export(buf, &vb2_dc_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
         if (IS_ERR(dbuf))
                 return NULL;
  
diff --git a/drivers/media/v4l2-core/videobuf2-dma-sg.c b/drivers/media/v4l2-core/videobuf2-dma-sg.c

index b1838ab..45c708e 100644 (file)
--- a/drivers/media/v4l2-core/videobuf2-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-sg.c
@@ -583,11 +583,17 @@ static struct dma_buf *vb2_dma_sg_get_dmabuf(void *buf_priv, unsigned long flags
  {
         struct vb2_dma_sg_buf *buf = buf_priv;
         struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_dma_sg_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
  
         if (WARN_ON(!buf->dma_sgt))
                 return NULL;
  
-       dbuf = dma_buf_export(buf, &vb2_dma_sg_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
         if (IS_ERR(dbuf))
                 return NULL;
  
diff --git a/drivers/media/v4l2-core/videobuf2-vmalloc.c b/drivers/media/v4l2-core/videobuf2-vmalloc.c

index bcde885..657ab30 100644 (file)
--- a/drivers/media/v4l2-core/videobuf2-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf2-vmalloc.c
@@ -368,11 +368,17 @@ static struct dma_buf *vb2_vmalloc_get_dmabuf(void *buf_priv, unsigned long flag
  {
         struct vb2_vmalloc_buf *buf = buf_priv;
         struct dma_buf *dbuf;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+       exp_info.ops = &vb2_vmalloc_dmabuf_ops;
+       exp_info.size = buf->size;
+       exp_info.flags = flags;
+       exp_info.priv = buf;
  
         if (WARN_ON(!buf->vaddr))
                 return NULL;
  
-       dbuf = dma_buf_export(buf, &vb2_vmalloc_dmabuf_ops, buf->size, flags, NULL);
+       dbuf = dma_buf_export(&exp_info);
         if (IS_ERR(dbuf))
                 return NULL;
  
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c

index fc0c81e..c4aecc6 100644 (file)
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -74,15 +74,11 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
         ret = ec_dev->cmd_xfer(ec_dev, msg);
         if (msg->result == EC_RES_IN_PROGRESS) {
                 int i;
-               struct cros_ec_command status_msg;
-               struct ec_response_get_comms_status status;
+               struct cros_ec_command status_msg = { };
+               struct ec_response_get_comms_status *status;
  
-               status_msg.version = 0;
                 status_msg.command = EC_CMD_GET_COMMS_STATUS;
-               status_msg.outdata = NULL;
-               status_msg.outsize = 0;
-               status_msg.indata = (uint8_t *)&status;
-               status_msg.insize = sizeof(status);
+               status_msg.insize = sizeof(*status);
  
                 /*
                  * Query the EC's status until it's no longer busy or
@@ -98,7 +94,10 @@ int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
                         msg->result = status_msg.result;
                         if (status_msg.result != EC_RES_SUCCESS)
                                 break;
-                       if (!(status.flags & EC_COMMS_STATUS_PROCESSING))
+
+                       status = (struct ec_response_get_comms_status *)
+                                status_msg.indata;
+                       if (!(status->flags & EC_COMMS_STATUS_PROCESSING))
                                 break;
                 }
         }
@@ -119,6 +118,10 @@ static const struct mfd_cell cros_devs[] = {
                 .id = 2,
                 .of_compatible = "google,cros-ec-i2c-tunnel",
         },
+       {
+               .name = "cros-ec-ctl",
+               .id = 3,
+       },
  };
  
  int cros_ec_register(struct cros_ec_device *ec_dev)
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c

index 072f670..2b6ef6b 100644 (file)
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -388,7 +388,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
  {
         struct dma_slave_config cfg = { 0, };
         struct dma_chan *chan;
-       unsigned int slave_id;
+       void *slave_data = NULL;
         struct resource *res;
         dma_cap_mask_t mask;
         int ret;
@@ -397,13 +397,12 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
         dma_cap_set(DMA_SLAVE, mask);
  
         if (pdata)
-               slave_id = direction == DMA_MEM_TO_DEV
-                        ? pdata->slave_id_tx : pdata->slave_id_rx;
-       else
-               slave_id = 0;
+               slave_data = direction == DMA_MEM_TO_DEV ?
+                       (void *)pdata->slave_id_tx :
+                       (void *)pdata->slave_id_rx;
  
         chan = dma_request_slave_channel_compat(mask, shdma_chan_filter,
-                               (void *)(unsigned long)slave_id, &host->pd->dev,
+                               slave_data, &host->pd->dev,
                                 direction == DMA_MEM_TO_DEV ? "tx" : "rx");
  
         dev_dbg(&host->pd->dev, "%s: %s: got channel %p\n", __func__,
@@ -414,8 +413,6 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
  
         res = platform_get_resource(host->pd, IORESOURCE_MEM, 0);
  
-       /* In the OF case the driver will get the slave ID from the DT */
-       cfg.slave_id = slave_id;
         cfg.direction = direction;
  
         if (direction == DMA_DEV_TO_MEM) {
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c

index 6906a90..354f4f3 100644 (file)
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -201,7 +201,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
                 of_match_device(sh_mobile_sdhi_of_match, &pdev->dev);
         struct sh_mobile_sdhi *priv;
         struct tmio_mmc_data *mmc_data;
-       struct sh_mobile_sdhi_info *p = pdev->dev.platform_data;
+       struct tmio_mmc_data *mmd = pdev->dev.platform_data;
         struct tmio_mmc_host *host;
         struct resource *res;
         int irq, ret, i = 0;
@@ -245,30 +245,14 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
         else
                 host->bus_shift = 0;
  
-       mmc_data->capabilities = MMC_CAP_MMC_HIGHSPEED;
-       if (p) {
-               mmc_data->flags = p->tmio_flags;
-               mmc_data->ocr_mask = p->tmio_ocr_mask;
-               mmc_data->capabilities |= p->tmio_caps;
-               mmc_data->capabilities2 |= p->tmio_caps2;
-               mmc_data->cd_gpio = p->cd_gpio;
-
-               if (p->dma_slave_tx > 0 && p->dma_slave_rx > 0) {
-                       /*
-                        * Yes, we have to provide slave IDs twice to TMIO:
-                        * once as a filter parameter and once for channel
-                        * configuration as an explicit slave ID
-                        */
-                       dma_priv->chan_priv_tx = (void *)p->dma_slave_tx;
-                       dma_priv->chan_priv_rx = (void *)p->dma_slave_rx;
-                       dma_priv->slave_id_tx = p->dma_slave_tx;
-                       dma_priv->slave_id_rx = p->dma_slave_rx;
-               }
-       }
+       if (mmd)
+               *mmc_data = *mmd;
+
         dma_priv->filter = shdma_chan_filter;
         dma_priv->enable = sh_mobile_sdhi_enable_dma;
  
         mmc_data->alignment_shift = 1; /* 2-byte alignment */
+       mmc_data->capabilities |= MMC_CAP_MMC_HIGHSPEED;
  
         /*
          * All SDHI blocks support 2-byte and larger block sizes in 4-bit
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h

index fc3805e..4a597f5 100644 (file)
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -43,10 +43,6 @@ struct tmio_mmc_data;
  struct tmio_mmc_host;
  
  struct tmio_mmc_dma {
-       void *chan_priv_tx;
-       void *chan_priv_rx;
-       int slave_id_tx;
-       int slave_id_rx;
         enum dma_slave_buswidth dma_buswidth;
         bool (*filter)(struct dma_chan *chan, void *arg);
         void (*enable)(struct tmio_mmc_host *host, bool enable);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c

index 331bb61..e4b05db 100644 (file)
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -261,7 +261,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
  {
         /* We can only either use DMA for both Tx and Rx or not use it at all */
         if (!host->dma || (!host->pdev->dev.of_node &&
-               (!host->dma->chan_priv_tx || !host->dma->chan_priv_rx)))
+               (!pdata->chan_priv_tx || !pdata->chan_priv_rx)))
                 return;
  
         if (!host->chan_tx && !host->chan_rx) {
@@ -278,7 +278,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                 dma_cap_set(DMA_SLAVE, mask);
  
                 host->chan_tx = dma_request_slave_channel_compat(mask,
-                                       host->dma->filter, host->dma->chan_priv_tx,
+                                       host->dma->filter, pdata->chan_priv_tx,
                                         &host->pdev->dev, "tx");
                 dev_dbg(&host->pdev->dev, "%s: TX: got channel %p\n", __func__,
                         host->chan_tx);
@@ -286,8 +286,6 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                 if (!host->chan_tx)
                         return;
  
-               if (host->dma->chan_priv_tx)
-                       cfg.slave_id = host->dma->slave_id_tx;
                 cfg.direction = DMA_MEM_TO_DEV;
                 cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->bus_shift);
                 cfg.dst_addr_width = host->dma->dma_buswidth;
@@ -299,7 +297,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                         goto ecfgtx;
  
                 host->chan_rx = dma_request_slave_channel_compat(mask,
-                                       host->dma->filter, host->dma->chan_priv_rx,
+                                       host->dma->filter, pdata->chan_priv_rx,
                                         &host->pdev->dev, "rx");
                 dev_dbg(&host->pdev->dev, "%s: RX: got channel %p\n", __func__,
                         host->chan_rx);
@@ -307,8 +305,6 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
                 if (!host->chan_rx)
                         goto ereqrx;
  
-               if (host->dma->chan_priv_rx)
-                       cfg.slave_id = host->dma->slave_id_rx;
                 cfg.direction = DMA_DEV_TO_MEM;
                 cfg.src_addr = cfg.dst_addr + host->pdata->dma_rx_offset;
                 cfg.src_addr_width = host->dma->dma_buswidth;
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig

index 71fea89..a03ad29 100644 (file)
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -309,6 +309,19 @@ config MTD_SWAP
           The driver provides wear leveling by storing erase counter into the
           OOB.
  
+config MTD_PARTITIONED_MASTER
+       bool "Retain master device when partitioned"
+       default n
+       depends on MTD
+       help
+         For historical reasons, by default, either a master is present or
+         several partitions are present, but not both. The concern was that
+         data listed in multiple partitions was dangerous; however, SCSI does
+         this and it is frequently useful for applications. This config option
+         leaves the master in even if the device is partitioned. It also makes
+         the parent of the partition device be the master device, rather than
+         what lies behind the master.
+
  source "drivers/mtd/chips/Kconfig"
  
  source "drivers/mtd/maps/Kconfig"
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c

index 423666b..9a1a6ff 100644 (file)
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -206,23 +206,23 @@ static struct mtd_info *cfi_staa_setup(struct map_info *map)
                         mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].numblocks = ernum;
                 }
                 offset += (ersize * ernum);
-               }
+       }
  
-               if (offset != devsize) {
-                       /* Argh */
-                       printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize);
-                       kfree(mtd->eraseregions);
-                       kfree(cfi->cmdset_priv);
-                       kfree(mtd);
-                       return NULL;
-               }
+       if (offset != devsize) {
+               /* Argh */
+               printk(KERN_WARNING "Sum of regions (%lx) != total size of set of interleaved chips (%lx)\n", offset, devsize);
+               kfree(mtd->eraseregions);
+               kfree(cfi->cmdset_priv);
+               kfree(mtd);
+               return NULL;
+       }
  
-               for (i=0; i<mtd->numeraseregions;i++){
-                       printk(KERN_DEBUG "%d: offset=0x%llx,size=0x%x,blocks=%d\n",
-                              i, (unsigned long long)mtd->eraseregions[i].offset,
-                              mtd->eraseregions[i].erasesize,
-                              mtd->eraseregions[i].numblocks);
-               }
+       for (i=0; i<mtd->numeraseregions;i++){
+               printk(KERN_DEBUG "%d: offset=0x%llx,size=0x%x,blocks=%d\n",
+                      i, (unsigned long long)mtd->eraseregions[i].offset,
+                      mtd->eraseregions[i].erasesize,
+                      mtd->eraseregions[i].numblocks);
+       }
  
         /* Also select the correct geometry setup too */
         mtd->_erase = cfi_staa_erase_varsize;
diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c

index 66f0405..b16f3cd 100644 (file)
--- a/drivers/mtd/devices/block2mtd.c
+++ b/drivers/mtd/devices/block2mtd.c
@@ -9,7 +9,15 @@
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
+/*
+ * When the first attempt at device initialization fails, we may need to
+ * wait a little bit and retry. This timeout, by default 3 seconds, gives
+ * device time to start up. Required on BCM2708 and a few other chipsets.
+ */
+#define MTD_DEFAULT_TIMEOUT    3
+
  #include <linux/module.h>
+#include <linux/delay.h>
  #include <linux/fs.h>
  #include <linux/blkdev.h>
  #include <linux/bio.h>
@@ -209,10 +217,14 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
  }
  
  
-static struct block2mtd_dev *add_device(char *devname, int erase_size)
+static struct block2mtd_dev *add_device(char *devname, int erase_size,
+               int timeout)
  {
+#ifndef MODULE
+       int i;
+#endif
         const fmode_t mode = FMODE_READ | FMODE_WRITE | FMODE_EXCL;
-       struct block_device *bdev;
+       struct block_device *bdev = ERR_PTR(-ENODEV);
         struct block2mtd_dev *dev;
         char *name;
  
@@ -225,15 +237,28 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
  
         /* Get a handle on the device */
         bdev = blkdev_get_by_path(devname, mode, dev);
-#ifndef MODULE
-       if (IS_ERR(bdev)) {
-
-               /* We might not have rootfs mounted at this point. Try
-                  to resolve the device name by other means. */
  
-               dev_t devt = name_to_dev_t(devname);
-               if (devt)
-                       bdev = blkdev_get_by_dev(devt, mode, dev);
+#ifndef MODULE
+       /*
+        * We might not have the root device mounted at this point.
+        * Try to resolve the device name by other means.
+        */
+       for (i = 0; IS_ERR(bdev) && i <= timeout; i++) {
+               dev_t devt;
+
+               if (i)
+                       /*
+                        * Calling wait_for_device_probe in the first loop
+                        * was not enough, sleep for a bit in subsequent
+                        * go-arounds.
+                        */
+                       msleep(1000);
+               wait_for_device_probe();
+
+               devt = name_to_dev_t(devname);
+               if (!devt)
+                       continue;
+               bdev = blkdev_get_by_dev(devt, mode, dev);
         }
  #endif
  
@@ -280,6 +305,7 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size)
                 /* Device didn't get added, so free the entry */
                 goto err_destroy_mutex;
         }
+
         list_add(&dev->list, &blkmtd_device_list);
         pr_info("mtd%d: [%s] erase_size = %dKiB [%d]\n",
                 dev->mtd.index,
@@ -348,16 +374,19 @@ static inline void kill_final_newline(char *str)
  
  #ifndef MODULE
  static int block2mtd_init_called = 0;
-static char block2mtd_paramline[80 + 12]; /* 80 for device, 12 for erase size */
+/* 80 for device, 12 for erase size */
+static char block2mtd_paramline[80 + 12];
  #endif
  
  static int block2mtd_setup2(const char *val)
  {
-       char buf[80 + 12]; /* 80 for device, 12 for erase size */
+       /* 80 for device, 12 for erase size, 80 for name, 8 for timeout */
+       char buf[80 + 12 + 80 + 8];
         char *str = buf;
         char *token[2];
         char *name;
         size_t erase_size = PAGE_SIZE;
+       unsigned long timeout = MTD_DEFAULT_TIMEOUT;
         int i, ret;
  
         if (strnlen(val, sizeof(buf)) >= sizeof(buf)) {
@@ -395,7 +424,7 @@ static int block2mtd_setup2(const char *val)
                 }
         }
  
-       add_device(name, erase_size);
+       add_device(name, erase_size, timeout);
  
         return 0;
  }
@@ -463,8 +492,7 @@ static void block2mtd_exit(void)
         }
  }
  
-
-module_init(block2mtd_init);
+late_initcall(block2mtd_init);
  module_exit(block2mtd_exit);
  
  MODULE_LICENSE("GPL");
diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c

index 448ce42..866d319 100644 (file)
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -1805,7 +1805,7 @@ static int __init doc_dbg_register(struct docg3 *docg3)
         }
  }
  
-static void __exit doc_dbg_unregister(struct docg3 *docg3)
+static void doc_dbg_unregister(struct docg3 *docg3)
  {
         debugfs_remove_recursive(docg3->debugfs_root);
  }
@@ -2033,7 +2033,7 @@ static int __init docg3_probe(struct platform_device *pdev)
         struct mtd_info *mtd;
         struct resource *ress;
         void __iomem *base;
-       int ret, floor, found = 0;
+       int ret, floor;
         struct docg3_cascade *cascade;
  
         ret = -ENXIO;
@@ -2073,14 +2073,11 @@ static int __init docg3_probe(struct platform_device *pdev)
                                                 0);
                 if (ret)
                         goto err_probe;
-               found++;
         }
  
         ret = doc_register_sysfs(pdev, cascade);
         if (ret)
                 goto err_probe;
-       if (!found)
-               goto notfound;
  
         platform_set_drvdata(pdev, cascade);
         doc_dbg_register(cascade->floors[0]->priv);
@@ -2103,7 +2100,7 @@ err_probe:
   *
   * Returns 0
   */
-static int __exit docg3_release(struct platform_device *pdev)
+static int docg3_release(struct platform_device *pdev)
  {
         struct docg3_cascade *cascade = platform_get_drvdata(pdev);
         struct docg3 *docg3 = cascade->floors[0]->priv;
@@ -2134,7 +2131,7 @@ static struct platform_driver g3_driver = {
         },
         .suspend        = docg3_suspend,
         .resume         = docg3_resume,
-       .remove         = __exit_p(docg3_release),
+       .remove         = docg3_release,
  };
  
  module_platform_driver_probe(g3_driver, docg3_probe);
diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c

index 85e3546..7c8b169 100644 (file)
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -223,6 +223,8 @@ static int m25p_probe(struct spi_device *spi)
          */
         if (data && data->type)
                 flash_name = data->type;
+       else if (!strcmp(spi->modalias, "nor-jedec"))
+               flash_name = NULL; /* auto-detect */
         else
                 flash_name = spi->modalias;
  
@@ -247,9 +249,16 @@ static int m25p_remove(struct spi_device *spi)
  }
  
  /*
- * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
- * it with spi-nor, because if this is built as a module then modpost
- * won't be able to read it and add appropriate aliases.
+ * Do NOT add to this array without reading the following:
+ *
+ * Historically, many flash devices are bound to this driver by their name. But
+ * since most of these flash are compatible to some extent, and their
+ * differences can often be differentiated by the JEDEC read-ID command, we
+ * encourage new users to add support to the spi-nor library, and simply bind
+ * against a generic string here (e.g., "nor-jedec").
+ *
+ * Many flash names are kept here in this list (as well as in spi-nor.c) to
+ * keep them available as module aliases for existing platforms.
   */
  static const struct spi_device_id m25p_ids[] = {
         {"at25fs010"},  {"at25fs040"},  {"at25df041a"}, {"at25df321a"},
@@ -291,6 +300,12 @@ static const struct spi_device_id m25p_ids[] = {
         {"w25x64"},     {"w25q64"},     {"w25q80"},     {"w25q80bl"},
         {"w25q128"},    {"w25q256"},    {"cat25c11"},
         {"cat25c03"},   {"cat25c09"},   {"cat25c17"},   {"cat25128"},
+
+       /*
+        * Generic support for SPI NOR that can be identified by the JEDEC READ
+        * ID opcode (0x9F). Use this, if possible.
+        */
+       {"nor-jedec"},
         { },
  };
  MODULE_DEVICE_TABLE(spi, m25p_ids);
diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig

index ba801d2..e715ae9 100644 (file)
--- a/drivers/mtd/maps/Kconfig
+++ b/drivers/mtd/maps/Kconfig
@@ -242,7 +242,7 @@ config MTD_L440GX
  
  config MTD_CFI_FLAGADM
         tristate "CFI Flash device mapping on FlagaDM"
-       depends on 8xx && MTD_CFI
+       depends on PPC_8xx && MTD_CFI
         help
           Mapping for the Flaga digital module. If you don't have one, ignore
           this setting.
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c

index ea69720..892ad6a 100644 (file)
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -274,7 +274,7 @@ static int sa1100_mtd_probe(struct platform_device *pdev)
         return err;
  }
  
-static int __exit sa1100_mtd_remove(struct platform_device *pdev)
+static int sa1100_mtd_remove(struct platform_device *pdev)
  {
         struct sa_info *info = platform_get_drvdata(pdev);
         struct flash_platform_data *plat = dev_get_platdata(&pdev->dev);
@@ -286,7 +286,7 @@ static int __exit sa1100_mtd_remove(struct platform_device *pdev)
  
  static struct platform_driver sa1100_mtd_driver = {
         .probe          = sa1100_mtd_probe,
-       .remove         = __exit_p(sa1100_mtd_remove),
+       .remove         = sa1100_mtd_remove,
         .driver         = {
                 .name   = "sa1100-mtd",
         },
diff --git a/drivers/mtd/maps/ts5500_flash.c b/drivers/mtd/maps/ts5500_flash.c

index d1d671d..9969fed 100644 (file)
--- a/drivers/mtd/maps/ts5500_flash.c
+++ b/drivers/mtd/maps/ts5500_flash.c
@@ -117,5 +117,5 @@ module_exit(cleanup_ts5500_map);
  
  MODULE_LICENSE("GPL");
  MODULE_AUTHOR("Sean Young <sean@mess.org>");
-MODULE_DESCRIPTION("MTD map driver for Techology Systems TS-5500 board");
+MODULE_DESCRIPTION("MTD map driver for Technology Systems TS-5500 board");
  
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c

index d08229e..2b0c528 100644 (file)
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -171,9 +171,6 @@ static void mtd_blktrans_work(struct work_struct *work)
                 background_done = 0;
         }
  
-       if (req)
-               __blk_end_request_all(req, -EIO);
-
         spin_unlock_irq(rq->queue_lock);
  }
  
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c

index 11883bd..d172195 100644 (file)
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -38,6 +38,7 @@
  #include <linux/gfp.h>
  #include <linux/slab.h>
  #include <linux/reboot.h>
+#include <linux/kconfig.h>
  
  #include <linux/mtd/mtd.h>
  #include <linux/mtd/partitions.h>
@@ -501,6 +502,29 @@ out_error:
         return ret;
  }
  
+static int mtd_add_device_partitions(struct mtd_info *mtd,
+                                    struct mtd_partition *real_parts,
+                                    int nbparts)
+{
+       int ret;
+
+       if (nbparts == 0 || IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+               ret = add_mtd_device(mtd);
+               if (ret == 1)
+                       return -ENODEV;
+       }
+
+       if (nbparts > 0) {
+               ret = add_mtd_partitions(mtd, real_parts, nbparts);
+               if (ret && IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+                       del_mtd_device(mtd);
+               return ret;
+       }
+
+       return 0;
+}
+
+
  /**
   * mtd_device_parse_register - parse partitions and register an MTD device.
   *
@@ -523,7 +547,8 @@ out_error:
   *   found this functions tries to fallback to information specified in
   *   @parts/@nr_parts.
   * * If any partitioning info was found, this function registers the found
- *   partitions.
+ *   partitions. If the MTD_PARTITIONED_MASTER option is set, then the device
+ *   as a whole is registered first.
   * * If no partitions were found this function just registers the MTD device
   *   @mtd and exits.
   *
@@ -534,27 +559,21 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
                               const struct mtd_partition *parts,
                               int nr_parts)
  {
-       int err;
-       struct mtd_partition *real_parts;
+       int ret;
+       struct mtd_partition *real_parts = NULL;
  
-       err = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
-       if (err <= 0 && nr_parts && parts) {
+       ret = parse_mtd_partitions(mtd, types, &real_parts, parser_data);
+       if (ret <= 0 && nr_parts && parts) {
                 real_parts = kmemdup(parts, sizeof(*parts) * nr_parts,
                                      GFP_KERNEL);
                 if (!real_parts)
-                       err = -ENOMEM;
+                       ret = -ENOMEM;
                 else
-                       err = nr_parts;
+                       ret = nr_parts;
         }
  
-       if (err > 0) {
-               err = add_mtd_partitions(mtd, real_parts, err);
-               kfree(real_parts);
-       } else if (err == 0) {
-               err = add_mtd_device(mtd);
-               if (err == 1)
-                       err = -ENODEV;
-       }
+       if (ret >= 0)
+               ret = mtd_add_device_partitions(mtd, real_parts, ret);
  
         /*
          * FIXME: some drivers unfortunately call this function more than once.
@@ -569,7 +588,8 @@ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types,
                 register_reboot_notifier(&mtd->reboot_notifier);
         }
  
-       return err;
+       kfree(real_parts);
+       return ret;
  }
  EXPORT_SYMBOL_GPL(mtd_device_parse_register);
  
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c

index e779de3..cafdb88 100644 (file)
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -30,6 +30,7 @@
  #include <linux/mtd/mtd.h>
  #include <linux/mtd/partitions.h>
  #include <linux/err.h>
+#include <linux/kconfig.h>
  
  #include "mtdcore.h"
  
@@ -379,10 +380,17 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
         slave->mtd.name = name;
         slave->mtd.owner = master->owner;
  
-       /* NOTE:  we don't arrange MTDs as a tree; it'd be error-prone
-        * to have the same data be in two different partitions.
+       /* NOTE: Historically, we didn't arrange MTDs as a tree out of
+        * concern for showing the same data in multiple partitions.
+        * However, it is very useful to have the master node present,
+        * so the MTD_PARTITIONED_MASTER option allows that. The master
+        * will have device nodes etc only if this is set, so make the
+        * parent conditional on that option. Note, this is a way to
+        * distinguish between the master and the partition in sysfs.
          */
-       slave->mtd.dev.parent = master->dev.parent;
+       slave->mtd.dev.parent = IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER) ?
+                               &master->dev :
+                               master->dev.parent;
  
         slave->mtd._read = part_read;
         slave->mtd._write = part_write;
@@ -546,12 +554,35 @@ out_register:
         return slave;
  }
  
+static ssize_t mtd_partition_offset_show(struct device *dev,
+               struct device_attribute *attr, char *buf)
+{
+       struct mtd_info *mtd = dev_get_drvdata(dev);
+       struct mtd_part *part = PART(mtd);
+       return snprintf(buf, PAGE_SIZE, "%lld\n", part->offset);
+}
+
+static DEVICE_ATTR(offset, S_IRUGO, mtd_partition_offset_show, NULL);
+
+static const struct attribute *mtd_partition_attrs[] = {
+       &dev_attr_offset.attr,
+       NULL
+};
+
+static int mtd_add_partition_attrs(struct mtd_part *new)
+{
+       int ret = sysfs_create_files(&new->mtd.dev.kobj, mtd_partition_attrs);
+       if (ret)
+               printk(KERN_WARNING
+                      "mtd: failed to create partition attrs, err=%d\n", ret);
+       return ret;
+}
+
  int mtd_add_partition(struct mtd_info *master, const char *name,
                       long long offset, long long length)
  {
         struct mtd_partition part;
-       struct mtd_part *p, *new;
-       uint64_t start, end;
+       struct mtd_part *new;
         int ret = 0;
  
         /* the direct offset is expected */
@@ -575,31 +606,15 @@ int mtd_add_partition(struct mtd_info *master, const char *name,
         if (IS_ERR(new))
                 return PTR_ERR(new);
  
-       start = offset;
-       end = offset + length;
-
         mutex_lock(&mtd_partitions_mutex);
-       list_for_each_entry(p, &mtd_partitions, list)
-               if (p->master == master) {
-                       if ((start >= p->offset) &&
-                           (start < (p->offset + p->mtd.size)))
-                               goto err_inv;
-
-                       if ((end >= p->offset) &&
-                           (end < (p->offset + p->mtd.size)))
-                               goto err_inv;
-               }
-
         list_add(&new->list, &mtd_partitions);
         mutex_unlock(&mtd_partitions_mutex);
  
         add_mtd_device(&new->mtd);
  
+       mtd_add_partition_attrs(new);
+
         return ret;
-err_inv:
-       mutex_unlock(&mtd_partitions_mutex);
-       free_partition(new);
-       return -EINVAL;
  }
  EXPORT_SYMBOL_GPL(mtd_add_partition);
  
@@ -612,6 +627,8 @@ int mtd_del_partition(struct mtd_info *master, int partno)
         list_for_each_entry_safe(slave, next, &mtd_partitions, list)
                 if ((slave->master == master) &&
                     (slave->mtd.index == partno)) {
+                       sysfs_remove_files(&slave->mtd.dev.kobj,
+                                          mtd_partition_attrs);
                         ret = del_mtd_device(&slave->mtd);
                         if (ret < 0)
                                 break;
@@ -631,8 +648,8 @@ EXPORT_SYMBOL_GPL(mtd_del_partition);
   * and registers slave MTD objects which are bound to the master according to
   * the partition definitions.
   *
- * We don't register the master, or expect the caller to have done so,
- * for reasons of data integrity.
+ * For historical reasons, this function's caller only registers the master
+ * if the MTD_PARTITIONED_MASTER config option is set.
   */
  
  int add_mtd_partitions(struct mtd_info *master,
@@ -655,6 +672,7 @@ int add_mtd_partitions(struct mtd_info *master,
                 mutex_unlock(&mtd_partitions_mutex);
  
                 add_mtd_device(&slave->mtd);
+               mtd_add_partition_attrs(slave);
  
                 cur_offset = slave->offset + slave->mtd.size;
         }
diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c

index d93c849..46010bd 100644 (file)
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -485,7 +485,7 @@ static void pmecc_config_ecc_layout(struct nand_ecclayout *layout,
         for (i = 0; i < ecc_len; i++)
                 layout->eccpos[i] = oobsize - ecc_len + i;
  
-       layout->oobfree[0].offset = 2;
+       layout->oobfree[0].offset = PMECC_OOB_RESERVED_BYTES;
         layout->oobfree[0].length =
                 oobsize - ecc_len - layout->oobfree[0].offset;
  }
@@ -1204,14 +1204,14 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
                 goto err;
         }
  
-       regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
-       host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev, regs_rom);
-       if (IS_ERR(host->pmecc_rom_base)) {
-               if (!host->has_no_lookup_table)
-                       /* Don't display the information again */
+       if (!host->has_no_lookup_table) {
+               regs_rom = platform_get_resource(pdev, IORESOURCE_MEM, 3);
+               host->pmecc_rom_base = devm_ioremap_resource(&pdev->dev,
+                                                               regs_rom);
+               if (IS_ERR(host->pmecc_rom_base)) {
                         dev_err(host->dev, "Can not get I/O resource for ROM, will build a lookup table in runtime!\n");
-
-               host->has_no_lookup_table = true;
+                       host->has_no_lookup_table = true;
+               }
         }
  
         if (host->has_no_lookup_table) {
@@ -1254,7 +1254,8 @@ static int atmel_pmecc_nand_init_params(struct platform_device *pdev,
                 nand_chip->ecc.steps = mtd->writesize / sector_size;
                 nand_chip->ecc.total = nand_chip->ecc.bytes *
                         nand_chip->ecc.steps;
-               if (nand_chip->ecc.total > mtd->oobsize - 2) {
+               if (nand_chip->ecc.total >
+                               mtd->oobsize - PMECC_OOB_RESERVED_BYTES) {
                         dev_err(host->dev, "No room for ECC bytes\n");
                         err_no = -EINVAL;
                         goto err;
@@ -1719,7 +1720,7 @@ static int nfc_wait_interrupt(struct atmel_nand_host *host, u32 flag)
                 comp[index++] = &host->nfc->comp_cmd_done;
  
         if (index == 0) {
-               dev_err(host->dev, "Unkown interrupt flag: 0x%08x\n", flag);
+               dev_err(host->dev, "Unknown interrupt flag: 0x%08x\n", flag);
                 return -EINVAL;
         }
  
@@ -1752,11 +1753,10 @@ static int nfc_send_command(struct atmel_nand_host *host,
                 cmd, addr, cycle0);
  
         timeout = jiffies + msecs_to_jiffies(NFC_TIME_OUT_MS);
-       while (nfc_cmd_readl(NFCADDR_CMD_NFCBUSY, host->nfc->base_cmd_regs)
-                       & NFCADDR_CMD_NFCBUSY) {
+       while (nfc_readl(host->nfc->hsmc_regs, SR) & NFC_SR_BUSY) {
                 if (time_after(jiffies, timeout)) {
                         dev_err(host->dev,
-                               "Time out to wait CMD_NFCBUSY ready!\n");
+                               "Time out to wait for NFC ready!\n");
                         return -ETIMEDOUT;
                 }
         }
diff --git a/drivers/mtd/nand/atmel_nand_ecc.h b/drivers/mtd/nand/atmel_nand_ecc.h

index d4035e3..668e735 100644 (file)
--- a/drivers/mtd/nand/atmel_nand_ecc.h
+++ b/drivers/mtd/nand/atmel_nand_ecc.h
@@ -152,4 +152,7 @@
  /* Time out value for reading PMECC status register */
  #define PMECC_MAX_TIMEOUT_MS                   100
  
+/* Reserved bytes in oob area */
+#define PMECC_OOB_RESERVED_BYTES               2
+
  #endif
diff --git a/drivers/mtd/nand/atmel_nand_nfc.h b/drivers/mtd/nand/atmel_nand_nfc.h

index 85b8ca6..4d5d262 100644 (file)
--- a/drivers/mtd/nand/atmel_nand_nfc.h
+++ b/drivers/mtd/nand/atmel_nand_nfc.h
@@ -35,6 +35,7 @@
  #define                NFC_CTRL_DISABLE        (1 << 1)
  
  #define ATMEL_HSMC_NFC_SR      0x08            /* NFC Status Register */
+#define                NFC_SR_BUSY             (1 << 8)
  #define                NFC_SR_XFR_DONE         (1 << 16)
  #define                NFC_SR_CMD_DONE         (1 << 17)
  #define                NFC_SR_DTOE             (1 << 20)
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c

index f44c606..870c7fc 100644 (file)
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -225,7 +225,6 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali,
         uint16_t Twhr[6] = {120, 80, 80, 60, 60, 60};
         uint16_t Tcs[6] = {70, 35, 25, 25, 20, 15};
  
-       uint16_t TclsRising = 1;
         uint16_t data_invalid_rhoh, data_invalid_rloh, data_invalid;
         uint16_t dv_window = 0;
         uint16_t en_lo, en_hi;
@@ -276,8 +275,6 @@ static void nand_onfi_timing_set(struct denali_nand_info *denali,
         re_2_re = CEIL_DIV(Trhz[mode], CLK_X);
         we_2_re = CEIL_DIV(Twhr[mode], CLK_X);
         cs_cnt = CEIL_DIV((Tcs[mode] - Trp[mode]), CLK_X);
-       if (!TclsRising)
-               cs_cnt = CEIL_DIV(Tcs[mode], CLK_X);
         if (cs_cnt == 0)
                 cs_cnt = 1;
  
@@ -1536,6 +1533,9 @@ int denali_init(struct denali_nand_info *denali)
         denali->nand.options |= NAND_SKIP_BBTSCAN;
         denali->nand.ecc.mode = NAND_ECC_HW_SYNDROME;
  
+       /* no subpage writes on denali */
+       denali->nand.options |= NAND_NO_SUBPAGE_WRITE;
+
         /*
          * Denali Controller only support 15bit and 8bit ECC in MRST,
          * so just let controller do 15bit ECC for MLC and 8bit ECC for
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c

index 4c05f4f..51394e5 100644 (file)
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -317,7 +317,7 @@ static void fsl_ifc_run_command(struct mtd_info *mtd)
  
         /* wait for command complete flag or timeout */
         wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
-                          IFC_TIMEOUT_MSECS * HZ/1000);
+                          msecs_to_jiffies(IFC_TIMEOUT_MSECS));
  
         /* ctrl->nand_stat will be updated from IRQ context */
         if (!ctrl->nand_stat)
@@ -860,7 +860,7 @@ static void fsl_ifc_sram_init(struct fsl_ifc_mtd *priv)
  
         /* wait for command complete flag or timeout */
         wait_event_timeout(ctrl->nand_wait, ctrl->nand_stat,
-                          IFC_TIMEOUT_MSECS * HZ/1000);
+                          msecs_to_jiffies(IFC_TIMEOUT_MSECS));
  
         if (ctrl->nand_stat != IFC_NAND_EVTER_STAT_OPC)
                 printk(KERN_ERR "fsl-ifc: Failed to Initialise SRAM\n");
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c

index edfaa21..e58af4b 100644 (file)
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -873,6 +873,7 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
  {
         struct fsmc_nand_platform_data *pdata = dev_get_platdata(&pdev->dev);
         u32 val;
+       int ret;
  
         /* Set default NAND width to 8 bits */
         pdata->width = 8;
@@ -891,8 +892,12 @@ static int fsmc_nand_probe_config_dt(struct platform_device *pdev,
                                 sizeof(*pdata->nand_timings), GFP_KERNEL);
         if (!pdata->nand_timings)
                 return -ENOMEM;
-       of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
+       ret = of_property_read_u8_array(np, "timings", (u8 *)pdata->nand_timings,
                                                 sizeof(*pdata->nand_timings));
+       if (ret) {
+               dev_info(&pdev->dev, "No timings in dts specified, using default timings!\n");
+               pdata->nand_timings = NULL;
+       }
  
         /* Set default NAND bank to 0 */
         pdata->bank = 0;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c

index 33f3c3c..1b8f350 100644 (file)
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -446,7 +446,7 @@ int start_dma_without_bch_irq(struct gpmi_nand_data *this,
                                 struct dma_async_tx_descriptor *desc)
  {
         struct completion *dma_c = &this->dma_done;
-       int err;
+       unsigned long timeout;
  
         init_completion(dma_c);
  
@@ -456,8 +456,8 @@ int start_dma_without_bch_irq(struct gpmi_nand_data *this,
         dma_async_issue_pending(get_dma_chan(this));
  
         /* Wait for the interrupt from the DMA block. */
-       err = wait_for_completion_timeout(dma_c, msecs_to_jiffies(1000));
-       if (!err) {
+       timeout = wait_for_completion_timeout(dma_c, msecs_to_jiffies(1000));
+       if (!timeout) {
                 dev_err(this->dev, "DMA timeout, last DMA :%d\n",
                         this->last_dma_type);
                 gpmi_dump_info(this);
@@ -477,7 +477,7 @@ int start_dma_with_bch_irq(struct gpmi_nand_data *this,
                         struct dma_async_tx_descriptor *desc)
  {
         struct completion *bch_c = &this->bch_done;
-       int err;
+       unsigned long timeout;
  
         /* Prepare to receive an interrupt from the BCH block. */
         init_completion(bch_c);
@@ -486,8 +486,8 @@ int start_dma_with_bch_irq(struct gpmi_nand_data *this,
         start_dma_without_bch_irq(this, desc);
  
         /* Wait for the interrupt from the BCH block. */
-       err = wait_for_completion_timeout(bch_c, msecs_to_jiffies(1000));
-       if (!err) {
+       timeout = wait_for_completion_timeout(bch_c, msecs_to_jiffies(1000));
+       if (!timeout) {
                 dev_err(this->dev, "BCH timeout, last DMA :%d\n",
                         this->last_dma_type);
                 gpmi_dump_info(this);
@@ -1950,7 +1950,9 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
         ret = nand_boot_init(this);
         if (ret)
                 goto err_out;
-       chip->scan_bbt(mtd);
+       ret = chip->scan_bbt(mtd);
+       if (ret)
+               goto err_out;
  
         ppdata.of_node = this->pdev->dev.of_node;
         ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c

index a8f550f..372e0e3 100644 (file)
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -386,26 +386,51 @@ static irqreturn_t mxc_nfc_irq(int irq, void *dev_id)
  /* This function polls the NANDFC to wait for the basic operation to
   * complete by checking the INT bit of config2 register.
   */
-static void wait_op_done(struct mxc_nand_host *host, int useirq)
+static int wait_op_done(struct mxc_nand_host *host, int useirq)
  {
-       int max_retries = 8000;
+       int ret = 0;
+
+       /*
+        * If operation is already complete, don't bother to setup an irq or a
+        * loop.
+        */
+       if (host->devtype_data->check_int(host))
+               return 0;
  
         if (useirq) {
-               if (!host->devtype_data->check_int(host)) {
-                       reinit_completion(&host->op_completion);
-                       irq_control(host, 1);
-                       wait_for_completion(&host->op_completion);
+               unsigned long timeout;
+
+               reinit_completion(&host->op_completion);
+
+               irq_control(host, 1);
+
+               timeout = wait_for_completion_timeout(&host->op_completion, HZ);
+               if (!timeout && !host->devtype_data->check_int(host)) {
+                       dev_dbg(host->dev, "timeout waiting for irq\n");
+                       ret = -ETIMEDOUT;
                 }
         } else {
-               while (max_retries-- > 0) {
-                       if (host->devtype_data->check_int(host))
-                               break;
+               int max_retries = 8000;
+               int done;
  
+               do {
                         udelay(1);
+
+                       done = host->devtype_data->check_int(host);
+                       if (done)
+                               break;
+
+               } while (--max_retries);
+
+               if (!done) {
+                       dev_dbg(host->dev, "timeout polling for completion\n");
+                       ret = -ETIMEDOUT;
                 }
-               if (max_retries < 0)
-                       pr_debug("%s: INT not set\n", __func__);
         }
+
+       WARN_ONCE(ret < 0, "timeout! useirq=%d\n", useirq);
+
+       return ret;
  }
  
  static void send_cmd_v3(struct mxc_nand_host *host, uint16_t cmd, int useirq)
@@ -527,30 +552,17 @@ static void send_page_v1(struct mtd_info *mtd, unsigned int ops)
  
  static void send_read_id_v3(struct mxc_nand_host *host)
  {
-       struct nand_chip *this = &host->nand;
-
         /* Read ID into main buffer */
         writel(NFC_ID, NFC_V3_LAUNCH);
  
         wait_op_done(host, true);
  
         memcpy32_fromio(host->data_buf, host->main_area0, 16);
-
-       if (this->options & NAND_BUSWIDTH_16) {
-               /* compress the ID info */
-               host->data_buf[1] = host->data_buf[2];
-               host->data_buf[2] = host->data_buf[4];
-               host->data_buf[3] = host->data_buf[6];
-               host->data_buf[4] = host->data_buf[8];
-               host->data_buf[5] = host->data_buf[10];
-       }
  }
  
  /* Request the NANDFC to perform a read of the NAND device ID. */
  static void send_read_id_v1_v2(struct mxc_nand_host *host)
  {
-       struct nand_chip *this = &host->nand;
-
         /* NANDFC buffer 0 is used for device ID output */
         writew(host->active_cs << 4, NFC_V1_V2_BUF_ADDR);
  
@@ -560,15 +572,6 @@ static void send_read_id_v1_v2(struct mxc_nand_host *host)
         wait_op_done(host, true);
  
         memcpy32_fromio(host->data_buf, host->main_area0, 16);
-
-       if (this->options & NAND_BUSWIDTH_16) {
-               /* compress the ID info */
-               host->data_buf[1] = host->data_buf[2];
-               host->data_buf[2] = host->data_buf[4];
-               host->data_buf[3] = host->data_buf[6];
-               host->data_buf[4] = host->data_buf[8];
-               host->data_buf[5] = host->data_buf[10];
-       }
  }
  
  static uint16_t get_dev_status_v3(struct mxc_nand_host *host)
@@ -694,9 +697,17 @@ static u_char mxc_nand_read_byte(struct mtd_info *mtd)
         if (host->status_request)
                 return host->devtype_data->get_dev_status(host) & 0xFF;
  
-       ret = *(uint8_t *)(host->data_buf + host->buf_start);
-       host->buf_start++;
+       if (nand_chip->options & NAND_BUSWIDTH_16) {
+               /* only take the lower byte of each word */
+               ret = *(uint16_t *)(host->data_buf + host->buf_start);
+
+               host->buf_start += 2;
+       } else {
+               ret = *(uint8_t *)(host->data_buf + host->buf_start);
+               host->buf_start++;
+       }
  
+       pr_debug("%s: ret=0x%hhx (start=%u)\n", __func__, ret, host->buf_start);
         return ret;
  }
  
@@ -825,6 +836,12 @@ static void copy_spare(struct mtd_info *mtd, bool bfrom)
         }
  }
  
+/*
+ * MXC NANDFC can only perform full page+spare or spare-only read/write.  When
+ * the upper layers perform a read/write buf operation, the saved column address
+ * is used to index into the full page. So usually this function is called with
+ * column == 0 (unless no column cycle is needed indicated by column == -1)
+ */
  static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
  {
         struct nand_chip *nand_chip = mtd->priv;
@@ -832,16 +849,13 @@ static void mxc_do_addr_cycle(struct mtd_info *mtd, int column, int page_addr)
  
         /* Write out column address, if necessary */
         if (column != -1) {
-               /*
-                * MXC NANDFC can only perform full page+spare or
-                * spare-only read/write.  When the upper layers
-                * perform a read/write buf operation, the saved column
-                 * address is used to index into the full page.
-                */
-               host->devtype_data->send_addr(host, 0, page_addr == -1);
+               host->devtype_data->send_addr(host, column & 0xff,
+                                             page_addr == -1);
                 if (mtd->writesize > 512)
                         /* another col addr cycle for 2k page */
-                       host->devtype_data->send_addr(host, 0, false);
+                       host->devtype_data->send_addr(host,
+                                                     (column >> 8) & 0xff,
+                                                     false);
         }
  
         /* Write out page address, if necessary */
@@ -903,7 +917,7 @@ static void preset_v1(struct mtd_info *mtd)
         struct mxc_nand_host *host = nand_chip->priv;
         uint16_t config1 = 0;
  
-       if (nand_chip->ecc.mode == NAND_ECC_HW)
+       if (nand_chip->ecc.mode == NAND_ECC_HW && mtd->writesize)
                 config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
  
         if (!host->devtype_data->irqpending_quirk)
@@ -931,9 +945,6 @@ static void preset_v2(struct mtd_info *mtd)
         struct mxc_nand_host *host = nand_chip->priv;
         uint16_t config1 = 0;
  
-       if (nand_chip->ecc.mode == NAND_ECC_HW)
-               config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
-
         config1 |= NFC_V2_CONFIG1_FP_INT;
  
         if (!host->devtype_data->irqpending_quirk)
@@ -942,6 +953,9 @@ static void preset_v2(struct mtd_info *mtd)
         if (mtd->writesize) {
                 uint16_t pages_per_block = mtd->erasesize / mtd->writesize;
  
+               if (nand_chip->ecc.mode == NAND_ECC_HW)
+                       config1 |= NFC_V1_V2_CONFIG1_ECC_EN;
+
                 host->eccsize = get_eccsize(mtd);
                 if (host->eccsize == 4)
                         config1 |= NFC_V2_CONFIG1_ECC_MODE_4;
@@ -999,9 +1013,6 @@ static void preset_v3(struct mtd_info *mtd)
                 NFC_V3_CONFIG2_INT_MSK |
                 NFC_V3_CONFIG2_NUM_ADDR_PHASE0;
  
-       if (chip->ecc.mode == NAND_ECC_HW)
-               config2 |= NFC_V3_CONFIG2_ECC_EN;
-
         addr_phases = fls(chip->pagemask) >> 3;
  
         if (mtd->writesize == 2048) {
@@ -1016,6 +1027,9 @@ static void preset_v3(struct mtd_info *mtd)
         }
  
         if (mtd->writesize) {
+               if (chip->ecc.mode == NAND_ECC_HW)
+                       config2 |= NFC_V3_CONFIG2_ECC_EN;
+
                 config2 |= NFC_V3_CONFIG2_PPB(
                                 ffs(mtd->erasesize / mtd->writesize) - 6,
                                 host->devtype_data->ppb_shift);
@@ -1066,6 +1080,9 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                 host->status_request = true;
  
                 host->devtype_data->send_cmd(host, command, true);
+               WARN_ONCE(column != -1 || page_addr != -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
                 mxc_do_addr_cycle(mtd, column, page_addr);
                 break;
  
@@ -1079,7 +1096,10 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                 command = NAND_CMD_READ0; /* only READ0 is valid */
  
                 host->devtype_data->send_cmd(host, command, false);
-               mxc_do_addr_cycle(mtd, column, page_addr);
+               WARN_ONCE(column < 0,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
+               mxc_do_addr_cycle(mtd, 0, page_addr);
  
                 if (mtd->writesize > 512)
                         host->devtype_data->send_cmd(host,
@@ -1100,7 +1120,10 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                 host->buf_start = column;
  
                 host->devtype_data->send_cmd(host, command, false);
-               mxc_do_addr_cycle(mtd, column, page_addr);
+               WARN_ONCE(column < -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
+               mxc_do_addr_cycle(mtd, 0, page_addr);
                 break;
  
         case NAND_CMD_PAGEPROG:
@@ -1108,6 +1131,9 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                 copy_spare(mtd, false);
                 host->devtype_data->send_page(mtd, NFC_INPUT);
                 host->devtype_data->send_cmd(host, command, true);
+               WARN_ONCE(column != -1 || page_addr != -1,
+                         "Unexpected column/row value (cmd=%u, col=%d, row=%d)\n",
+                         command, column, page_addr);
                 mxc_do_addr_cycle(mtd, column, page_addr);
                 break;
  
@@ -1115,15 +1141,29 @@ static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
                 host->devtype_data->send_cmd(host, command, true);
                 mxc_do_addr_cycle(mtd, column, page_addr);
                 host->devtype_data->send_read_id(host);
-               host->buf_start = column;
+               host->buf_start = 0;
                 break;
  
         case NAND_CMD_ERASE1:
         case NAND_CMD_ERASE2:
                 host->devtype_data->send_cmd(host, command, false);
+               WARN_ONCE(column != -1,
+                         "Unexpected column value (cmd=%u, col=%d)\n",
+                         command, column);
                 mxc_do_addr_cycle(mtd, column, page_addr);
  
                 break;
+       case NAND_CMD_PARAM:
+               host->devtype_data->send_cmd(host, command, false);
+               mxc_do_addr_cycle(mtd, column, page_addr);
+               host->devtype_data->send_page(mtd, NFC_OUTPUT);
+               memcpy32_fromio(host->data_buf, host->main_area0, 512);
+               host->buf_start = 0;
+               break;
+       default:
+               WARN_ONCE(1, "Unimplemented command (cmd=%u)\n",
+                         command);
+               break;
         }
  }
  
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c

index df7eb4f..c2e1232 100644 (file)
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -386,7 +386,7 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
         uint8_t buf[2] = { 0, 0 };
         int ret = 0, res, i = 0;
  
-       ops.datbuf = NULL;
+       memset(&ops, 0, sizeof(ops));
         ops.oobbuf = buf;
         ops.ooboffs = chip->badblockpos;
         if (chip->options & NAND_BUSWIDTH_16) {
@@ -565,6 +565,25 @@ void nand_wait_ready(struct mtd_info *mtd)
  }
  EXPORT_SYMBOL_GPL(nand_wait_ready);
  
+/**
+ * nand_wait_status_ready - [GENERIC] Wait for the ready status after commands.
+ * @mtd: MTD device structure
+ * @timeo: Timeout in ms
+ *
+ * Wait for status ready (i.e. command done) or timeout.
+ */
+static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo)
+{
+       register struct nand_chip *chip = mtd->priv;
+
+       timeo = jiffies + msecs_to_jiffies(timeo);
+       do {
+               if ((chip->read_byte(mtd) & NAND_STATUS_READY))
+                       break;
+               touch_softlockup_watchdog();
+       } while (time_before(jiffies, timeo));
+};
+
  /**
   * nand_command - [DEFAULT] Send command to NAND device
   * @mtd: MTD device structure
@@ -643,8 +662,8 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
                                NAND_CTRL_CLE | NAND_CTRL_CHANGE);
                 chip->cmd_ctrl(mtd,
                                NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
-               while (!(chip->read_byte(mtd) & NAND_STATUS_READY))
-                               ;
+               /* EZ-NAND can take upto 250ms as per ONFi v4.0 */
+               nand_wait_status_ready(mtd, 250);
                 return;
  
                 /* This applies to read commands */
@@ -740,8 +759,8 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
                                NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
                 chip->cmd_ctrl(mtd, NAND_CMD_NONE,
                                NAND_NCE | NAND_CTRL_CHANGE);
-               while (!(chip->read_byte(mtd) & NAND_STATUS_READY))
-                               ;
+               /* EZ-NAND can take upto 250ms as per ONFi v4.0 */
+               nand_wait_status_ready(mtd, 250);
                 return;
  
         case NAND_CMD_RNDOUT:
@@ -968,7 +987,7 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                         __func__, (unsigned long long)ofs, len);
  
         if (check_offs_len(mtd, ofs, len))
-               ret = -EINVAL;
+               return -EINVAL;
  
         /* Align to last block address if size addresses end of the device */
         if (ofs + len == mtd->size)
@@ -1031,7 +1050,7 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                         __func__, (unsigned long long)ofs, len);
  
         if (check_offs_len(mtd, ofs, len))
-               ret = -EINVAL;
+               return -EINVAL;
  
         nand_get_device(mtd, FL_LOCKING);
  
@@ -1716,9 +1735,9 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
         int ret;
  
         nand_get_device(mtd, FL_READING);
+       memset(&ops, 0, sizeof(ops));
         ops.len = len;
         ops.datbuf = buf;
-       ops.oobbuf = NULL;
         ops.mode = MTD_OPS_PLACE_OOB;
         ret = nand_do_read_ops(mtd, from, &ops);
         *retlen = ops.retlen;
@@ -2124,7 +2143,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  
  
  /**
- * nand_write_subpage_hwecc - [REPLACABLE] hardware ECC based subpage write
+ * nand_write_subpage_hwecc - [REPLACEABLE] hardware ECC based subpage write
   * @mtd:       mtd info structure
   * @chip:      nand chip info structure
   * @offset:    column address of subpage within the page
@@ -2508,9 +2527,9 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
         /* Grab the device */
         panic_nand_get_device(chip, mtd, FL_WRITING);
  
+       memset(&ops, 0, sizeof(ops));
         ops.len = len;
         ops.datbuf = (uint8_t *)buf;
-       ops.oobbuf = NULL;
         ops.mode = MTD_OPS_PLACE_OOB;
  
         ret = nand_do_write_ops(mtd, to, &ops);
@@ -2536,9 +2555,9 @@ static int nand_write(struct mtd_info *mtd, loff_t to, size_t len,
         int ret;
  
         nand_get_device(mtd, FL_WRITING);
+       memset(&ops, 0, sizeof(ops));
         ops.len = len;
         ops.datbuf = (uint8_t *)buf;
-       ops.oobbuf = NULL;
         ops.mode = MTD_OPS_PLACE_OOB;
         ret = nand_do_write_ops(mtd, to, &ops);
         *retlen = ops.retlen;
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c

index 10b1f7a..a4615fc 100644 (file)
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -38,8 +38,8 @@
  
  #include <linux/platform_data/mtd-nand-pxa3xx.h>
  
-#define        CHIP_DELAY_TIMEOUT      (2 * HZ/10)
-#define NAND_STOP_DELAY                (2 * HZ/50)
+#define        CHIP_DELAY_TIMEOUT      msecs_to_jiffies(200)
+#define NAND_STOP_DELAY                msecs_to_jiffies(40)
  #define PAGE_CHUNK_SIZE                (2048)
  
  /*
@@ -605,11 +605,24 @@ static void start_data_dma(struct pxa3xx_nand_info *info)
  {}
  #endif
  
+static irqreturn_t pxa3xx_nand_irq_thread(int irq, void *data)
+{
+       struct pxa3xx_nand_info *info = data;
+
+       handle_data_pio(info);
+
+       info->state = STATE_CMD_DONE;
+       nand_writel(info, NDSR, NDSR_WRDREQ | NDSR_RDDREQ);
+
+       return IRQ_HANDLED;
+}
+
  static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
  {
         struct pxa3xx_nand_info *info = devid;
         unsigned int status, is_completed = 0, is_ready = 0;
         unsigned int ready, cmd_done;
+       irqreturn_t ret = IRQ_HANDLED;
  
         if (info->cs == 0) {
                 ready           = NDSR_FLASH_RDY;
@@ -651,7 +664,8 @@ static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
                 } else {
                         info->state = (status & NDSR_RDDREQ) ?
                                       STATE_PIO_READING : STATE_PIO_WRITING;
-                       handle_data_pio(info);
+                       ret = IRQ_WAKE_THREAD;
+                       goto NORMAL_IRQ_EXIT;
                 }
         }
         if (status & cmd_done) {
@@ -692,7 +706,7 @@ static irqreturn_t pxa3xx_nand_irq(int irq, void *devid)
         if (is_ready)
                 complete(&info->dev_ready);
  NORMAL_IRQ_EXIT:
-       return IRQ_HANDLED;
+       return ret;
  }
  
  static inline int is_buf_blank(uint8_t *buf, size_t len)
@@ -951,7 +965,7 @@ static void nand_cmdfunc(struct mtd_info *mtd, unsigned command,
  {
         struct pxa3xx_nand_host *host = mtd->priv;
         struct pxa3xx_nand_info *info = host->info_data;
-       int ret, exec_cmd;
+       int exec_cmd;
  
         /*
          * if this is a x16 device ,then convert the input
@@ -983,9 +997,8 @@ static void nand_cmdfunc(struct mtd_info *mtd, unsigned command,
                 info->need_wait = 1;
                 pxa3xx_nand_start(info);
  
-               ret = wait_for_completion_timeout(&info->cmd_complete,
-                               CHIP_DELAY_TIMEOUT);
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->cmd_complete,
+                   CHIP_DELAY_TIMEOUT)) {
                         dev_err(&info->pdev->dev, "Wait time out!!!\n");
                         /* Stop State Machine for next command cycle */
                         pxa3xx_nand_stop(info);
@@ -1000,7 +1013,7 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
  {
         struct pxa3xx_nand_host *host = mtd->priv;
         struct pxa3xx_nand_info *info = host->info_data;
-       int ret, exec_cmd, ext_cmd_type;
+       int exec_cmd, ext_cmd_type;
  
         /*
          * if this is a x16 device then convert the input
@@ -1063,9 +1076,8 @@ static void nand_cmdfunc_extended(struct mtd_info *mtd,
                 init_completion(&info->cmd_complete);
                 pxa3xx_nand_start(info);
  
-               ret = wait_for_completion_timeout(&info->cmd_complete,
-                               CHIP_DELAY_TIMEOUT);
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->cmd_complete,
+                   CHIP_DELAY_TIMEOUT)) {
                         dev_err(&info->pdev->dev, "Wait time out!!!\n");
                         /* Stop State Machine for next command cycle */
                         pxa3xx_nand_stop(info);
@@ -1198,13 +1210,11 @@ static int pxa3xx_nand_waitfunc(struct mtd_info *mtd, struct nand_chip *this)
  {
         struct pxa3xx_nand_host *host = mtd->priv;
         struct pxa3xx_nand_info *info = host->info_data;
-       int ret;
  
         if (info->need_wait) {
-               ret = wait_for_completion_timeout(&info->dev_ready,
-                               CHIP_DELAY_TIMEOUT);
                 info->need_wait = 0;
-               if (!ret) {
+               if (!wait_for_completion_timeout(&info->dev_ready,
+                   CHIP_DELAY_TIMEOUT)) {
                         dev_err(&info->pdev->dev, "Ready time out!!!\n");
                         return NAND_STATUS_FAIL;
                 }
@@ -1508,6 +1518,8 @@ static int pxa3xx_nand_scan(struct mtd_info *mtd)
                 return ret;
         }
  
+       memset(pxa3xx_flash_ids, 0, sizeof(pxa3xx_flash_ids));
+
         pxa3xx_flash_ids[0].name = f->name;
         pxa3xx_flash_ids[0].dev_id = (f->chip_id >> 8) & 0xffff;
         pxa3xx_flash_ids[0].pagesize = f->page_size;
@@ -1710,7 +1722,9 @@ static int alloc_nand_resource(struct platform_device *pdev)
         /* initialize all interrupts to be disabled */
         disable_int(info, NDSR_MASK);
  
-       ret = request_irq(irq, pxa3xx_nand_irq, 0, pdev->name, info);
+       ret = request_threaded_irq(irq, pxa3xx_nand_irq,
+                                  pxa3xx_nand_irq_thread, IRQF_ONESHOT,
+                                  pdev->name, info);
         if (ret < 0) {
                 dev_err(&pdev->dev, "failed to request IRQ\n");
                 goto fail_free_buf;
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c

index 35aef5e..0e02be4 100644 (file)
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -948,8 +948,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
  
         cpu_type = platform_get_device_id(pdev)->driver_data;
  
-       pr_debug("s3c2410_nand_probe(%p)\n", pdev);
-
         info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL);
         if (info == NULL) {
                 err = -ENOMEM;
@@ -1045,7 +1043,6 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
                 s3c2410_nand_clk_set_state(info, CLOCK_SUSPEND);
         }
  
-       pr_debug("initialised ok\n");
         return 0;
  
   exit_error:
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c

index a21c378..c3ce81c 100644 (file)
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -159,7 +159,6 @@ static void flctl_setup_dma(struct sh_flctl *flctl)
                 return;
  
         memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = pdata->slave_id_fifo0_tx;
         cfg.direction = DMA_MEM_TO_DEV;
         cfg.dst_addr = (dma_addr_t)FLDTFIFO(flctl);
         cfg.src_addr = 0;
@@ -175,7 +174,6 @@ static void flctl_setup_dma(struct sh_flctl *flctl)
         if (!flctl->chan_fifo0_rx)
                 goto err;
  
-       cfg.slave_id = pdata->slave_id_fifo0_rx;
         cfg.direction = DMA_DEV_TO_MEM;
         cfg.dst_addr = 0;
         cfg.src_addr = (dma_addr_t)FLDTFIFO(flctl);
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c

index 635ee00..43b3392 100644 (file)
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1743,7 +1743,6 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
         struct onenand_chip *this = mtd->priv;
         int column, subpage;
         int written = 0;
-       int ret = 0;
  
         if (this->state == FL_PM_SUSPENDED)
                 return -EBUSY;
@@ -1786,15 +1785,10 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
                 onenand_panic_wait(mtd);
  
                 /* In partial page write we don't update bufferram */
-               onenand_update_bufferram(mtd, to, !ret && !subpage);
+               onenand_update_bufferram(mtd, to, !subpage);
                 if (ONENAND_IS_2PLANE(this)) {
                         ONENAND_SET_BUFFERRAM1(this);
-                       onenand_update_bufferram(mtd, to + this->writesize, !ret && !subpage);
-               }
-
-               if (ret) {
-                       printk(KERN_ERR "%s: write failed %d\n", __func__, ret);
-                       break;
+                       onenand_update_bufferram(mtd, to + this->writesize, !subpage);
                 }
  
                 written += thislen;
@@ -1808,7 +1802,7 @@ static int onenand_panic_write(struct mtd_info *mtd, loff_t to, size_t len,
         }
  
         *retlen = written;
-       return ret;
+       return 0;
  }
  
  /**
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c

index 1c7308c..5d5d362 100644 (file)
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -460,8 +460,7 @@ fsl_qspi_runcmd(struct fsl_qspi *q, u8 cmd, unsigned int addr, int len)
         writel((seqid << QUADSPI_IPCR_SEQID_SHIFT) | len, base + QUADSPI_IPCR);
  
         /* Wait for the interrupt. */
-       err = wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000));
-       if (!err) {
+       if (!wait_for_completion_timeout(&q->c, msecs_to_jiffies(1000))) {
                 dev_err(q->dev,
                         "cmd 0x%.2x timeout, addr@%.8x, FR:0x%.8x, SR:0x%.8x\n",
                         cmd, addr, readl(base + QUADSPI_FR),
@@ -830,27 +829,27 @@ static int fsl_qspi_probe(struct platform_device *pdev)
  
         ret = clk_prepare_enable(q->clk_en);
         if (ret) {
-               dev_err(dev, "can not enable the qspi_en clock\n");
+               dev_err(dev, "cannot enable the qspi_en clock: %d\n", ret);
                 return ret;
         }
  
         ret = clk_prepare_enable(q->clk);
         if (ret) {
-               dev_err(dev, "can not enable the qspi clock\n");
+               dev_err(dev, "cannot enable the qspi clock: %d\n", ret);
                 goto clk_failed;
         }
  
         /* find the irq */
         ret = platform_get_irq(pdev, 0);
         if (ret < 0) {
-               dev_err(dev, "failed to get the irq\n");
+               dev_err(dev, "failed to get the irq: %d\n", ret);
                 goto irq_failed;
         }
  
         ret = devm_request_irq(dev, ret,
                         fsl_qspi_irq_handler, 0, pdev->name, q);
         if (ret) {
-               dev_err(dev, "failed to request irq.\n");
+               dev_err(dev, "failed to request irq: %d\n", ret);
                 goto irq_failed;
         }
  
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c

index b6a5a0c..14a5d23 100644 (file)
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -369,17 +369,13 @@ erase_err:
         return ret;
  }
  
-static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+static int stm_lock(struct spi_nor *nor, loff_t ofs, uint64_t len)
  {
-       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       struct mtd_info *mtd = nor->mtd;
         uint32_t offset = ofs;
         uint8_t status_old, status_new;
         int ret = 0;
  
-       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
-       if (ret)
-               return ret;
-
         status_old = read_sr(nor);
  
         if (offset < mtd->size - (mtd->size / 2))
@@ -402,26 +398,18 @@ static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                                 (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
                 write_enable(nor);
                 ret = write_sr(nor, status_new);
-               if (ret)
-                       goto err;
         }
  
-err:
-       spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
         return ret;
  }
  
-static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+static int stm_unlock(struct spi_nor *nor, loff_t ofs, uint64_t len)
  {
-       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       struct mtd_info *mtd = nor->mtd;
         uint32_t offset = ofs;
         uint8_t status_old, status_new;
         int ret = 0;
  
-       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
-       if (ret)
-               return ret;
-
         status_old = read_sr(nor);
  
         if (offset+len > mtd->size - (mtd->size / 64))
@@ -444,15 +432,41 @@ static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
                                 (status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
                 write_enable(nor);
                 ret = write_sr(nor, status_new);
-               if (ret)
-                       goto err;
         }
  
-err:
+       return ret;
+}
+
+static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       int ret;
+
+       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
+       if (ret)
+               return ret;
+
+       ret = nor->flash_lock(nor, ofs, len);
+
         spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
         return ret;
  }
  
+static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+       struct spi_nor *nor = mtd_to_spi_nor(mtd);
+       int ret;
+
+       ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+       if (ret)
+               return ret;
+
+       ret = nor->flash_unlock(nor, ofs, len);
+
+       spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+       return ret;
+}
+
  /* Used when the "_ext_id" is two bytes at most */
  #define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)     \
         ((kernel_ulong_t)&(struct flash_info) {                         \
@@ -524,6 +538,7 @@ static const struct spi_device_id spi_nor_ids[] = {
         { "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
         { "en25qh128",  INFO(0x1c7018, 0, 64 * 1024,  256, 0) },
         { "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
+       { "en25s64",    INFO(0x1c3817, 0, 64 * 1024,  128, 0) },
  
         /* ESMT */
         { "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
@@ -553,6 +568,7 @@ static const struct spi_device_id spi_nor_ids[] = {
         { "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
         { "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
         { "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
+       { "mx25u6435f",  INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) },
         { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
         { "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
         { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
@@ -648,6 +664,7 @@ static const struct spi_device_id spi_nor_ids[] = {
         { "m25px80",    INFO(0x207114,  0, 64 * 1024, 16, 0) },
  
         /* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
+       { "w25x05", INFO(0xef3010, 0, 64 * 1024,  1,  SECT_4K) },
         { "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
         { "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
         { "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
@@ -658,6 +675,7 @@ static const struct spi_device_id spi_nor_ids[] = {
         { "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
         { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
         { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+       { "w25q64dw", INFO(0xef6017, 0, 64 * 1024, 128, SECT_4K) },
         { "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
         { "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
         { "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
@@ -1045,6 +1063,11 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
  
         /* nor protection support for STmicro chips */
         if (JEDEC_MFR(info) == CFI_MFR_ST) {
+               nor->flash_lock = stm_lock;
+               nor->flash_unlock = stm_unlock;
+       }
+
+       if (nor->flash_lock && nor->flash_unlock) {
                 mtd->_lock = spi_nor_lock;
                 mtd->_unlock = spi_nor_unlock;
         }
diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c

index e579f90..7931615 100644 (file)
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c
@@ -9,6 +9,8 @@
  #include <linux/slab.h>
  #include <linux/mtd/nand_ecc.h>
  
+#include "mtd_test.h"
+
  /*
   * Test the implementation for software ECC
   *
@@ -274,6 +276,10 @@ static int nand_ecc_test_run(const size_t size)
                 }
                 pr_info("ok - %s-%zd\n",
                         nand_ecc_test[i].name, size);
+
+               err = mtdtest_relax();
+               if (err)
+                       break;
         }
  error:
         kfree(error_data);
diff --git a/drivers/mtd/tests/mtd_test.h b/drivers/mtd/tests/mtd_test.h

index f437c77..4b7bee1 100644 (file)
--- a/drivers/mtd/tests/mtd_test.h
+++ b/drivers/mtd/tests/mtd_test.h
@@ -1,4 +1,16 @@
  #include <linux/mtd/mtd.h>
+#include <linux/sched.h>
+
+static inline int mtdtest_relax(void)
+{
+       cond_resched();
+       if (signal_pending(current)) {
+               pr_info("aborting test due to pending signal!\n");
+               return -EINTR;
+       }
+
+       return 0;
+}
  
  int mtdtest_erase_eraseblock(struct mtd_info *mtd, unsigned int ebnum);
  int mtdtest_scan_for_bad_eraseblocks(struct mtd_info *mtd, unsigned char *bbt,
diff --git a/drivers/mtd/tests/nandbiterrs.c b/drivers/mtd/tests/nandbiterrs.c

index 273f7e5..09a4cca 100644 (file)
--- a/drivers/mtd/tests/nandbiterrs.c
+++ b/drivers/mtd/tests/nandbiterrs.c
@@ -320,6 +320,10 @@ static int overwrite_test(void)
                         break;
                 }
  
+               err = mtdtest_relax();
+               if (err)
+                       break;
+
                 opno++;
         }
  
diff --git a/drivers/mtd/tests/oobtest.c b/drivers/mtd/tests/oobtest.c

index 5e06118..8e8525f 100644 (file)
--- a/drivers/mtd/tests/oobtest.c
+++ b/drivers/mtd/tests/oobtest.c
@@ -70,7 +70,7 @@ static int write_eraseblock(int ebnum)
         int i;
         struct mtd_oob_ops ops;
         int err = 0;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
  
         prandom_bytes_state(&rnd_state, writebuf, use_len_max * pgcnt);
         for (i = 0; i < pgcnt; ++i, addr += mtd->writesize) {
@@ -112,7 +112,10 @@ static int write_whole_device(void)
                         return err;
                 if (i % 256 == 0)
                         pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
         }
         pr_info("written %u eraseblocks\n", i);
         return 0;
@@ -141,6 +144,31 @@ static size_t memcmpshow(loff_t addr, const void *cs, const void *ct, size_t cou
         return bitflips;
  }
  
+/*
+ * Compare with 0xff and show the address, offset and data bytes at
+ * comparison failure. Return number of bitflips encountered.
+ */
+static size_t memffshow(loff_t addr, loff_t offset, const void *cs,
+                       size_t count)
+{
+       const unsigned char *su1;
+       int res;
+       size_t i = 0;
+       size_t bitflips = 0;
+
+       for (su1 = cs; 0 < count; ++su1, count--, i++) {
+               res = *su1 ^ 0xff;
+               if (res) {
+                       pr_info("error @addr[0x%lx:0x%lx] 0x%x -> 0xff diff 0x%x\n",
+                               (unsigned long)addr, (unsigned long)offset + i,
+                               *su1, res);
+                       bitflips += hweight8(res);
+               }
+       }
+
+       return bitflips;
+}
+
  static int verify_eraseblock(int ebnum)
  {
         int i;
@@ -203,6 +231,15 @@ static int verify_eraseblock(int ebnum)
                         bitflips = memcmpshow(addr, readbuf + use_offset,
                                               writebuf + (use_len_max * i) + use_offset,
                                               use_len);
+
+                       /* verify pre-offset area for 0xff */
+                       bitflips += memffshow(addr, 0, readbuf, use_offset);
+
+                       /* verify post-(use_offset + use_len) area for 0xff */
+                       k = use_offset + use_len;
+                       bitflips += memffshow(addr, k, readbuf + k,
+                                             mtd->ecclayout->oobavail - k);
+
                         if (bitflips > bitflip_limit) {
                                 pr_err("error: verify failed at %#llx\n",
                                                 (long long)addr);
@@ -212,34 +249,8 @@ static int verify_eraseblock(int ebnum)
                                         return -1;
                                 }
                         } else if (bitflips) {
-                               pr_info("ignoring error as within bitflip_limit\n");
+                               pr_info("ignoring errors as within bitflip limit\n");
                         }
-
-                       for (k = 0; k < use_offset; ++k)
-                               if (readbuf[k] != 0xff) {
-                                       pr_err("error: verify 0xff "
-                                              "failed at %#llx\n",
-                                              (long long)addr);
-                                       errcnt += 1;
-                                       if (errcnt > 1000) {
-                                               pr_err("error: too "
-                                                      "many errors\n");
-                                               return -1;
-                                       }
-                               }
-                       for (k = use_offset + use_len;
-                            k < mtd->ecclayout->oobavail; ++k)
-                               if (readbuf[k] != 0xff) {
-                                       pr_err("error: verify 0xff "
-                                              "failed at %#llx\n",
-                                              (long long)addr);
-                                       errcnt += 1;
-                                       if (errcnt > 1000) {
-                                               pr_err("error: too "
-                                                      "many errors\n");
-                                               return -1;
-                                       }
-                               }
                 }
                 if (vary_offset)
                         do_vary_offset();
@@ -310,7 +321,10 @@ static int verify_all_eraseblocks(void)
                         return err;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
         }
         pr_info("verified %u eraseblocks\n", i);
         return 0;
@@ -421,7 +435,10 @@ static int __init mtd_oobtest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("verified %u eraseblocks\n", i);
  
@@ -634,7 +651,11 @@ static int __init mtd_oobtest_init(void)
                                 goto out;
                         if (i % 256 == 0)
                                 pr_info("written up to eraseblock %u\n", i);
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
+
                         addr += mtd->writesize;
                 }
         }
@@ -672,7 +693,10 @@ static int __init mtd_oobtest_init(void)
                 }
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("verified %u eraseblocks\n", i);
  
diff --git a/drivers/mtd/tests/pagetest.c b/drivers/mtd/tests/pagetest.c

index 88296e8..ba1890d 100644 (file)
--- a/drivers/mtd/tests/pagetest.c
+++ b/drivers/mtd/tests/pagetest.c
@@ -407,7 +407,10 @@ static int __init mtd_pagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("written %u eraseblocks\n", i);
  
@@ -422,7 +425,10 @@ static int __init mtd_pagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("verified %u eraseblocks\n", i);
  
diff --git a/drivers/mtd/tests/readtest.c b/drivers/mtd/tests/readtest.c

index a54cf15..a3196b7 100644 (file)
--- a/drivers/mtd/tests/readtest.c
+++ b/drivers/mtd/tests/readtest.c
@@ -190,7 +190,10 @@ static int __init mtd_readtest_init(void)
                         if (!err)
                                 err = ret;
                 }
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
  
         if (err)
diff --git a/drivers/mtd/tests/speedtest.c b/drivers/mtd/tests/speedtest.c

index 5ee9f70..5a6f31a 100644 (file)
--- a/drivers/mtd/tests/speedtest.c
+++ b/drivers/mtd/tests/speedtest.c
@@ -185,7 +185,7 @@ static long calc_speed(void)
              (finish.tv_usec - start.tv_usec) / 1000;
         if (ms == 0)
                 return 0;
-       k = goodebcnt * (mtd->erasesize / 1024) * 1000;
+       k = (uint64_t)goodebcnt * (mtd->erasesize / 1024) * 1000;
         do_div(k, ms);
         return k;
  }
@@ -269,7 +269,10 @@ static int __init mtd_speedtest_init(void)
                 err = write_eraseblock(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -284,7 +287,10 @@ static int __init mtd_speedtest_init(void)
                 err = read_eraseblock(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -303,7 +309,10 @@ static int __init mtd_speedtest_init(void)
                 err = write_eraseblock_by_page(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -318,7 +327,10 @@ static int __init mtd_speedtest_init(void)
                 err = read_eraseblock_by_page(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -337,7 +349,10 @@ static int __init mtd_speedtest_init(void)
                 err = write_eraseblock_by_2pages(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -352,7 +367,10 @@ static int __init mtd_speedtest_init(void)
                 err = read_eraseblock_by_2pages(i);
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         stop_timing();
         speed = calc_speed();
@@ -385,7 +403,11 @@ static int __init mtd_speedtest_init(void)
                         err = multiblock_erase(i, j);
                         if (err)
                                 goto out;
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
+
                         i += j;
                 }
                 stop_timing();
diff --git a/drivers/mtd/tests/stresstest.c b/drivers/mtd/tests/stresstest.c

index c9d42cc..e509f8a 100644 (file)
--- a/drivers/mtd/tests/stresstest.c
+++ b/drivers/mtd/tests/stresstest.c
@@ -96,7 +96,7 @@ static int do_read(void)
                 if (offs + len > mtd->erasesize)
                         len = mtd->erasesize - offs;
         }
-       addr = eb * mtd->erasesize + offs;
+       addr = (loff_t)eb * mtd->erasesize + offs;
         return mtdtest_read(mtd, addr, len, readbuf);
  }
  
@@ -124,7 +124,7 @@ static int do_write(void)
                         offsets[eb + 1] = 0;
                 }
         }
-       addr = eb * mtd->erasesize + offs;
+       addr = (loff_t)eb * mtd->erasesize + offs;
         err = mtdtest_write(mtd, addr, len, writebuf);
         if (unlikely(err))
                 return err;
@@ -221,7 +221,10 @@ static int __init mtd_stresstest_init(void)
                 err = do_operation();
                 if (err)
                         goto out;
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("finished, %d operations done\n", op);
  
diff --git a/drivers/mtd/tests/subpagetest.c b/drivers/mtd/tests/subpagetest.c

index 7b59ef5..aecc6ce 100644 (file)
--- a/drivers/mtd/tests/subpagetest.c
+++ b/drivers/mtd/tests/subpagetest.c
@@ -95,7 +95,7 @@ static int write_eraseblock2(int ebnum)
         loff_t addr = (loff_t)ebnum * mtd->erasesize;
  
         for (k = 1; k < 33; ++k) {
-               if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
+               if (addr + (subpgsize * k) > (loff_t)(ebnum + 1) * mtd->erasesize)
                         break;
                 prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
                 err = mtd_write(mtd, addr, subpgsize * k, &written, writebuf);
@@ -195,7 +195,7 @@ static int verify_eraseblock2(int ebnum)
         loff_t addr = (loff_t)ebnum * mtd->erasesize;
  
         for (k = 1; k < 33; ++k) {
-               if (addr + (subpgsize * k) > (ebnum + 1) * mtd->erasesize)
+               if (addr + (subpgsize * k) > (loff_t)(ebnum + 1) * mtd->erasesize)
                         break;
                 prandom_bytes_state(&rnd_state, writebuf, subpgsize * k);
                 clear_data(readbuf, subpgsize * k);
@@ -269,7 +269,10 @@ static int verify_all_eraseblocks_ff(void)
                         return err;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       return err;
         }
         pr_info("verified %u eraseblocks\n", i);
         return 0;
@@ -346,7 +349,10 @@ static int __init mtd_subpagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("written %u eraseblocks\n", i);
  
@@ -360,7 +366,10 @@ static int __init mtd_subpagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("verified %u eraseblocks\n", i);
  
@@ -383,7 +392,10 @@ static int __init mtd_subpagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("written up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("written %u eraseblocks\n", i);
  
@@ -398,7 +410,10 @@ static int __init mtd_subpagetest_init(void)
                         goto out;
                 if (i % 256 == 0)
                         pr_info("verified up to eraseblock %u\n", i);
-               cond_resched();
+
+               err = mtdtest_relax();
+               if (err)
+                       goto out;
         }
         pr_info("verified %u eraseblocks\n", i);
  
diff --git a/drivers/mtd/tests/torturetest.c b/drivers/mtd/tests/torturetest.c

index b55bc52..e5d6e6d 100644 (file)
--- a/drivers/mtd/tests/torturetest.c
+++ b/drivers/mtd/tests/torturetest.c
@@ -101,11 +101,11 @@ static inline int check_eraseblock(int ebnum, unsigned char *buf)
  {
         int err, retries = 0;
         size_t read;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
         size_t len = mtd->erasesize;
  
         if (pgcnt) {
-               addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
+               addr = (loff_t)(ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
                 len = pgcnt * pgsize;
         }
  
@@ -155,11 +155,11 @@ static inline int write_pattern(int ebnum, void *buf)
  {
         int err;
         size_t written;
-       loff_t addr = ebnum * mtd->erasesize;
+       loff_t addr = (loff_t)ebnum * mtd->erasesize;
         size_t len = mtd->erasesize;
  
         if (pgcnt) {
-               addr = (ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
+               addr = (loff_t)(ebnum + 1) * mtd->erasesize - pgcnt * pgsize;
                 len = pgcnt * pgsize;
         }
         err = mtd_write(mtd, addr, len, &written, buf);
@@ -279,7 +279,10 @@ static int __init tort_init(void)
                                                " for 0xFF... pattern\n");
                                         goto out;
                                 }
-                               cond_resched();
+
+                               err = mtdtest_relax();
+                               if (err)
+                                       goto out;
                         }
                 }
  
@@ -294,7 +297,10 @@ static int __init tort_init(void)
                         err = write_pattern(i, patt);
                         if (err)
                                 goto out;
-                       cond_resched();
+
+                       err = mtdtest_relax();
+                       if (err)
+                               goto out;
                 }
  
                 /* Verify what we wrote */
@@ -314,7 +320,10 @@ static int __init tort_init(void)
                                                "0x55AA55..." : "0xAA55AA...");
                                         goto out;
                                 }
-                               cond_resched();
+
+                               err = mtdtest_relax();
+                               if (err)
+                                       goto out;
                         }
                 }
  
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c

index 9690cf9..b7f824d 100644 (file)
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1169,9 +1169,9 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
                 return ERR_PTR(err);
  
         /* MTD device number is defined by the major / minor numbers */
-       major = imajor(path.dentry->d_inode);
-       minor = iminor(path.dentry->d_inode);
-       mode = path.dentry->d_inode->i_mode;
+       major = imajor(d_backing_inode(path.dentry));
+       minor = iminor(d_backing_inode(path.dentry));
+       mode = d_backing_inode(path.dentry)->i_mode;
         path_put(&path);
         if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode))
                 return ERR_PTR(-EINVAL);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c

index 478e00c..e844887 100644 (file)
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
         if (error)
                 return ERR_PTR(error);
  
-       inode = path.dentry->d_inode;
+       inode = d_backing_inode(path.dentry);
         mod = inode->i_mode;
         ubi_num = ubi_major2num(imajor(inode));
         vol_id = iminor(inode) - 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c

index f0285bc..371f75e 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -538,7 +538,7 @@ static ssize_t tp_la_write(struct file *file, const char __user *buf,
         char s[32];
         unsigned long val;
         size_t size = min(sizeof(s) - 1, count);
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
  
         if (copy_from_user(s, buf, size))
                 return -EFAULT;
@@ -647,7 +647,7 @@ static int pm_stats_open(struct inode *inode, struct file *file)
  static ssize_t pm_stats_clear(struct file *file, const char __user *buf,
                               size_t count, loff_t *pos)
  {
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
  
         t4_write_reg(adap, PM_RX_STAT_CONFIG_A, 0);
         t4_write_reg(adap, PM_TX_STAT_CONFIG_A, 0);
@@ -1005,7 +1005,7 @@ static ssize_t mbox_write(struct file *file, const char __user *buf,
                    &data[7], &c) < 8 || c != '\n')
                 return -EINVAL;
  
-       ino = FILE_DATA(file);
+       ino = file_inode(file);
         mbox = (uintptr_t)ino->i_private & 7;
         adap = ino->i_private - mbox;
         addr = adap->regs + PF_REG(mbox, CIM_PF_MAILBOX_DATA_A);
@@ -1034,7 +1034,7 @@ static ssize_t flash_read(struct file *file, char __user *buf, size_t count,
                           loff_t *ppos)
  {
         loff_t pos = *ppos;
-       loff_t avail = FILE_DATA(file)->i_size;
+       loff_t avail = file_inode(file)->i_size;
         struct adapter *adap = file->private_data;
  
         if (pos < 0)
@@ -1479,7 +1479,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf,
         int i, j;
         u32 key[10];
         char s[100], *p;
-       struct adapter *adap = FILE_DATA(file)->i_private;
+       struct adapter *adap = file_inode(file)->i_private;
  
         if (count > sizeof(s) - 1)
                 return -EINVAL;
@@ -1951,12 +1951,6 @@ static const struct file_operations mem_debugfs_fops = {
         .llseek  = default_llseek,
  };
  
-static void set_debugfs_file_size(struct dentry *de, loff_t size)
-{
-       if (!IS_ERR(de) && de->d_inode)
-               de->d_inode->i_size = size;
-}
-
  static void add_debugfs_mem(struct adapter *adap, const char *name,
                             unsigned int idx, unsigned int size_mb)
  {
@@ -2072,9 +2066,8 @@ int t4_setup_debugfs(struct adapter *adap)
                 }
         }
  
-       de = debugfs_create_file("flash", S_IRUSR, adap->debugfs_root, adap,
-                                &flash_debugfs_fops);
-       set_debugfs_file_size(de, adap->params.sf_size);
+       de = debugfs_create_file_size("flash", S_IRUSR, adap->debugfs_root, adap,
+                                     &flash_debugfs_fops, adap->params.sf_size);
  
         return 0;
  }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h

index 8f418ba..23f43a0 100644 (file)
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.h
@@ -37,8 +37,6 @@
  
  #include <linux/export.h>
  
-#define FILE_DATA(_file) ((_file)->f_path.dentry->d_inode)
-
  #define DEFINE_SIMPLE_DEBUGFS_FILE(name) \
  static int name##_open(struct inode *inode, struct file *file) \
  { \
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c

index f0fbb4a..4f7dc04 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                                 return err;
                         }
                         if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
-                               /* compute slave's gid block */
-                               smp->attr_mod = cpu_to_be32(slave / 8);
-                               /* execute cmd */
-                               err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                            vhcr->in_modifier, opcode_modifier,
-                                            vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
-                               if (!err) {
-                                       /* if needed, move slave gid to index 0 */
-                                       if (slave % 8)
-                                               memcpy(outsmp->data,
-                                                      outsmp->data + (slave % 8) * 8, 8);
-                                       /* delete all other gids */
-                                       memset(outsmp->data + 8, 0, 56);
+                               __be64 guid = mlx4_get_admin_guid(dev, slave,
+                                                                 port);
+
+                               /* set the PF admin guid to the FW/HW burned
+                                * GUID, if it wasn't yet set
+                                */
+                               if (slave == 0 && guid == 0) {
+                                       smp->attr_mod = 0;
+                                       err = mlx4_cmd_box(dev,
+                                                          inbox->dma,
+                                                          outbox->dma,
+                                                          vhcr->in_modifier,
+                                                          opcode_modifier,
+                                                          vhcr->op,
+                                                          MLX4_CMD_TIME_CLASS_C,
+                                                          MLX4_CMD_NATIVE);
+                                       if (err)
+                                               return err;
+                                       mlx4_set_admin_guid(dev,
+                                                           *(__be64 *)outsmp->
+                                                           data, slave, port);
+                               } else {
+                                       memcpy(outsmp->data, &guid, 8);
                                 }
-                               return err;
+
+                               /* clean all other gids */
+                               memset(outsmp->data + 8, 0, 56);
+                               return 0;
                         }
                         if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
                                 err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                                 oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
                                 vf_oper->vport[port].vlan_idx = NO_INDX;
                                 vf_oper->vport[port].mac_idx = NO_INDX;
+                               mlx4_set_random_admin_guid(dev, i, port);
                         }
                         spin_lock_init(&s_state->lock);
                 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c

index 190fd62..2619c9f 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
                                 priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
                         }
                         spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+                       mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
+                                           flr_slave);
                         queue_work(priv->mfunc.master.comm_wq,
                                    &priv->mfunc.master.slave_flr_event_work);
                         break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c

index acceb75..ced5eca 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
  }
  EXPORT_SYMBOL_GPL(mlx4_counter_free);
  
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
+
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       return priv->mfunc.master.vf_admin[entry].vport[port].guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
+
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       __be64 guid;
+
+       /* hw GUID */
+       if (entry == 0)
+               return;
+
+       get_random_bytes((char *)&guid, sizeof(guid));
+       guid &= ~(cpu_to_be64(1ULL << 56));
+       guid |= cpu_to_be64(1ULL << 57);
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+
  static int mlx4_setup_hca(struct mlx4_dev *dev)
  {
         struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h

index f30eeb7..502d3dd 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -499,6 +499,7 @@ struct mlx4_vport_state {
         bool spoofchk;
         u32 link_state;
         u8 qos_vport;
+       __be64 guid;
  };
  
  struct mlx4_vf_admin_state {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

index df22383..8a64542 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
         return 0;
  }
  
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
  static void free_4k(struct mlx5_core_dev *dev, u64 addr)
  {
         struct fw_page *fwp;
         int n;
  
-       fwp = find_fw_page(dev, addr & PAGE_MASK);
+       fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
         if (!fwp) {
                 mlx5_core_warn(dev, "page not found\n");
                 return;
         }
  
-       n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+       n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
         fwp->free_count++;
         set_bit(n, &fwp->bitmask);
         if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
                 rb_erase(&fwp->rb_node, &dev->priv.page_root);
                 if (fwp->free_count != 1)
                         list_del(&fwp->list);
-               dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
-                              DMA_BIDIRECTIONAL);
+               dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+                              PAGE_SIZE, DMA_BIDIRECTIONAL);
                 __free_page(fwp->page);
                 kfree(fwp);
         } else if (fwp->free_count == 1) {
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig

index 1470b52..07bb3c8 100644 (file)
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -50,7 +50,7 @@ config OF_ADDRESS_PCI
  
  config OF_IRQ
         def_bool y
-       depends on !SPARC
+       depends on !SPARC && IRQ_DOMAIN
  
  config OF_NET
         depends on NETDEVICES
diff --git a/drivers/of/base.c b/drivers/of/base.c

index a1aa0c7..99764db 100644 (file)
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -567,6 +567,29 @@ bool of_device_is_available(const struct device_node *device)
  }
  EXPORT_SYMBOL(of_device_is_available);
  
+/**
+ *  of_device_is_big_endian - check if a device has BE registers
+ *
+ *  @device: Node to check for endianness
+ *
+ *  Returns true if the device has a "big-endian" property, or if the kernel
+ *  was compiled for BE *and* the device has a "native-endian" property.
+ *  Returns false otherwise.
+ *
+ *  Callers would nominally use ioread32be/iowrite32be if
+ *  of_device_is_big_endian() == true, or readl/writel otherwise.
+ */
+bool of_device_is_big_endian(const struct device_node *device)
+{
+       if (of_property_read_bool(device, "big-endian"))
+               return true;
+       if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+           of_property_read_bool(device, "native-endian"))
+               return true;
+       return false;
+}
+EXPORT_SYMBOL(of_device_is_big_endian);
+
  /**
   *     of_get_parent - Get a node's parent if any
   *     @node:  Node to get parent
@@ -640,8 +663,9 @@ static struct device_node *__of_get_next_child(const struct device_node *node,
   *     @node:  parent node
   *     @prev:  previous child of the parent node, or NULL to get first
   *
- *     Returns a node pointer with refcount incremented, use
- *     of_node_put() on it when done.
+ *     Returns a node pointer with refcount incremented, use of_node_put() on
+ *     it when done. Returns NULL when prev is the last child. Decrements the
+ *     refcount of prev.
   */
  struct device_node *of_get_next_child(const struct device_node *node,
         struct device_node *prev)
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c

index 3a896c9..cde35c5 100644 (file)
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -108,6 +108,25 @@ int of_fdt_is_compatible(const void *blob,
         return 0;
  }
  
+/**
+ * of_fdt_is_big_endian - Return true if given node needs BE MMIO accesses
+ * @blob: A device tree blob
+ * @node: node to test
+ *
+ * Returns true if the node has a "big-endian" property, or if the kernel
+ * was compiled for BE *and* the node has a "native-endian" property.
+ * Returns false otherwise.
+ */
+bool of_fdt_is_big_endian(const void *blob, unsigned long node)
+{
+       if (fdt_getprop(blob, node, "big-endian", NULL))
+               return true;
+       if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) &&
+           fdt_getprop(blob, node, "native-endian", NULL))
+               return true;
+       return false;
+}
+
  /**
   * of_fdt_match - Return true if node matches a list of compatible values
   */
@@ -172,7 +191,7 @@ static void * unflatten_dt_node(void *blob,
         if (!pathp)
                 return mem;
  
-       allocl = l++;
+       allocl = ++l;
  
         /* version 0x10 has a more compact unit name here instead of the full
          * path. we accumulate the full path size using "fpsize", we'll rebuild
@@ -879,8 +898,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
  
         endp = reg + (l / sizeof(__be32));
  
-       pr_debug("memory scan node %s, reg size %d, data: %x %x %x %x,\n",
-           uname, l, reg[0], reg[1], reg[2], reg[3]);
+       pr_debug("memory scan node %s, reg size %d,\n", uname, l);
  
         while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
                 u64 base, size;
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c

index e844907..1801634 100644 (file)
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -23,6 +23,8 @@
  #include <linux/i2c.h>
  #include <linux/i2c-mux.h>
  
+#include <linux/bitops.h>
+
  #include "of_private.h"
  
  static struct unittest_results {
@@ -1109,6 +1111,59 @@ static const char *overlay_path(int nr)
  
  static const char *bus_path = "/testcase-data/overlay-node/test-bus";
  
+/* it is guaranteed that overlay ids are assigned in sequence */
+#define MAX_UNITTEST_OVERLAYS  256
+static unsigned long overlay_id_bits[BITS_TO_LONGS(MAX_UNITTEST_OVERLAYS)];
+static int overlay_first_id = -1;
+
+static void of_unittest_track_overlay(int id)
+{
+       if (overlay_first_id < 0)
+               overlay_first_id = id;
+       id -= overlay_first_id;
+
+       /* we shouldn't need that many */
+       BUG_ON(id >= MAX_UNITTEST_OVERLAYS);
+       overlay_id_bits[BIT_WORD(id)] |= BIT_MASK(id);
+}
+
+static void of_unittest_untrack_overlay(int id)
+{
+       if (overlay_first_id < 0)
+               return;
+       id -= overlay_first_id;
+       BUG_ON(id >= MAX_UNITTEST_OVERLAYS);
+       overlay_id_bits[BIT_WORD(id)] &= ~BIT_MASK(id);
+}
+
+static void of_unittest_destroy_tracked_overlays(void)
+{
+       int id, ret, defers;
+
+       if (overlay_first_id < 0)
+               return;
+
+       /* try until no defers */
+       do {
+               defers = 0;
+               /* remove in reverse order */
+               for (id = MAX_UNITTEST_OVERLAYS - 1; id >= 0; id--) {
+                       if (!(overlay_id_bits[BIT_WORD(id)] & BIT_MASK(id)))
+                               continue;
+
+                       ret = of_overlay_destroy(id + overlay_first_id);
+                       if (ret != 0) {
+                               defers++;
+                               pr_warn("%s: overlay destroy failed for #%d\n",
+                                       __func__, id + overlay_first_id);
+                               continue;
+                       }
+
+                       overlay_id_bits[BIT_WORD(id)] &= ~BIT_MASK(id);
+               }
+       } while (defers > 0);
+}
+
  static int of_unittest_apply_overlay(int unittest_nr, int overlay_nr,
                 int *overlay_id)
  {
@@ -1130,6 +1185,7 @@ static int of_unittest_apply_overlay(int unittest_nr, int overlay_nr,
                 goto out;
         }
         id = ret;
+       of_unittest_track_overlay(id);
  
         ret = 0;
  
@@ -1343,6 +1399,7 @@ static void of_unittest_overlay_6(void)
                         return;
                 }
                 ov_id[i] = ret;
+               of_unittest_track_overlay(ov_id[i]);
         }
  
         for (i = 0; i < 2; i++) {
@@ -1367,6 +1424,7 @@ static void of_unittest_overlay_6(void)
                                                 PDEV_OVERLAY));
                         return;
                 }
+               of_unittest_untrack_overlay(ov_id[i]);
         }
  
         for (i = 0; i < 2; i++) {
@@ -1411,6 +1469,7 @@ static void of_unittest_overlay_8(void)
                         return;
                 }
                 ov_id[i] = ret;
+               of_unittest_track_overlay(ov_id[i]);
         }
  
         /* now try to remove first overlay (it should fail) */
@@ -1433,6 +1492,7 @@ static void of_unittest_overlay_8(void)
                                                 PDEV_OVERLAY));
                         return;
                 }
+               of_unittest_untrack_overlay(ov_id[i]);
         }
  
         unittest(1, "overlay test %d passed\n", 8);
@@ -1855,6 +1915,8 @@ static void __init of_unittest_overlay(void)
         of_unittest_overlay_i2c_cleanup();
  #endif
  
+       of_unittest_destroy_tracked_overlays();
+
  out:
         of_node_put(bus_np);
  }
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c

index 3f49345..dd92c5e 100644 (file)
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,22 +138,22 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
         struct dentry *dentry;
         struct inode *inode;
  
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
         dentry = d_alloc_name(root, name);
         if (!dentry) {
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                 return -ENOMEM;
         }
         inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm);
         if (!inode) {
                 dput(dentry);
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                 return -ENOMEM;
         }
         inode->i_fop = fops;
         inode->i_private = priv;
         d_add(dentry, inode);
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         return 0;
  }
  
@@ -215,22 +215,22 @@ struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name)
         struct dentry *dentry;
         struct inode *inode;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         dentry = d_alloc_name(parent, name);
         if (!dentry) {
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
                 return NULL;
         }
         inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755);
         if (!inode) {
                 dput(dentry);
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
                 return NULL;
         }
         inode->i_op = &simple_dir_inode_operations;
         inode->i_fop = &simple_dir_operations;
         d_add(dentry, inode);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         return dentry;
  }
  
diff --git a/drivers/platform/chrome/Kconfig b/drivers/platform/chrome/Kconfig

index 440ed77..2a6531a 100644 (file)
--- a/drivers/platform/chrome/Kconfig
+++ b/drivers/platform/chrome/Kconfig
@@ -4,7 +4,7 @@
  
  menuconfig CHROME_PLATFORMS
         bool "Platform support for Chrome hardware"
-       depends on X86
+       depends on X86 || ARM
         ---help---
           Say Y here to get to see options for platform support for
           various Chromebooks and Chromeboxes. This option alone does
@@ -16,8 +16,7 @@ if CHROME_PLATFORMS
  
  config CHROMEOS_LAPTOP
         tristate "Chrome OS Laptop"
-       depends on I2C
-       depends on DMI
+       depends on I2C && DMI && X86
         ---help---
           This driver instantiates i2c and smbus devices such as
           light sensors and touchpads.
@@ -27,6 +26,7 @@ config CHROMEOS_LAPTOP
  
  config CHROMEOS_PSTORE
         tristate "Chrome OS pstore support"
+       depends on X86
         ---help---
           This module instantiates the persistent storage on x86 ChromeOS
           devices. It can be used to store away console logs and crash
@@ -38,5 +38,25 @@ config CHROMEOS_PSTORE
           If you have a supported Chromebook, choose Y or M here.
           The module will be called chromeos_pstore.
  
+config CROS_EC_CHARDEV
+        tristate "Chrome OS Embedded Controller userspace device interface"
+        depends on MFD_CROS_EC
+        ---help---
+          This driver adds support to talk with the ChromeOS EC from userspace.
+
+          If you have a supported Chromebook, choose Y or M here.
+          The module will be called cros_ec_dev.
+
+config CROS_EC_LPC
+        tristate "ChromeOS Embedded Controller (LPC)"
+        depends on MFD_CROS_EC && (X86 || COMPILE_TEST)
+        help
+          If you say Y here, you get support for talking to the ChromeOS EC
+          over an LPC bus. This uses a simple byte-level protocol with a
+          checksum. This is used for userspace access only. The kernel
+          typically has its own communication methods.
+
+          To compile this driver as a module, choose M here: the
+          module will be called cros_ec_lpc.
  
  endif # CHROMEOS_PLATFORMS
diff --git a/drivers/platform/chrome/Makefile b/drivers/platform/chrome/Makefile

index 2b860ca..bd8d860 100644 (file)
--- a/drivers/platform/chrome/Makefile
+++ b/drivers/platform/chrome/Makefile
@@ -1,3 +1,6 @@
  
  obj-$(CONFIG_CHROMEOS_LAPTOP)  += chromeos_laptop.o
  obj-$(CONFIG_CHROMEOS_PSTORE)  += chromeos_pstore.o
+cros_ec_devs-objs               := cros_ec_dev.o cros_ec_sysfs.o cros_ec_lightbar.o
+obj-$(CONFIG_CROS_EC_CHARDEV)   += cros_ec_devs.o
+obj-$(CONFIG_CROS_EC_LPC)       += cros_ec_lpc.o
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c

index b84fdd6..a04019a 100644 (file)
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -133,12 +133,13 @@ static struct i2c_client *__add_probed_i2c_device(
                 const char *name,
                 int bus,
                 struct i2c_board_info *info,
-               const unsigned short *addrs)
+               const unsigned short *alt_addr_list)
  {
         const struct dmi_device *dmi_dev;
         const struct dmi_dev_onboard *dev_data;
         struct i2c_adapter *adapter;
-       struct i2c_client *client;
+       struct i2c_client *client = NULL;
+       const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
  
         if (bus < 0)
                 return NULL;
@@ -169,8 +170,28 @@ static struct i2c_client *__add_probed_i2c_device(
                 return NULL;
         }
  
-       /* add the i2c device */
-       client = i2c_new_probed_device(adapter, info, addrs, NULL);
+       /*
+        * Add the i2c device. If we can't detect it at the primary
+        * address we scan secondary addresses. In any case the client
+        * structure gets assigned primary address.
+        */
+       client = i2c_new_probed_device(adapter, info, addr_list, NULL);
+       if (!client && alt_addr_list) {
+               struct i2c_board_info dummy_info = {
+                       I2C_BOARD_INFO("dummy", info->addr),
+               };
+               struct i2c_client *dummy;
+
+               dummy = i2c_new_probed_device(adapter, &dummy_info,
+                                             alt_addr_list, NULL);
+               if (dummy) {
+                       pr_debug("%s %d-%02x is probed at %02x\n",
+                                 __func__, bus, info->addr, dummy->addr);
+                       i2c_unregister_device(dummy);
+                       client = i2c_new_device(adapter, info);
+               }
+       }
+
         if (!client)
                 pr_notice("%s failed to register device %d-%02x\n",
                           __func__, bus, info->addr);
@@ -254,12 +275,10 @@ static struct i2c_client *add_i2c_device(const char *name,
                                                 enum i2c_adapter_type type,
                                                 struct i2c_board_info *info)
  {
-       const unsigned short addr_list[] = { info->addr, I2C_CLIENT_END };
-
         return __add_probed_i2c_device(name,
                                        find_i2c_adapter_num(type),
                                        info,
-                                      addr_list);
+                                      NULL);
  }
  
  static int setup_cyapa_tp(enum i2c_adapter_type type)
@@ -275,7 +294,6 @@ static int setup_cyapa_tp(enum i2c_adapter_type type)
  static int setup_atmel_224s_tp(enum i2c_adapter_type type)
  {
         const unsigned short addr_list[] = { ATMEL_TP_I2C_BL_ADDR,
-                                            ATMEL_TP_I2C_ADDR,
                                              I2C_CLIENT_END };
         if (tp)
                 return 0;
@@ -289,7 +307,6 @@ static int setup_atmel_224s_tp(enum i2c_adapter_type type)
  static int setup_atmel_1664s_ts(enum i2c_adapter_type type)
  {
         const unsigned short addr_list[] = { ATMEL_TS_I2C_BL_ADDR,
-                                            ATMEL_TS_I2C_ADDR,
                                              I2C_CLIENT_END };
         if (ts)
                 return 0;
diff --git a/drivers/platform/chrome/cros_ec_dev.c b/drivers/platform/chrome/cros_ec_dev.c

new file mode 100644 (file)

index 0000000..6090d0b
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_dev.c
@@ -0,0 +1,274 @@
+/*
+ * cros_ec_dev - expose the Chrome OS Embedded Controller to user-space
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Device variables */
+#define CROS_MAX_DEV 128
+static struct class *cros_class;
+static int ec_major;
+
+/* Basic communication */
+static int ec_get_version(struct cros_ec_device *ec, char *str, int maxlen)
+{
+       struct ec_response_get_version *resp;
+       static const char * const current_image_name[] = {
+               "unknown", "read-only", "read-write", "invalid",
+       };
+       struct cros_ec_command msg = {
+               .version = 0,
+               .command = EC_CMD_GET_VERSION,
+               .outdata = { 0 },
+               .outsize = 0,
+               .indata = { 0 },
+               .insize = sizeof(*resp),
+       };
+       int ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS) {
+               snprintf(str, maxlen,
+                        "%s\nUnknown EC version: EC returned %d\n",
+                        CROS_EC_DEV_VERSION, msg.result);
+               return 0;
+       }
+
+       resp = (struct ec_response_get_version *)msg.indata;
+       if (resp->current_image >= ARRAY_SIZE(current_image_name))
+               resp->current_image = 3; /* invalid */
+
+       snprintf(str, maxlen, "%s\n%s\n%s\n%s\n", CROS_EC_DEV_VERSION,
+                resp->version_string_ro, resp->version_string_rw,
+                current_image_name[resp->current_image]);
+
+       return 0;
+}
+
+/* Device file ops */
+static int ec_device_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = container_of(inode->i_cdev,
+                                         struct cros_ec_device, cdev);
+       return 0;
+}
+
+static int ec_device_release(struct inode *inode, struct file *filp)
+{
+       return 0;
+}
+
+static ssize_t ec_device_read(struct file *filp, char __user *buffer,
+                             size_t length, loff_t *offset)
+{
+       struct cros_ec_device *ec = filp->private_data;
+       char msg[sizeof(struct ec_response_get_version) +
+                sizeof(CROS_EC_DEV_VERSION)];
+       size_t count;
+       int ret;
+
+       if (*offset != 0)
+               return 0;
+
+       ret = ec_get_version(ec, msg, sizeof(msg));
+       if (ret)
+               return ret;
+
+       count = min(length, strlen(msg));
+
+       if (copy_to_user(buffer, msg, count))
+               return -EFAULT;
+
+       *offset = count;
+       return count;
+}
+
+/* Ioctls */
+static long ec_device_ioctl_xcmd(struct cros_ec_device *ec, void __user *arg)
+{
+       long ret;
+       struct cros_ec_command s_cmd = { };
+
+       if (copy_from_user(&s_cmd, arg, sizeof(s_cmd)))
+               return -EFAULT;
+
+       ret = cros_ec_cmd_xfer(ec, &s_cmd);
+       /* Only copy data to userland if data was received. */
+       if (ret < 0)
+               return ret;
+
+       if (copy_to_user(arg, &s_cmd, sizeof(s_cmd)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long ec_device_ioctl_readmem(struct cros_ec_device *ec, void __user *arg)
+{
+       struct cros_ec_readmem s_mem = { };
+       long num;
+
+       /* Not every platform supports direct reads */
+       if (!ec->cmd_readmem)
+               return -ENOTTY;
+
+       if (copy_from_user(&s_mem, arg, sizeof(s_mem)))
+               return -EFAULT;
+
+       num = ec->cmd_readmem(ec, s_mem.offset, s_mem.bytes, s_mem.buffer);
+       if (num <= 0)
+               return num;
+
+       if (copy_to_user((void __user *)arg, &s_mem, sizeof(s_mem)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long ec_device_ioctl(struct file *filp, unsigned int cmd,
+                           unsigned long arg)
+{
+       struct cros_ec_device *ec = filp->private_data;
+
+       if (_IOC_TYPE(cmd) != CROS_EC_DEV_IOC)
+               return -ENOTTY;
+
+       switch (cmd) {
+       case CROS_EC_DEV_IOCXCMD:
+               return ec_device_ioctl_xcmd(ec, (void __user *)arg);
+       case CROS_EC_DEV_IOCRDMEM:
+               return ec_device_ioctl_readmem(ec, (void __user *)arg);
+       }
+
+       return -ENOTTY;
+}
+
+/* Module initialization */
+static const struct file_operations fops = {
+       .open = ec_device_open,
+       .release = ec_device_release,
+       .read = ec_device_read,
+       .unlocked_ioctl = ec_device_ioctl,
+};
+
+static int ec_device_probe(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+       int retval = -ENOTTY;
+       dev_t devno = MKDEV(ec_major, 0);
+
+       /* Instantiate it (and remember the EC) */
+       cdev_init(&ec->cdev, &fops);
+
+       retval = cdev_add(&ec->cdev, devno, 1);
+       if (retval) {
+               dev_err(&pdev->dev, ": failed to add character device\n");
+               return retval;
+       }
+
+       ec->vdev = device_create(cros_class, NULL, devno, ec,
+                                CROS_EC_DEV_NAME);
+       if (IS_ERR(ec->vdev)) {
+               retval = PTR_ERR(ec->vdev);
+               dev_err(&pdev->dev, ": failed to create device\n");
+               cdev_del(&ec->cdev);
+               return retval;
+       }
+
+       /* Initialize extra interfaces */
+       ec_dev_sysfs_init(ec);
+       ec_dev_lightbar_init(ec);
+
+       return 0;
+}
+
+static int ec_device_remove(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
+
+       ec_dev_lightbar_remove(ec);
+       ec_dev_sysfs_remove(ec);
+       device_destroy(cros_class, MKDEV(ec_major, 0));
+       cdev_del(&ec->cdev);
+       return 0;
+}
+
+static struct platform_driver cros_ec_dev_driver = {
+       .driver = {
+               .name = "cros-ec-ctl",
+       },
+       .probe = ec_device_probe,
+       .remove = ec_device_remove,
+};
+
+static int __init cros_ec_dev_init(void)
+{
+       int ret;
+       dev_t dev = 0;
+
+       cros_class = class_create(THIS_MODULE, "chromeos");
+       if (IS_ERR(cros_class)) {
+               pr_err(CROS_EC_DEV_NAME ": failed to register device class\n");
+               return PTR_ERR(cros_class);
+       }
+
+       /* Get a range of minor numbers (starting with 0) to work with */
+       ret = alloc_chrdev_region(&dev, 0, CROS_MAX_DEV, CROS_EC_DEV_NAME);
+       if (ret < 0) {
+               pr_err(CROS_EC_DEV_NAME ": alloc_chrdev_region() failed\n");
+               goto failed_chrdevreg;
+       }
+       ec_major = MAJOR(dev);
+
+       /* Register the driver */
+       ret = platform_driver_register(&cros_ec_dev_driver);
+       if (ret < 0) {
+               pr_warn(CROS_EC_DEV_NAME ": can't register driver: %d\n", ret);
+               goto failed_devreg;
+       }
+       return 0;
+
+failed_devreg:
+       unregister_chrdev_region(MKDEV(ec_major, 0), CROS_MAX_DEV);
+failed_chrdevreg:
+       class_destroy(cros_class);
+       return ret;
+}
+
+static void __exit cros_ec_dev_exit(void)
+{
+       platform_driver_unregister(&cros_ec_dev_driver);
+       unregister_chrdev(ec_major, CROS_EC_DEV_NAME);
+       class_destroy(cros_class);
+}
+
+module_init(cros_ec_dev_init);
+module_exit(cros_ec_dev_exit);
+
+MODULE_AUTHOR("Bill Richardson <wfrichar@chromium.org>");
+MODULE_DESCRIPTION("Userspace interface to the Chrome OS Embedded Controller");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/chrome/cros_ec_dev.h b/drivers/platform/chrome/cros_ec_dev.h

new file mode 100644 (file)

index 0000000..45d67f7
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_dev.h
@@ -0,0 +1,53 @@
+/*
+ * cros_ec_dev - expose the Chrome OS Embedded Controller to userspace
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CROS_EC_DEV_H_
+#define _CROS_EC_DEV_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+#include <linux/mfd/cros_ec.h>
+
+#define CROS_EC_DEV_NAME "cros_ec"
+#define CROS_EC_DEV_VERSION "1.0.0"
+
+/*
+ * @offset: within EC_LPC_ADDR_MEMMAP region
+ * @bytes: number of bytes to read. zero means "read a string" (including '\0')
+ *         (at most only EC_MEMMAP_SIZE bytes can be read)
+ * @buffer: where to store the result
+ * ioctl returns the number of bytes read, negative on error
+ */
+struct cros_ec_readmem {
+       uint32_t offset;
+       uint32_t bytes;
+       uint8_t buffer[EC_MEMMAP_SIZE];
+};
+
+#define CROS_EC_DEV_IOC       0xEC
+#define CROS_EC_DEV_IOCXCMD   _IOWR(CROS_EC_DEV_IOC, 0, struct cros_ec_command)
+#define CROS_EC_DEV_IOCRDMEM  _IOWR(CROS_EC_DEV_IOC, 1, struct cros_ec_readmem)
+
+void ec_dev_sysfs_init(struct cros_ec_device *);
+void ec_dev_sysfs_remove(struct cros_ec_device *);
+
+void ec_dev_lightbar_init(struct cros_ec_device *);
+void ec_dev_lightbar_remove(struct cros_ec_device *);
+
+#endif /* _CROS_EC_DEV_H_ */
diff --git a/drivers/platform/chrome/cros_ec_lightbar.c b/drivers/platform/chrome/cros_ec_lightbar.c

new file mode 100644 (file)

index 0000000..b4ff47a
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lightbar.c
@@ -0,0 +1,367 @@
+/*
+ * cros_ec_lightbar - expose the Chromebook Pixel lightbar to userspace
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "cros_ec_lightbar: " fmt
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Rate-limit the lightbar interface to prevent DoS. */
+static unsigned long lb_interval_jiffies = 50 * HZ / 1000;
+
+static ssize_t interval_msec_show(struct device *dev,
+                                 struct device_attribute *attr, char *buf)
+{
+       unsigned long msec = lb_interval_jiffies * 1000 / HZ;
+
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", msec);
+}
+
+static ssize_t interval_msec_store(struct device *dev,
+                                  struct device_attribute *attr,
+                                  const char *buf, size_t count)
+{
+       unsigned long msec;
+
+       if (kstrtoul(buf, 0, &msec))
+               return -EINVAL;
+
+       lb_interval_jiffies = msec * HZ / 1000;
+
+       return count;
+}
+
+static DEFINE_MUTEX(lb_mutex);
+/* Return 0 if able to throttle correctly, error otherwise */
+static int lb_throttle(void)
+{
+       static unsigned long last_access;
+       unsigned long now, next_timeslot;
+       long delay;
+       int ret = 0;
+
+       mutex_lock(&lb_mutex);
+
+       now = jiffies;
+       next_timeslot = last_access + lb_interval_jiffies;
+
+       if (time_before(now, next_timeslot)) {
+               delay = (long)(next_timeslot) - (long)now;
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (schedule_timeout(delay) > 0) {
+                       /* interrupted - just abort */
+                       ret = -EINTR;
+                       goto out;
+               }
+               now = jiffies;
+       }
+
+       last_access = now;
+out:
+       mutex_unlock(&lb_mutex);
+
+       return ret;
+}
+
+#define INIT_MSG(P, R) { \
+               .command = EC_CMD_LIGHTBAR_CMD, \
+               .outsize = sizeof(*P), \
+               .insize = sizeof(*R), \
+       }
+
+static int get_lightbar_version(struct cros_ec_device *ec,
+                               uint32_t *ver_ptr, uint32_t *flg_ptr)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_VERSION;
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return 0;
+
+       switch (msg.result) {
+       case EC_RES_INVALID_PARAM:
+               /* Pixel had no version command. */
+               if (ver_ptr)
+                       *ver_ptr = 0;
+               if (flg_ptr)
+                       *flg_ptr = 0;
+               return 1;
+
+       case EC_RES_SUCCESS:
+               resp = (struct ec_response_lightbar *)msg.indata;
+
+               /* Future devices w/lightbars should implement this command */
+               if (ver_ptr)
+                       *ver_ptr = resp->version.num;
+               if (flg_ptr)
+                       *flg_ptr = resp->version.flags;
+               return 1;
+       }
+
+       /* Anything else (ie, EC_RES_INVALID_COMMAND) - no lightbar */
+       return 0;
+}
+
+static ssize_t version_show(struct device *dev,
+                           struct device_attribute *attr, char *buf)
+{
+       uint32_t version, flags;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+       int ret;
+
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       /* This should always succeed, because we check during init. */
+       if (!get_lightbar_version(ec, &version, &flags))
+               return -EIO;
+
+       return scnprintf(buf, PAGE_SIZE, "%d %d\n", version, flags);
+}
+
+static ssize_t brightness_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+       unsigned int val;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       if (kstrtouint(buf, 0, &val))
+               return -EINVAL;
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_BRIGHTNESS;
+       param->brightness.num = val;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return -EINVAL;
+
+       return count;
+}
+
+
+/*
+ * We expect numbers, and we'll keep reading until we find them, skipping over
+ * any whitespace (sysfs guarantees that the input is null-terminated). Every
+ * four numbers are sent to the lightbar as <LED,R,G,B>. We fail at the first
+ * parsing error, if we don't parse any numbers, or if we have numbers left
+ * over.
+ */
+static ssize_t led_rgb_store(struct device *dev, struct device_attribute *attr,
+                            const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+       unsigned int val[4];
+       int ret, i = 0, j = 0, ok = 0;
+
+       do {
+               /* Skip any whitespace */
+               while (*buf && isspace(*buf))
+                       buf++;
+
+               if (!*buf)
+                       break;
+
+               ret = sscanf(buf, "%i", &val[i++]);
+               if (ret == 0)
+                       return -EINVAL;
+
+               if (i == 4) {
+                       param = (struct ec_params_lightbar *)msg.outdata;
+                       param->cmd = LIGHTBAR_CMD_RGB;
+                       param->rgb.led = val[0];
+                       param->rgb.red = val[1];
+                       param->rgb.green = val[2];
+                       param->rgb.blue = val[3];
+                       /*
+                        * Throttle only the first of every four transactions,
+                        * so that the user can update all four LEDs at once.
+                        */
+                       if ((j++ % 4) == 0) {
+                               ret = lb_throttle();
+                               if (ret)
+                                       return ret;
+                       }
+
+                       ret = cros_ec_cmd_xfer(ec, &msg);
+                       if (ret < 0)
+                               return ret;
+
+                       if (msg.result != EC_RES_SUCCESS)
+                               return -EINVAL;
+
+                       i = 0;
+                       ok = 1;
+               }
+
+               /* Skip over the number we just read */
+               while (*buf && !isspace(*buf))
+                       buf++;
+
+       } while (*buf);
+
+       return (ok && i == 0) ? count : -EINVAL;
+}
+
+static char const *seqname[] = {
+       "ERROR", "S5", "S3", "S0", "S5S3", "S3S0",
+       "S0S3", "S3S5", "STOP", "RUN", "PULSE", "TEST", "KONAMI",
+};
+
+static ssize_t sequence_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_GET_SEQ;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       resp = (struct ec_response_lightbar *)msg.indata;
+       if (resp->get_seq.num >= ARRAY_SIZE(seqname))
+               return scnprintf(buf, PAGE_SIZE, "%d\n", resp->get_seq.num);
+       else
+               return scnprintf(buf, PAGE_SIZE, "%s\n",
+                                seqname[resp->get_seq.num]);
+}
+
+static ssize_t sequence_store(struct device *dev, struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+       struct ec_params_lightbar *param;
+       struct ec_response_lightbar *resp;
+       struct cros_ec_command msg = INIT_MSG(param, resp);
+       unsigned int num;
+       int ret, len;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       for (len = 0; len < count; len++)
+               if (!isalnum(buf[len]))
+                       break;
+
+       for (num = 0; num < ARRAY_SIZE(seqname); num++)
+               if (!strncasecmp(seqname[num], buf, len))
+                       break;
+
+       if (num >= ARRAY_SIZE(seqname)) {
+               ret = kstrtouint(buf, 0, &num);
+               if (ret)
+                       return ret;
+       }
+
+       param = (struct ec_params_lightbar *)msg.outdata;
+       param->cmd = LIGHTBAR_CMD_SEQ;
+       param->seq.num = num;
+       ret = lb_throttle();
+       if (ret)
+               return ret;
+
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+
+       if (msg.result != EC_RES_SUCCESS)
+               return -EINVAL;
+
+       return count;
+}
+
+/* Module initialization */
+
+static DEVICE_ATTR_RW(interval_msec);
+static DEVICE_ATTR_RO(version);
+static DEVICE_ATTR_WO(brightness);
+static DEVICE_ATTR_WO(led_rgb);
+static DEVICE_ATTR_RW(sequence);
+static struct attribute *__lb_cmds_attrs[] = {
+       &dev_attr_interval_msec.attr,
+       &dev_attr_version.attr,
+       &dev_attr_brightness.attr,
+       &dev_attr_led_rgb.attr,
+       &dev_attr_sequence.attr,
+       NULL,
+};
+static struct attribute_group lb_cmds_attr_group = {
+       .name = "lightbar",
+       .attrs = __lb_cmds_attrs,
+};
+
+void ec_dev_lightbar_init(struct cros_ec_device *ec)
+{
+       int ret = 0;
+
+       /* Only instantiate this stuff if the EC has a lightbar */
+       if (!get_lightbar_version(ec, NULL, NULL))
+               return;
+
+       ret = sysfs_create_group(&ec->vdev->kobj, &lb_cmds_attr_group);
+       if (ret)
+               pr_warn("sysfs_create_group() failed: %d\n", ret);
+}
+
+void ec_dev_lightbar_remove(struct cros_ec_device *ec)
+{
+       sysfs_remove_group(&ec->vdev->kobj, &lb_cmds_attr_group);
+}
diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c

new file mode 100644 (file)

index 0000000..8f9ac4d
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -0,0 +1,319 @@
+/*
+ * cros_ec_lpc - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2012-2015 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#include <linux/dmi.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+
+#define DRV_NAME "cros_ec_lpc"
+
+static int ec_response_timed_out(void)
+{
+       unsigned long one_second = jiffies + HZ;
+
+       usleep_range(200, 300);
+       do {
+               if (!(inb(EC_LPC_ADDR_HOST_CMD) & EC_LPC_STATUS_BUSY_MASK))
+                       return 0;
+               usleep_range(100, 200);
+       } while (time_before(jiffies, one_second));
+
+       return 1;
+}
+
+static int cros_ec_cmd_xfer_lpc(struct cros_ec_device *ec,
+                               struct cros_ec_command *msg)
+{
+       struct ec_lpc_host_args args;
+       int csum;
+       int i;
+       int ret = 0;
+
+       if (msg->outsize > EC_PROTO2_MAX_PARAM_SIZE ||
+           msg->insize > EC_PROTO2_MAX_PARAM_SIZE) {
+               dev_err(ec->dev,
+                       "invalid buffer sizes (out %d, in %d)\n",
+                       msg->outsize, msg->insize);
+               return -EINVAL;
+       }
+
+       /* Now actually send the command to the EC and get the result */
+       args.flags = EC_HOST_ARGS_FLAG_FROM_HOST;
+       args.command_version = msg->version;
+       args.data_size = msg->outsize;
+
+       /* Initialize checksum */
+       csum = msg->command + args.flags +
+               args.command_version + args.data_size;
+
+       /* Copy data and update checksum */
+       for (i = 0; i < msg->outsize; i++) {
+               outb(msg->outdata[i], EC_LPC_ADDR_HOST_PARAM + i);
+               csum += msg->outdata[i];
+       }
+
+       /* Finalize checksum and write args */
+       args.checksum = csum & 0xFF;
+       outb(args.flags, EC_LPC_ADDR_HOST_ARGS);
+       outb(args.command_version, EC_LPC_ADDR_HOST_ARGS + 1);
+       outb(args.data_size, EC_LPC_ADDR_HOST_ARGS + 2);
+       outb(args.checksum, EC_LPC_ADDR_HOST_ARGS + 3);
+
+       /* Here we go */
+       outb(msg->command, EC_LPC_ADDR_HOST_CMD);
+
+       if (ec_response_timed_out()) {
+               dev_warn(ec->dev, "EC responsed timed out\n");
+               ret = -EIO;
+               goto done;
+       }
+
+       /* Check result */
+       msg->result = inb(EC_LPC_ADDR_HOST_DATA);
+
+       switch (msg->result) {
+       case EC_RES_SUCCESS:
+               break;
+       case EC_RES_IN_PROGRESS:
+               ret = -EAGAIN;
+               dev_dbg(ec->dev, "command 0x%02x in progress\n",
+                       msg->command);
+               goto done;
+       default:
+               dev_dbg(ec->dev, "command 0x%02x returned %d\n",
+                       msg->command, msg->result);
+       }
+
+       /* Read back args */
+       args.flags = inb(EC_LPC_ADDR_HOST_ARGS);
+       args.command_version = inb(EC_LPC_ADDR_HOST_ARGS + 1);
+       args.data_size = inb(EC_LPC_ADDR_HOST_ARGS + 2);
+       args.checksum = inb(EC_LPC_ADDR_HOST_ARGS + 3);
+
+       if (args.data_size > msg->insize) {
+               dev_err(ec->dev,
+                       "packet too long (%d bytes, expected %d)",
+                       args.data_size, msg->insize);
+               ret = -ENOSPC;
+               goto done;
+       }
+
+       /* Start calculating response checksum */
+       csum = msg->command + args.flags +
+               args.command_version + args.data_size;
+
+       /* Read response and update checksum */
+       for (i = 0; i < args.data_size; i++) {
+               msg->indata[i] = inb(EC_LPC_ADDR_HOST_PARAM + i);
+               csum += msg->indata[i];
+       }
+
+       /* Verify checksum */
+       if (args.checksum != (csum & 0xFF)) {
+               dev_err(ec->dev,
+                       "bad packet checksum, expected %02x, got %02x\n",
+                       args.checksum, csum & 0xFF);
+               ret = -EBADMSG;
+               goto done;
+       }
+
+       /* Return actual amount of data received */
+       ret = args.data_size;
+done:
+       return ret;
+}
+
+/* Returns num bytes read, or negative on error. Doesn't need locking. */
+static int cros_ec_lpc_readmem(struct cros_ec_device *ec, unsigned int offset,
+                              unsigned int bytes, void *dest)
+{
+       int i = offset;
+       char *s = dest;
+       int cnt = 0;
+
+       if (offset >= EC_MEMMAP_SIZE - bytes)
+               return -EINVAL;
+
+       /* fixed length */
+       if (bytes) {
+               for (; cnt < bytes; i++, s++, cnt++)
+                       *s = inb(EC_LPC_ADDR_MEMMAP + i);
+               return cnt;
+       }
+
+       /* string */
+       for (; i < EC_MEMMAP_SIZE; i++, s++) {
+               *s = inb(EC_LPC_ADDR_MEMMAP + i);
+               cnt++;
+               if (!*s)
+                       break;
+       }
+
+       return cnt;
+}
+
+static int cros_ec_lpc_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct cros_ec_device *ec_dev;
+       int ret;
+
+       if (!devm_request_region(dev, EC_LPC_ADDR_MEMMAP, EC_MEMMAP_SIZE,
+                                dev_name(dev))) {
+               dev_err(dev, "couldn't reserve memmap region\n");
+               return -EBUSY;
+       }
+
+       if ((inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID) != 'E') ||
+           (inb(EC_LPC_ADDR_MEMMAP + EC_MEMMAP_ID + 1) != 'C')) {
+               dev_err(dev, "EC ID not detected\n");
+               return -ENODEV;
+       }
+
+       if (!devm_request_region(dev, EC_HOST_CMD_REGION0,
+                                EC_HOST_CMD_REGION_SIZE, dev_name(dev))) {
+               dev_err(dev, "couldn't reserve region0\n");
+               return -EBUSY;
+       }
+       if (!devm_request_region(dev, EC_HOST_CMD_REGION1,
+                                EC_HOST_CMD_REGION_SIZE, dev_name(dev))) {
+               dev_err(dev, "couldn't reserve region1\n");
+               return -EBUSY;
+       }
+
+       ec_dev = devm_kzalloc(dev, sizeof(*ec_dev), GFP_KERNEL);
+       if (!ec_dev)
+               return -ENOMEM;
+
+       platform_set_drvdata(pdev, ec_dev);
+       ec_dev->dev = dev;
+       ec_dev->ec_name = pdev->name;
+       ec_dev->phys_name = dev_name(dev);
+       ec_dev->parent = dev;
+       ec_dev->cmd_xfer = cros_ec_cmd_xfer_lpc;
+       ec_dev->cmd_readmem = cros_ec_lpc_readmem;
+
+       ret = cros_ec_register(ec_dev);
+       if (ret) {
+               dev_err(dev, "couldn't register ec_dev (%d)\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int cros_ec_lpc_remove(struct platform_device *pdev)
+{
+       struct cros_ec_device *ec_dev;
+
+       ec_dev = platform_get_drvdata(pdev);
+       cros_ec_remove(ec_dev);
+
+       return 0;
+}
+
+static struct dmi_system_id cros_ec_lpc_dmi_table[] __initdata = {
+       {
+               /*
+                * Today all Chromebooks/boxes ship with Google_* as version and
+                * coreboot as bios vendor. No other systems with this
+                * combination are known to date.
+                */
+               .matches = {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "coreboot"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "Google_"),
+               },
+       },
+       {
+               /* x86-link, the Chromebook Pixel. */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "GOOGLE"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Link"),
+               },
+       },
+       {
+               /* x86-peppy, the Acer C720 Chromebook. */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Peppy"),
+               },
+       },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(dmi, cros_ec_lpc_dmi_table);
+
+static struct platform_driver cros_ec_lpc_driver = {
+       .driver = {
+               .name = DRV_NAME,
+       },
+       .probe = cros_ec_lpc_probe,
+       .remove = cros_ec_lpc_remove,
+};
+
+static struct platform_device cros_ec_lpc_device = {
+       .name = DRV_NAME
+};
+
+static int __init cros_ec_lpc_init(void)
+{
+       int ret;
+
+       if (!dmi_check_system(cros_ec_lpc_dmi_table)) {
+               pr_err(DRV_NAME ": unsupported system.\n");
+               return -ENODEV;
+       }
+
+       /* Register the driver */
+       ret = platform_driver_register(&cros_ec_lpc_driver);
+       if (ret) {
+               pr_err(DRV_NAME ": can't register driver: %d\n", ret);
+               return ret;
+       }
+
+       /* Register the device, and it'll get hooked up automatically */
+       ret = platform_device_register(&cros_ec_lpc_device);
+       if (ret) {
+               pr_err(DRV_NAME ": can't register device: %d\n", ret);
+               platform_driver_unregister(&cros_ec_lpc_driver);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void __exit cros_ec_lpc_exit(void)
+{
+       platform_device_unregister(&cros_ec_lpc_device);
+       platform_driver_unregister(&cros_ec_lpc_driver);
+}
+
+module_init(cros_ec_lpc_init);
+module_exit(cros_ec_lpc_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ChromeOS EC LPC driver");
diff --git a/drivers/platform/chrome/cros_ec_sysfs.c b/drivers/platform/chrome/cros_ec_sysfs.c

new file mode 100644 (file)

index 0000000..fb62ab6
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_sysfs.c
@@ -0,0 +1,271 @@
+/*
+ * cros_ec_sysfs - expose the Chrome OS EC through sysfs
+ *
+ * Copyright (C) 2014 Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) "cros_ec_sysfs: " fmt
+
+#include <linux/ctype.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/mfd/cros_ec.h>
+#include <linux/mfd/cros_ec_commands.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+
+#include "cros_ec_dev.h"
+
+/* Accessor functions */
+
+static ssize_t show_ec_reboot(struct device *dev,
+                             struct device_attribute *attr, char *buf)
+{
+       int count = 0;
+
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "ro|rw|cancel|cold|disable-jump|hibernate");
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          " [at-shutdown]\n");
+       return count;
+}
+
+static ssize_t store_ec_reboot(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t count)
+{
+       static const struct {
+               const char * const str;
+               uint8_t cmd;
+               uint8_t flags;
+       } words[] = {
+               {"cancel",       EC_REBOOT_CANCEL, 0},
+               {"ro",           EC_REBOOT_JUMP_RO, 0},
+               {"rw",           EC_REBOOT_JUMP_RW, 0},
+               {"cold",         EC_REBOOT_COLD, 0},
+               {"disable-jump", EC_REBOOT_DISABLE_JUMP, 0},
+               {"hibernate",    EC_REBOOT_HIBERNATE, 0},
+               {"at-shutdown",  -1, EC_REBOOT_FLAG_ON_AP_SHUTDOWN},
+       };
+       struct cros_ec_command msg = { 0 };
+       struct ec_params_reboot_ec *param =
+               (struct ec_params_reboot_ec *)msg.outdata;
+       int got_cmd = 0, offset = 0;
+       int i;
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       param->flags = 0;
+       while (1) {
+               /* Find word to start scanning */
+               while (buf[offset] && isspace(buf[offset]))
+                       offset++;
+               if (!buf[offset])
+                       break;
+
+               for (i = 0; i < ARRAY_SIZE(words); i++) {
+                       if (!strncasecmp(words[i].str, buf+offset,
+                                        strlen(words[i].str))) {
+                               if (words[i].flags) {
+                                       param->flags |= words[i].flags;
+                               } else {
+                                       param->cmd = words[i].cmd;
+                                       got_cmd = 1;
+                               }
+                               break;
+                       }
+               }
+
+               /* On to the next word, if any */
+               while (buf[offset] && !isspace(buf[offset]))
+                       offset++;
+       }
+
+       if (!got_cmd)
+               return -EINVAL;
+
+       msg.command = EC_CMD_REBOOT_EC;
+       msg.outsize = sizeof(param);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS) {
+               dev_dbg(ec->dev, "EC result %d\n", msg.result);
+               return -EINVAL;
+       }
+
+       return count;
+}
+
+static ssize_t show_ec_version(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       static const char * const image_names[] = {"unknown", "RO", "RW"};
+       struct ec_response_get_version *r_ver;
+       struct ec_response_get_chip_info *r_chip;
+       struct ec_response_board_version *r_board;
+       struct cros_ec_command msg = { 0 };
+       int ret;
+       int count = 0;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       /* Get versions. RW may change. */
+       msg.command = EC_CMD_GET_VERSION;
+       msg.insize = sizeof(*r_ver);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       r_ver = (struct ec_response_get_version *)msg.indata;
+       /* Strings should be null-terminated, but let's be sure. */
+       r_ver->version_string_ro[sizeof(r_ver->version_string_ro) - 1] = '\0';
+       r_ver->version_string_rw[sizeof(r_ver->version_string_rw) - 1] = '\0';
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "RO version:    %s\n", r_ver->version_string_ro);
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "RW version:    %s\n", r_ver->version_string_rw);
+       count += scnprintf(buf + count, PAGE_SIZE - count,
+                          "Firmware copy: %s\n",
+                          (r_ver->current_image < ARRAY_SIZE(image_names) ?
+                           image_names[r_ver->current_image] : "?"));
+
+       /* Get build info. */
+       msg.command = EC_CMD_GET_BUILD_INFO;
+       msg.insize = sizeof(msg.indata);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    EC error %d\n", msg.result);
+       else {
+               msg.indata[sizeof(msg.indata) - 1] = '\0';
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Build info:    %s\n", msg.indata);
+       }
+
+       /* Get chip info. */
+       msg.command = EC_CMD_GET_CHIP_INFO;
+       msg.insize = sizeof(*r_chip);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip info:     XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip info:     EC error %d\n", msg.result);
+       else {
+               r_chip = (struct ec_response_get_chip_info *)msg.indata;
+
+               r_chip->vendor[sizeof(r_chip->vendor) - 1] = '\0';
+               r_chip->name[sizeof(r_chip->name) - 1] = '\0';
+               r_chip->revision[sizeof(r_chip->revision) - 1] = '\0';
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip vendor:   %s\n", r_chip->vendor);
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip name:     %s\n", r_chip->name);
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Chip revision: %s\n", r_chip->revision);
+       }
+
+       /* Get board version */
+       msg.command = EC_CMD_GET_BOARD_VERSION;
+       msg.insize = sizeof(*r_board);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: XFER ERROR %d\n", ret);
+       else if (msg.result != EC_RES_SUCCESS)
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: EC error %d\n", msg.result);
+       else {
+               r_board = (struct ec_response_board_version *)msg.indata;
+
+               count += scnprintf(buf + count, PAGE_SIZE - count,
+                                  "Board version: %d\n",
+                                  r_board->board_version);
+       }
+
+       return count;
+}
+
+static ssize_t show_ec_flashinfo(struct device *dev,
+                                struct device_attribute *attr, char *buf)
+{
+       struct ec_response_flash_info *resp;
+       struct cros_ec_command msg = { 0 };
+       int ret;
+       struct cros_ec_device *ec = dev_get_drvdata(dev);
+
+       /* The flash info shouldn't ever change, but ask each time anyway. */
+       msg.command = EC_CMD_FLASH_INFO;
+       msg.insize = sizeof(*resp);
+       ret = cros_ec_cmd_xfer(ec, &msg);
+       if (ret < 0)
+               return ret;
+       if (msg.result != EC_RES_SUCCESS)
+               return scnprintf(buf, PAGE_SIZE,
+                                "ERROR: EC returned %d\n", msg.result);
+
+       resp = (struct ec_response_flash_info *)msg.indata;
+
+       return scnprintf(buf, PAGE_SIZE,
+                        "FlashSize %d\nWriteSize %d\n"
+                        "EraseSize %d\nProtectSize %d\n",
+                        resp->flash_size, resp->write_block_size,
+                        resp->erase_block_size, resp->protect_block_size);
+}
+
+/* Module initialization */
+
+static DEVICE_ATTR(reboot, S_IWUSR | S_IRUGO, show_ec_reboot, store_ec_reboot);
+static DEVICE_ATTR(version, S_IRUGO, show_ec_version, NULL);
+static DEVICE_ATTR(flashinfo, S_IRUGO, show_ec_flashinfo, NULL);
+
+static struct attribute *__ec_attrs[] = {
+       &dev_attr_reboot.attr,
+       &dev_attr_version.attr,
+       &dev_attr_flashinfo.attr,
+       NULL,
+};
+
+static struct attribute_group ec_attr_group = {
+       .attrs = __ec_attrs,
+};
+
+void ec_dev_sysfs_init(struct cros_ec_device *ec)
+{
+       int error;
+
+       error = sysfs_create_group(&ec->vdev->kobj, &ec_attr_group);
+       if (error)
+               pr_warn("failed to create group: %d\n", error);
+}
+
+void ec_dev_sysfs_remove(struct cros_ec_device *ec)
+{
+       sysfs_remove_group(&ec->vdev->kobj, &ec_attr_group);
+}
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig

index 9752761..f9f205c 100644 (file)
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -614,6 +614,7 @@ config ACPI_TOSHIBA
         depends on INPUT
         depends on RFKILL || RFKILL = n
         depends on SERIO_I8042 || SERIO_I8042 = n
+       depends on ACPI_VIDEO || ACPI_VIDEO = n
         select INPUT_POLLDEV
         select INPUT_SPARSEKMAP
         ---help---
diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c

index 66d6d22..6808715 100644 (file)
--- a/drivers/platform/x86/apple-gmux.c
+++ b/drivers/platform/x86/apple-gmux.c
@@ -22,6 +22,7 @@
  #include <linux/delay.h>
  #include <linux/pci.h>
  #include <linux/vga_switcheroo.h>
+#include <linux/vgaarb.h>
  #include <acpi/video.h>
  #include <asm/io.h>
  
@@ -31,6 +32,7 @@ struct apple_gmux_data {
         bool indexed;
         struct mutex index_lock;
  
+       struct pci_dev *pdev;
         struct backlight_device *bdev;
  
         /* switcheroo data */
@@ -415,6 +417,23 @@ static int gmux_resume(struct device *dev)
         return 0;
  }
  
+static struct pci_dev *gmux_get_io_pdev(void)
+{
+       struct pci_dev *pdev = NULL;
+
+       while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev))) {
+               u16 cmd;
+
+               pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+               if (!(cmd & PCI_COMMAND_IO))
+                       continue;
+
+               return pdev;
+       }
+
+       return NULL;
+}
+
  static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
  {
         struct apple_gmux_data *gmux_data;
@@ -425,6 +444,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
         int ret = -ENXIO;
         acpi_status status;
         unsigned long long gpe;
+       struct pci_dev *pdev = NULL;
  
         if (apple_gmux_data)
                 return -EBUSY;
@@ -475,7 +495,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
                         ver_minor = (version >> 16) & 0xff;
                         ver_release = (version >> 8) & 0xff;
                 } else {
-                       pr_info("gmux device not present\n");
+                       pr_info("gmux device not present or IO disabled\n");
                         ret = -ENODEV;
                         goto err_release;
                 }
@@ -483,6 +503,23 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
         pr_info("Found gmux version %d.%d.%d [%s]\n", ver_major, ver_minor,
                 ver_release, (gmux_data->indexed ? "indexed" : "classic"));
  
+       /*
+        * Apple systems with gmux are EFI based and normally don't use
+        * VGA. In addition changing IO+MEM ownership between IGP and dGPU
+        * disables IO/MEM used for backlight control on some systems.
+        * Lock IO+MEM to GPU with active IO to prevent switch.
+        */
+       pdev = gmux_get_io_pdev();
+       if (pdev && vga_tryget(pdev,
+                              VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM)) {
+               pr_err("IO+MEM vgaarb-locking for PCI:%s failed\n",
+                       pci_name(pdev));
+               ret = -EBUSY;
+               goto err_release;
+       } else if (pdev)
+               pr_info("locked IO for PCI:%s\n", pci_name(pdev));
+       gmux_data->pdev = pdev;
+
         memset(&props, 0, sizeof(props));
         props.type = BACKLIGHT_PLATFORM;
         props.max_brightness = gmux_read32(gmux_data, GMUX_PORT_MAX_BRIGHTNESS);
@@ -574,6 +611,10 @@ err_enable_gpe:
  err_notify:
         backlight_device_unregister(bdev);
  err_release:
+       if (gmux_data->pdev)
+               vga_put(gmux_data->pdev,
+                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM);
+       pci_dev_put(pdev);
         release_region(gmux_data->iostart, gmux_data->iolen);
  err_free:
         kfree(gmux_data);
@@ -593,6 +634,11 @@ static void gmux_remove(struct pnp_dev *pnp)
                                            &gmux_notify_handler);
         }
  
+       if (gmux_data->pdev) {
+               vga_put(gmux_data->pdev,
+                       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM);
+               pci_dev_put(gmux_data->pdev);
+       }
         backlight_device_unregister(gmux_data->bdev);
  
         release_region(gmux_data->iostart, gmux_data->iolen);
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c

index 3d21efe..d688d80 100644 (file)
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -2,9 +2,11 @@
   *  Driver for Dell laptop extras
   *
   *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
   *
- *  Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
- *  Inc.
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
   *
   *  This program is free software; you can redistribute it and/or modify
   *  it under the terms of the GNU General Public License version 2 as
@@ -32,6 +34,13 @@
  #include "../../firmware/dcdbas.h"
  
  #define BRIGHTNESS_TOKEN 0x7d
+#define KBD_LED_OFF_TOKEN 0x01E1
+#define KBD_LED_ON_TOKEN 0x01E2
+#define KBD_LED_AUTO_TOKEN 0x01E3
+#define KBD_LED_AUTO_25_TOKEN 0x02EA
+#define KBD_LED_AUTO_50_TOKEN 0x02EB
+#define KBD_LED_AUTO_75_TOKEN 0x02EC
+#define KBD_LED_AUTO_100_TOKEN 0x02F6
  
  /* This structure will be modified by the firmware when we enter
   * system management mode, hence the volatiles */
@@ -62,6 +71,13 @@ struct calling_interface_structure {
  
  struct quirk_entry {
         u8 touchpad_led;
+
+       int needs_kbd_timeouts;
+       /*
+        * Ordered list of timeouts expressed in seconds.
+        * The list must end with -1
+        */
+       int kbd_timeouts[];
  };
  
  static struct quirk_entry *quirks;
@@ -76,6 +92,15 @@ static int __init dmi_matched(const struct dmi_system_id *dmi)
         return 1;
  }
  
+/*
+ * These values come from Windows utility provided by Dell. If any other value
+ * is used then BIOS silently set timeout to 0 without any error message.
+ */
+static struct quirk_entry quirk_dell_xps13_9333 = {
+       .needs_kbd_timeouts = 1,
+       .kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
+};
+
  static int da_command_address;
  static int da_command_code;
  static int da_num_tokens;
@@ -267,6 +292,15 @@ static const struct dmi_system_id dell_quirks[] __initconst = {
                 },
                 .driver_data = &quirk_dell_vostro_v130,
         },
+       {
+               .callback = dmi_matched,
+               .ident = "Dell XPS13 9333",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+               },
+               .driver_data = &quirk_dell_xps13_9333,
+       },
         { }
  };
  
@@ -331,17 +365,29 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy)
         }
  }
  
-static int find_token_location(int tokenid)
+static int find_token_id(int tokenid)
  {
         int i;
+
         for (i = 0; i < da_num_tokens; i++) {
                 if (da_tokens[i].tokenID == tokenid)
-                       return da_tokens[i].location;
+                       return i;
         }
  
         return -1;
  }
  
+static int find_token_location(int tokenid)
+{
+       int id;
+
+       id = find_token_id(tokenid);
+       if (id == -1)
+               return -1;
+
+       return da_tokens[id].location;
+}
+
  static struct calling_interface_buffer *
  dell_send_request(struct calling_interface_buffer *buffer, int class,
                   int select)
@@ -362,6 +408,20 @@ dell_send_request(struct calling_interface_buffer *buffer, int class,
         return buffer;
  }
  
+static inline int dell_smi_error(int value)
+{
+       switch (value) {
+       case 0: /* Completed successfully */
+               return 0;
+       case -1: /* Completed with error */
+               return -EIO;
+       case -2: /* Function not supported */
+               return -ENXIO;
+       default: /* Unknown error */
+               return -EINVAL;
+       }
+}
+
  /* Derived from information in DellWirelessCtl.cpp:
     Class 17, select 11 is radio control. It returns an array of 32-bit values.
  
@@ -716,7 +776,7 @@ static int dell_send_intensity(struct backlight_device *bd)
         else
                 dell_send_request(buffer, 1, 1);
  
-out:
+ out:
         release_buffer();
         return ret;
  }
@@ -740,7 +800,7 @@ static int dell_get_intensity(struct backlight_device *bd)
  
         ret = buffer->output[1];
  
-out:
+ out:
         release_buffer();
         return ret;
  }
@@ -789,6 +849,1018 @@ static void touchpad_led_exit(void)
         led_classdev_unregister(&touchpad_led);
  }
  
+/*
+ * Derived from information in smbios-keyboard-ctl:
+ *
+ * cbClass 4
+ * cbSelect 11
+ * Keyboard illumination
+ * cbArg1 determines the function to be performed
+ *
+ * cbArg1 0x0 = Get Feature Information
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of user-selectable modes
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *  cbRES2, byte2  Reserved for future use
+ *  cbRES2, byte3  Keyboard illumination type
+ *     0         Reserved
+ *     1         Tasklight
+ *     2         Backlight
+ *     3-255     Reserved for future use
+ *  cbRES3, byte0  Supported auto keyboard illumination trigger bitmap.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte1  Supported timeout unit bitmap
+ *     bit 0     Seconds
+ *     bit 1     Minutes
+ *     bit 2     Hours
+ *     bit 3     Days
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte2  Number of keyboard light brightness levels
+ *  cbRES4, byte0  Maximum acceptable seconds value (0 if seconds not supported).
+ *  cbRES4, byte1  Maximum acceptable minutes value (0 if minutes not supported).
+ *  cbRES4, byte2  Maximum acceptable hours value (0 if hours not supported).
+ *  cbRES4, byte3  Maximum acceptable days value (0 if days not supported)
+ *
+ * cbArg1 0x1 = Get Current State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbRES2, byte2  Currently active auto keyboard illumination triggers.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES2, byte3  Current Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *     NOTE: A value of 0 means always on (no timeout) if any bits of RES3 byte
+ *     are set upon return from the [Get feature information] call.
+ *  cbRES3, byte0  Current setting of ALS value that turns the light on or off.
+ *  cbRES3, byte1  Current ALS reading
+ *  cbRES3, byte2  Current keyboard light level.
+ *
+ * cbArg1 0x2 = Set New State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbArg2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbArg2, byte2  Desired auto keyboard illumination triggers. Must remain inactive to allow
+ *                 keyboard to turn off automatically.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbArg2, byte3  Desired Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *  cbArg3, byte0  Desired setting of ALS value that turns the light on or off.
+ *  cbArg3, byte2  Desired keyboard light level.
+ */
+
+
+enum kbd_timeout_unit {
+       KBD_TIMEOUT_SECONDS = 0,
+       KBD_TIMEOUT_MINUTES,
+       KBD_TIMEOUT_HOURS,
+       KBD_TIMEOUT_DAYS,
+};
+
+enum kbd_mode_bit {
+       KBD_MODE_BIT_OFF = 0,
+       KBD_MODE_BIT_ON,
+       KBD_MODE_BIT_ALS,
+       KBD_MODE_BIT_TRIGGER_ALS,
+       KBD_MODE_BIT_TRIGGER,
+       KBD_MODE_BIT_TRIGGER_25,
+       KBD_MODE_BIT_TRIGGER_50,
+       KBD_MODE_BIT_TRIGGER_75,
+       KBD_MODE_BIT_TRIGGER_100,
+};
+
+#define kbd_is_als_mode_bit(bit) \
+       ((bit) == KBD_MODE_BIT_ALS || (bit) == KBD_MODE_BIT_TRIGGER_ALS)
+#define kbd_is_trigger_mode_bit(bit) \
+       ((bit) >= KBD_MODE_BIT_TRIGGER_ALS && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+#define kbd_is_level_mode_bit(bit) \
+       ((bit) >= KBD_MODE_BIT_TRIGGER_25 && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+
+struct kbd_info {
+       u16 modes;
+       u8 type;
+       u8 triggers;
+       u8 levels;
+       u8 seconds;
+       u8 minutes;
+       u8 hours;
+       u8 days;
+};
+
+struct kbd_state {
+       u8 mode_bit;
+       u8 triggers;
+       u8 timeout_value;
+       u8 timeout_unit;
+       u8 als_setting;
+       u8 als_value;
+       u8 level;
+};
+
+static const int kbd_tokens[] = {
+       KBD_LED_OFF_TOKEN,
+       KBD_LED_AUTO_25_TOKEN,
+       KBD_LED_AUTO_50_TOKEN,
+       KBD_LED_AUTO_75_TOKEN,
+       KBD_LED_AUTO_100_TOKEN,
+       KBD_LED_ON_TOKEN,
+};
+
+static u16 kbd_token_bits;
+
+static struct kbd_info kbd_info;
+static bool kbd_als_supported;
+static bool kbd_triggers_supported;
+
+static u8 kbd_mode_levels[16];
+static int kbd_mode_levels_count;
+
+static u8 kbd_previous_level;
+static u8 kbd_previous_mode_bit;
+
+static bool kbd_led_present;
+
+/*
+ * NOTE: there are three ways to set the keyboard backlight level.
+ * First, via kbd_state.mode_bit (assigning KBD_MODE_BIT_TRIGGER_* value).
+ * Second, via kbd_state.level (assigning numerical value <= kbd_info.levels).
+ * Third, via SMBIOS tokens (KBD_LED_* in kbd_tokens)
+ *
+ * There are laptops which support only one of these methods. If we want to
+ * support as many machines as possible we need to implement all three methods.
+ * The first two methods use the kbd_state structure. The third uses SMBIOS
+ * tokens. If kbd_info.levels == 0, the machine does not support setting the
+ * keyboard backlight level via kbd_state.level.
+ */
+
+static int kbd_get_info(struct kbd_info *info)
+{
+       u8 units;
+       int ret;
+
+       get_buffer();
+
+       buffer->input[0] = 0x0;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+
+       if (ret) {
+               ret = dell_smi_error(ret);
+               goto out;
+       }
+
+       info->modes = buffer->output[1] & 0xFFFF;
+       info->type = (buffer->output[1] >> 24) & 0xFF;
+       info->triggers = buffer->output[2] & 0xFF;
+       units = (buffer->output[2] >> 8) & 0xFF;
+       info->levels = (buffer->output[2] >> 16) & 0xFF;
+
+       if (units & BIT(0))
+               info->seconds = (buffer->output[3] >> 0) & 0xFF;
+       if (units & BIT(1))
+               info->minutes = (buffer->output[3] >> 8) & 0xFF;
+       if (units & BIT(2))
+               info->hours = (buffer->output[3] >> 16) & 0xFF;
+       if (units & BIT(3))
+               info->days = (buffer->output[3] >> 24) & 0xFF;
+
+ out:
+       release_buffer();
+       return ret;
+}
+
+static unsigned int kbd_get_max_level(void)
+{
+       if (kbd_info.levels != 0)
+               return kbd_info.levels;
+       if (kbd_mode_levels_count > 0)
+               return kbd_mode_levels_count - 1;
+       return 0;
+}
+
+static int kbd_get_level(struct kbd_state *state)
+{
+       int i;
+
+       if (kbd_info.levels != 0)
+               return state->level;
+
+       if (kbd_mode_levels_count > 0) {
+               for (i = 0; i < kbd_mode_levels_count; ++i)
+                       if (kbd_mode_levels[i] == state->mode_bit)
+                               return i;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int kbd_set_level(struct kbd_state *state, u8 level)
+{
+       if (kbd_info.levels != 0) {
+               if (level != 0)
+                       kbd_previous_level = level;
+               if (state->level == level)
+                       return 0;
+               state->level = level;
+               if (level != 0 && state->mode_bit == KBD_MODE_BIT_OFF)
+                       state->mode_bit = kbd_previous_mode_bit;
+               else if (level == 0 && state->mode_bit != KBD_MODE_BIT_OFF) {
+                       kbd_previous_mode_bit = state->mode_bit;
+                       state->mode_bit = KBD_MODE_BIT_OFF;
+               }
+               return 0;
+       }
+
+       if (kbd_mode_levels_count > 0 && level < kbd_mode_levels_count) {
+               if (level != 0)
+                       kbd_previous_level = level;
+               state->mode_bit = kbd_mode_levels[level];
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
+static int kbd_get_state(struct kbd_state *state)
+{
+       int ret;
+
+       get_buffer();
+
+       buffer->input[0] = 0x1;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+
+       if (ret) {
+               ret = dell_smi_error(ret);
+               goto out;
+       }
+
+       state->mode_bit = ffs(buffer->output[1] & 0xFFFF);
+       if (state->mode_bit != 0)
+               state->mode_bit--;
+
+       state->triggers = (buffer->output[1] >> 16) & 0xFF;
+       state->timeout_value = (buffer->output[1] >> 24) & 0x3F;
+       state->timeout_unit = (buffer->output[1] >> 30) & 0x3;
+       state->als_setting = buffer->output[2] & 0xFF;
+       state->als_value = (buffer->output[2] >> 8) & 0xFF;
+       state->level = (buffer->output[2] >> 16) & 0xFF;
+
+ out:
+       release_buffer();
+       return ret;
+}
+
+static int kbd_set_state(struct kbd_state *state)
+{
+       int ret;
+
+       get_buffer();
+       buffer->input[0] = 0x2;
+       buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
+       buffer->input[1] |= (state->triggers & 0xFF) << 16;
+       buffer->input[1] |= (state->timeout_value & 0x3F) << 24;
+       buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
+       buffer->input[2] = state->als_setting & 0xFF;
+       buffer->input[2] |= (state->level & 0xFF) << 16;
+       dell_send_request(buffer, 4, 11);
+       ret = buffer->output[0];
+       release_buffer();
+
+       return dell_smi_error(ret);
+}
+
+static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
+{
+       int ret;
+
+       ret = kbd_set_state(state);
+       if (ret == 0)
+               return 0;
+
+       /*
+        * When setting the new state fails,try to restore the previous one.
+        * This is needed on some machines where BIOS sets a default state when
+        * setting a new state fails. This default state could be all off.
+        */
+
+       if (kbd_set_state(old))
+               pr_err("Setting old previous keyboard state failed\n");
+
+       return ret;
+}
+
+static int kbd_set_token_bit(u8 bit)
+{
+       int id;
+       int ret;
+
+       if (bit >= ARRAY_SIZE(kbd_tokens))
+               return -EINVAL;
+
+       id = find_token_id(kbd_tokens[bit]);
+       if (id == -1)
+               return -EINVAL;
+
+       get_buffer();
+       buffer->input[0] = da_tokens[id].location;
+       buffer->input[1] = da_tokens[id].value;
+       dell_send_request(buffer, 1, 0);
+       ret = buffer->output[0];
+       release_buffer();
+
+       return dell_smi_error(ret);
+}
+
+static int kbd_get_token_bit(u8 bit)
+{
+       int id;
+       int ret;
+       int val;
+
+       if (bit >= ARRAY_SIZE(kbd_tokens))
+               return -EINVAL;
+
+       id = find_token_id(kbd_tokens[bit]);
+       if (id == -1)
+               return -EINVAL;
+
+       get_buffer();
+       buffer->input[0] = da_tokens[id].location;
+       dell_send_request(buffer, 0, 0);
+       ret = buffer->output[0];
+       val = buffer->output[1];
+       release_buffer();
+
+       if (ret)
+               return dell_smi_error(ret);
+
+       return (val == da_tokens[id].value);
+}
+
+static int kbd_get_first_active_token_bit(void)
+{
+       int i;
+       int ret;
+
+       for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i) {
+               ret = kbd_get_token_bit(i);
+               if (ret == 1)
+                       return i;
+       }
+
+       return ret;
+}
+
+static int kbd_get_valid_token_counts(void)
+{
+       return hweight16(kbd_token_bits);
+}
+
+static inline int kbd_init_info(void)
+{
+       struct kbd_state state;
+       int ret;
+       int i;
+
+       ret = kbd_get_info(&kbd_info);
+       if (ret)
+               return ret;
+
+       kbd_get_state(&state);
+
+       /* NOTE: timeout value is stored in 6 bits so max value is 63 */
+       if (kbd_info.seconds > 63)
+               kbd_info.seconds = 63;
+       if (kbd_info.minutes > 63)
+               kbd_info.minutes = 63;
+       if (kbd_info.hours > 63)
+               kbd_info.hours = 63;
+       if (kbd_info.days > 63)
+               kbd_info.days = 63;
+
+       /* NOTE: On tested machines ON mode did not work and caused
+        *       problems (turned backlight off) so do not use it
+        */
+       kbd_info.modes &= ~BIT(KBD_MODE_BIT_ON);
+
+       kbd_previous_level = kbd_get_level(&state);
+       kbd_previous_mode_bit = state.mode_bit;
+
+       if (kbd_previous_level == 0 && kbd_get_max_level() != 0)
+               kbd_previous_level = 1;
+
+       if (kbd_previous_mode_bit == KBD_MODE_BIT_OFF) {
+               kbd_previous_mode_bit =
+                       ffs(kbd_info.modes & ~BIT(KBD_MODE_BIT_OFF));
+               if (kbd_previous_mode_bit != 0)
+                       kbd_previous_mode_bit--;
+       }
+
+       if (kbd_info.modes & (BIT(KBD_MODE_BIT_ALS) |
+                             BIT(KBD_MODE_BIT_TRIGGER_ALS)))
+               kbd_als_supported = true;
+
+       if (kbd_info.modes & (
+           BIT(KBD_MODE_BIT_TRIGGER_ALS) | BIT(KBD_MODE_BIT_TRIGGER) |
+           BIT(KBD_MODE_BIT_TRIGGER_25) | BIT(KBD_MODE_BIT_TRIGGER_50) |
+           BIT(KBD_MODE_BIT_TRIGGER_75) | BIT(KBD_MODE_BIT_TRIGGER_100)
+          ))
+               kbd_triggers_supported = true;
+
+       /* kbd_mode_levels[0] is reserved, see below */
+       for (i = 0; i < 16; ++i)
+               if (kbd_is_level_mode_bit(i) && (BIT(i) & kbd_info.modes))
+                       kbd_mode_levels[1 + kbd_mode_levels_count++] = i;
+
+       /*
+        * Find the first supported mode and assign to kbd_mode_levels[0].
+        * This should be 0 (off), but we cannot depend on the BIOS to
+        * support 0.
+        */
+       if (kbd_mode_levels_count > 0) {
+               for (i = 0; i < 16; ++i) {
+                       if (BIT(i) & kbd_info.modes) {
+                               kbd_mode_levels[0] = i;
+                               break;
+                       }
+               }
+               kbd_mode_levels_count++;
+       }
+
+       return 0;
+
+}
+
+static inline void kbd_init_tokens(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
+               if (find_token_id(kbd_tokens[i]) != -1)
+                       kbd_token_bits |= BIT(i);
+}
+
+static void kbd_init(void)
+{
+       int ret;
+
+       ret = kbd_init_info();
+       kbd_init_tokens();
+
+       if (kbd_token_bits != 0 || ret == 0)
+               kbd_led_present = true;
+}
+
+static ssize_t kbd_led_timeout_store(struct device *dev,
+                                    struct device_attribute *attr,
+                                    const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool convert;
+       int value;
+       int ret;
+       char ch;
+       u8 unit;
+       int i;
+
+       ret = sscanf(buf, "%d %c", &value, &ch);
+       if (ret < 1)
+               return -EINVAL;
+       else if (ret == 1)
+               ch = 's';
+
+       if (value < 0)
+               return -EINVAL;
+
+       convert = false;
+
+       switch (ch) {
+       case 's':
+               if (value > kbd_info.seconds)
+                       convert = true;
+               unit = KBD_TIMEOUT_SECONDS;
+               break;
+       case 'm':
+               if (value > kbd_info.minutes)
+                       convert = true;
+               unit = KBD_TIMEOUT_MINUTES;
+               break;
+       case 'h':
+               if (value > kbd_info.hours)
+                       convert = true;
+               unit = KBD_TIMEOUT_HOURS;
+               break;
+       case 'd':
+               if (value > kbd_info.days)
+                       convert = true;
+               unit = KBD_TIMEOUT_DAYS;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (quirks && quirks->needs_kbd_timeouts)
+               convert = true;
+
+       if (convert) {
+               /* Convert value from current units to seconds */
+               switch (unit) {
+               case KBD_TIMEOUT_DAYS:
+                       value *= 24;
+               case KBD_TIMEOUT_HOURS:
+                       value *= 60;
+               case KBD_TIMEOUT_MINUTES:
+                       value *= 60;
+                       unit = KBD_TIMEOUT_SECONDS;
+               }
+
+               if (quirks && quirks->needs_kbd_timeouts) {
+                       for (i = 0; quirks->kbd_timeouts[i] != -1; i++) {
+                               if (value <= quirks->kbd_timeouts[i]) {
+                                       value = quirks->kbd_timeouts[i];
+                                       break;
+                               }
+                       }
+               }
+
+               if (value <= kbd_info.seconds && kbd_info.seconds) {
+                       unit = KBD_TIMEOUT_SECONDS;
+               } else if (value / 60 <= kbd_info.minutes && kbd_info.minutes) {
+                       value /= 60;
+                       unit = KBD_TIMEOUT_MINUTES;
+               } else if (value / (60 * 60) <= kbd_info.hours && kbd_info.hours) {
+                       value /= (60 * 60);
+                       unit = KBD_TIMEOUT_HOURS;
+               } else if (value / (60 * 60 * 24) <= kbd_info.days && kbd_info.days) {
+                       value /= (60 * 60 * 24);
+                       unit = KBD_TIMEOUT_DAYS;
+               } else {
+                       return -EINVAL;
+               }
+       }
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       new_state = state;
+       new_state.timeout_value = value;
+       new_state.timeout_unit = unit;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t kbd_led_timeout_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct kbd_state state;
+       int ret;
+       int len;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       len = sprintf(buf, "%d", state.timeout_value);
+
+       switch (state.timeout_unit) {
+       case KBD_TIMEOUT_SECONDS:
+               return len + sprintf(buf+len, "s\n");
+       case KBD_TIMEOUT_MINUTES:
+               return len + sprintf(buf+len, "m\n");
+       case KBD_TIMEOUT_HOURS:
+               return len + sprintf(buf+len, "h\n");
+       case KBD_TIMEOUT_DAYS:
+               return len + sprintf(buf+len, "d\n");
+       default:
+               return -EINVAL;
+       }
+
+       return len;
+}
+
+static DEVICE_ATTR(stop_timeout, S_IRUGO | S_IWUSR,
+                  kbd_led_timeout_show, kbd_led_timeout_store);
+
+static const char * const kbd_led_triggers[] = {
+       "keyboard",
+       "touchpad",
+       /*"trackstick"*/ NULL, /* NOTE: trackstick is just alias for touchpad */
+       "mouse",
+};
+
+static ssize_t kbd_led_triggers_store(struct device *dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool triggers_enabled = false;
+       int trigger_bit = -1;
+       char trigger[21];
+       int i, ret;
+
+       ret = sscanf(buf, "%20s", trigger);
+       if (ret != 1)
+               return -EINVAL;
+
+       if (trigger[0] != '+' && trigger[0] != '-')
+               return -EINVAL;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       if (kbd_triggers_supported)
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+       if (kbd_triggers_supported) {
+               for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+                       if (!(kbd_info.triggers & BIT(i)))
+                               continue;
+                       if (!kbd_led_triggers[i])
+                               continue;
+                       if (strcmp(trigger+1, kbd_led_triggers[i]) != 0)
+                               continue;
+                       if (trigger[0] == '+' &&
+                           triggers_enabled && (state.triggers & BIT(i)))
+                               return count;
+                       if (trigger[0] == '-' &&
+                           (!triggers_enabled || !(state.triggers & BIT(i))))
+                               return count;
+                       trigger_bit = i;
+                       break;
+               }
+       }
+
+       if (trigger_bit != -1) {
+               new_state = state;
+               if (trigger[0] == '+')
+                       new_state.triggers |= BIT(trigger_bit);
+               else {
+                       new_state.triggers &= ~BIT(trigger_bit);
+                       /* NOTE: trackstick bit (2) must be disabled when
+                        *       disabling touchpad bit (1), otherwise touchpad
+                        *       bit (1) will not be disabled */
+                       if (trigger_bit == 1)
+                               new_state.triggers &= ~BIT(2);
+               }
+               if ((kbd_info.triggers & new_state.triggers) !=
+                   new_state.triggers)
+                       return -EINVAL;
+               if (new_state.triggers && !triggers_enabled) {
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+                       kbd_set_level(&new_state, kbd_previous_level);
+               } else if (new_state.triggers == 0) {
+                       kbd_set_level(&new_state, 0);
+               }
+               if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+                       return -EINVAL;
+               ret = kbd_set_state_safe(&new_state, &state);
+               if (ret)
+                       return ret;
+               if (new_state.mode_bit != KBD_MODE_BIT_OFF)
+                       kbd_previous_mode_bit = new_state.mode_bit;
+               return count;
+       }
+
+       return -EINVAL;
+}
+
+static ssize_t kbd_led_triggers_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct kbd_state state;
+       bool triggers_enabled;
+       int level, i, ret;
+       int len = 0;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       len = 0;
+
+       if (kbd_triggers_supported) {
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+               level = kbd_get_level(&state);
+               for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+                       if (!(kbd_info.triggers & BIT(i)))
+                               continue;
+                       if (!kbd_led_triggers[i])
+                               continue;
+                       if ((triggers_enabled || level <= 0) &&
+                           (state.triggers & BIT(i)))
+                               buf[len++] = '+';
+                       else
+                               buf[len++] = '-';
+                       len += sprintf(buf+len, "%s ", kbd_led_triggers[i]);
+               }
+       }
+
+       if (len)
+               buf[len - 1] = '\n';
+
+       return len;
+}
+
+static DEVICE_ATTR(start_triggers, S_IRUGO | S_IWUSR,
+                  kbd_led_triggers_show, kbd_led_triggers_store);
+
+static ssize_t kbd_led_als_enabled_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t count)
+{
+       struct kbd_state new_state;
+       struct kbd_state state;
+       bool triggers_enabled = false;
+       int enable;
+       int ret;
+
+       ret = kstrtoint(buf, 0, &enable);
+       if (ret)
+               return ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       if (enable == kbd_is_als_mode_bit(state.mode_bit))
+               return count;
+
+       new_state = state;
+
+       if (kbd_triggers_supported)
+               triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+       if (enable) {
+               if (triggers_enabled)
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+               else
+                       new_state.mode_bit = KBD_MODE_BIT_ALS;
+       } else {
+               if (triggers_enabled) {
+                       new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+                       kbd_set_level(&new_state, kbd_previous_level);
+               } else {
+                       new_state.mode_bit = KBD_MODE_BIT_ON;
+               }
+       }
+       if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+               return -EINVAL;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+       kbd_previous_mode_bit = new_state.mode_bit;
+
+       return count;
+}
+
+static ssize_t kbd_led_als_enabled_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct kbd_state state;
+       bool enabled = false;
+       int ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+       enabled = kbd_is_als_mode_bit(state.mode_bit);
+
+       return sprintf(buf, "%d\n", enabled ? 1 : 0);
+}
+
+static DEVICE_ATTR(als_enabled, S_IRUGO | S_IWUSR,
+                  kbd_led_als_enabled_show, kbd_led_als_enabled_store);
+
+static ssize_t kbd_led_als_setting_store(struct device *dev,
+                                        struct device_attribute *attr,
+                                        const char *buf, size_t count)
+{
+       struct kbd_state state;
+       struct kbd_state new_state;
+       u8 setting;
+       int ret;
+
+       ret = kstrtou8(buf, 10, &setting);
+       if (ret)
+               return ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       new_state = state;
+       new_state.als_setting = setting;
+
+       ret = kbd_set_state_safe(&new_state, &state);
+       if (ret)
+               return ret;
+
+       return count;
+}
+
+static ssize_t kbd_led_als_setting_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *buf)
+{
+       struct kbd_state state;
+       int ret;
+
+       ret = kbd_get_state(&state);
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%d\n", state.als_setting);
+}
+
+static DEVICE_ATTR(als_setting, S_IRUGO | S_IWUSR,
+                  kbd_led_als_setting_show, kbd_led_als_setting_store);
+
+static struct attribute *kbd_led_attrs[] = {
+       &dev_attr_stop_timeout.attr,
+       &dev_attr_start_triggers.attr,
+       NULL,
+};
+
+static const struct attribute_group kbd_led_group = {
+       .attrs = kbd_led_attrs,
+};
+
+static struct attribute *kbd_led_als_attrs[] = {
+       &dev_attr_als_enabled.attr,
+       &dev_attr_als_setting.attr,
+       NULL,
+};
+
+static const struct attribute_group kbd_led_als_group = {
+       .attrs = kbd_led_als_attrs,
+};
+
+static const struct attribute_group *kbd_led_groups[] = {
+       &kbd_led_group,
+       &kbd_led_als_group,
+       NULL,
+};
+
+static enum led_brightness kbd_led_level_get(struct led_classdev *led_cdev)
+{
+       int ret;
+       u16 num;
+       struct kbd_state state;
+
+       if (kbd_get_max_level()) {
+               ret = kbd_get_state(&state);
+               if (ret)
+                       return 0;
+               ret = kbd_get_level(&state);
+               if (ret < 0)
+                       return 0;
+               return ret;
+       }
+
+       if (kbd_get_valid_token_counts()) {
+               ret = kbd_get_first_active_token_bit();
+               if (ret < 0)
+                       return 0;
+               for (num = kbd_token_bits; num != 0 && ret > 0; --ret)
+                       num &= num - 1; /* clear the first bit set */
+               if (num == 0)
+                       return 0;
+               return ffs(num) - 1;
+       }
+
+       pr_warn("Keyboard brightness level control not supported\n");
+       return 0;
+}
+
+static void kbd_led_level_set(struct led_classdev *led_cdev,
+                             enum led_brightness value)
+{
+       struct kbd_state state;
+       struct kbd_state new_state;
+       u16 num;
+
+       if (kbd_get_max_level()) {
+               if (kbd_get_state(&state))
+                       return;
+               new_state = state;
+               if (kbd_set_level(&new_state, value))
+                       return;
+               kbd_set_state_safe(&new_state, &state);
+               return;
+       }
+
+       if (kbd_get_valid_token_counts()) {
+               for (num = kbd_token_bits; num != 0 && value > 0; --value)
+                       num &= num - 1; /* clear the first bit set */
+               if (num == 0)
+                       return;
+               kbd_set_token_bit(ffs(num) - 1);
+               return;
+       }
+
+       pr_warn("Keyboard brightness level control not supported\n");
+}
+
+static struct led_classdev kbd_led = {
+       .name           = "dell::kbd_backlight",
+       .brightness_set = kbd_led_level_set,
+       .brightness_get = kbd_led_level_get,
+       .groups         = kbd_led_groups,
+};
+
+static int __init kbd_led_init(struct device *dev)
+{
+       kbd_init();
+       if (!kbd_led_present)
+               return -ENODEV;
+       if (!kbd_als_supported)
+               kbd_led_groups[1] = NULL;
+       kbd_led.max_brightness = kbd_get_max_level();
+       if (!kbd_led.max_brightness) {
+               kbd_led.max_brightness = kbd_get_valid_token_counts();
+               if (kbd_led.max_brightness)
+                       kbd_led.max_brightness--;
+       }
+       return led_classdev_register(dev, &kbd_led);
+}
+
+static void brightness_set_exit(struct led_classdev *led_cdev,
+                               enum led_brightness value)
+{
+       /* Don't change backlight level on exit */
+};
+
+static void kbd_led_exit(void)
+{
+       if (!kbd_led_present)
+               return;
+       kbd_led.brightness_set = brightness_set_exit;
+       led_classdev_unregister(&kbd_led);
+}
+
  static int __init dell_init(void)
  {
         int max_intensity = 0;
@@ -841,6 +1913,8 @@ static int __init dell_init(void)
         if (quirks && quirks->touchpad_led)
                 touchpad_led_init(&platform_device->dev);
  
+       kbd_led_init(&platform_device->dev);
+
         dell_laptop_dir = debugfs_create_dir("dell_laptop", NULL);
         if (dell_laptop_dir != NULL)
                 debugfs_create_file("rfkill", 0444, dell_laptop_dir, NULL,
@@ -908,6 +1982,7 @@ static void __exit dell_exit(void)
         debugfs_remove_recursive(dell_laptop_dir);
         if (quirks && quirks->touchpad_led)
                 touchpad_led_exit();
+       kbd_led_exit();
         i8042_remove_filter(dell_laptop_i8042_filter);
         cancel_delayed_work_sync(&dell_rfkill_work);
         backlight_device_unregister(dell_backlight_device);
@@ -924,5 +1999,7 @@ module_init(dell_init);
  module_exit(dell_exit);
  
  MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
  MODULE_DESCRIPTION("Dell laptop driver");
  MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c

index a4a4258..8037c8b 100644 (file)
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c
@@ -62,7 +62,7 @@
   * (1 << 1):   Bluetooth enable/disable, RW.
   * (1 << 2):   GPS enable/disable, RW.
   * (1 << 3):   WiFi enable/disable, RW.
- * (1 << 4):   WWAN (3G) enable/disalbe, RW.
+ * (1 << 4):   WWAN (3G) enable/disable, RW.
   * (1 << 5):   Touchscreen enable/disable, Read Only.
   */
  #define OT_EC_DEVICE_STATE_ADDRESS     0xD6
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c

index 3b8ceee..7769575 100644 (file)
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -319,6 +319,7 @@ static struct {
         u32 sensors_pdrv_attrs_registered:1;
         u32 sensors_pdev_attrs_registered:1;
         u32 hotkey_poll_active:1;
+       u32 has_adaptive_kbd:1;
  } tp_features;
  
  static struct {
@@ -1911,6 +1912,27 @@ enum {   /* hot key scan codes (derived from ACPI DSDT) */
         TP_ACPI_HOTKEYSCAN_UNK7,
         TP_ACPI_HOTKEYSCAN_UNK8,
  
+       TP_ACPI_HOTKEYSCAN_MUTE2,
+       TP_ACPI_HOTKEYSCAN_BRIGHTNESS_ZERO,
+       TP_ACPI_HOTKEYSCAN_CLIPPING_TOOL,
+       TP_ACPI_HOTKEYSCAN_CLOUD,
+       TP_ACPI_HOTKEYSCAN_UNK9,
+       TP_ACPI_HOTKEYSCAN_VOICE,
+       TP_ACPI_HOTKEYSCAN_UNK10,
+       TP_ACPI_HOTKEYSCAN_GESTURES,
+       TP_ACPI_HOTKEYSCAN_UNK11,
+       TP_ACPI_HOTKEYSCAN_UNK12,
+       TP_ACPI_HOTKEYSCAN_UNK13,
+       TP_ACPI_HOTKEYSCAN_CONFIG,
+       TP_ACPI_HOTKEYSCAN_NEW_TAB,
+       TP_ACPI_HOTKEYSCAN_RELOAD,
+       TP_ACPI_HOTKEYSCAN_BACK,
+       TP_ACPI_HOTKEYSCAN_MIC_DOWN,
+       TP_ACPI_HOTKEYSCAN_MIC_UP,
+       TP_ACPI_HOTKEYSCAN_MIC_CANCELLATION,
+       TP_ACPI_HOTKEYSCAN_CAMERA_MODE,
+       TP_ACPI_HOTKEYSCAN_ROTATE_DISPLAY,
+
         /* Hotkey keymap size */
         TPACPI_HOTKEY_MAP_LEN
  };
@@ -2647,9 +2669,7 @@ static ssize_t hotkey_enable_store(struct device *dev,
         return count;
  }
  
-static struct device_attribute dev_attr_hotkey_enable =
-       __ATTR(hotkey_enable, S_IWUSR | S_IRUGO,
-               hotkey_enable_show, hotkey_enable_store);
+static DEVICE_ATTR_RW(hotkey_enable);
  
  /* sysfs hotkey mask --------------------------------------------------- */
  static ssize_t hotkey_mask_show(struct device *dev,
@@ -2685,9 +2705,7 @@ static ssize_t hotkey_mask_store(struct device *dev,
         return (res) ? res : count;
  }
  
-static struct device_attribute dev_attr_hotkey_mask =
-       __ATTR(hotkey_mask, S_IWUSR | S_IRUGO,
-               hotkey_mask_show, hotkey_mask_store);
+static DEVICE_ATTR_RW(hotkey_mask);
  
  /* sysfs hotkey bios_enabled ------------------------------------------- */
  static ssize_t hotkey_bios_enabled_show(struct device *dev,
@@ -2697,8 +2715,7 @@ static ssize_t hotkey_bios_enabled_show(struct device *dev,
         return sprintf(buf, "0\n");
  }
  
-static struct device_attribute dev_attr_hotkey_bios_enabled =
-       __ATTR(hotkey_bios_enabled, S_IRUGO, hotkey_bios_enabled_show, NULL);
+static DEVICE_ATTR_RO(hotkey_bios_enabled);
  
  /* sysfs hotkey bios_mask ---------------------------------------------- */
  static ssize_t hotkey_bios_mask_show(struct device *dev,
@@ -2710,8 +2727,7 @@ static ssize_t hotkey_bios_mask_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "0x%08x\n", hotkey_orig_mask);
  }
  
-static struct device_attribute dev_attr_hotkey_bios_mask =
-       __ATTR(hotkey_bios_mask, S_IRUGO, hotkey_bios_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_bios_mask);
  
  /* sysfs hotkey all_mask ----------------------------------------------- */
  static ssize_t hotkey_all_mask_show(struct device *dev,
@@ -2722,8 +2738,7 @@ static ssize_t hotkey_all_mask_show(struct device *dev,
                                 hotkey_all_mask | hotkey_source_mask);
  }
  
-static struct device_attribute dev_attr_hotkey_all_mask =
-       __ATTR(hotkey_all_mask, S_IRUGO, hotkey_all_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_all_mask);
  
  /* sysfs hotkey recommended_mask --------------------------------------- */
  static ssize_t hotkey_recommended_mask_show(struct device *dev,
@@ -2735,9 +2750,7 @@ static ssize_t hotkey_recommended_mask_show(struct device *dev,
                         & ~hotkey_reserved_mask);
  }
  
-static struct device_attribute dev_attr_hotkey_recommended_mask =
-       __ATTR(hotkey_recommended_mask, S_IRUGO,
-               hotkey_recommended_mask_show, NULL);
+static DEVICE_ATTR_RO(hotkey_recommended_mask);
  
  #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
  
@@ -2792,9 +2805,7 @@ static ssize_t hotkey_source_mask_store(struct device *dev,
         return (rc < 0) ? rc : count;
  }
  
-static struct device_attribute dev_attr_hotkey_source_mask =
-       __ATTR(hotkey_source_mask, S_IWUSR | S_IRUGO,
-               hotkey_source_mask_show, hotkey_source_mask_store);
+static DEVICE_ATTR_RW(hotkey_source_mask);
  
  /* sysfs hotkey hotkey_poll_freq --------------------------------------- */
  static ssize_t hotkey_poll_freq_show(struct device *dev,
@@ -2826,9 +2837,7 @@ static ssize_t hotkey_poll_freq_store(struct device *dev,
         return count;
  }
  
-static struct device_attribute dev_attr_hotkey_poll_freq =
-       __ATTR(hotkey_poll_freq, S_IWUSR | S_IRUGO,
-               hotkey_poll_freq_show, hotkey_poll_freq_store);
+static DEVICE_ATTR_RW(hotkey_poll_freq);
  
  #endif /* CONFIG_THINKPAD_ACPI_HOTKEY_POLL */
  
@@ -2849,8 +2858,7 @@ static ssize_t hotkey_radio_sw_show(struct device *dev,
                         (res == TPACPI_RFK_RADIO_OFF) ? 0 : 1);
  }
  
-static struct device_attribute dev_attr_hotkey_radio_sw =
-       __ATTR(hotkey_radio_sw, S_IRUGO, hotkey_radio_sw_show, NULL);
+static DEVICE_ATTR_RO(hotkey_radio_sw);
  
  static void hotkey_radio_sw_notify_change(void)
  {
@@ -2872,8 +2880,7 @@ static ssize_t hotkey_tablet_mode_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%d\n", !!s);
  }
  
-static struct device_attribute dev_attr_hotkey_tablet_mode =
-       __ATTR(hotkey_tablet_mode, S_IRUGO, hotkey_tablet_mode_show, NULL);
+static DEVICE_ATTR_RO(hotkey_tablet_mode);
  
  static void hotkey_tablet_mode_notify_change(void)
  {
@@ -2890,8 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason);
  }
  
-static struct device_attribute dev_attr_hotkey_wakeup_reason =
-       __ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL);
+static DEVICE_ATTR_RO(hotkey_wakeup_reason);
  
  static void hotkey_wakeup_reason_notify_change(void)
  {
@@ -2907,9 +2913,7 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack);
  }
  
-static struct device_attribute dev_attr_hotkey_wakeup_hotunplug_complete =
-       __ATTR(wakeup_hotunplug_complete, S_IRUGO,
-              hotkey_wakeup_hotunplug_complete_show, NULL);
+static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete);
  
  static void hotkey_wakeup_hotunplug_complete_notify_change(void)
  {
@@ -2917,6 +2921,57 @@ static void hotkey_wakeup_hotunplug_complete_notify_change(void)
                      "wakeup_hotunplug_complete");
  }
  
+/* sysfs adaptive kbd mode --------------------------------------------- */
+
+static int adaptive_keyboard_get_mode(void);
+static int adaptive_keyboard_set_mode(int new_mode);
+
+enum ADAPTIVE_KEY_MODE {
+       HOME_MODE,
+       WEB_BROWSER_MODE,
+       WEB_CONFERENCE_MODE,
+       FUNCTION_MODE,
+       LAYFLAT_MODE
+};
+
+static ssize_t adaptive_kbd_mode_show(struct device *dev,
+                          struct device_attribute *attr,
+                          char *buf)
+{
+       int current_mode;
+
+       current_mode = adaptive_keyboard_get_mode();
+       if (current_mode < 0)
+               return current_mode;
+
+       return snprintf(buf, PAGE_SIZE, "%d\n", current_mode);
+}
+
+static ssize_t adaptive_kbd_mode_store(struct device *dev,
+                           struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned long t;
+       int res;
+
+       if (parse_strtoul(buf, LAYFLAT_MODE, &t))
+               return -EINVAL;
+
+       res = adaptive_keyboard_set_mode(t);
+       return (res < 0) ? res : count;
+}
+
+static DEVICE_ATTR_RW(adaptive_kbd_mode);
+
+static struct attribute *adaptive_kbd_attributes[] = {
+       &dev_attr_adaptive_kbd_mode.attr,
+       NULL
+};
+
+static const struct attribute_group adaptive_kbd_attr_group = {
+       .attrs = adaptive_kbd_attributes,
+};
+
  /* --------------------------------------------------------------------- */
  
  static struct attribute *hotkey_attributes[] __initdata = {
@@ -3118,6 +3173,13 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
                 /* (assignments unknown, please report if found) */
                 KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
                 KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+
+               /* No assignments, only used for Adaptive keyboards. */
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+               KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
                 },
  
         /* Generic keymap for Lenovo ThinkPads */
@@ -3174,6 +3236,35 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
  
                 /* Extra keys in use since the X240 / T440 / T540 */
                 KEY_CONFIG, KEY_SEARCH, KEY_SCALE, KEY_FILE,
+
+               /*
+                * These are the adaptive keyboard keycodes for Carbon X1 2014.
+                * The first item in this list is the Mute button which is
+                * emitted with 0x103 through
+                * adaptive_keyboard_hotkey_notify_hotkey() when the sound
+                * symbol is held.
+                * We'll need to offset those by 0x20.
+                */
+               KEY_RESERVED,        /* Mute held, 0x103 */
+               KEY_BRIGHTNESS_MIN,  /* Backlight off */
+               KEY_RESERVED,        /* Clipping tool */
+               KEY_RESERVED,        /* Cloud */
+               KEY_RESERVED,
+               KEY_VOICECOMMAND,    /* Voice */
+               KEY_RESERVED,
+               KEY_RESERVED,        /* Gestures */
+               KEY_RESERVED,
+               KEY_RESERVED,
+               KEY_RESERVED,
+               KEY_CONFIG,          /* Settings */
+               KEY_RESERVED,        /* New tab */
+               KEY_REFRESH,         /* Reload */
+               KEY_BACK,            /* Back */
+               KEY_RESERVED,        /* Microphone down */
+               KEY_RESERVED,        /* Microphone up */
+               KEY_RESERVED,        /* Microphone cancellation */
+               KEY_RESERVED,        /* Camera mode */
+               KEY_RESERVED,        /* Rotate display, 0x116 */
                 },
         };
  
@@ -3227,6 +3318,20 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
         if (!tp_features.hotkey)
                 return 1;
  
+       /*
+        * Check if we have an adaptive keyboard, like on the
+        * Lenovo Carbon X1 2014 (2nd Gen).
+        */
+       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
+               if ((hkeyv >> 8) == 2) {
+                       tp_features.has_adaptive_kbd = true;
+                       res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
+                                       &adaptive_kbd_attr_group);
+                       if (res)
+                               goto err_exit;
+               }
+       }
+
         quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable,
                                      ARRAY_SIZE(tpacpi_hotkey_qtable));
  
@@ -3437,6 +3542,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
  
  err_exit:
         delete_attr_set(hotkey_dev_attributes, &tpacpi_pdev->dev.kobj);
+       sysfs_remove_group(&tpacpi_pdev->dev.kobj,
+                       &adaptive_kbd_attr_group);
+
         hotkey_dev_attributes = NULL;
  
         return (res < 0) ? res : 1;
@@ -3449,14 +3557,6 @@ err_exit:
   * Will consider support rest of modes in future.
   *
   */
-enum ADAPTIVE_KEY_MODE {
-       HOME_MODE,
-       WEB_BROWSER_MODE,
-       WEB_CONFERENCE_MODE,
-       FUNCTION_MODE,
-       LAYFLAT_MODE
-};
-
  static const int adaptive_keyboard_modes[] = {
         HOME_MODE,
  /*     WEB_BROWSER_MODE = 2,
@@ -3466,6 +3566,8 @@ static const int adaptive_keyboard_modes[] = {
  
  #define DFR_CHANGE_ROW                 0x101
  #define DFR_SHOW_QUICKVIEW_ROW         0x102
+#define FIRST_ADAPTIVE_KEY             0x103
+#define ADAPTIVE_KEY_OFFSET            0x020
  
  /* press Fn key a while second, it will switch to Function Mode. Then
   * release Fn key, previous mode be restored.
@@ -3473,6 +3575,32 @@ static const int adaptive_keyboard_modes[] = {
  static bool adaptive_keyboard_mode_is_saved;
  static int adaptive_keyboard_prev_mode;
  
+static int adaptive_keyboard_get_mode(void)
+{
+       int mode = 0;
+
+       if (!acpi_evalf(hkey_handle, &mode, "GTRW", "dd", 0)) {
+               pr_err("Cannot read adaptive keyboard mode\n");
+               return -EIO;
+       }
+
+       return mode;
+}
+
+static int adaptive_keyboard_set_mode(int new_mode)
+{
+       if (new_mode < 0 ||
+               new_mode > LAYFLAT_MODE)
+               return -EINVAL;
+
+       if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd", new_mode)) {
+               pr_err("Cannot set adaptive keyboard mode\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
  static int adaptive_keyboard_get_next_mode(int mode)
  {
         size_t i;
@@ -3493,8 +3621,9 @@ static int adaptive_keyboard_get_next_mode(int mode)
  
  static bool adaptive_keyboard_hotkey_notify_hotkey(unsigned int scancode)
  {
-       u32 current_mode = 0;
+       int current_mode = 0;
         int new_mode = 0;
+       int keycode;
  
         switch (scancode) {
         case DFR_CHANGE_ROW:
@@ -3502,43 +3631,51 @@ static bool adaptive_keyboard_hotkey_notify_hotkey(unsigned int scancode)
                         new_mode = adaptive_keyboard_prev_mode;
                         adaptive_keyboard_mode_is_saved = false;
                 } else {
-                       if (!acpi_evalf(
-                                       hkey_handle, &current_mode,
-                                       "GTRW", "dd", 0)) {
-                               pr_err("Cannot read adaptive keyboard mode\n");
+                       current_mode = adaptive_keyboard_get_mode();
+                       if (current_mode < 0)
                                 return false;
-                       } else {
-                               new_mode = adaptive_keyboard_get_next_mode(
-                                               current_mode);
-                       }
+                       new_mode = adaptive_keyboard_get_next_mode(
+                                       current_mode);
                 }
  
-               if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd", new_mode)) {
-                       pr_err("Cannot set adaptive keyboard mode\n");
+               if (adaptive_keyboard_set_mode(new_mode) < 0)
                         return false;
-               }
  
                 return true;
  
         case DFR_SHOW_QUICKVIEW_ROW:
-               if (!acpi_evalf(hkey_handle,
-                               &adaptive_keyboard_prev_mode,
-                               "GTRW", "dd", 0)) {
-                       pr_err("Cannot read adaptive keyboard mode\n");
+               current_mode = adaptive_keyboard_get_mode();
+               if (current_mode < 0)
                         return false;
-               } else {
-                       adaptive_keyboard_mode_is_saved = true;
  
-                       if (!acpi_evalf(hkey_handle,
-                                       NULL, "STRW", "vd", FUNCTION_MODE)) {
-                               pr_err("Cannot set adaptive keyboard mode\n");
-                               return false;
-                       }
-               }
+               adaptive_keyboard_prev_mode = current_mode;
+               adaptive_keyboard_mode_is_saved = true;
+
+               if (adaptive_keyboard_set_mode (FUNCTION_MODE) < 0)
+                       return false;
                 return true;
  
         default:
-               return false;
+               if (scancode < FIRST_ADAPTIVE_KEY ||
+                   scancode >= FIRST_ADAPTIVE_KEY + TPACPI_HOTKEY_MAP_LEN -
+                               ADAPTIVE_KEY_OFFSET) {
+                       pr_info("Unhandled adaptive keyboard key: 0x%x\n",
+                                       scancode);
+                       return false;
+               }
+               keycode = hotkey_keycode_map[scancode - FIRST_ADAPTIVE_KEY + ADAPTIVE_KEY_OFFSET];
+               if (keycode != KEY_RESERVED) {
+                       mutex_lock(&tpacpi_inputdev_send_mutex);
+
+                       input_report_key(tpacpi_inputdev, keycode, 1);
+                       input_sync(tpacpi_inputdev);
+
+                       input_report_key(tpacpi_inputdev, keycode, 0);
+                       input_sync(tpacpi_inputdev);
+
+                       mutex_unlock(&tpacpi_inputdev_send_mutex);
+               }
+               return true;
         }
  }
  
@@ -3836,28 +3973,21 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event)
  
  static void hotkey_suspend(void)
  {
-       int hkeyv;
-
         /* Do these on suspend, we get the events on early resume! */
         hotkey_wakeup_reason = TP_ACPI_WAKEUP_NONE;
         hotkey_autosleep_ack = 0;
  
         /* save previous mode of adaptive keyboard of X1 Carbon */
-       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-               if ((hkeyv >> 8) == 2) {
-                       if (!acpi_evalf(hkey_handle,
-                                               &adaptive_keyboard_prev_mode,
-                                               "GTRW", "dd", 0)) {
-                               pr_err("Cannot read adaptive keyboard mode.\n");
-                       }
+       if (tp_features.has_adaptive_kbd) {
+               if (!acpi_evalf(hkey_handle, &adaptive_keyboard_prev_mode,
+                                       "GTRW", "dd", 0)) {
+                       pr_err("Cannot read adaptive keyboard mode.\n");
                 }
         }
  }
  
  static void hotkey_resume(void)
  {
-       int hkeyv;
-
         tpacpi_disable_brightness_delay();
  
         if (hotkey_status_set(true) < 0 ||
@@ -3872,14 +4002,10 @@ static void hotkey_resume(void)
         hotkey_poll_setup_safe(false);
  
         /* restore previous mode of adapive keyboard of X1 Carbon */
-       if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-               if ((hkeyv >> 8) == 2) {
-                       if (!acpi_evalf(hkey_handle,
-                                               NULL,
-                                               "STRW", "vd",
-                                               adaptive_keyboard_prev_mode)) {
-                               pr_err("Cannot set adaptive keyboard mode.\n");
-                       }
+       if (tp_features.has_adaptive_kbd) {
+               if (!acpi_evalf(hkey_handle, NULL, "STRW", "vd",
+                                       adaptive_keyboard_prev_mode)) {
+                       pr_err("Cannot set adaptive keyboard mode.\n");
                 }
         }
  }
@@ -4079,9 +4205,7 @@ static ssize_t bluetooth_enable_store(struct device *dev,
                                 attr, buf, count);
  }
  
-static struct device_attribute dev_attr_bluetooth_enable =
-       __ATTR(bluetooth_enable, S_IWUSR | S_IRUGO,
-               bluetooth_enable_show, bluetooth_enable_store);
+static DEVICE_ATTR_RW(bluetooth_enable);
  
  /* --------------------------------------------------------------------- */
  
@@ -4269,9 +4393,7 @@ static ssize_t wan_enable_store(struct device *dev,
                         attr, buf, count);
  }
  
-static struct device_attribute dev_attr_wan_enable =
-       __ATTR(wwan_enable, S_IWUSR | S_IRUGO,
-               wan_enable_show, wan_enable_store);
+static DEVICE_ATTR_RW(wan_enable);
  
  /* --------------------------------------------------------------------- */
  
@@ -5048,8 +5170,7 @@ static ssize_t cmos_command_store(struct device *dev,
         return (res) ? res : count;
  }
  
-static struct device_attribute dev_attr_cmos_command =
-       __ATTR(cmos_command, S_IWUSR, NULL, cmos_command_store);
+static DEVICE_ATTR_WO(cmos_command);
  
  /* --------------------------------------------------------------------- */
  
@@ -8017,9 +8138,7 @@ static ssize_t fan_pwm1_enable_store(struct device *dev,
         return count;
  }
  
-static struct device_attribute dev_attr_fan_pwm1_enable =
-       __ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
-               fan_pwm1_enable_show, fan_pwm1_enable_store);
+static DEVICE_ATTR_RW(fan_pwm1_enable);
  
  /* sysfs fan pwm1 ------------------------------------------------------ */
  static ssize_t fan_pwm1_show(struct device *dev,
@@ -8079,9 +8198,7 @@ static ssize_t fan_pwm1_store(struct device *dev,
         return (rc) ? rc : count;
  }
  
-static struct device_attribute dev_attr_fan_pwm1 =
-       __ATTR(pwm1, S_IWUSR | S_IRUGO,
-               fan_pwm1_show, fan_pwm1_store);
+static DEVICE_ATTR_RW(fan_pwm1);
  
  /* sysfs fan fan1_input ------------------------------------------------ */
  static ssize_t fan_fan1_input_show(struct device *dev,
@@ -8098,9 +8215,7 @@ static ssize_t fan_fan1_input_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%u\n", speed);
  }
  
-static struct device_attribute dev_attr_fan_fan1_input =
-       __ATTR(fan1_input, S_IRUGO,
-               fan_fan1_input_show, NULL);
+static DEVICE_ATTR_RO(fan_fan1_input);
  
  /* sysfs fan fan2_input ------------------------------------------------ */
  static ssize_t fan_fan2_input_show(struct device *dev,
@@ -8117,9 +8232,7 @@ static ssize_t fan_fan2_input_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%u\n", speed);
  }
  
-static struct device_attribute dev_attr_fan_fan2_input =
-       __ATTR(fan2_input, S_IRUGO,
-               fan_fan2_input_show, NULL);
+static DEVICE_ATTR_RO(fan_fan2_input);
  
  /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
  static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
@@ -8735,8 +8848,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
         return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
  }
  
-static struct device_attribute dev_attr_thinkpad_acpi_pdev_name =
-       __ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
+static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name);
  
  /* --------------------------------------------------------------------- */
  
diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c

index dbcb7a8..9956b99 100644 (file)
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c
@@ -51,6 +51,7 @@
  #include <linux/acpi.h>
  #include <linux/dmi.h>
  #include <linux/uaccess.h>
+#include <acpi/video.h>
  
  MODULE_AUTHOR("John Belmonte");
  MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver");
@@ -116,6 +117,7 @@ MODULE_LICENSE("GPL");
  #define HCI_KBD_ILLUMINATION           0x0095
  #define HCI_ECO_MODE                   0x0097
  #define HCI_ACCELEROMETER2             0x00a6
+#define HCI_SYSTEM_INFO                        0xc000
  #define SCI_PANEL_POWER_ON             0x010d
  #define SCI_ILLUMINATION               0x014e
  #define SCI_USB_SLEEP_CHARGE           0x0150
@@ -129,10 +131,13 @@ MODULE_LICENSE("GPL");
  #define HCI_ACCEL_MASK                 0x7fff
  #define HCI_HOTKEY_DISABLE             0x0b
  #define HCI_HOTKEY_ENABLE              0x09
+#define HCI_HOTKEY_SPECIAL_FUNCTIONS   0x10
  #define HCI_LCD_BRIGHTNESS_BITS                3
  #define HCI_LCD_BRIGHTNESS_SHIFT       (16-HCI_LCD_BRIGHTNESS_BITS)
  #define HCI_LCD_BRIGHTNESS_LEVELS      (1 << HCI_LCD_BRIGHTNESS_BITS)
  #define HCI_MISC_SHIFT                 0x10
+#define HCI_SYSTEM_TYPE1               0x10
+#define HCI_SYSTEM_TYPE2               0x11
  #define HCI_VIDEO_OUT_LCD              0x1
  #define HCI_VIDEO_OUT_CRT              0x2
  #define HCI_VIDEO_OUT_TV               0x4
@@ -147,9 +152,10 @@ MODULE_LICENSE("GPL");
  #define SCI_KBD_MODE_OFF               0x10
  #define SCI_KBD_TIME_MAX               0x3c001a
  #define SCI_USB_CHARGE_MODE_MASK       0xff
-#define SCI_USB_CHARGE_DISABLED                0x30000
-#define SCI_USB_CHARGE_ALTERNATE       0x30009
-#define SCI_USB_CHARGE_AUTO            0x30021
+#define SCI_USB_CHARGE_DISABLED                0x00
+#define SCI_USB_CHARGE_ALTERNATE       0x09
+#define SCI_USB_CHARGE_TYPICAL         0x11
+#define SCI_USB_CHARGE_AUTO            0x21
  #define SCI_USB_CHARGE_BAT_MASK                0x7
  #define SCI_USB_CHARGE_BAT_LVL_OFF     0x1
  #define SCI_USB_CHARGE_BAT_LVL_ON      0x4
@@ -174,6 +180,8 @@ struct toshiba_acpi_dev {
         int kbd_mode;
         int kbd_time;
         int usbsc_bat_level;
+       int usbsc_mode_base;
+       int hotkey_event_type;
  
         unsigned int illumination_supported:1;
         unsigned int video_supported:1;
@@ -243,29 +251,6 @@ static const struct key_entry toshiba_acpi_keymap[] = {
         { KE_END, 0 },
  };
  
-/* alternative keymap */
-static const struct dmi_system_id toshiba_alt_keymap_dmi[] = {
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Satellite M840"),
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "Qosmio X75-A"),
-               },
-       },
-       {
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "TECRA A50-A"),
-               },
-       },
-       {}
-};
-
  static const struct key_entry toshiba_acpi_alt_keymap[] = {
         { KE_KEY, 0x157, { KEY_MUTE } },
         { KE_KEY, 0x102, { KEY_ZOOMOUT } },
@@ -280,6 +265,14 @@ static const struct key_entry toshiba_acpi_alt_keymap[] = {
         { KE_END, 0 },
  };
  
+/*
+ * List of models which have a broken acpi-video backlight interface and thus
+ * need to use the toshiba (vendor) interface instead.
+ */
+static const struct dmi_system_id toshiba_vendor_backlight_dmi[] = {
+       {}
+};
+
  /*
   * Utility
   */
@@ -819,6 +812,54 @@ static int toshiba_accelerometer_get(struct toshiba_acpi_dev *dev,
  }
  
  /* Sleep (Charge and Music) utilities support */
+static void toshiba_usb_sleep_charge_available(struct toshiba_acpi_dev *dev)
+{
+       u32 in[TCI_WORDS] = { SCI_GET, SCI_USB_SLEEP_CHARGE, 0, 0, 0, 0 };
+       u32 out[TCI_WORDS];
+       acpi_status status;
+
+       /* Set the feature to "not supported" in case of error */
+       dev->usb_sleep_charge_supported = 0;
+
+       if (!sci_open(dev))
+               return;
+
+       status = tci_raw(dev, in, out);
+       if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
+               pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_NOT_SUPPORTED) {
+               pr_info("USB Sleep and Charge not supported\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_SUCCESS) {
+               dev->usbsc_mode_base = out[4];
+       }
+
+       in[5] = SCI_USB_CHARGE_BAT_LVL;
+       status = tci_raw(dev, in, out);
+       if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
+               pr_err("ACPI call to get USB Sleep and Charge mode failed\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_NOT_SUPPORTED) {
+               pr_info("USB Sleep and Charge not supported\n");
+               sci_close(dev);
+               return;
+       } else if (out[0] == TOS_SUCCESS) {
+               dev->usbsc_bat_level = out[2];
+               /*
+                * If we reach this point, it means that the laptop has support
+                * for this feature and all values are initialized.
+                * Set it as supported.
+                */
+               dev->usb_sleep_charge_supported = 1;
+       }
+
+       sci_close(dev);
+}
+
  static int toshiba_usb_sleep_charge_get(struct toshiba_acpi_dev *dev,
                                         u32 *mode)
  {
@@ -934,11 +975,11 @@ static int toshiba_usb_rapid_charge_get(struct toshiba_acpi_dev *dev,
         status = tci_raw(dev, in, out);
         sci_close(dev);
         if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-               pr_err("ACPI call to get USB S&C battery level failed\n");
+               pr_err("ACPI call to get USB Rapid Charge failed\n");
                 return -EIO;
         } else if (out[0] == TOS_NOT_SUPPORTED ||
                    out[0] == TOS_INPUT_DATA_ERROR) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("USB Rapid Charge not supported\n");
                 return -ENODEV;
         }
  
@@ -962,10 +1003,10 @@ static int toshiba_usb_rapid_charge_set(struct toshiba_acpi_dev *dev,
         status = tci_raw(dev, in, out);
         sci_close(dev);
         if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C battery level failed\n");
+               pr_err("ACPI call to set USB Rapid Charge failed\n");
                 return -EIO;
         } else if (out[0] == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("USB Rapid Charge not supported\n");
                 return -ENODEV;
         } else if (out[0] == TOS_INPUT_DATA_ERROR) {
                 return -EIO;
@@ -984,10 +1025,10 @@ static int toshiba_usb_sleep_music_get(struct toshiba_acpi_dev *dev, u32 *state)
         result = sci_read(dev, SCI_USB_SLEEP_MUSIC, state);
         sci_close(dev);
         if (result == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C mode failed\n");
+               pr_err("ACPI call to get Sleep and Music failed\n");
                 return -EIO;
         } else if (result == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("Sleep and Music not supported\n");
                 return -ENODEV;
         } else if (result == TOS_INPUT_DATA_ERROR) {
                 return -EIO;
@@ -1006,10 +1047,10 @@ static int toshiba_usb_sleep_music_set(struct toshiba_acpi_dev *dev, u32 state)
         result = sci_write(dev, SCI_USB_SLEEP_MUSIC, state);
         sci_close(dev);
         if (result == TOS_FAILURE) {
-               pr_err("ACPI call to set USB S&C mode failed\n");
+               pr_err("ACPI call to set Sleep and Music failed\n");
                 return -EIO;
         } else if (result == TOS_NOT_SUPPORTED) {
-               pr_info("USB Sleep and Charge not supported\n");
+               pr_info("Sleep and Music not supported\n");
                 return -ENODEV;
         } else if (result == TOS_INPUT_DATA_ERROR) {
                 return -EIO;
@@ -1149,6 +1190,28 @@ static int toshiba_usb_three_set(struct toshiba_acpi_dev *dev, u32 state)
         return 0;
  }
  
+/* Hotkey Event type */
+static int toshiba_hotkey_event_type_get(struct toshiba_acpi_dev *dev,
+                                        u32 *type)
+{
+       u32 val1 = 0x03;
+       u32 val2 = 0;
+       u32 result;
+
+       result = hci_read2(dev, HCI_SYSTEM_INFO, &val1, &val2);
+       if (result == TOS_FAILURE) {
+               pr_err("ACPI call to get System type failed\n");
+               return -EIO;
+       } else if (result == TOS_NOT_SUPPORTED) {
+               pr_info("System type not supported\n");
+               return -ENODEV;
+       }
+
+       *type = val2;
+
+       return 0;
+}
+
  /* Bluetooth rfkill handlers */
  
  static u32 hci_get_bt_present(struct toshiba_acpi_dev *dev, bool *present)
@@ -1973,17 +2036,21 @@ static ssize_t usb_sleep_charge_store(struct device *dev,
          * 0 - Disabled
          * 1 - Alternate (Non USB conformant devices that require more power)
          * 2 - Auto (USB conformant devices)
+        * 3 - Typical
          */
-       if (state != 0 && state != 1 && state != 2)
+       if (state != 0 && state != 1 && state != 2 && state != 3)
                 return -EINVAL;
  
         /* Set the USB charging mode to internal value */
+       mode = toshiba->usbsc_mode_base;
         if (state == 0)
-               mode = SCI_USB_CHARGE_DISABLED;
+               mode |= SCI_USB_CHARGE_DISABLED;
         else if (state == 1)
-               mode = SCI_USB_CHARGE_ALTERNATE;
+               mode |= SCI_USB_CHARGE_ALTERNATE;
         else if (state == 2)
-               mode = SCI_USB_CHARGE_AUTO;
+               mode |= SCI_USB_CHARGE_AUTO;
+       else if (state == 3)
+               mode |= SCI_USB_CHARGE_TYPICAL;
  
         ret = toshiba_usb_sleep_charge_set(toshiba, mode);
         if (ret)
@@ -2333,6 +2400,20 @@ static int toshiba_acpi_enable_hotkeys(struct toshiba_acpi_dev *dev)
         return 0;
  }
  
+static void toshiba_acpi_enable_special_functions(struct toshiba_acpi_dev *dev)
+{
+       u32 result;
+
+       /*
+        * Re-activate the hotkeys, but this time, we are using the
+        * "Special Functions" mode.
+        */
+       result = hci_write1(dev, HCI_HOTKEY_EVENT,
+                           HCI_HOTKEY_SPECIAL_FUNCTIONS);
+       if (result != TOS_SUCCESS)
+               pr_err("Could not enable the Special Function mode\n");
+}
+
  static bool toshiba_acpi_i8042_filter(unsigned char data, unsigned char str,
                                       struct serio *port)
  {
@@ -2434,10 +2515,22 @@ static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
  
  static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
  {
+       const struct key_entry *keymap = toshiba_acpi_keymap;
         acpi_handle ec_handle;
-       int error;
+       u32 events_type;
         u32 hci_result;
-       const struct key_entry *keymap = toshiba_acpi_keymap;
+       int error;
+
+       error = toshiba_acpi_enable_hotkeys(dev);
+       if (error)
+               return error;
+
+       error = toshiba_hotkey_event_type_get(dev, &events_type);
+       if (error) {
+               pr_err("Unable to query Hotkey Event Type\n");
+               return error;
+       }
+       dev->hotkey_event_type = events_type;
  
         dev->hotkey_dev = input_allocate_device();
         if (!dev->hotkey_dev)
@@ -2447,8 +2540,14 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
         dev->hotkey_dev->phys = "toshiba_acpi/input0";
         dev->hotkey_dev->id.bustype = BUS_HOST;
  
-       if (dmi_check_system(toshiba_alt_keymap_dmi))
+       if (events_type == HCI_SYSTEM_TYPE1 ||
+           !dev->kbd_function_keys_supported)
+               keymap = toshiba_acpi_keymap;
+       else if (events_type == HCI_SYSTEM_TYPE2 ||
+                dev->kbd_function_keys_supported)
                 keymap = toshiba_acpi_alt_keymap;
+       else
+               pr_info("Unknown event type received %x\n", events_type);
         error = sparse_keymap_setup(dev->hotkey_dev, keymap, NULL);
         if (error)
                 goto err_free_dev;
@@ -2490,12 +2589,6 @@ static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
                 goto err_remove_filter;
         }
  
-       error = toshiba_acpi_enable_hotkeys(dev);
-       if (error) {
-               pr_info("Unable to enable hotkeys\n");
-               goto err_remove_filter;
-       }
-
         error = input_register_device(dev->hotkey_dev);
         if (error) {
                 pr_info("Unable to register input device\n");
@@ -2541,6 +2634,20 @@ static int toshiba_acpi_setup_backlight(struct toshiba_acpi_dev *dev)
         ret = get_tr_backlight_status(dev, &enabled);
         dev->tr_backlight_supported = !ret;
  
+       /*
+        * Tell acpi-video-detect code to prefer vendor backlight on all
+        * systems with transflective backlight and on dmi matched systems.
+        */
+       if (dev->tr_backlight_supported ||
+           dmi_check_system(toshiba_vendor_backlight_dmi))
+               acpi_video_dmi_promote_vendor();
+
+       if (acpi_video_backlight_support())
+               return 0;
+
+       /* acpi-video may have loaded before we called dmi_promote_vendor() */
+       acpi_video_unregister_backlight();
+
         memset(&props, 0, sizeof(props));
         props.type = BACKLIGHT_PLATFORM;
         props.max_brightness = HCI_LCD_BRIGHTNESS_LEVELS - 1;
@@ -2624,6 +2731,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
  {
         struct toshiba_acpi_dev *dev;
         const char *hci_method;
+       u32 special_functions;
         u32 dummy;
         bool bt_present;
         int ret = 0;
@@ -2648,6 +2756,16 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
         acpi_dev->driver_data = dev;
         dev_set_drvdata(&acpi_dev->dev, dev);
  
+       /* Query the BIOS for supported features */
+
+       /*
+        * The "Special Functions" are always supported by the laptops
+        * with the new keyboard layout, query for its presence to help
+        * determine the keymap layout to use.
+        */
+       ret = toshiba_function_keys_get(dev, &special_functions);
+       dev->kbd_function_keys_supported = !ret;
+
         if (toshiba_acpi_setup_keyboard(dev))
                 pr_info("Unable to activate hotkeys\n");
  
@@ -2716,8 +2834,7 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
         ret = toshiba_accelerometer_supported(dev);
         dev->accelerometer_supported = !ret;
  
-       ret = toshiba_usb_sleep_charge_get(dev, &dummy);
-       dev->usb_sleep_charge_supported = !ret;
+       toshiba_usb_sleep_charge_available(dev);
  
         ret = toshiba_usb_rapid_charge_get(dev, &dummy);
         dev->usb_rapid_charge_supported = !ret;
@@ -2725,23 +2842,25 @@ static int toshiba_acpi_add(struct acpi_device *acpi_dev)
         ret = toshiba_usb_sleep_music_get(dev, &dummy);
         dev->usb_sleep_music_supported = !ret;
  
-       ret = toshiba_function_keys_get(dev, &dummy);
-       dev->kbd_function_keys_supported = !ret;
-
         ret = toshiba_panel_power_on_get(dev, &dummy);
         dev->panel_power_on_supported = !ret;
  
         ret = toshiba_usb_three_get(dev, &dummy);
         dev->usb_three_supported = !ret;
  
-       /* Determine whether or not BIOS supports fan and video interfaces */
-
         ret = get_video_status(dev, &dummy);
         dev->video_supported = !ret;
  
         ret = get_fan_status(dev, &dummy);
         dev->fan_supported = !ret;
  
+       /*
+        * Enable the "Special Functions" mode only if they are
+        * supported and if they are activated.
+        */
+       if (dev->kbd_function_keys_supported && special_functions)
+               toshiba_acpi_enable_special_functions(dev);
+
         ret = sysfs_create_group(&dev->acpi_dev->dev.kobj,
                                  &toshiba_attr_group);
         if (ret) {
@@ -2770,6 +2889,21 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
         case 0x80: /* Hotkeys and some system events */
                 toshiba_acpi_process_hotkeys(dev);
                 break;
+       case 0x81: /* Dock events */
+       case 0x82:
+       case 0x83:
+               pr_info("Dock event received %x\n", event);
+               break;
+       case 0x88: /* Thermal events */
+               pr_info("Thermal event received\n");
+               break;
+       case 0x8f: /* LID closed */
+       case 0x90: /* LID is closed and Dock has been ejected */
+               break;
+       case 0x8c: /* SATA power events */
+       case 0x8b:
+               pr_info("SATA power event received %x\n", event);
+               break;
         case 0x92: /* Keyboard backlight mode changed */
                 /* Update sysfs entries */
                 ret = sysfs_update_group(&acpi_dev->dev.kobj,
@@ -2777,17 +2911,19 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
                 if (ret)
                         pr_err("Unable to update sysfs entries\n");
                 break;
-       case 0x81: /* Unknown */
-       case 0x82: /* Unknown */
-       case 0x83: /* Unknown */
-       case 0x8c: /* Unknown */
+       case 0x85: /* Unknown */
+       case 0x8d: /* Unknown */
         case 0x8e: /* Unknown */
-       case 0x8f: /* Unknown */
-       case 0x90: /* Unknown */
+       case 0x94: /* Unknown */
+       case 0x95: /* Unknown */
         default:
                 pr_info("Unknown event received %x\n", event);
                 break;
         }
+
+       acpi_bus_generate_netlink_event(acpi_dev->pnp.device_class,
+                                       dev_name(&acpi_dev->dev),
+                                       event, 0);
  }
  
  #ifdef CONFIG_PM_SLEEP
diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c

index 2cb1ea6..2498007 100644 (file)
--- a/drivers/platform/x86/toshiba_bluetooth.c
+++ b/drivers/platform/x86/toshiba_bluetooth.c
@@ -2,6 +2,7 @@
   * Toshiba Bluetooth Enable Driver
   *
   * Copyright (C) 2009 Jes Sorensen <Jes.Sorensen@gmail.com>
+ * Copyright (C) 2015 Azael Avalos <coproscefalo@gmail.com>
   *
   * Thanks to Matthew Garrett for background info on ACPI innards which
   * normal people aren't meant to understand :-)
@@ -25,6 +26,10 @@
  #include <linux/types.h>
  #include <linux/acpi.h>
  
+#define BT_KILLSWITCH_MASK     0x01
+#define BT_PLUGGED_MASK                0x40
+#define BT_POWER_MASK          0x80
+
  MODULE_AUTHOR("Jes Sorensen <Jes.Sorensen@gmail.com>");
  MODULE_DESCRIPTION("Toshiba Laptop ACPI Bluetooth Enable Driver");
  MODULE_LICENSE("GPL");
@@ -57,32 +62,107 @@ static struct acpi_driver toshiba_bt_rfkill_driver = {
         .drv.pm =       &toshiba_bt_pm,
  };
  
+static int toshiba_bluetooth_present(acpi_handle handle)
+{
+       acpi_status result;
+       u64 bt_present;
+
+       /*
+        * Some Toshiba laptops may have a fake TOS6205 device in
+        * their ACPI BIOS, so query the _STA method to see if there
+        * is really anything there.
+        */
+       result = acpi_evaluate_integer(handle, "_STA", NULL, &bt_present);
+       if (ACPI_FAILURE(result)) {
+               pr_err("ACPI call to query Bluetooth presence failed");
+               return -ENXIO;
+       } else if (!bt_present) {
+               pr_info("Bluetooth device not present\n");
+               return -ENODEV;
+       }
+
+       return 0;
+}
+
+static int toshiba_bluetooth_status(acpi_handle handle)
+{
+       acpi_status result;
+       u64 status;
+
+       result = acpi_evaluate_integer(handle, "BTST", NULL, &status);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not get Bluetooth device status\n");
+               return -ENXIO;
+       }
+
+       pr_info("Bluetooth status %llu\n", status);
+
+       return status;
+}
  
  static int toshiba_bluetooth_enable(acpi_handle handle)
  {
-       acpi_status res1, res2;
-       u64 result;
+       acpi_status result;
+       bool killswitch;
+       bool powered;
+       bool plugged;
+       int status;
  
         /*
          * Query ACPI to verify RFKill switch is set to 'on'.
          * If not, we return silently, no need to report it as
          * an error.
          */
-       res1 = acpi_evaluate_integer(handle, "BTST", NULL, &result);
-       if (ACPI_FAILURE(res1))
-               return res1;
-       if (!(result & 0x01))
-               return 0;
+       status = toshiba_bluetooth_status(handle);
+       if (status < 0)
+               return status;
+
+       killswitch = (status & BT_KILLSWITCH_MASK) ? true : false;
+       powered = (status & BT_POWER_MASK) ? true : false;
+       plugged = (status & BT_PLUGGED_MASK) ? true : false;
  
-       pr_info("Re-enabling Toshiba Bluetooth\n");
-       res1 = acpi_evaluate_object(handle, "AUSB", NULL, NULL);
-       res2 = acpi_evaluate_object(handle, "BTPO", NULL, NULL);
-       if (!ACPI_FAILURE(res1) || !ACPI_FAILURE(res2))
+       if (!killswitch)
                 return 0;
+       /*
+        * This check ensures to only enable the device if it is powered
+        * off or detached, as some recent devices somehow pass the killswitch
+        * test, causing a loop enabling/disabling the device, see bug 93911.
+        */
+       if (powered || plugged)
+               return 0;
+
+       result = acpi_evaluate_object(handle, "AUSB", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not attach USB Bluetooth device\n");
+               return -ENXIO;
+       }
+
+       result = acpi_evaluate_object(handle, "BTPO", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not power ON Bluetooth device\n");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int toshiba_bluetooth_disable(acpi_handle handle)
+{
+       acpi_status result;
+
+       result = acpi_evaluate_object(handle, "BTPF", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not power OFF Bluetooth device\n");
+               return -ENXIO;
+       }
  
-       pr_warn("Failed to re-enable Toshiba Bluetooth\n");
+       result = acpi_evaluate_object(handle, "DUSB", NULL, NULL);
+       if (ACPI_FAILURE(result)) {
+               pr_err("Could not detach USB Bluetooth device\n");
+               return -ENXIO;
+       }
  
-       return -ENODEV;
+       return 0;
  }
  
  static void toshiba_bt_rfkill_notify(struct acpi_device *device, u32 event)
@@ -99,23 +179,18 @@ static int toshiba_bt_resume(struct device *dev)
  
  static int toshiba_bt_rfkill_add(struct acpi_device *device)
  {
-       acpi_status status;
-       u64 bt_present;
-       int result = -ENODEV;
+       int result;
  
-       /*
-        * Some Toshiba laptops may have a fake TOS6205 device in
-        * their ACPI BIOS, so query the _STA method to see if there
-        * is really anything there, before trying to enable it.
-        */
-       status = acpi_evaluate_integer(device->handle, "_STA", NULL,
-                                      &bt_present);
+       result = toshiba_bluetooth_present(device->handle);
+       if (result)
+               return result;
  
-       if (!ACPI_FAILURE(status) && bt_present) {
-               pr_info("Detected Toshiba ACPI Bluetooth device - "
-                       "installing RFKill handler\n");
-               result = toshiba_bluetooth_enable(device->handle);
-       }
+       pr_info("Toshiba ACPI Bluetooth device driver\n");
+
+       /* Enable the BT device */
+       result = toshiba_bluetooth_enable(device->handle);
+       if (result)
+               return result;
  
         return result;
  }
@@ -123,7 +198,7 @@ static int toshiba_bt_rfkill_add(struct acpi_device *device)
  static int toshiba_bt_rfkill_remove(struct acpi_device *device)
  {
         /* clean up */
-       return 0;
+       return toshiba_bluetooth_disable(device->handle);
  }
  
  module_acpi_driver(toshiba_bt_rfkill_driver);
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c

index 737e56d..aac4757 100644 (file)
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -45,7 +45,6 @@ MODULE_LICENSE("GPL");
  
  #define ACPI_WMI_CLASS "wmi"
  
-static DEFINE_MUTEX(wmi_data_lock);
  static LIST_HEAD(wmi_block_list);
  
  struct guid_block {
@@ -240,10 +239,10 @@ static bool find_guid(const char *guid_string, struct wmi_block **out)
                 if (memcmp(block->guid, guid_input, 16) == 0) {
                         if (out)
                                 *out = wblock;
-                       return 1;
+                       return true;
                 }
         }
-       return 0;
+       return false;
  }
  
  static acpi_status wmi_method_enable(struct wmi_block *wblock, int enable)
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c

index e03877c..fd24323 100644 (file)
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1064,6 +1064,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
         RAPL_CPU(0x3f, rapl_defaults_hsw_server),/* Haswell servers */
         RAPL_CPU(0x4f, rapl_defaults_hsw_server),/* Broadwell servers */
         RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
+       RAPL_CPU(0x4E, rapl_defaults_core),/* Skylake */
         RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
         RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
         RAPL_CPU(0x56, rapl_defaults_core),/* Future Xeon */
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c

index 810aef3..ba34c7d 100644 (file)
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -573,7 +573,7 @@ EXPORT_SYMBOL_GPL(of_pwm_get);
   * @table: array of consumers to register
   * @num: number of consumers in table
   */
-void __init pwm_add_table(struct pwm_lookup *table, size_t num)
+void pwm_add_table(struct pwm_lookup *table, size_t num)
  {
         mutex_lock(&pwm_lookup_lock);
  
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c

index 522f707..fa5feab 100644 (file)
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -225,6 +225,10 @@ static const struct of_device_id atmel_hlcdc_dt_ids[] = {
                 .compatible = "atmel,sama5d3-hlcdc",
                 .data = &atmel_hlcdc_pwm_sama5d3_errata,
         },
+       {
+               .compatible = "atmel,sama5d4-hlcdc",
+               .data = &atmel_hlcdc_pwm_sama5d3_errata,
+       },
         { /* sentinel */ },
  };
  
diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c

index f75ecb0..b430811 100644 (file)
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c
@@ -35,6 +35,10 @@
  #define  PERIOD_CDIV(div)      (((div) & 0x7) << 20)
  #define  PERIOD_CDIV_MAX       8
  
+static const unsigned int cdiv[PERIOD_CDIV_MAX] = {
+       1, 2, 4, 8, 16, 64, 256, 1024
+};
+
  struct mxs_pwm_chip {
         struct pwm_chip chip;
         struct clk *clk;
@@ -54,13 +58,13 @@ static int mxs_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
  
         rate = clk_get_rate(mxs->clk);
         while (1) {
-               c = rate / (1 << div);
+               c = rate / cdiv[div];
                 c = c * period_ns;
                 do_div(c, 1000000000);
                 if (c < PERIOD_PERIOD_MAX)
                         break;
                 div++;
-               if (div > PERIOD_CDIV_MAX)
+               if (div >= PERIOD_CDIV_MAX)
                         return -EINVAL;
         }
  
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c

index 3fb775d..34b5c27 100644 (file)
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -202,7 +202,7 @@ static const struct pwm_ops pca9685_pwm_ops = {
         .owner = THIS_MODULE,
  };
  
-static struct regmap_config pca9685_regmap_i2c_config = {
+static const struct regmap_config pca9685_regmap_i2c_config = {
         .reg_bits = 8,
         .val_bits = 8,
         .max_register = PCA9685_NUMREGS,
diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c

index 3e9b583..ff201e1 100644 (file)
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c
@@ -269,12 +269,31 @@ static void pwm_samsung_disable(struct pwm_chip *chip, struct pwm_device *pwm)
         spin_unlock_irqrestore(&samsung_pwm_lock, flags);
  }
  
+static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+                                     struct pwm_device *pwm)
+{
+       unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
+       u32 tcon;
+       unsigned long flags;
+
+       spin_lock_irqsave(&samsung_pwm_lock, flags);
+
+       tcon = readl(chip->base + REG_TCON);
+       tcon |= TCON_MANUALUPDATE(tcon_chan);
+       writel(tcon, chip->base + REG_TCON);
+
+       tcon &= ~TCON_MANUALUPDATE(tcon_chan);
+       writel(tcon, chip->base + REG_TCON);
+
+       spin_unlock_irqrestore(&samsung_pwm_lock, flags);
+}
+
  static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
                               int duty_ns, int period_ns)
  {
         struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
         struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
-       u32 tin_ns = chan->tin_ns, tcnt, tcmp;
+       u32 tin_ns = chan->tin_ns, tcnt, tcmp, oldtcmp;
  
         /*
          * We currently avoid using 64bit arithmetic by using the
@@ -288,6 +307,7 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
                 return 0;
  
         tcnt = readl(our_chip->base + REG_TCNTB(pwm->hwpwm));
+       oldtcmp = readl(our_chip->base + REG_TCMPB(pwm->hwpwm));
  
         /* We need tick count for calculation, not last tick. */
         ++tcnt;
@@ -335,6 +355,16 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
         writel(tcnt, our_chip->base + REG_TCNTB(pwm->hwpwm));
         writel(tcmp, our_chip->base + REG_TCMPB(pwm->hwpwm));
  
+       /*
+        * In case the PWM is currently at 100% duty cycle, force a manual
+        * update to prevent the signal staying high if the PWM is disabled
+        * shortly afer this update (before it autoreloaded the new values).
+        */
+       if (oldtcmp == (u32) -1) {
+               dev_dbg(our_chip->chip.dev, "Forcing manual update");
+               pwm_samsung_manual_update(our_chip, pwm);
+       }
+
         chan->period_ns = period_ns;
         chan->tin_ns = tin_ns;
         chan->duty_ns = duty_ns;
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c

index 71d7802..6f1fa17 100644 (file)
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -1201,13 +1201,9 @@ static int virtio_ccw_online(struct ccw_device *cdev)
         vcdev->vdev.id.vendor = cdev->id.cu_type;
         vcdev->vdev.id.device = cdev->id.cu_model;
  
-       if (virtio_device_is_legacy_only(vcdev->vdev.id)) {
-               vcdev->revision = 0;
-       } else {
-               ret = virtio_ccw_set_transport_rev(vcdev);
-               if (ret)
-                       goto out_free;
-       }
+       ret = virtio_ccw_set_transport_rev(vcdev);
+       if (ret)
+               goto out_free;
  
         ret = register_virtio_device(&vcdev->vdev);
         if (ret) {
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c

index 5741825..fe8a8d1 100644 (file)
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3065,7 +3065,7 @@ static void qlt_do_ctio_completion(struct scsi_qla_host *vha, uint32_t handle,
  {
         struct qla_hw_data *ha = vha->hw;
         struct se_cmd *se_cmd;
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
         struct qla_tgt_cmd *cmd;
  
         if (handle & CTIO_INTERMEDIATE_HANDLE_MARK) {
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c

index ab4879e..68c2002 100644 (file)
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -53,9 +53,8 @@
  static struct workqueue_struct *tcm_qla2xxx_free_wq;
  static struct workqueue_struct *tcm_qla2xxx_cmd_wq;
  
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *tcm_qla2xxx_fabric_configfs;
-static struct target_fabric_configfs *tcm_qla2xxx_npiv_fabric_configfs;
+static const struct target_core_fabric_ops tcm_qla2xxx_ops;
+static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops;
  
  /*
   * Parse WWN.
@@ -336,6 +335,14 @@ static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg
         return tpg->tpg_attrib.demo_mode_login_only;
  }
  
+static int tcm_qla2xxx_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+
+       return tpg->tpg_attrib.fabric_prot_type;
+}
+
  static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl(
         struct se_portal_group *se_tpg)
  {
@@ -1082,8 +1089,53 @@ static ssize_t tcm_qla2xxx_tpg_store_enable(
  
  TF_TPG_BASE_ATTR(tcm_qla2xxx, enable, S_IRUGO | S_IWUSR);
  
+static ssize_t tcm_qla2xxx_tpg_show_dynamic_sessions(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       return target_show_dynamic_sessions(se_tpg, page);
+}
+
+TF_TPG_BASE_ATTR_RO(tcm_qla2xxx, dynamic_sessions);
+
+static ssize_t tcm_qla2xxx_tpg_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid qla2xxx fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tpg->tpg_attrib.fabric_prot_type = val;
+
+       return count;
+}
+
+static ssize_t tcm_qla2xxx_tpg_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+
+       return sprintf(page, "%d\n", tpg->tpg_attrib.fabric_prot_type);
+}
+TF_TPG_BASE_ATTR(tcm_qla2xxx, fabric_prot_type, S_IRUGO | S_IWUSR);
+
  static struct configfs_attribute *tcm_qla2xxx_tpg_attrs[] = {
         &tcm_qla2xxx_tpg_enable.attr,
+       &tcm_qla2xxx_tpg_dynamic_sessions.attr,
+       &tcm_qla2xxx_tpg_fabric_prot_type.attr,
         NULL,
  };
  
@@ -1124,7 +1176,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg(
         tpg->tpg_attrib.cache_dynamic_acls = 1;
         tpg->tpg_attrib.demo_mode_login_only = 1;
  
-       ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&tcm_qla2xxx_ops, wwn,
                                 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 kfree(tpg);
@@ -1244,7 +1296,7 @@ static struct se_portal_group *tcm_qla2xxx_npiv_make_tpg(
         tpg->tpg_attrib.cache_dynamic_acls = 1;
         tpg->tpg_attrib.demo_mode_login_only = 1;
  
-       ret = core_tpg_register(&tcm_qla2xxx_npiv_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&tcm_qla2xxx_npiv_ops, wwn,
                                 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 kfree(tpg);
@@ -1560,7 +1612,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
  
         se_sess = transport_init_session_tags(num_tags,
                                               sizeof(struct qla_tgt_cmd),
-                                             TARGET_PROT_NORMAL);
+                                             TARGET_PROT_ALL);
         if (IS_ERR(se_sess)) {
                 pr_err("Unable to initialize struct se_session\n");
                 return PTR_ERR(se_sess);
@@ -1934,7 +1986,9 @@ static struct configfs_attribute *tcm_qla2xxx_wwn_attrs[] = {
         NULL,
  };
  
-static struct target_core_fabric_ops tcm_qla2xxx_ops = {
+static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "qla2xxx",
         .get_fabric_name                = tcm_qla2xxx_get_fabric_name,
         .get_fabric_proto_ident         = tcm_qla2xxx_get_fabric_proto_ident,
         .tpg_get_wwn                    = tcm_qla2xxx_get_fabric_wwn,
@@ -1949,6 +2003,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = {
                                         tcm_qla2xxx_check_demo_write_protect,
         .tpg_check_prod_mode_write_protect =
                                         tcm_qla2xxx_check_prod_write_protect,
+       .tpg_check_prot_fabric_only     = tcm_qla2xxx_check_prot_fabric_only,
         .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only,
         .tpg_alloc_fabric_acl           = tcm_qla2xxx_alloc_fabric_acl,
         .tpg_release_fabric_acl         = tcm_qla2xxx_release_fabric_acl,
@@ -1983,9 +2038,15 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = tcm_qla2xxx_make_nodeacl,
         .fabric_drop_nodeacl            = tcm_qla2xxx_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = tcm_qla2xxx_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_qla2xxx_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = tcm_qla2xxx_tpg_attrib_attrs,
  };
  
-static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
+static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "qla2xxx_npiv",
         .get_fabric_name                = tcm_qla2xxx_npiv_get_fabric_name,
         .get_fabric_proto_ident         = tcm_qla2xxx_get_fabric_proto_ident,
         .tpg_get_wwn                    = tcm_qla2xxx_get_fabric_wwn,
@@ -2033,94 +2094,26 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = tcm_qla2xxx_make_nodeacl,
         .fabric_drop_nodeacl            = tcm_qla2xxx_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = tcm_qla2xxx_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_qla2xxx_npiv_tpg_attrs,
  };
  
  static int tcm_qla2xxx_register_configfs(void)
  {
-       struct target_fabric_configfs *fabric, *npiv_fabric;
         int ret;
  
         pr_debug("TCM QLOGIC QLA2XXX fabric module %s on %s/%s on "
             UTS_RELEASE"\n", TCM_QLA2XXX_VERSION, utsname()->sysname,
             utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "qla2xxx");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup fabric->tf_ops from our local tcm_qla2xxx_ops
-        */
-       fabric->tf_ops = tcm_qla2xxx_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_qla2xxx_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs =
-                                               tcm_qla2xxx_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for TCM_QLA2XXX\n");
+
+       ret = target_register_template(&tcm_qla2xxx_ops);
+       if (ret)
                 return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       tcm_qla2xxx_fabric_configfs = fabric;
-       pr_debug("TCM_QLA2XXX[0] - Set fabric -> tcm_qla2xxx_fabric_configfs\n");
  
-       /*
-        * Register the top level struct config_item_type for NPIV with TCM core
-        */
-       npiv_fabric = target_fabric_configfs_init(THIS_MODULE, "qla2xxx_npiv");
-       if (IS_ERR(npiv_fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               ret = PTR_ERR(npiv_fabric);
-               goto out_fabric;
-       }
-       /*
-        * Setup fabric->tf_ops from our local tcm_qla2xxx_npiv_ops
-        */
-       npiv_fabric->tf_ops = tcm_qla2xxx_npiv_ops;
-       /*
-        * Setup default attribute lists for various npiv_fabric->tf_cit_tmpl
-        */
-       npiv_fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_qla2xxx_wwn_attrs;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs =
-           tcm_qla2xxx_npiv_tpg_attrs;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       npiv_fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the npiv_fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(npiv_fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for TCM_QLA2XXX\n");
+       ret = target_register_template(&tcm_qla2xxx_npiv_ops);
+       if (ret)
                 goto out_fabric;
-       }
-       /*
-        * Setup our local pointer to *npiv_fabric
-        */
-       tcm_qla2xxx_npiv_fabric_configfs = npiv_fabric;
-       pr_debug("TCM_QLA2XXX[0] - Set fabric -> tcm_qla2xxx_npiv_fabric_configfs\n");
  
         tcm_qla2xxx_free_wq = alloc_workqueue("tcm_qla2xxx_free",
                                                 WQ_MEM_RECLAIM, 0);
@@ -2140,9 +2133,9 @@ static int tcm_qla2xxx_register_configfs(void)
  out_free_wq:
         destroy_workqueue(tcm_qla2xxx_free_wq);
  out_fabric_npiv:
-       target_fabric_configfs_deregister(tcm_qla2xxx_npiv_fabric_configfs);
+       target_unregister_template(&tcm_qla2xxx_npiv_ops);
  out_fabric:
-       target_fabric_configfs_deregister(tcm_qla2xxx_fabric_configfs);
+       target_unregister_template(&tcm_qla2xxx_ops);
         return ret;
  }
  
@@ -2151,13 +2144,8 @@ static void tcm_qla2xxx_deregister_configfs(void)
         destroy_workqueue(tcm_qla2xxx_cmd_wq);
         destroy_workqueue(tcm_qla2xxx_free_wq);
  
-       target_fabric_configfs_deregister(tcm_qla2xxx_fabric_configfs);
-       tcm_qla2xxx_fabric_configfs = NULL;
-       pr_debug("TCM_QLA2XXX[0] - Cleared tcm_qla2xxx_fabric_configfs\n");
-
-       target_fabric_configfs_deregister(tcm_qla2xxx_npiv_fabric_configfs);
-       tcm_qla2xxx_npiv_fabric_configfs = NULL;
-       pr_debug("TCM_QLA2XXX[0] - Cleared tcm_qla2xxx_npiv_fabric_configfs\n");
+       target_unregister_template(&tcm_qla2xxx_ops);
+       target_unregister_template(&tcm_qla2xxx_npiv_ops);
  }
  
  static int __init tcm_qla2xxx_init(void)
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h

index 10c0021..2329511 100644 (file)
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h
@@ -33,6 +33,7 @@ struct tcm_qla2xxx_tpg_attrib {
         int demo_mode_write_protect;
         int prod_mode_write_protect;
         int demo_mode_login_only;
+       int fabric_prot_type;
  };
  
  struct tcm_qla2xxx_tpg {
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c

index 186924a..f6bac9e 100644 (file)
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -1023,7 +1023,6 @@ static struct dma_chan *rspi_request_dma_chan(struct device *dev,
         }
  
         memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = id;
         cfg.direction = dir;
         if (dir == DMA_MEM_TO_DEV) {
                 cfg.dst_addr = port_addr;
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c

index e57eec0..bcc7c63 100644 (file)
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -1030,7 +1030,6 @@ static struct dma_chan *sh_msiof_request_dma_chan(struct device *dev,
         }
  
         memset(&cfg, 0, sizeof(cfg));
-       cfg.slave_id = id;
         cfg.direction = dir;
         if (dir == DMA_MEM_TO_DEV) {
                 cfg.dst_addr = port_addr;
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c

index 0e3d8c7..b0b96ab 100644 (file)
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -1106,6 +1106,7 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
         struct ion_buffer *buffer;
         struct dma_buf *dmabuf;
         bool valid_handle;
+       DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
  
         mutex_lock(&client->lock);
         valid_handle = ion_handle_validate(client, handle);
@@ -1118,8 +1119,12 @@ struct dma_buf *ion_share_dma_buf(struct ion_client *client,
         ion_buffer_get(buffer);
         mutex_unlock(&client->lock);
  
-       dmabuf = dma_buf_export(buffer, &dma_buf_ops, buffer->size, O_RDWR,
-                               NULL);
+       exp_info.ops = &dma_buf_ops;
+       exp_info.size = buffer->size;
+       exp_info.flags = O_RDWR;
+       exp_info.priv = buffer;
+
+       dmabuf = dma_buf_export(&exp_info);
         if (IS_ERR(dmabuf)) {
                 ion_buffer_put(buffer);
                 return dmabuf;
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c

index fe1fd05..5af0135 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -153,7 +153,7 @@ static int ll_ddelete(const struct dentry *de)
  
         CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
                d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping",
-              de, de, de->d_parent, de->d_inode,
+              de, de, de->d_parent, d_inode(de),
                d_unhashed(de) ? "" : "hashed,",
                list_empty(&de->d_subdirs) ? "" : "subdirs");
  
@@ -167,8 +167,8 @@ static int ll_ddelete(const struct dentry *de)
  #if 0
         /* if not ldlm lock for this inode, set i_nlink to 0 so that
          * this inode can be recycled later b=20433 */
-       if (de->d_inode && !find_cbdata(de->d_inode))
-               clear_nlink(de->d_inode);
+       if (d_really_is_positive(de) && !find_cbdata(d_inode(de)))
+               clear_nlink(d_inode(de));
  #endif
  
         if (d_lustre_invalid((struct dentry *)de))
@@ -181,7 +181,7 @@ int ll_d_init(struct dentry *de)
         LASSERT(de != NULL);
  
         CDEBUG(D_DENTRY, "ldd on dentry %pd (%p) parent %p inode %p refc %d\n",
-               de, de, de->d_parent, de->d_inode,
+               de, de, de->d_parent, d_inode(de),
                 d_count(de));
  
         if (de->d_fsdata == NULL) {
@@ -261,7 +261,7 @@ void ll_invalidate_aliases(struct inode *inode)
         ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_u.d_alias) {
                 CDEBUG(D_DENTRY, "dentry in drop %pd (%p) parent %p inode %p flags %d\n",
                        dentry, dentry, dentry->d_parent,
-                      dentry->d_inode, dentry->d_flags);
+                      d_inode(dentry), dentry->d_flags);
  
                 d_lustre_invalidate(dentry, 0);
         }
@@ -309,7 +309,7 @@ void ll_lookup_finish_locks(struct lookup_intent *it, struct inode *inode)
  static int ll_revalidate_dentry(struct dentry *dentry,
                                 unsigned int lookup_flags)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
  
         /*
          * if open&create is set, talk to MDS to make sure file is created if
@@ -329,7 +329,7 @@ static int ll_revalidate_dentry(struct dentry *dentry,
         if (lookup_flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       do_statahead_enter(dir, &dentry, dentry->d_inode == NULL);
+       do_statahead_enter(dir, &dentry, d_inode(dentry) == NULL);
         ll_statahead_mark(dir, dentry);
         return 1;
  }
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c

index 529062e..4b44c63 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -388,7 +388,7 @@ int ll_file_release(struct inode *inode, struct file *file)
  static int ll_intent_file_open(struct dentry *dentry, void *lmm,
                                int lmmsize, struct lookup_intent *itp)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct dentry *parent = dentry->d_parent;
         const char *name = dentry->d_name.name;
@@ -413,7 +413,7 @@ static int ll_intent_file_open(struct dentry *dentry, void *lmm,
                         opc = LUSTRE_OPC_CREATE;
         }
  
-       op_data  = ll_prep_md_op_data(NULL, parent->d_inode,
+       op_data  = ll_prep_md_op_data(NULL, d_inode(parent),
                                       inode, name, len,
                                       O_RDWR, opc, NULL);
         if (IS_ERR(op_data))
@@ -2896,7 +2896,7 @@ static int ll_inode_revalidate_fini(struct inode *inode, int rc)
  
  static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ptlrpc_request *req = NULL;
         struct obd_export *exp;
         int rc = 0;
@@ -2948,12 +2948,12 @@ static int __ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
                    do_lookup() -> ll_revalidate_it(). We cannot use d_drop
                    here to preserve get_cwd functionality on 2.6.
                    Bug 10503 */
-               if (!dentry->d_inode->i_nlink)
+               if (!d_inode(dentry)->i_nlink)
                         d_lustre_invalidate(dentry, 0);
  
                 ll_lookup_finish_locks(&oit, inode);
-       } else if (!ll_have_md_lock(dentry->d_inode, &ibits, LCK_MINMODE)) {
-               struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
+       } else if (!ll_have_md_lock(d_inode(dentry), &ibits, LCK_MINMODE)) {
+               struct ll_sb_info *sbi = ll_i2sbi(d_inode(dentry));
                 u64 valid = OBD_MD_FLGETATTR;
                 struct md_op_data *op_data;
                 int ealen = 0;
@@ -2991,7 +2991,7 @@ out:
  
  static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int rc;
  
         rc = __ll_inode_revalidate(dentry, ibits);
@@ -3019,7 +3019,7 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits)
  
  int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat)
  {
-       struct inode *inode = de->d_inode;
+       struct inode *inode = d_inode(de);
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ll_inode_info *lli = ll_i2info(inode);
         int res = 0;
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h

index e7422f5..5f918e3 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -1488,7 +1488,7 @@ static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
  {
         CDEBUG(D_DENTRY, "invalidate dentry %pd (%p) parent %p inode %p refc %d\n",
                dentry, dentry,
-              dentry->d_parent, dentry->d_inode, d_count(dentry));
+              dentry->d_parent, d_inode(dentry), d_count(dentry));
  
         spin_lock_nested(&dentry->d_lock,
                          nested ? DENTRY_D_LOCK_NESTED : DENTRY_D_LOCK_NORMAL);
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c

index bf1ec27..a27af78 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -1166,7 +1166,7 @@ static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
                   struct md_open_data **mod)
  {
         struct lustre_md md;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ll_sb_info *sbi = ll_i2sbi(inode);
         struct ptlrpc_request *request = NULL;
         int rc, ia_valid;
@@ -1290,7 +1290,7 @@ static int ll_setattr_ost(struct inode *inode, struct iattr *attr)
   */
  int ll_setattr_raw(struct dentry *dentry, struct iattr *attr, bool hsm_import)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ll_inode_info *lli = ll_i2info(inode);
         struct md_op_data *op_data = NULL;
         struct md_open_data *mod = NULL;
@@ -1465,7 +1465,7 @@ out:
  
  int ll_setattr(struct dentry *de, struct iattr *attr)
  {
-       int mode = de->d_inode->i_mode;
+       int mode = d_inode(de)->i_mode;
  
         if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
                               (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c

index 243a784..db43b81 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -230,11 +230,11 @@ static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
  static int ll_get_name(struct dentry *dentry, char *name,
                        struct dentry *child)
  {
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
         int rc;
         struct ll_getname_data lgd = {
                 .lgd_name = name,
-               .lgd_fid = ll_i2info(child->d_inode)->lli_fid,
+               .lgd_fid = ll_i2info(d_inode(child))->lli_fid,
                 .ctx.actor = ll_nfs_get_name_filldir,
         };
  
@@ -282,7 +282,7 @@ static struct dentry *ll_fh_to_parent(struct super_block *sb, struct fid *fid,
  static struct dentry *ll_get_parent(struct dentry *dchild)
  {
         struct ptlrpc_request *req = NULL;
-       struct inode      *dir = dchild->d_inode;
+       struct inode      *dir = d_inode(dchild);
         struct ll_sb_info     *sbi;
         struct dentry    *result = NULL;
         struct mdt_body       *body;
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c

index 49f1cb0..5a25dcd 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -155,7 +155,7 @@ static void ll_invalidate_negative_children(struct inode *dir)
                         list_for_each_entry_safe(child, tmp_subdir,
                                                  &dentry->d_subdirs,
                                                  d_child) {
-                               if (child->d_inode == NULL)
+                               if (d_really_is_negative(child))
                                         d_lustre_invalidate(child, 1);
                         }
                 }
@@ -392,7 +392,7 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
                         iput(inode);
                         CDEBUG(D_DENTRY,
                                "Reuse dentry %p inode %p refc %d flags %#x\n",
-                             new, new->d_inode, d_count(new), new->d_flags);
+                             new, d_inode(new), d_count(new), new->d_flags);
                         return new;
                 }
         }
@@ -401,7 +401,7 @@ struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de)
                 return ERR_PTR(rc);
         d_add(de, inode);
         CDEBUG(D_DENTRY, "Add dentry %p inode %p refc %d flags %#x\n",
-              de, de->d_inode, d_count(de), de->d_flags);
+              de, d_inode(de), d_count(de), de->d_flags);
         return de;
  }
  
@@ -448,7 +448,7 @@ static int ll_lookup_it_finish(struct ptlrpc_request *request,
                    !it_disposition(it, DISP_OPEN_CREATE)) {
                 /* With DISP_OPEN_CREATE dentry will
                    instantiated in ll_create_it. */
-               LASSERT((*de)->d_inode == NULL);
+               LASSERT(d_inode(*de) == NULL);
                 d_instantiate(*de, inode);
         }
  
@@ -541,7 +541,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
                 goto out;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         if ((it->it_op & IT_OPEN) && inode &&
             !S_ISREG(inode->i_mode) &&
             !S_ISDIR(inode->i_mode)) {
@@ -638,9 +638,9 @@ static int ll_atomic_open(struct inode *dir, struct dentry *dentry,
  
                         *opened |= FILE_CREATED;
                 }
-               if (dentry->d_inode && it_disposition(it, DISP_OPEN_OPEN)) {
+               if (d_really_is_positive(dentry) && it_disposition(it, DISP_OPEN_OPEN)) {
                         /* Open dentry. */
-                       if (S_ISFIFO(dentry->d_inode->i_mode)) {
+                       if (S_ISFIFO(d_inode(dentry)->i_mode)) {
                                 /* We cannot call open here as it would
                                  * deadlock.
                                  */
@@ -862,8 +862,8 @@ static int ll_create_nd(struct inode *dir, struct dentry *dentry,
  
  static inline void ll_get_child_fid(struct dentry *child, struct lu_fid *fid)
  {
-       if (child->d_inode)
-               *fid = *ll_inode2fid(child->d_inode);
+       if (d_really_is_positive(child))
+               *fid = *ll_inode2fid(d_inode(child));
  }
  
  /**
@@ -1076,7 +1076,7 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry,
  static int ll_link(struct dentry *old_dentry, struct inode *dir,
                    struct dentry *new_dentry)
  {
-       struct inode *src = old_dentry->d_inode;
+       struct inode *src = d_inode(old_dentry);
         struct ll_sb_info *sbi = ll_i2sbi(dir);
         struct ptlrpc_request *request = NULL;
         struct md_op_data *op_data;
diff --git a/drivers/staging/lustre/lustre/llite/statahead.c b/drivers/staging/lustre/lustre/llite/statahead.c

index b75562c..7f80712 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/statahead.c
+++ b/drivers/staging/lustre/lustre/llite/statahead.c
@@ -880,7 +880,7 @@ static int do_sa_lookup(struct inode *dir, struct ll_sa_entry *entry)
  static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
                             struct dentry *dentry)
  {
-       struct inode         *inode = dentry->d_inode;
+       struct inode         *inode = d_inode(dentry);
         struct lookup_intent      it = { .it_op = IT_GETATTR,
                                          .d.lustre.it_lock_handle = 0 };
         struct md_enqueue_info   *minfo;
@@ -926,7 +926,7 @@ static int do_sa_revalidate(struct inode *dir, struct ll_sa_entry *entry,
  static void ll_statahead_one(struct dentry *parent, const char *entry_name,
                              int entry_name_len)
  {
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
         struct ll_inode_info     *lli    = ll_i2info(dir);
         struct ll_statahead_info *sai    = lli->lli_sai;
         struct dentry       *dentry = NULL;
@@ -944,8 +944,8 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
                 rc = do_sa_lookup(dir, entry);
         } else {
                 rc = do_sa_revalidate(dir, entry, dentry);
-               if (rc == 1 && agl_should_run(sai, dentry->d_inode))
-                       ll_agl_add(sai, dentry->d_inode, entry->se_index);
+               if (rc == 1 && agl_should_run(sai, d_inode(dentry)))
+                       ll_agl_add(sai, d_inode(dentry), entry->se_index);
         }
  
         if (dentry != NULL)
@@ -968,7 +968,7 @@ static void ll_statahead_one(struct dentry *parent, const char *entry_name,
  static int ll_agl_thread(void *arg)
  {
         struct dentry       *parent = (struct dentry *)arg;
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
         struct ll_inode_info     *plli   = ll_i2info(dir);
         struct ll_inode_info     *clli;
         struct ll_sb_info       *sbi    = ll_i2sbi(dir);
@@ -1042,7 +1042,7 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
         CDEBUG(D_READA, "start agl thread: sai %p, parent %pd\n",
                sai, parent);
  
-       plli = ll_i2info(parent->d_inode);
+       plli = ll_i2info(d_inode(parent));
         task = kthread_run(ll_agl_thread, parent,
                                "ll_agl_%u", plli->lli_opendir_pid);
         if (IS_ERR(task)) {
@@ -1059,7 +1059,7 @@ static void ll_start_agl(struct dentry *parent, struct ll_statahead_info *sai)
  static int ll_statahead_thread(void *arg)
  {
         struct dentry       *parent = (struct dentry *)arg;
-       struct inode         *dir    = parent->d_inode;
+       struct inode         *dir    = d_inode(parent);
         struct ll_inode_info     *plli   = ll_i2info(dir);
         struct ll_inode_info     *clli;
         struct ll_sb_info       *sbi    = ll_i2sbi(dir);
@@ -1604,7 +1604,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
                         rc = md_revalidate_lock(ll_i2mdexp(dir), &it,
                                                 ll_inode2fid(inode), &bits);
                         if (rc == 1) {
-                               if ((*dentryp)->d_inode == NULL) {
+                               if (d_inode(*dentryp) == NULL) {
                                         struct dentry *alias;
  
                                         alias = ll_splice_alias(inode,
@@ -1614,13 +1614,13 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
                                                 return PTR_ERR(alias);
                                         }
                                         *dentryp = alias;
-                               } else if ((*dentryp)->d_inode != inode) {
+                               } else if (d_inode(*dentryp) != inode) {
                                         /* revalidate, but inode is recreated */
                                         CDEBUG(D_READA,
                                               "stale dentry %pd inode %lu/%u, statahead inode %lu/%u\n",
                                               *dentryp,
-                                             (*dentryp)->d_inode->i_ino,
-                                             (*dentryp)->d_inode->i_generation,
+                                             d_inode(*dentryp)->i_ino,
+                                             d_inode(*dentryp)->i_generation,
                                               inode->i_ino,
                                               inode->i_generation);
                                         ll_sai_unplug(sai, entry);
@@ -1666,8 +1666,8 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
  
         /* get parent reference count here, and put it in ll_statahead_thread */
         parent = dget((*dentryp)->d_parent);
-       if (unlikely(sai->sai_inode != parent->d_inode)) {
-               struct ll_inode_info *nlli = ll_i2info(parent->d_inode);
+       if (unlikely(sai->sai_inode != d_inode(parent))) {
+               struct ll_inode_info *nlli = ll_i2info(d_inode(parent));
  
                 CWARN("Race condition, someone changed %pd just now: old parent "DFID", new parent "DFID"\n",
                       *dentryp,
@@ -1689,7 +1689,7 @@ int do_statahead_enter(struct inode *dir, struct dentry **dentryp,
         ll_sai_get(sai);
         lli->lli_sai = sai;
  
-       plli = ll_i2info(parent->d_inode);
+       plli = ll_i2info(d_inode(parent));
         rc = PTR_ERR(kthread_run(ll_statahead_thread, parent,
                                  "ll_sa_%u", plli->lli_opendir_pid));
         thread = &sai->sai_thread;
diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c

index 686b6a5..3711e67 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/symlink.c
+++ b/drivers/staging/lustre/lustre/llite/symlink.c
@@ -120,7 +120,7 @@ failed:
  
  static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ptlrpc_request *request = NULL;
         int rc;
         char *symname = NULL;
diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c

index b439936..e0fcbe1 100644 (file)
--- a/drivers/staging/lustre/lustre/llite/xattr.c
+++ b/drivers/staging/lustre/lustre/llite/xattr.c
@@ -214,7 +214,7 @@ int ll_setxattr_common(struct inode *inode, const char *name,
  int ll_setxattr(struct dentry *dentry, const char *name,
                 const void *value, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         LASSERT(inode);
         LASSERT(name);
@@ -267,7 +267,7 @@ int ll_setxattr(struct dentry *dentry, const char *name,
  
  int ll_removexattr(struct dentry *dentry, const char *name)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         LASSERT(inode);
         LASSERT(name);
@@ -457,7 +457,7 @@ out:
  ssize_t ll_getxattr(struct dentry *dentry, const char *name,
                     void *buffer, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         LASSERT(inode);
         LASSERT(name);
@@ -545,7 +545,7 @@ out:
  
  ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int rc = 0, rc2 = 0;
         struct lov_mds_md *lmm = NULL;
         struct ptlrpc_request *request = NULL;
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig

index 81d44c4..2573612 100644 (file)
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,12 +31,13 @@ config TCM_PSCSI
         Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
         passthrough access to Linux/SCSI device
  
-config TCM_USER
+config TCM_USER2
         tristate "TCM/USER Subsystem Plugin for Linux"
         depends on UIO && NET
         help
         Say Y here to enable the TCM/USER subsystem plugin for a userspace
-       process to handle requests
+       process to handle requests. This is version 2 of the ABI; version 1
+       is obsolete.
  
  source "drivers/target/loopback/Kconfig"
  source "drivers/target/tcm_fc/Kconfig"
diff --git a/drivers/target/Makefile b/drivers/target/Makefile

index bbb4a7d..e619c02 100644 (file)
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_TARGET_CORE)     += target_core_mod.o
  obj-$(CONFIG_TCM_IBLOCK)       += target_core_iblock.o
  obj-$(CONFIG_TCM_FILEIO)       += target_core_file.o
  obj-$(CONFIG_TCM_PSCSI)                += target_core_pscsi.o
-obj-$(CONFIG_TCM_USER)         += target_core_user.o
+obj-$(CONFIG_TCM_USER2)                += target_core_user.o
  
  # Fabric modules
  obj-$(CONFIG_LOOPBACK_TARGET)  += loopback/
diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile

index 13a9240..0f43be9 100644 (file)
--- a/drivers/target/iscsi/Makefile
+++ b/drivers/target/iscsi/Makefile
@@ -1,6 +1,5 @@
  iscsi_target_mod-y +=          iscsi_target_parameters.o \
                                 iscsi_target_seq_pdu_list.o \
-                               iscsi_target_tq.o \
                                 iscsi_target_auth.o \
                                 iscsi_target_datain_values.o \
                                 iscsi_target_device.o \
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c

index 77d6425..34871a6 100644 (file)
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -33,8 +33,6 @@
  #include <target/iscsi/iscsi_target_core.h>
  #include "iscsi_target_parameters.h"
  #include "iscsi_target_seq_pdu_list.h"
-#include "iscsi_target_tq.h"
-#include "iscsi_target_configfs.h"
  #include "iscsi_target_datain_values.h"
  #include "iscsi_target_erl0.h"
  #include "iscsi_target_erl1.h"
@@ -537,7 +535,7 @@ static struct iscsit_transport iscsi_target_transport = {
  
  static int __init iscsi_target_init_module(void)
  {
-       int ret = 0;
+       int ret = 0, size;
  
         pr_debug("iSCSI-Target "ISCSIT_VERSION"\n");
  
@@ -546,24 +544,21 @@ static int __init iscsi_target_init_module(void)
                 pr_err("Unable to allocate memory for iscsit_global\n");
                 return -1;
         }
+       spin_lock_init(&iscsit_global->ts_bitmap_lock);
         mutex_init(&auth_id_lock);
         spin_lock_init(&sess_idr_lock);
         idr_init(&tiqn_idr);
         idr_init(&sess_idr);
  
-       ret = iscsi_target_register_configfs();
-       if (ret < 0)
+       ret = target_register_template(&iscsi_ops);
+       if (ret)
                 goto out;
  
-       ret = iscsi_thread_set_init();
-       if (ret < 0)
+       size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long);
+       iscsit_global->ts_bitmap = vzalloc(size);
+       if (!iscsit_global->ts_bitmap) {
+               pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
                 goto configfs_out;
-
-       if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) !=
-                       TARGET_THREAD_SET_COUNT) {
-               pr_err("iscsi_allocate_thread_sets() returned"
-                       " unexpected value!\n");
-               goto ts_out1;
         }
  
         lio_qr_cache = kmem_cache_create("lio_qr_cache",
@@ -572,7 +567,7 @@ static int __init iscsi_target_init_module(void)
         if (!lio_qr_cache) {
                 pr_err("nable to kmem_cache_create() for"
                                 " lio_qr_cache\n");
-               goto ts_out2;
+               goto bitmap_out;
         }
  
         lio_dr_cache = kmem_cache_create("lio_dr_cache",
@@ -617,12 +612,13 @@ dr_out:
         kmem_cache_destroy(lio_dr_cache);
  qr_out:
         kmem_cache_destroy(lio_qr_cache);
-ts_out2:
-       iscsi_deallocate_thread_sets();
-ts_out1:
-       iscsi_thread_set_free();
+bitmap_out:
+       vfree(iscsit_global->ts_bitmap);
  configfs_out:
-       iscsi_target_deregister_configfs();
+       /* XXX: this probably wants it to be it's own unwind step.. */
+       if (iscsit_global->discovery_tpg)
+               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
+       target_unregister_template(&iscsi_ops);
  out:
         kfree(iscsit_global);
         return -ENOMEM;
@@ -630,8 +626,6 @@ out:
  
  static void __exit iscsi_target_cleanup_module(void)
  {
-       iscsi_deallocate_thread_sets();
-       iscsi_thread_set_free();
         iscsit_release_discovery_tpg();
         iscsit_unregister_transport(&iscsi_target_transport);
         kmem_cache_destroy(lio_qr_cache);
@@ -639,8 +633,15 @@ static void __exit iscsi_target_cleanup_module(void)
         kmem_cache_destroy(lio_ooo_cache);
         kmem_cache_destroy(lio_r2t_cache);
  
-       iscsi_target_deregister_configfs();
+       /*
+        * Shutdown discovery sessions and disable discovery TPG
+        */
+       if (iscsit_global->discovery_tpg)
+               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
  
+       target_unregister_template(&iscsi_ops);
+
+       vfree(iscsit_global->ts_bitmap);
         kfree(iscsit_global);
  }
  
@@ -990,7 +991,7 @@ int iscsit_setup_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
         /*
          * Initialize struct se_cmd descriptor from target_core_mod infrastructure
          */
-       transport_init_se_cmd(&cmd->se_cmd, &lio_target_fabric_configfs->tf_ops,
+       transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
                         conn->sess->se_sess, be32_to_cpu(hdr->data_length),
                         cmd->data_direction, sam_task_attr,
                         cmd->sense_buffer + 2);
@@ -1805,8 +1806,7 @@ iscsit_handle_task_mgt_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
                 u8 tcm_function;
                 int ret;
  
-               transport_init_se_cmd(&cmd->se_cmd,
-                                     &lio_target_fabric_configfs->tf_ops,
+               transport_init_se_cmd(&cmd->se_cmd, &iscsi_ops,
                                       conn->sess->se_sess, 0, DMA_NONE,
                                       TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
  
@@ -2155,7 +2155,6 @@ reject:
         cmd->text_in_ptr = NULL;
         return iscsit_reject_cmd(cmd, ISCSI_REASON_PROTOCOL_ERROR, buf);
  }
-EXPORT_SYMBOL(iscsit_handle_text_cmd);
  
  int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
  {
@@ -3715,17 +3714,16 @@ static int iscsit_send_reject(
  
  void iscsit_thread_get_cpumask(struct iscsi_conn *conn)
  {
-       struct iscsi_thread_set *ts = conn->thread_set;
         int ord, cpu;
         /*
-        * thread_id is assigned from iscsit_global->ts_bitmap from
-        * within iscsi_thread_set.c:iscsi_allocate_thread_sets()
+        * bitmap_id is assigned from iscsit_global->ts_bitmap from
+        * within iscsit_start_kthreads()
          *
-        * Here we use thread_id to determine which CPU that this
-        * iSCSI connection's iscsi_thread_set will be scheduled to
+        * Here we use bitmap_id to determine which CPU that this
+        * iSCSI connection's RX/TX threads will be scheduled to
          * execute upon.
          */
-       ord = ts->thread_id % cpumask_weight(cpu_online_mask);
+       ord = conn->bitmap_id % cpumask_weight(cpu_online_mask);
         for_each_online_cpu(cpu) {
                 if (ord-- == 0) {
                         cpumask_set_cpu(cpu, conn->conn_cpumask);
@@ -3914,7 +3912,7 @@ check_rsp_state:
         switch (state) {
         case ISTATE_SEND_LOGOUTRSP:
                 if (!iscsit_logout_post_handler(cmd, conn))
-                       goto restart;
+                       return -ECONNRESET;
                 /* fall through */
         case ISTATE_SEND_STATUS:
         case ISTATE_SEND_ASYNCMSG:
@@ -3942,8 +3940,6 @@ check_rsp_state:
  
  err:
         return -1;
-restart:
-       return -EAGAIN;
  }
  
  static int iscsit_handle_response_queue(struct iscsi_conn *conn)
@@ -3970,21 +3966,13 @@ static int iscsit_handle_response_queue(struct iscsi_conn *conn)
  int iscsi_target_tx_thread(void *arg)
  {
         int ret = 0;
-       struct iscsi_conn *conn;
-       struct iscsi_thread_set *ts = arg;
+       struct iscsi_conn *conn = arg;
         /*
          * Allow ourselves to be interrupted by SIGINT so that a
          * connection recovery / failure event can be triggered externally.
          */
         allow_signal(SIGINT);
  
-restart:
-       conn = iscsi_tx_thread_pre_handler(ts);
-       if (!conn)
-               goto out;
-
-       ret = 0;
-
         while (!kthread_should_stop()) {
                 /*
                  * Ensure that both TX and RX per connection kthreads
@@ -3993,11 +3981,9 @@ restart:
                 iscsit_thread_check_cpumask(conn, current, 1);
  
                 wait_event_interruptible(conn->queues_wq,
-                                        !iscsit_conn_all_queues_empty(conn) ||
-                                        ts->status == ISCSI_THREAD_SET_RESET);
+                                        !iscsit_conn_all_queues_empty(conn));
  
-               if ((ts->status == ISCSI_THREAD_SET_RESET) ||
-                    signal_pending(current))
+               if (signal_pending(current))
                         goto transport_err;
  
  get_immediate:
@@ -4008,15 +3994,14 @@ get_immediate:
                 ret = iscsit_handle_response_queue(conn);
                 if (ret == 1)
                         goto get_immediate;
-               else if (ret == -EAGAIN)
-                       goto restart;
+               else if (ret == -ECONNRESET)
+                       goto out;
                 else if (ret < 0)
                         goto transport_err;
         }
  
  transport_err:
         iscsit_take_action_for_connection_exit(conn);
-       goto restart;
  out:
         return 0;
  }
@@ -4111,8 +4096,7 @@ int iscsi_target_rx_thread(void *arg)
         int ret;
         u8 buffer[ISCSI_HDR_LEN], opcode;
         u32 checksum = 0, digest = 0;
-       struct iscsi_conn *conn = NULL;
-       struct iscsi_thread_set *ts = arg;
+       struct iscsi_conn *conn = arg;
         struct kvec iov;
         /*
          * Allow ourselves to be interrupted by SIGINT so that a
@@ -4120,11 +4104,6 @@ int iscsi_target_rx_thread(void *arg)
          */
         allow_signal(SIGINT);
  
-restart:
-       conn = iscsi_rx_thread_pre_handler(ts);
-       if (!conn)
-               goto out;
-
         if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) {
                 struct completion comp;
                 int rc;
@@ -4134,7 +4113,7 @@ restart:
                 if (rc < 0)
                         goto transport_err;
  
-               goto out;
+               goto transport_err;
         }
  
         while (!kthread_should_stop()) {
@@ -4210,8 +4189,6 @@ transport_err:
         if (!signal_pending(current))
                 atomic_set(&conn->transport_failed, 1);
         iscsit_take_action_for_connection_exit(conn);
-       goto restart;
-out:
         return 0;
  }
  
@@ -4273,7 +4250,24 @@ int iscsit_close_connection(
         if (conn->conn_transport->transport_type == ISCSI_TCP)
                 complete(&conn->conn_logout_comp);
  
-       iscsi_release_thread_set(conn);
+       if (!strcmp(current->comm, ISCSI_RX_THREAD_NAME)) {
+               if (conn->tx_thread &&
+                   cmpxchg(&conn->tx_thread_active, true, false)) {
+                       send_sig(SIGINT, conn->tx_thread, 1);
+                       kthread_stop(conn->tx_thread);
+               }
+       } else if (!strcmp(current->comm, ISCSI_TX_THREAD_NAME)) {
+               if (conn->rx_thread &&
+                   cmpxchg(&conn->rx_thread_active, true, false)) {
+                       send_sig(SIGINT, conn->rx_thread, 1);
+                       kthread_stop(conn->rx_thread);
+               }
+       }
+
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id,
+                             get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
  
         iscsit_stop_timers_for_cmds(conn);
         iscsit_stop_nopin_response_timer(conn);
@@ -4383,8 +4377,6 @@ int iscsit_close_connection(
  
         iscsit_put_transport(conn->conn_transport);
  
-       conn->thread_set = NULL;
-
         pr_debug("Moving to TARG_CONN_STATE_FREE.\n");
         conn->conn_state = TARG_CONN_STATE_FREE;
         kfree(conn);
@@ -4551,15 +4543,13 @@ static void iscsit_logout_post_handler_closesession(
         struct iscsi_conn *conn)
  {
         struct iscsi_session *sess = conn->sess;
-
-       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
-       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+       int sleep = cmpxchg(&conn->tx_thread_active, true, false);
  
         atomic_set(&conn->conn_logout_remove, 0);
         complete(&conn->conn_logout_comp);
  
         iscsit_dec_conn_usage_count(conn);
-       iscsit_stop_session(sess, 1, 1);
+       iscsit_stop_session(sess, sleep, sleep);
         iscsit_dec_session_usage_count(sess);
         target_put_session(sess->se_sess);
  }
@@ -4567,13 +4557,12 @@ static void iscsit_logout_post_handler_closesession(
  static void iscsit_logout_post_handler_samecid(
         struct iscsi_conn *conn)
  {
-       iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD);
-       iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD);
+       int sleep = cmpxchg(&conn->tx_thread_active, true, false);
  
         atomic_set(&conn->conn_logout_remove, 0);
         complete(&conn->conn_logout_comp);
  
-       iscsit_cause_connection_reinstatement(conn, 1);
+       iscsit_cause_connection_reinstatement(conn, sleep);
         iscsit_dec_conn_usage_count(conn);
  }
  
diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h

index e936d56..7d0f9c0 100644 (file)
--- a/drivers/target/iscsi/iscsi_target.h
+++ b/drivers/target/iscsi/iscsi_target.h
@@ -35,7 +35,7 @@ extern void iscsit_stop_session(struct iscsi_session *, int, int);
  extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int);
  
  extern struct iscsit_global *iscsit_global;
-extern struct target_fabric_configfs *lio_target_fabric_configfs;
+extern const struct target_core_fabric_ops iscsi_ops;
  
  extern struct kmem_cache *lio_dr_cache;
  extern struct kmem_cache *lio_ooo_cache;
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c

index 48384b6..469fce4 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -37,9 +37,6 @@
  #include "iscsi_target_util.h"
  #include "iscsi_target.h"
  #include <target/iscsi/iscsi_target_stat.h>
-#include "iscsi_target_configfs.h"
-
-struct target_fabric_configfs *lio_target_fabric_configfs;
  
  struct lio_target_configfs_attribute {
         struct configfs_attribute attr;
@@ -1052,6 +1049,11 @@ TPG_ATTR(default_erl, S_IRUGO | S_IWUSR);
   */
  DEF_TPG_ATTRIB(t10_pi);
  TPG_ATTR(t10_pi, S_IRUGO | S_IWUSR);
+/*
+ * Define iscsi_tpg_attrib_s_fabric_prot_type
+ */
+DEF_TPG_ATTRIB(fabric_prot_type);
+TPG_ATTR(fabric_prot_type, S_IRUGO | S_IWUSR);
  
  static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
         &iscsi_tpg_attrib_authentication.attr,
@@ -1065,6 +1067,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = {
         &iscsi_tpg_attrib_demo_mode_discovery.attr,
         &iscsi_tpg_attrib_default_erl.attr,
         &iscsi_tpg_attrib_t10_pi.attr,
+       &iscsi_tpg_attrib_fabric_prot_type.attr,
         NULL,
  };
  
@@ -1410,8 +1413,18 @@ out:
  
  TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR);
  
+static ssize_t lio_target_tpg_show_dynamic_sessions(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       return target_show_dynamic_sessions(se_tpg, page);
+}
+
+TF_TPG_BASE_ATTR_RO(lio_target, dynamic_sessions);
+
  static struct configfs_attribute *lio_target_tpg_attrs[] = {
         &lio_target_tpg_enable.attr,
+       &lio_target_tpg_dynamic_sessions.attr,
         NULL,
  };
  
@@ -1450,10 +1463,8 @@ static struct se_portal_group *lio_target_tiqn_addtpg(
         if (!tpg)
                 return NULL;
  
-       ret = core_tpg_register(
-                       &lio_target_fabric_configfs->tf_ops,
-                       wwn, &tpg->tpg_se_tpg, tpg,
-                       TRANSPORT_TPG_TYPE_NORMAL);
+       ret = core_tpg_register(&iscsi_ops, wwn, &tpg->tpg_se_tpg,
+                               tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0)
                 return NULL;
  
@@ -1872,6 +1883,20 @@ static int lio_tpg_check_prod_mode_write_protect(
         return tpg->tpg_attrib.prod_mode_write_protect;
  }
  
+static int lio_tpg_check_prot_fabric_only(
+       struct se_portal_group *se_tpg)
+{
+       struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
+       /*
+        * Only report fabric_prot_type if t10_pi has also been enabled
+        * for incoming ib_isert sessions.
+        */
+       if (!tpg->tpg_attrib.t10_pi)
+               return 0;
+
+       return tpg->tpg_attrib.fabric_prot_type;
+}
+
  static void lio_tpg_release_fabric_acl(
         struct se_portal_group *se_tpg,
         struct se_node_acl *se_acl)
@@ -1953,115 +1978,60 @@ static void lio_release_cmd(struct se_cmd *se_cmd)
         iscsit_release_cmd(cmd);
  }
  
-/* End functions for target_core_fabric_ops */
-
-int iscsi_target_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       lio_target_fabric_configfs = NULL;
-       fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() for"
-                               " LIO-Target failed!\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup the fabric API of function pointers used by target_core_mod..
-        */
-       fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name;
-       fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident;
-       fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn;
-       fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag;
-       fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth;
-       fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id;
-       fabric->tf_ops.tpg_get_pr_transport_id_len =
-                               &iscsi_get_pr_transport_id_len;
-       fabric->tf_ops.tpg_parse_pr_out_transport_id =
-                               &iscsi_parse_pr_out_transport_id;
-       fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode;
-       fabric->tf_ops.tpg_check_demo_mode_cache =
-                               &lio_tpg_check_demo_mode_cache;
-       fabric->tf_ops.tpg_check_demo_mode_write_protect =
-                               &lio_tpg_check_demo_mode_write_protect;
-       fabric->tf_ops.tpg_check_prod_mode_write_protect =
-                               &lio_tpg_check_prod_mode_write_protect;
-       fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl;
-       fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl;
-       fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index;
-       fabric->tf_ops.check_stop_free = &lio_check_stop_free,
-       fabric->tf_ops.release_cmd = &lio_release_cmd;
-       fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
-       fabric->tf_ops.close_session = &lio_tpg_close_session;
-       fabric->tf_ops.sess_get_index = &lio_sess_get_index;
-       fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
-       fabric->tf_ops.write_pending = &lio_write_pending;
-       fabric->tf_ops.write_pending_status = &lio_write_pending_status;
-       fabric->tf_ops.set_default_node_attributes =
-                               &lio_set_default_node_attributes;
-       fabric->tf_ops.get_task_tag = &iscsi_get_task_tag;
-       fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state;
-       fabric->tf_ops.queue_data_in = &lio_queue_data_in;
-       fabric->tf_ops.queue_status = &lio_queue_status;
-       fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
-       fabric->tf_ops.aborted_task = &lio_aborted_task;
-       /*
-        * Setup function pointers for generic logic in target_core_fabric_configfs.c
-        */
-       fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn;
-       fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn;
-       fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg;
-       fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg;
-       fabric->tf_ops.fabric_post_link = NULL;
-       fabric->tf_ops.fabric_pre_unlink = NULL;
-       fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg;
-       fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg;
-       fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl;
-       fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        * sturct config_item_type's
-        */
-       fabric->tf_cit_tmpl.tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_auth_cit.ct_attrs = lio_target_tpg_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs;
-
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() for"
-                               " LIO-Target failed!\n");
-               target_fabric_configfs_free(fabric);
-               return ret;
-       }
-
-       lio_target_fabric_configfs = fabric;
-       pr_debug("LIO_TARGET[0] - Set fabric ->"
-                       " lio_target_fabric_configfs\n");
-       return 0;
-}
-
-
-void iscsi_target_deregister_configfs(void)
-{
-       if (!lio_target_fabric_configfs)
-               return;
-       /*
-        * Shutdown discovery sessions and disable discovery TPG
-        */
-       if (iscsit_global->discovery_tpg)
-               iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1);
-
-       target_fabric_configfs_deregister(lio_target_fabric_configfs);
-       lio_target_fabric_configfs = NULL;
-       pr_debug("LIO_TARGET[0] - Cleared"
-                               " lio_target_fabric_configfs\n");
-}
+const struct target_core_fabric_ops iscsi_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "iscsi",
+       .get_fabric_name                = iscsi_get_fabric_name,
+       .get_fabric_proto_ident         = iscsi_get_fabric_proto_ident,
+       .tpg_get_wwn                    = lio_tpg_get_endpoint_wwn,
+       .tpg_get_tag                    = lio_tpg_get_tag,
+       .tpg_get_default_depth          = lio_tpg_get_default_depth,
+       .tpg_get_pr_transport_id        = iscsi_get_pr_transport_id,
+       .tpg_get_pr_transport_id_len    = iscsi_get_pr_transport_id_len,
+       .tpg_parse_pr_out_transport_id  = iscsi_parse_pr_out_transport_id,
+       .tpg_check_demo_mode            = lio_tpg_check_demo_mode,
+       .tpg_check_demo_mode_cache      = lio_tpg_check_demo_mode_cache,
+       .tpg_check_demo_mode_write_protect =
+                       lio_tpg_check_demo_mode_write_protect,
+       .tpg_check_prod_mode_write_protect =
+                       lio_tpg_check_prod_mode_write_protect,
+       .tpg_check_prot_fabric_only     = &lio_tpg_check_prot_fabric_only,
+       .tpg_alloc_fabric_acl           = lio_tpg_alloc_fabric_acl,
+       .tpg_release_fabric_acl         = lio_tpg_release_fabric_acl,
+       .tpg_get_inst_index             = lio_tpg_get_inst_index,
+       .check_stop_free                = lio_check_stop_free,
+       .release_cmd                    = lio_release_cmd,
+       .shutdown_session               = lio_tpg_shutdown_session,
+       .close_session                  = lio_tpg_close_session,
+       .sess_get_index                 = lio_sess_get_index,
+       .sess_get_initiator_sid         = lio_sess_get_initiator_sid,
+       .write_pending                  = lio_write_pending,
+       .write_pending_status           = lio_write_pending_status,
+       .set_default_node_attributes    = lio_set_default_node_attributes,
+       .get_task_tag                   = iscsi_get_task_tag,
+       .get_cmd_state                  = iscsi_get_cmd_state,
+       .queue_data_in                  = lio_queue_data_in,
+       .queue_status                   = lio_queue_status,
+       .queue_tm_rsp                   = lio_queue_tm_rsp,
+       .aborted_task                   = lio_aborted_task,
+       .fabric_make_wwn                = lio_target_call_coreaddtiqn,
+       .fabric_drop_wwn                = lio_target_call_coredeltiqn,
+       .fabric_make_tpg                = lio_target_tiqn_addtpg,
+       .fabric_drop_tpg                = lio_target_tiqn_deltpg,
+       .fabric_make_np                 = lio_target_call_addnptotpg,
+       .fabric_drop_np                 = lio_target_call_delnpfromtpg,
+       .fabric_make_nodeacl            = lio_target_make_nodeacl,
+       .fabric_drop_nodeacl            = lio_target_drop_nodeacl,
+
+       .tfc_discovery_attrs            = lio_target_discovery_auth_attrs,
+       .tfc_wwn_attrs                  = lio_target_wwn_attrs,
+       .tfc_tpg_base_attrs             = lio_target_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = lio_target_tpg_attrib_attrs,
+       .tfc_tpg_auth_attrs             = lio_target_tpg_auth_attrs,
+       .tfc_tpg_param_attrs            = lio_target_tpg_param_attrs,
+       .tfc_tpg_np_base_attrs          = lio_target_portal_attrs,
+       .tfc_tpg_nacl_base_attrs        = lio_target_initiator_attrs,
+       .tfc_tpg_nacl_attrib_attrs      = lio_target_nacl_attrib_attrs,
+       .tfc_tpg_nacl_auth_attrs        = lio_target_nacl_auth_attrs,
+       .tfc_tpg_nacl_param_attrs       = lio_target_nacl_param_attrs,
+};
diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h

deleted file mode 100644 (file)

index 8cd5a63..0000000
--- a/drivers/target/iscsi/iscsi_target_configfs.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef ISCSI_TARGET_CONFIGFS_H
-#define ISCSI_TARGET_CONFIGFS_H
-
-extern int iscsi_target_register_configfs(void);
-extern void iscsi_target_deregister_configfs(void);
-
-#endif /* ISCSI_TARGET_CONFIGFS_H */
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c

index bdd8731..959a14c 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -23,7 +23,6 @@
  
  #include <target/iscsi/iscsi_target_core.h>
  #include "iscsi_target_seq_pdu_list.h"
-#include "iscsi_target_tq.h"
  #include "iscsi_target_erl0.h"
  #include "iscsi_target_erl1.h"
  #include "iscsi_target_erl2.h"
@@ -860,7 +859,10 @@ void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn)
         }
         spin_unlock_bh(&conn->state_lock);
  
-       iscsi_thread_set_force_reinstatement(conn);
+       if (conn->tx_thread && conn->tx_thread_active)
+               send_sig(SIGINT, conn->tx_thread, 1);
+       if (conn->rx_thread && conn->rx_thread_active)
+               send_sig(SIGINT, conn->rx_thread, 1);
  
  sleep:
         wait_for_completion(&conn->conn_wait_rcfr_comp);
@@ -885,10 +887,10 @@ void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep)
                 return;
         }
  
-       if (iscsi_thread_set_force_reinstatement(conn) < 0) {
-               spin_unlock_bh(&conn->state_lock);
-               return;
-       }
+       if (conn->tx_thread && conn->tx_thread_active)
+               send_sig(SIGINT, conn->tx_thread, 1);
+       if (conn->rx_thread && conn->rx_thread_active)
+               send_sig(SIGINT, conn->rx_thread, 1);
  
         atomic_set(&conn->connection_reinstatement, 1);
         if (!sleep) {
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c

index 153fb66..8ce94ff 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -26,7 +26,6 @@
  
  #include <target/iscsi/iscsi_target_core.h>
  #include <target/iscsi/iscsi_target_stat.h>
-#include "iscsi_target_tq.h"
  #include "iscsi_target_device.h"
  #include "iscsi_target_nego.h"
  #include "iscsi_target_erl0.h"
@@ -699,6 +698,51 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
                 iscsit_start_nopin_timer(conn);
  }
  
+static int iscsit_start_kthreads(struct iscsi_conn *conn)
+{
+       int ret = 0;
+
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       conn->bitmap_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
+                                       ISCSIT_BITMAP_BITS, get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
+
+       if (conn->bitmap_id < 0) {
+               pr_err("bitmap_find_free_region() failed for"
+                      " iscsit_start_kthreads()\n");
+               return -ENOMEM;
+       }
+
+       conn->tx_thread = kthread_run(iscsi_target_tx_thread, conn,
+                                     "%s", ISCSI_TX_THREAD_NAME);
+       if (IS_ERR(conn->tx_thread)) {
+               pr_err("Unable to start iscsi_target_tx_thread\n");
+               ret = PTR_ERR(conn->tx_thread);
+               goto out_bitmap;
+       }
+       conn->tx_thread_active = true;
+
+       conn->rx_thread = kthread_run(iscsi_target_rx_thread, conn,
+                                     "%s", ISCSI_RX_THREAD_NAME);
+       if (IS_ERR(conn->rx_thread)) {
+               pr_err("Unable to start iscsi_target_rx_thread\n");
+               ret = PTR_ERR(conn->rx_thread);
+               goto out_tx;
+       }
+       conn->rx_thread_active = true;
+
+       return 0;
+out_tx:
+       kthread_stop(conn->tx_thread);
+       conn->tx_thread_active = false;
+out_bitmap:
+       spin_lock(&iscsit_global->ts_bitmap_lock);
+       bitmap_release_region(iscsit_global->ts_bitmap, conn->bitmap_id,
+                             get_order(1));
+       spin_unlock(&iscsit_global->ts_bitmap_lock);
+       return ret;
+}
+
  int iscsi_post_login_handler(
         struct iscsi_np *np,
         struct iscsi_conn *conn,
@@ -709,7 +753,7 @@ int iscsi_post_login_handler(
         struct se_session *se_sess = sess->se_sess;
         struct iscsi_portal_group *tpg = sess->tpg;
         struct se_portal_group *se_tpg = &tpg->tpg_se_tpg;
-       struct iscsi_thread_set *ts;
+       int rc;
  
         iscsit_inc_conn_usage_count(conn);
  
@@ -724,7 +768,6 @@ int iscsi_post_login_handler(
         /*
          * SCSI Initiator -> SCSI Target Port Mapping
          */
-       ts = iscsi_get_thread_set();
         if (!zero_tsih) {
                 iscsi_set_session_parameters(sess->sess_ops,
                                 conn->param_list, 0);
@@ -751,9 +794,11 @@ int iscsi_post_login_handler(
                         sess->sess_ops->InitiatorName);
                 spin_unlock_bh(&sess->conn_lock);
  
-               iscsi_post_login_start_timers(conn);
+               rc = iscsit_start_kthreads(conn);
+               if (rc)
+                       return rc;
  
-               iscsi_activate_thread_set(conn, ts);
+               iscsi_post_login_start_timers(conn);
                 /*
                  * Determine CPU mask to ensure connection's RX and TX kthreads
                  * are scheduled on the same CPU.
@@ -810,8 +855,11 @@ int iscsi_post_login_handler(
                 " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt);
         spin_unlock_bh(&se_tpg->session_lock);
  
+       rc = iscsit_start_kthreads(conn);
+       if (rc)
+               return rc;
+
         iscsi_post_login_start_timers(conn);
-       iscsi_activate_thread_set(conn, ts);
         /*
          * Determine CPU mask to ensure connection's RX and TX kthreads
          * are scheduled on the same CPU.
diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c

index bdd127c..e8a2408 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -68,10 +68,8 @@ int iscsit_load_discovery_tpg(void)
                 return -1;
         }
  
-       ret = core_tpg_register(
-                       &lio_target_fabric_configfs->tf_ops,
-                       NULL, &tpg->tpg_se_tpg, tpg,
-                       TRANSPORT_TPG_TYPE_DISCOVERY);
+       ret = core_tpg_register(&iscsi_ops, NULL, &tpg->tpg_se_tpg,
+                               tpg, TRANSPORT_TPG_TYPE_DISCOVERY);
         if (ret < 0) {
                 kfree(tpg);
                 return -1;
@@ -228,6 +226,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg)
         a->demo_mode_discovery = TA_DEMO_MODE_DISCOVERY;
         a->default_erl = TA_DEFAULT_ERL;
         a->t10_pi = TA_DEFAULT_T10_PI;
+       a->fabric_prot_type = TA_DEFAULT_FABRIC_PROT_TYPE;
  }
  
  int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg)
@@ -878,3 +877,21 @@ int iscsit_ta_t10_pi(
  
         return 0;
  }
+
+int iscsit_ta_fabric_prot_type(
+       struct iscsi_portal_group *tpg,
+       u32 prot_type)
+{
+       struct iscsi_tpg_attrib *a = &tpg->tpg_attrib;
+
+       if ((prot_type != 0) && (prot_type != 1) && (prot_type != 3)) {
+               pr_err("Illegal value for fabric_prot_type: %u\n", prot_type);
+               return -EINVAL;
+       }
+
+       a->fabric_prot_type = prot_type;
+       pr_debug("iSCSI_TPG[%hu] - T10 Fabric Protection Type: %u\n",
+                tpg->tpgt, prot_type);
+
+       return 0;
+}
diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h

index e726533..95ff5bd 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_tpg.h
+++ b/drivers/target/iscsi/iscsi_target_tpg.h
@@ -39,5 +39,6 @@ extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32);
  extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32);
  extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32);
  extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32);
+extern int iscsit_ta_fabric_prot_type(struct iscsi_portal_group *, u32);
  
  #endif /* ISCSI_TARGET_TPG_H */
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c

deleted file mode 100644 (file)

index 26aa509..0000000
--- a/drivers/target/iscsi/iscsi_target_tq.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*******************************************************************************
- * This file contains the iSCSI Login Thread and Thread Queue functions.
- *
- * (c) Copyright 2007-2013 Datera, Inc.
- *
- * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- ******************************************************************************/
-
-#include <linux/kthread.h>
-#include <linux/list.h>
-#include <linux/bitmap.h>
-
-#include <target/iscsi/iscsi_target_core.h>
-#include "iscsi_target_tq.h"
-#include "iscsi_target.h"
-
-static LIST_HEAD(inactive_ts_list);
-static DEFINE_SPINLOCK(inactive_ts_lock);
-static DEFINE_SPINLOCK(ts_bitmap_lock);
-
-static void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts)
-{
-       if (!list_empty(&ts->ts_list)) {
-               WARN_ON(1);
-               return;
-       }
-       spin_lock(&inactive_ts_lock);
-       list_add_tail(&ts->ts_list, &inactive_ts_list);
-       iscsit_global->inactive_ts++;
-       spin_unlock(&inactive_ts_lock);
-}
-
-static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void)
-{
-       struct iscsi_thread_set *ts;
-
-       spin_lock(&inactive_ts_lock);
-       if (list_empty(&inactive_ts_list)) {
-               spin_unlock(&inactive_ts_lock);
-               return NULL;
-       }
-
-       ts = list_first_entry(&inactive_ts_list, struct iscsi_thread_set, ts_list);
-
-       list_del_init(&ts->ts_list);
-       iscsit_global->inactive_ts--;
-       spin_unlock(&inactive_ts_lock);
-
-       return ts;
-}
-
-int iscsi_allocate_thread_sets(u32 thread_pair_count)
-{
-       int allocated_thread_pair_count = 0, i, thread_id;
-       struct iscsi_thread_set *ts = NULL;
-
-       for (i = 0; i < thread_pair_count; i++) {
-               ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL);
-               if (!ts) {
-                       pr_err("Unable to allocate memory for"
-                                       " thread set.\n");
-                       return allocated_thread_pair_count;
-               }
-               /*
-                * Locate the next available regision in the thread_set_bitmap
-                */
-               spin_lock(&ts_bitmap_lock);
-               thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap,
-                               iscsit_global->ts_bitmap_count, get_order(1));
-               spin_unlock(&ts_bitmap_lock);
-               if (thread_id < 0) {
-                       pr_err("bitmap_find_free_region() failed for"
-                               " thread_set_bitmap\n");
-                       kfree(ts);
-                       return allocated_thread_pair_count;
-               }
-
-               ts->thread_id = thread_id;
-               ts->status = ISCSI_THREAD_SET_FREE;
-               INIT_LIST_HEAD(&ts->ts_list);
-               spin_lock_init(&ts->ts_state_lock);
-               init_completion(&ts->rx_restart_comp);
-               init_completion(&ts->tx_restart_comp);
-               init_completion(&ts->rx_start_comp);
-               init_completion(&ts->tx_start_comp);
-               sema_init(&ts->ts_activate_sem, 0);
-
-               ts->create_threads = 1;
-               ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
-                                       ISCSI_TX_THREAD_NAME);
-               if (IS_ERR(ts->tx_thread)) {
-                       dump_stack();
-                       pr_err("Unable to start iscsi_target_tx_thread\n");
-                       break;
-               }
-
-               ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s",
-                                       ISCSI_RX_THREAD_NAME);
-               if (IS_ERR(ts->rx_thread)) {
-                       kthread_stop(ts->tx_thread);
-                       pr_err("Unable to start iscsi_target_rx_thread\n");
-                       break;
-               }
-               ts->create_threads = 0;
-
-               iscsi_add_ts_to_inactive_list(ts);
-               allocated_thread_pair_count++;
-       }
-
-       pr_debug("Spawned %d thread set(s) (%d total threads).\n",
-               allocated_thread_pair_count, allocated_thread_pair_count * 2);
-       return allocated_thread_pair_count;
-}
-
-static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->status = ISCSI_THREAD_SET_DIE;
-
-       if (ts->rx_thread) {
-               complete(&ts->rx_start_comp);
-               spin_unlock_bh(&ts->ts_state_lock);
-               kthread_stop(ts->rx_thread);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       if (ts->tx_thread) {
-               complete(&ts->tx_start_comp);
-               spin_unlock_bh(&ts->ts_state_lock);
-               kthread_stop(ts->tx_thread);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-       /*
-        * Release this thread_id in the thread_set_bitmap
-        */
-       spin_lock(&ts_bitmap_lock);
-       bitmap_release_region(iscsit_global->ts_bitmap,
-                       ts->thread_id, get_order(1));
-       spin_unlock(&ts_bitmap_lock);
-
-       kfree(ts);
-}
-
-void iscsi_deallocate_thread_sets(void)
-{
-       struct iscsi_thread_set *ts = NULL;
-       u32 released_count = 0;
-
-       while ((ts = iscsi_get_ts_from_inactive_list())) {
-
-               iscsi_deallocate_thread_one(ts);
-               released_count++;
-       }
-
-       if (released_count)
-               pr_debug("Stopped %d thread set(s) (%d total threads)."
-                       "\n", released_count, released_count * 2);
-}
-
-static void iscsi_deallocate_extra_thread_sets(void)
-{
-       u32 orig_count, released_count = 0;
-       struct iscsi_thread_set *ts = NULL;
-
-       orig_count = TARGET_THREAD_SET_COUNT;
-
-       while ((iscsit_global->inactive_ts + 1) > orig_count) {
-               ts = iscsi_get_ts_from_inactive_list();
-               if (!ts)
-                       break;
-
-               iscsi_deallocate_thread_one(ts);
-               released_count++;
-       }
-
-       if (released_count)
-               pr_debug("Stopped %d thread set(s) (%d total threads)."
-                       "\n", released_count, released_count * 2);
-}
-
-void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       conn->thread_set = ts;
-       ts->conn = conn;
-       ts->status = ISCSI_THREAD_SET_ACTIVE;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       complete(&ts->rx_start_comp);
-       complete(&ts->tx_start_comp);
-
-       down(&ts->ts_activate_sem);
-}
-
-struct iscsi_thread_set *iscsi_get_thread_set(void)
-{
-       struct iscsi_thread_set *ts;
-
-get_set:
-       ts = iscsi_get_ts_from_inactive_list();
-       if (!ts) {
-               iscsi_allocate_thread_sets(1);
-               goto get_set;
-       }
-
-       ts->delay_inactive = 1;
-       ts->signal_sent = 0;
-       ts->thread_count = 2;
-       init_completion(&ts->rx_restart_comp);
-       init_completion(&ts->tx_restart_comp);
-       sema_init(&ts->ts_activate_sem, 0);
-
-       return ts;
-}
-
-void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear)
-{
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn->thread_set) {
-               pr_err("struct iscsi_conn->thread_set is NULL\n");
-               return;
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->thread_clear &= ~thread_clear;
-
-       if ((thread_clear & ISCSI_CLEAR_RX_THREAD) &&
-           (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD))
-               complete(&ts->rx_restart_comp);
-       else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) &&
-                (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD))
-               complete(&ts->tx_restart_comp);
-       spin_unlock_bh(&ts->ts_state_lock);
-}
-
-void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent)
-{
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn->thread_set) {
-               pr_err("struct iscsi_conn->thread_set is NULL\n");
-               return;
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->signal_sent |= signal_sent;
-       spin_unlock_bh(&ts->ts_state_lock);
-}
-
-int iscsi_release_thread_set(struct iscsi_conn *conn)
-{
-       int thread_called = 0;
-       struct iscsi_thread_set *ts = NULL;
-
-       if (!conn || !conn->thread_set) {
-               pr_err("connection or thread set pointer is NULL\n");
-               BUG();
-       }
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       ts->status = ISCSI_THREAD_SET_RESET;
-
-       if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME,
-                       strlen(ISCSI_RX_THREAD_NAME)))
-               thread_called = ISCSI_RX_THREAD;
-       else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME,
-                       strlen(ISCSI_TX_THREAD_NAME)))
-               thread_called = ISCSI_TX_THREAD;
-
-       if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) &&
-          (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) {
-
-               if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) {
-                       send_sig(SIGINT, ts->rx_thread, 1);
-                       ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
-               }
-               ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD;
-               spin_unlock_bh(&ts->ts_state_lock);
-               wait_for_completion(&ts->rx_restart_comp);
-               spin_lock_bh(&ts->ts_state_lock);
-               ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD;
-       }
-       if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) &&
-          (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) {
-
-               if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) {
-                       send_sig(SIGINT, ts->tx_thread, 1);
-                       ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
-               }
-               ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD;
-               spin_unlock_bh(&ts->ts_state_lock);
-               wait_for_completion(&ts->tx_restart_comp);
-               spin_lock_bh(&ts->ts_state_lock);
-               ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD;
-       }
-
-       ts->conn = NULL;
-       ts->status = ISCSI_THREAD_SET_FREE;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn)
-{
-       struct iscsi_thread_set *ts;
-
-       if (!conn->thread_set)
-               return -1;
-       ts = conn->thread_set;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->status != ISCSI_THREAD_SET_ACTIVE) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               return -1;
-       }
-
-       if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) {
-               send_sig(SIGINT, ts->tx_thread, 1);
-               ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD;
-       }
-       if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) {
-               send_sig(SIGINT, ts->rx_thread, 1);
-               ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD;
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-static void iscsi_check_to_add_additional_sets(void)
-{
-       int thread_sets_add;
-
-       spin_lock(&inactive_ts_lock);
-       thread_sets_add = iscsit_global->inactive_ts;
-       spin_unlock(&inactive_ts_lock);
-       if (thread_sets_add == 1)
-               iscsi_allocate_thread_sets(1);
-}
-
-static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() ||
-           signal_pending(current)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               return -1;
-       }
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       return 0;
-}
-
-struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       int ret;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->create_threads) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               goto sleep;
-       }
-
-       if (ts->status != ISCSI_THREAD_SET_DIE)
-               flush_signals(current);
-
-       if (ts->delay_inactive && (--ts->thread_count == 0)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-
-               if (!iscsit_global->in_shutdown)
-                       iscsi_deallocate_extra_thread_sets();
-
-               iscsi_add_ts_to_inactive_list(ts);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-
-       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
-           (ts->thread_clear & ISCSI_CLEAR_RX_THREAD))
-               complete(&ts->rx_restart_comp);
-
-       ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-sleep:
-       ret = wait_for_completion_interruptible(&ts->rx_start_comp);
-       if (ret != 0)
-               return NULL;
-
-       if (iscsi_signal_thread_pre_handler(ts) < 0)
-               return NULL;
-
-       iscsi_check_to_add_additional_sets();
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (!ts->conn) {
-               pr_err("struct iscsi_thread_set->conn is NULL for"
-                       " RX thread_id: %s/%d\n", current->comm, current->pid);
-               spin_unlock_bh(&ts->ts_state_lock);
-               return NULL;
-       }
-       ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       up(&ts->ts_activate_sem);
-
-       return ts->conn;
-}
-
-struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
-{
-       int ret;
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (ts->create_threads) {
-               spin_unlock_bh(&ts->ts_state_lock);
-               goto sleep;
-       }
-
-       if (ts->status != ISCSI_THREAD_SET_DIE)
-               flush_signals(current);
-
-       if (ts->delay_inactive && (--ts->thread_count == 0)) {
-               spin_unlock_bh(&ts->ts_state_lock);
-
-               if (!iscsit_global->in_shutdown)
-                       iscsi_deallocate_extra_thread_sets();
-
-               iscsi_add_ts_to_inactive_list(ts);
-               spin_lock_bh(&ts->ts_state_lock);
-       }
-       if ((ts->status == ISCSI_THREAD_SET_RESET) &&
-           (ts->thread_clear & ISCSI_CLEAR_TX_THREAD))
-               complete(&ts->tx_restart_comp);
-
-       ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-sleep:
-       ret = wait_for_completion_interruptible(&ts->tx_start_comp);
-       if (ret != 0)
-               return NULL;
-
-       if (iscsi_signal_thread_pre_handler(ts) < 0)
-               return NULL;
-
-       iscsi_check_to_add_additional_sets();
-
-       spin_lock_bh(&ts->ts_state_lock);
-       if (!ts->conn) {
-               pr_err("struct iscsi_thread_set->conn is NULL for"
-                       " TX thread_id: %s/%d\n", current->comm, current->pid);
-               spin_unlock_bh(&ts->ts_state_lock);
-               return NULL;
-       }
-       ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
-       spin_unlock_bh(&ts->ts_state_lock);
-
-       up(&ts->ts_activate_sem);
-
-       return ts->conn;
-}
-
-int iscsi_thread_set_init(void)
-{
-       int size;
-
-       iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS;
-
-       size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long);
-       iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL);
-       if (!iscsit_global->ts_bitmap) {
-               pr_err("Unable to allocate iscsit_global->ts_bitmap\n");
-               return -ENOMEM;
-       }
-
-       return 0;
-}
-
-void iscsi_thread_set_free(void)
-{
-       kfree(iscsit_global->ts_bitmap);
-}
diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h

deleted file mode 100644 (file)

index cc1eede..0000000
--- a/drivers/target/iscsi/iscsi_target_tq.h
+++ /dev/null
@@ -1,84 +0,0 @@
-#ifndef ISCSI_THREAD_QUEUE_H
-#define ISCSI_THREAD_QUEUE_H
-
-/*
- * Defines for thread sets.
- */
-extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *);
-extern int iscsi_allocate_thread_sets(u32);
-extern void iscsi_deallocate_thread_sets(void);
-extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *);
-extern struct iscsi_thread_set *iscsi_get_thread_set(void);
-extern void iscsi_set_thread_clear(struct iscsi_conn *, u8);
-extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8);
-extern int iscsi_release_thread_set(struct iscsi_conn *);
-extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *);
-extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *);
-extern int iscsi_thread_set_init(void);
-extern void iscsi_thread_set_free(void);
-
-extern int iscsi_target_tx_thread(void *);
-extern int iscsi_target_rx_thread(void *);
-
-#define TARGET_THREAD_SET_COUNT                        4
-
-#define ISCSI_RX_THREAD                         1
-#define ISCSI_TX_THREAD                         2
-#define ISCSI_RX_THREAD_NAME                   "iscsi_trx"
-#define ISCSI_TX_THREAD_NAME                   "iscsi_ttx"
-#define ISCSI_BLOCK_RX_THREAD                  0x1
-#define ISCSI_BLOCK_TX_THREAD                  0x2
-#define ISCSI_CLEAR_RX_THREAD                  0x1
-#define ISCSI_CLEAR_TX_THREAD                  0x2
-#define ISCSI_SIGNAL_RX_THREAD                 0x1
-#define ISCSI_SIGNAL_TX_THREAD                 0x2
-
-/* struct iscsi_thread_set->status */
-#define ISCSI_THREAD_SET_FREE                  1
-#define ISCSI_THREAD_SET_ACTIVE                        2
-#define ISCSI_THREAD_SET_DIE                   3
-#define ISCSI_THREAD_SET_RESET                 4
-#define ISCSI_THREAD_SET_DEALLOCATE_THREADS    5
-
-/* By default allow a maximum of 32K iSCSI connections */
-#define ISCSI_TS_BITMAP_BITS                   32768
-
-struct iscsi_thread_set {
-       /* flags used for blocking and restarting sets */
-       int     blocked_threads;
-       /* flag for creating threads */
-       int     create_threads;
-       /* flag for delaying readding to inactive list */
-       int     delay_inactive;
-       /* status for thread set */
-       int     status;
-       /* which threads have had signals sent */
-       int     signal_sent;
-       /* flag for which threads exited first */
-       int     thread_clear;
-       /* Active threads in the thread set */
-       int     thread_count;
-       /* Unique thread ID */
-       u32     thread_id;
-       /* pointer to connection if set is active */
-       struct iscsi_conn       *conn;
-       /* used for controlling ts state accesses */
-       spinlock_t      ts_state_lock;
-       /* used for restarting thread queue */
-       struct completion       rx_restart_comp;
-       /* used for restarting thread queue */
-       struct completion       tx_restart_comp;
-       /* used for normal unused blocking */
-       struct completion       rx_start_comp;
-       /* used for normal unused blocking */
-       struct completion       tx_start_comp;
-       /* OS descriptor for rx thread */
-       struct task_struct      *rx_thread;
-       /* OS descriptor for tx thread */
-       struct task_struct      *tx_thread;
-       /* struct iscsi_thread_set in list list head*/
-       struct list_head        ts_list;
-       struct semaphore        ts_activate_sem;
-};
-
-#endif   /*** ISCSI_THREAD_QUEUE_H ***/
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c

index 390df8e..b18edda 100644 (file)
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -33,7 +33,6 @@
  #include "iscsi_target_erl1.h"
  #include "iscsi_target_erl2.h"
  #include "iscsi_target_tpg.h"
-#include "iscsi_target_tq.h"
  #include "iscsi_target_util.h"
  #include "iscsi_target.h"
  
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c

index c36bd7c..51f0c89 100644 (file)
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -41,8 +41,7 @@
  
  #define to_tcm_loop_hba(hba)   container_of(hba, struct tcm_loop_hba, dev)
  
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *tcm_loop_fabric_configfs;
+static const struct target_core_fabric_ops loop_ops;
  
  static struct workqueue_struct *tcm_loop_workqueue;
  static struct kmem_cache *tcm_loop_cmd_cache;
@@ -108,7 +107,7 @@ static struct device_driver tcm_loop_driverfs = {
  /*
   * Used with root_device_register() in tcm_loop_alloc_core_bus() below
   */
-struct device *tcm_loop_primary;
+static struct device *tcm_loop_primary;
  
  static void tcm_loop_submission_work(struct work_struct *work)
  {
@@ -697,6 +696,13 @@ static int tcm_loop_check_prod_mode_write_protect(struct se_portal_group *se_tpg
         return 0;
  }
  
+static int tcm_loop_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+       return tl_tpg->tl_fabric_prot_type;
+}
+
  static struct se_node_acl *tcm_loop_tpg_alloc_fabric_acl(
         struct se_portal_group *se_tpg)
  {
@@ -912,6 +918,46 @@ static void tcm_loop_port_unlink(
  
  /* End items for tcm_loop_port_cit */
  
+static ssize_t tcm_loop_tpg_attrib_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+
+       return sprintf(page, "%d\n", tl_tpg->tl_fabric_prot_type);
+}
+
+static ssize_t tcm_loop_tpg_attrib_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct tcm_loop_tpg *tl_tpg = container_of(se_tpg, struct tcm_loop_tpg,
+                                                  tl_se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid qla2xxx fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tl_tpg->tl_fabric_prot_type = val;
+
+       return count;
+}
+
+TF_TPG_ATTRIB_ATTR(tcm_loop, fabric_prot_type, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *tcm_loop_tpg_attrib_attrs[] = {
+       &tcm_loop_tpg_attrib_fabric_prot_type.attr,
+       NULL,
+};
+
  /* Start items for tcm_loop_nexus_cit */
  
  static int tcm_loop_make_nexus(
@@ -937,7 +983,8 @@ static int tcm_loop_make_nexus(
         /*
          * Initialize the struct se_session pointer
          */
-       tl_nexus->se_sess = transport_init_session(TARGET_PROT_ALL);
+       tl_nexus->se_sess = transport_init_session(
+                               TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS);
         if (IS_ERR(tl_nexus->se_sess)) {
                 ret = PTR_ERR(tl_nexus->se_sess);
                 goto out;
@@ -1165,21 +1212,19 @@ static struct se_portal_group *tcm_loop_make_naa_tpg(
         struct tcm_loop_hba *tl_hba = container_of(wwn,
                         struct tcm_loop_hba, tl_hba_wwn);
         struct tcm_loop_tpg *tl_tpg;
-       char *tpgt_str, *end_ptr;
         int ret;
-       unsigned short int tpgt;
+       unsigned long tpgt;
  
-       tpgt_str = strstr(name, "tpgt_");
-       if (!tpgt_str) {
+       if (strstr(name, "tpgt_") != name) {
                 pr_err("Unable to locate \"tpgt_#\" directory"
                                 " group\n");
                 return ERR_PTR(-EINVAL);
         }
-       tpgt_str += 5; /* Skip ahead of "tpgt_" */
-       tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0);
+       if (kstrtoul(name+5, 10, &tpgt))
+               return ERR_PTR(-EINVAL);
  
         if (tpgt >= TL_TPGS_PER_HBA) {
-               pr_err("Passed tpgt: %hu exceeds TL_TPGS_PER_HBA:"
+               pr_err("Passed tpgt: %lu exceeds TL_TPGS_PER_HBA:"
                                 " %u\n", tpgt, TL_TPGS_PER_HBA);
                 return ERR_PTR(-EINVAL);
         }
@@ -1189,14 +1234,13 @@ static struct se_portal_group *tcm_loop_make_naa_tpg(
         /*
          * Register the tl_tpg as a emulated SAS TCM Target Endpoint
          */
-       ret = core_tpg_register(&tcm_loop_fabric_configfs->tf_ops,
-                       wwn, &tl_tpg->tl_se_tpg, tl_tpg,
+       ret = core_tpg_register(&loop_ops, wwn, &tl_tpg->tl_se_tpg, tl_tpg,
                         TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0)
                 return ERR_PTR(-ENOMEM);
  
         pr_debug("TCM_Loop_ConfigFS: Allocated Emulated %s"
-               " Target Port %s,t,0x%04x\n", tcm_loop_dump_proto_id(tl_hba),
+               " Target Port %s,t,0x%04lx\n", tcm_loop_dump_proto_id(tl_hba),
                 config_item_name(&wwn->wwn_group.cg_item), tpgt);
  
         return &tl_tpg->tl_se_tpg;
@@ -1338,127 +1382,51 @@ static struct configfs_attribute *tcm_loop_wwn_attrs[] = {
  
  /* End items for tcm_loop_cit */
  
-static int tcm_loop_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-       /*
-        * Set the TCM Loop HBA counter to zero
-        */
-       tcm_loop_hba_no_cnt = 0;
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "loopback");
-       if (IS_ERR(fabric)) {
-               pr_err("tcm_loop_register_configfs() failed!\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup the fabric API of function pointers used by target_core_mod
-        */
-       fabric->tf_ops.get_fabric_name = &tcm_loop_get_fabric_name;
-       fabric->tf_ops.get_fabric_proto_ident = &tcm_loop_get_fabric_proto_ident;
-       fabric->tf_ops.tpg_get_wwn = &tcm_loop_get_endpoint_wwn;
-       fabric->tf_ops.tpg_get_tag = &tcm_loop_get_tag;
-       fabric->tf_ops.tpg_get_default_depth = &tcm_loop_get_default_depth;
-       fabric->tf_ops.tpg_get_pr_transport_id = &tcm_loop_get_pr_transport_id;
-       fabric->tf_ops.tpg_get_pr_transport_id_len =
-                                       &tcm_loop_get_pr_transport_id_len;
-       fabric->tf_ops.tpg_parse_pr_out_transport_id =
-                                       &tcm_loop_parse_pr_out_transport_id;
-       fabric->tf_ops.tpg_check_demo_mode = &tcm_loop_check_demo_mode;
-       fabric->tf_ops.tpg_check_demo_mode_cache =
-                                       &tcm_loop_check_demo_mode_cache;
-       fabric->tf_ops.tpg_check_demo_mode_write_protect =
-                                       &tcm_loop_check_demo_mode_write_protect;
-       fabric->tf_ops.tpg_check_prod_mode_write_protect =
-                                       &tcm_loop_check_prod_mode_write_protect;
-       /*
-        * The TCM loopback fabric module runs in demo-mode to a local
-        * virtual SCSI device, so fabric dependent initator ACLs are
-        * not required.
-        */
-       fabric->tf_ops.tpg_alloc_fabric_acl = &tcm_loop_tpg_alloc_fabric_acl;
-       fabric->tf_ops.tpg_release_fabric_acl =
-                                       &tcm_loop_tpg_release_fabric_acl;
-       fabric->tf_ops.tpg_get_inst_index = &tcm_loop_get_inst_index;
-       /*
-        * Used for setting up remaining TCM resources in process context
-        */
-       fabric->tf_ops.check_stop_free = &tcm_loop_check_stop_free;
-       fabric->tf_ops.release_cmd = &tcm_loop_release_cmd;
-       fabric->tf_ops.shutdown_session = &tcm_loop_shutdown_session;
-       fabric->tf_ops.close_session = &tcm_loop_close_session;
-       fabric->tf_ops.sess_get_index = &tcm_loop_sess_get_index;
-       fabric->tf_ops.sess_get_initiator_sid = NULL;
-       fabric->tf_ops.write_pending = &tcm_loop_write_pending;
-       fabric->tf_ops.write_pending_status = &tcm_loop_write_pending_status;
-       /*
-        * Not used for TCM loopback
-        */
-       fabric->tf_ops.set_default_node_attributes =
-                                       &tcm_loop_set_default_node_attributes;
-       fabric->tf_ops.get_task_tag = &tcm_loop_get_task_tag;
-       fabric->tf_ops.get_cmd_state = &tcm_loop_get_cmd_state;
-       fabric->tf_ops.queue_data_in = &tcm_loop_queue_data_in;
-       fabric->tf_ops.queue_status = &tcm_loop_queue_status;
-       fabric->tf_ops.queue_tm_rsp = &tcm_loop_queue_tm_rsp;
-       fabric->tf_ops.aborted_task = &tcm_loop_aborted_task;
-
-       /*
-        * Setup function pointers for generic logic in target_core_fabric_configfs.c
-        */
-       fabric->tf_ops.fabric_make_wwn = &tcm_loop_make_scsi_hba;
-       fabric->tf_ops.fabric_drop_wwn = &tcm_loop_drop_scsi_hba;
-       fabric->tf_ops.fabric_make_tpg = &tcm_loop_make_naa_tpg;
-       fabric->tf_ops.fabric_drop_tpg = &tcm_loop_drop_naa_tpg;
-       /*
-        * fabric_post_link() and fabric_pre_unlink() are used for
-        * registration and release of TCM Loop Virtual SCSI LUNs.
-        */
-       fabric->tf_ops.fabric_post_link = &tcm_loop_port_link;
-       fabric->tf_ops.fabric_pre_unlink = &tcm_loop_port_unlink;
-       fabric->tf_ops.fabric_make_np = NULL;
-       fabric->tf_ops.fabric_drop_np = NULL;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = tcm_loop_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = tcm_loop_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       /*
-        * Once fabric->tf_ops has been setup, now register the fabric for
-        * use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() for"
-                               " TCM_Loop failed!\n");
-               target_fabric_configfs_free(fabric);
-               return -1;
-       }
-       /*
-        * Setup our local pointer to *fabric.
-        */
-       tcm_loop_fabric_configfs = fabric;
-       pr_debug("TCM_LOOP[0] - Set fabric ->"
-                       " tcm_loop_fabric_configfs\n");
-       return 0;
-}
-
-static void tcm_loop_deregister_configfs(void)
-{
-       if (!tcm_loop_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(tcm_loop_fabric_configfs);
-       tcm_loop_fabric_configfs = NULL;
-       pr_debug("TCM_LOOP[0] - Cleared"
-                               " tcm_loop_fabric_configfs\n");
-}
+static const struct target_core_fabric_ops loop_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "loopback",
+       .get_fabric_name                = tcm_loop_get_fabric_name,
+       .get_fabric_proto_ident         = tcm_loop_get_fabric_proto_ident,
+       .tpg_get_wwn                    = tcm_loop_get_endpoint_wwn,
+       .tpg_get_tag                    = tcm_loop_get_tag,
+       .tpg_get_default_depth          = tcm_loop_get_default_depth,
+       .tpg_get_pr_transport_id        = tcm_loop_get_pr_transport_id,
+       .tpg_get_pr_transport_id_len    = tcm_loop_get_pr_transport_id_len,
+       .tpg_parse_pr_out_transport_id  = tcm_loop_parse_pr_out_transport_id,
+       .tpg_check_demo_mode            = tcm_loop_check_demo_mode,
+       .tpg_check_demo_mode_cache      = tcm_loop_check_demo_mode_cache,
+       .tpg_check_demo_mode_write_protect =
+                               tcm_loop_check_demo_mode_write_protect,
+       .tpg_check_prod_mode_write_protect =
+                               tcm_loop_check_prod_mode_write_protect,
+       .tpg_check_prot_fabric_only     = tcm_loop_check_prot_fabric_only,
+       .tpg_alloc_fabric_acl           = tcm_loop_tpg_alloc_fabric_acl,
+       .tpg_release_fabric_acl         = tcm_loop_tpg_release_fabric_acl,
+       .tpg_get_inst_index             = tcm_loop_get_inst_index,
+       .check_stop_free                = tcm_loop_check_stop_free,
+       .release_cmd                    = tcm_loop_release_cmd,
+       .shutdown_session               = tcm_loop_shutdown_session,
+       .close_session                  = tcm_loop_close_session,
+       .sess_get_index                 = tcm_loop_sess_get_index,
+       .write_pending                  = tcm_loop_write_pending,
+       .write_pending_status           = tcm_loop_write_pending_status,
+       .set_default_node_attributes    = tcm_loop_set_default_node_attributes,
+       .get_task_tag                   = tcm_loop_get_task_tag,
+       .get_cmd_state                  = tcm_loop_get_cmd_state,
+       .queue_data_in                  = tcm_loop_queue_data_in,
+       .queue_status                   = tcm_loop_queue_status,
+       .queue_tm_rsp                   = tcm_loop_queue_tm_rsp,
+       .aborted_task                   = tcm_loop_aborted_task,
+       .fabric_make_wwn                = tcm_loop_make_scsi_hba,
+       .fabric_drop_wwn                = tcm_loop_drop_scsi_hba,
+       .fabric_make_tpg                = tcm_loop_make_naa_tpg,
+       .fabric_drop_tpg                = tcm_loop_drop_naa_tpg,
+       .fabric_post_link               = tcm_loop_port_link,
+       .fabric_pre_unlink              = tcm_loop_port_unlink,
+       .tfc_wwn_attrs                  = tcm_loop_wwn_attrs,
+       .tfc_tpg_base_attrs             = tcm_loop_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = tcm_loop_tpg_attrib_attrs,
+};
  
  static int __init tcm_loop_fabric_init(void)
  {
@@ -1482,7 +1450,7 @@ static int __init tcm_loop_fabric_init(void)
         if (ret)
                 goto out_destroy_cache;
  
-       ret = tcm_loop_register_configfs();
+       ret = target_register_template(&loop_ops);
         if (ret)
                 goto out_release_core_bus;
  
@@ -1500,7 +1468,7 @@ out:
  
  static void __exit tcm_loop_fabric_exit(void)
  {
-       tcm_loop_deregister_configfs();
+       target_unregister_template(&loop_ops);
         tcm_loop_release_core_bus();
         kmem_cache_destroy(tcm_loop_cmd_cache);
         destroy_workqueue(tcm_loop_workqueue);
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h

index 6ae49f2..1e72ff7 100644 (file)
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -43,6 +43,7 @@ struct tcm_loop_nacl {
  struct tcm_loop_tpg {
         unsigned short tl_tpgt;
         unsigned short tl_transport_status;
+       enum target_prot_type tl_fabric_prot_type;
         atomic_t tl_tpg_port_count;
         struct se_portal_group tl_se_tpg;
         struct tcm_loop_hba *tl_hba;
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c

index 9512af6..18b0f97 100644 (file)
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -42,8 +42,7 @@
  
  #include "sbp_target.h"
  
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *sbp_fabric_configfs;
+static const struct target_core_fabric_ops sbp_ops;
  
  /* FireWire address region for management and command block address handlers */
  static const struct fw_address_region sbp_register_region = {
@@ -2215,8 +2214,7 @@ static struct se_portal_group *sbp_make_tpg(
                 goto out_free_tpg;
         }
  
-       ret = core_tpg_register(&sbp_fabric_configfs->tf_ops, wwn,
-                       &tpg->se_tpg, (void *)tpg,
+       ret = core_tpg_register(&sbp_ops, wwn, &tpg->se_tpg, tpg,
                         TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0)
                 goto out_unreg_mgt_agt;
@@ -2503,7 +2501,9 @@ static struct configfs_attribute *sbp_tpg_attrib_attrs[] = {
         NULL,
  };
  
-static struct target_core_fabric_ops sbp_ops = {
+static const struct target_core_fabric_ops sbp_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "sbp",
         .get_fabric_name                = sbp_get_fabric_name,
         .get_fabric_proto_ident         = sbp_get_fabric_proto_ident,
         .tpg_get_wwn                    = sbp_get_fabric_wwn,
@@ -2544,68 +2544,20 @@ static struct target_core_fabric_ops sbp_ops = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = sbp_make_nodeacl,
         .fabric_drop_nodeacl            = sbp_drop_nodeacl,
-};
-
-static int sbp_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       fabric = target_fabric_configfs_init(THIS_MODULE, "sbp");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-
-       fabric->tf_ops = sbp_ops;
-
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = sbp_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = sbp_tpg_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = sbp_tpg_attrib_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed for SBP\n");
-               return ret;
-       }
  
-       sbp_fabric_configfs = fabric;
-
-       return 0;
-};
-
-static void sbp_deregister_configfs(void)
-{
-       if (!sbp_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(sbp_fabric_configfs);
-       sbp_fabric_configfs = NULL;
+       .tfc_wwn_attrs                  = sbp_wwn_attrs,
+       .tfc_tpg_base_attrs             = sbp_tpg_base_attrs,
+       .tfc_tpg_attrib_attrs           = sbp_tpg_attrib_attrs,
  };
  
  static int __init sbp_init(void)
  {
-       int ret;
-
-       ret = sbp_register_configfs();
-       if (ret < 0)
-               return ret;
-
-       return 0;
+       return target_register_template(&sbp_ops);
  };
  
  static void __exit sbp_exit(void)
  {
-       sbp_deregister_configfs();
+       target_unregister_template(&sbp_ops);
  };
  
  MODULE_DESCRIPTION("FireWire SBP fabric driver");
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c

index 75d89ad..ddaf76a 100644 (file)
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -142,8 +142,8 @@ static struct config_group *target_core_register_fabric(
  
         tf = target_core_get_fabric(name);
         if (!tf) {
-               pr_err("target_core_register_fabric() trying autoload for %s\n",
-                       name);
+               pr_debug("target_core_register_fabric() trying autoload for %s\n",
+                        name);
  
                 /*
                  * Below are some hardcoded request_module() calls to automatically
@@ -165,8 +165,8 @@ static struct config_group *target_core_register_fabric(
                          */
                         ret = request_module("iscsi_target_mod");
                         if (ret < 0) {
-                               pr_err("request_module() failed for"
-                                      " iscsi_target_mod.ko: %d\n", ret);
+                               pr_debug("request_module() failed for"
+                                        " iscsi_target_mod.ko: %d\n", ret);
                                 return ERR_PTR(-EINVAL);
                         }
                 } else if (!strncmp(name, "loopback", 8)) {
@@ -178,8 +178,8 @@ static struct config_group *target_core_register_fabric(
                          */
                         ret = request_module("tcm_loop");
                         if (ret < 0) {
-                               pr_err("request_module() failed for"
-                                      " tcm_loop.ko: %d\n", ret);
+                               pr_debug("request_module() failed for"
+                                        " tcm_loop.ko: %d\n", ret);
                                 return ERR_PTR(-EINVAL);
                         }
                 }
@@ -188,8 +188,8 @@ static struct config_group *target_core_register_fabric(
         }
  
         if (!tf) {
-               pr_err("target_core_get_fabric() failed for %s\n",
-                      name);
+               pr_debug("target_core_get_fabric() failed for %s\n",
+                        name);
                 return ERR_PTR(-EINVAL);
         }
         pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
@@ -300,81 +300,17 @@ struct configfs_subsystem *target_core_subsystem[] = {
  // Start functions called by external Target Fabrics Modules
  //############################################################################*/
  
-/*
- * First function called by fabric modules to:
- *
- * 1) Allocate a struct target_fabric_configfs and save the *fabric_cit pointer.
- * 2) Add struct target_fabric_configfs to g_tf_list
- * 3) Return struct target_fabric_configfs to fabric module to be passed
- *    into target_fabric_configfs_register().
- */
-struct target_fabric_configfs *target_fabric_configfs_init(
-       struct module *fabric_mod,
-       const char *name)
+static int target_fabric_tf_ops_check(const struct target_core_fabric_ops *tfo)
  {
-       struct target_fabric_configfs *tf;
-
-       if (!(name)) {
-               pr_err("Unable to locate passed fabric name\n");
-               return ERR_PTR(-EINVAL);
+       if (!tfo->name) {
+               pr_err("Missing tfo->name\n");
+               return -EINVAL;
         }
-       if (strlen(name) >= TARGET_FABRIC_NAME_SIZE) {
+       if (strlen(tfo->name) >= TARGET_FABRIC_NAME_SIZE) {
                 pr_err("Passed name: %s exceeds TARGET_FABRIC"
-                       "_NAME_SIZE\n", name);
-               return ERR_PTR(-EINVAL);
+                       "_NAME_SIZE\n", tfo->name);
+               return -EINVAL;
         }
-
-       tf = kzalloc(sizeof(struct target_fabric_configfs), GFP_KERNEL);
-       if (!tf)
-               return ERR_PTR(-ENOMEM);
-
-       INIT_LIST_HEAD(&tf->tf_list);
-       atomic_set(&tf->tf_access_cnt, 0);
-       /*
-        * Setup the default generic struct config_item_type's (cits) in
-        * struct target_fabric_configfs->tf_cit_tmpl
-        */
-       tf->tf_module = fabric_mod;
-       target_fabric_setup_cits(tf);
-
-       tf->tf_subsys = target_core_subsystem[0];
-       snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", name);
-
-       mutex_lock(&g_tf_lock);
-       list_add_tail(&tf->tf_list, &g_tf_list);
-       mutex_unlock(&g_tf_lock);
-
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< BEGIN FABRIC API >>>>>>>>"
-                       ">>>>>>>>>>>>>>\n");
-       pr_debug("Initialized struct target_fabric_configfs: %p for"
-                       " %s\n", tf, tf->tf_name);
-       return tf;
-}
-EXPORT_SYMBOL(target_fabric_configfs_init);
-
-/*
- * Called by fabric plugins after FAILED target_fabric_configfs_register() call.
- */
-void target_fabric_configfs_free(
-       struct target_fabric_configfs *tf)
-{
-       mutex_lock(&g_tf_lock);
-       list_del(&tf->tf_list);
-       mutex_unlock(&g_tf_lock);
-
-       kfree(tf);
-}
-EXPORT_SYMBOL(target_fabric_configfs_free);
-
-/*
- * Perform a sanity check of the passed tf->tf_ops before completing
- * TCM fabric module registration.
- */
-static int target_fabric_tf_ops_check(
-       struct target_fabric_configfs *tf)
-{
-       struct target_core_fabric_ops *tfo = &tf->tf_ops;
-
         if (!tfo->get_fabric_name) {
                 pr_err("Missing tfo->get_fabric_name()\n");
                 return -EINVAL;
@@ -508,77 +444,59 @@ static int target_fabric_tf_ops_check(
         return 0;
  }
  
-/*
- * Called 2nd from fabric module with returned parameter of
- * struct target_fabric_configfs * from target_fabric_configfs_init().
- *
- * Upon a successful registration, the new fabric's struct config_item is
- * return.  Also, a pointer to this struct is set in the passed
- * struct target_fabric_configfs.
- */
-int target_fabric_configfs_register(
-       struct target_fabric_configfs *tf)
+int target_register_template(const struct target_core_fabric_ops *fo)
  {
+       struct target_fabric_configfs *tf;
         int ret;
  
+       ret = target_fabric_tf_ops_check(fo);
+       if (ret)
+               return ret;
+
+       tf = kzalloc(sizeof(struct target_fabric_configfs), GFP_KERNEL);
         if (!tf) {
-               pr_err("Unable to locate target_fabric_configfs"
-                       " pointer\n");
-               return -EINVAL;
-       }
-       if (!tf->tf_subsys) {
-               pr_err("Unable to target struct config_subsystem"
-                       " pointer\n");
-               return -EINVAL;
+               pr_err("%s: could not allocate memory!\n", __func__);
+               return -ENOMEM;
         }
-       ret = target_fabric_tf_ops_check(tf);
-       if (ret < 0)
-               return ret;
  
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< END FABRIC API >>>>>>>>>>>>"
-               ">>>>>>>>>>\n");
+       INIT_LIST_HEAD(&tf->tf_list);
+       atomic_set(&tf->tf_access_cnt, 0);
+
+       /*
+        * Setup the default generic struct config_item_type's (cits) in
+        * struct target_fabric_configfs->tf_cit_tmpl
+        */
+       tf->tf_module = fo->module;
+       tf->tf_subsys = target_core_subsystem[0];
+       snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name);
+
+       tf->tf_ops = *fo;
+       target_fabric_setup_cits(tf);
+
+       mutex_lock(&g_tf_lock);
+       list_add_tail(&tf->tf_list, &g_tf_list);
+       mutex_unlock(&g_tf_lock);
+
         return 0;
  }
-EXPORT_SYMBOL(target_fabric_configfs_register);
+EXPORT_SYMBOL(target_register_template);
  
-void target_fabric_configfs_deregister(
-       struct target_fabric_configfs *tf)
+void target_unregister_template(const struct target_core_fabric_ops *fo)
  {
-       struct configfs_subsystem *su;
+       struct target_fabric_configfs *t;
  
-       if (!tf) {
-               pr_err("Unable to locate passed target_fabric_"
-                       "configfs\n");
-               return;
-       }
-       su = tf->tf_subsys;
-       if (!su) {
-               pr_err("Unable to locate passed tf->tf_subsys"
-                       " pointer\n");
-               return;
-       }
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< BEGIN FABRIC API >>>>>>>>>>"
-                       ">>>>>>>>>>>>\n");
         mutex_lock(&g_tf_lock);
-       if (atomic_read(&tf->tf_access_cnt)) {
-               mutex_unlock(&g_tf_lock);
-               pr_err("Non zero tf->tf_access_cnt for fabric %s\n",
-                       tf->tf_name);
-               BUG();
+       list_for_each_entry(t, &g_tf_list, tf_list) {
+               if (!strcmp(t->tf_name, fo->name)) {
+                       BUG_ON(atomic_read(&t->tf_access_cnt));
+                       list_del(&t->tf_list);
+                       kfree(t);
+                       break;
+               }
         }
-       list_del(&tf->tf_list);
         mutex_unlock(&g_tf_lock);
-
-       pr_debug("Target_Core_ConfigFS: DEREGISTER -> Releasing tf:"
-                       " %s\n", tf->tf_name);
-       tf->tf_module = NULL;
-       tf->tf_subsys = NULL;
-       kfree(tf);
-
-       pr_debug("<<<<<<<<<<<<<<<<<<<<<< END FABRIC API >>>>>>>>>>>>>>>>>"
-                       ">>>>>\n");
  }
-EXPORT_SYMBOL(target_fabric_configfs_deregister);
+EXPORT_SYMBOL(target_unregister_template);
  
  /*##############################################################################
  // Stop functions called by external Target Fabrics Modules
@@ -945,7 +863,7 @@ static ssize_t target_core_dev_pr_show_attr_res_pr_holder_tg_port(
         struct se_lun *lun;
         struct se_portal_group *se_tpg;
         struct t10_pr_registration *pr_reg;
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
         ssize_t len = 0;
  
         spin_lock(&dev->dev_reservation_lock);
@@ -979,7 +897,7 @@ SE_DEV_PR_ATTR_RO(res_pr_holder_tg_port);
  static ssize_t target_core_dev_pr_show_attr_res_pr_registered_i_pts(
                 struct se_device *dev, char *page)
  {
-       struct target_core_fabric_ops *tfo;
+       const struct target_core_fabric_ops *tfo;
         struct t10_pr_registration *pr_reg;
         unsigned char buf[384];
         char i_buf[PR_REG_ISID_ID_LEN];
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c

index 0c3f901..1f7886b 100644 (file)
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -56,6 +56,20 @@ static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf)
         pr_debug("Setup generic %s\n", __stringify(_name));             \
  }
  
+#define TF_CIT_SETUP_DRV(_name, _item_ops, _group_ops)         \
+static void target_fabric_setup_##_name##_cit(struct target_fabric_configfs *tf) \
+{                                                                      \
+       struct target_fabric_configfs_template *tfc = &tf->tf_cit_tmpl; \
+       struct config_item_type *cit = &tfc->tfc_##_name##_cit;         \
+       struct configfs_attribute **attrs = tf->tf_ops.tfc_##_name##_attrs; \
+                                                                       \
+       cit->ct_item_ops = _item_ops;                                   \
+       cit->ct_group_ops = _group_ops;                                 \
+       cit->ct_attrs = attrs;                                          \
+       cit->ct_owner = tf->tf_module;                                  \
+       pr_debug("Setup generic %s\n", __stringify(_name));             \
+}
+
  /* Start of tfc_tpg_mappedlun_cit */
  
  static int target_fabric_mappedlun_link(
@@ -278,7 +292,7 @@ static struct configfs_item_operations target_fabric_nacl_attrib_item_ops = {
         .store_attribute        = target_fabric_nacl_attrib_attr_store,
  };
  
-TF_CIT_SETUP(tpg_nacl_attrib, &target_fabric_nacl_attrib_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_attrib, &target_fabric_nacl_attrib_item_ops, NULL);
  
  /* End of tfc_tpg_nacl_attrib_cit */
  
@@ -291,7 +305,7 @@ static struct configfs_item_operations target_fabric_nacl_auth_item_ops = {
         .store_attribute        = target_fabric_nacl_auth_attr_store,
  };
  
-TF_CIT_SETUP(tpg_nacl_auth, &target_fabric_nacl_auth_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_auth, &target_fabric_nacl_auth_item_ops, NULL);
  
  /* End of tfc_tpg_nacl_auth_cit */
  
@@ -304,7 +318,7 @@ static struct configfs_item_operations target_fabric_nacl_param_item_ops = {
         .store_attribute        = target_fabric_nacl_param_attr_store,
  };
  
-TF_CIT_SETUP(tpg_nacl_param, &target_fabric_nacl_param_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_param, &target_fabric_nacl_param_item_ops, NULL);
  
  /* End of tfc_tpg_nacl_param_cit */
  
@@ -461,8 +475,8 @@ static struct configfs_group_operations target_fabric_nacl_base_group_ops = {
         .drop_item              = target_fabric_drop_mappedlun,
  };
  
-TF_CIT_SETUP(tpg_nacl_base, &target_fabric_nacl_base_item_ops,
-               &target_fabric_nacl_base_group_ops, NULL);
+TF_CIT_SETUP_DRV(tpg_nacl_base, &target_fabric_nacl_base_item_ops,
+               &target_fabric_nacl_base_group_ops);
  
  /* End of tfc_tpg_nacl_base_cit */
  
@@ -570,7 +584,7 @@ static struct configfs_item_operations target_fabric_np_base_item_ops = {
         .store_attribute        = target_fabric_np_base_attr_store,
  };
  
-TF_CIT_SETUP(tpg_np_base, &target_fabric_np_base_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_np_base, &target_fabric_np_base_item_ops, NULL);
  
  /* End of tfc_tpg_np_base_cit */
  
@@ -966,7 +980,7 @@ static struct configfs_item_operations target_fabric_tpg_attrib_item_ops = {
         .store_attribute        = target_fabric_tpg_attrib_attr_store,
  };
  
-TF_CIT_SETUP(tpg_attrib, &target_fabric_tpg_attrib_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_attrib, &target_fabric_tpg_attrib_item_ops, NULL);
  
  /* End of tfc_tpg_attrib_cit */
  
@@ -979,7 +993,7 @@ static struct configfs_item_operations target_fabric_tpg_auth_item_ops = {
         .store_attribute        = target_fabric_tpg_auth_attr_store,
  };
  
-TF_CIT_SETUP(tpg_auth, &target_fabric_tpg_auth_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_auth, &target_fabric_tpg_auth_item_ops, NULL);
  
  /* End of tfc_tpg_attrib_cit */
  
@@ -992,7 +1006,7 @@ static struct configfs_item_operations target_fabric_tpg_param_item_ops = {
         .store_attribute        = target_fabric_tpg_param_attr_store,
  };
  
-TF_CIT_SETUP(tpg_param, &target_fabric_tpg_param_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_param, &target_fabric_tpg_param_item_ops, NULL);
  
  /* End of tfc_tpg_param_cit */
  
@@ -1018,7 +1032,7 @@ static struct configfs_item_operations target_fabric_tpg_base_item_ops = {
         .store_attribute        = target_fabric_tpg_attr_store,
  };
  
-TF_CIT_SETUP(tpg_base, &target_fabric_tpg_base_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(tpg_base, &target_fabric_tpg_base_item_ops, NULL);
  
  /* End of tfc_tpg_base_cit */
  
@@ -1192,7 +1206,7 @@ static struct configfs_item_operations target_fabric_wwn_item_ops = {
         .store_attribute        = target_fabric_wwn_attr_store,
  };
  
-TF_CIT_SETUP(wwn, &target_fabric_wwn_item_ops, &target_fabric_wwn_group_ops, NULL);
+TF_CIT_SETUP_DRV(wwn, &target_fabric_wwn_item_ops, &target_fabric_wwn_group_ops);
  
  /* End of tfc_wwn_cit */
  
@@ -1206,7 +1220,7 @@ static struct configfs_item_operations target_fabric_discovery_item_ops = {
         .store_attribute        = target_fabric_discovery_attr_store,
  };
  
-TF_CIT_SETUP(discovery, &target_fabric_discovery_item_ops, NULL, NULL);
+TF_CIT_SETUP_DRV(discovery, &target_fabric_discovery_item_ops, NULL);
  
  /* End of tfc_discovery_cit */
  
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c

index 44620fb..f7e6e51 100644 (file)
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -264,40 +264,32 @@ static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot,
         struct se_device *se_dev = cmd->se_dev;
         struct fd_dev *dev = FD_DEV(se_dev);
         struct file *prot_fd = dev->fd_prot_file;
-       struct scatterlist *sg;
         loff_t pos = (cmd->t_task_lba * se_dev->prot_length);
         unsigned char *buf;
-       u32 prot_size, len, size;
-       int rc, ret = 1, i;
+       u32 prot_size;
+       int rc, ret = 1;
  
         prot_size = (cmd->data_length / se_dev->dev_attrib.block_size) *
                      se_dev->prot_length;
  
         if (!is_write) {
-               fd_prot->prot_buf = vzalloc(prot_size);
+               fd_prot->prot_buf = kzalloc(prot_size, GFP_KERNEL);
                 if (!fd_prot->prot_buf) {
                         pr_err("Unable to allocate fd_prot->prot_buf\n");
                         return -ENOMEM;
                 }
                 buf = fd_prot->prot_buf;
  
-               fd_prot->prot_sg_nents = cmd->t_prot_nents;
-               fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist) *
-                                          fd_prot->prot_sg_nents, GFP_KERNEL);
+               fd_prot->prot_sg_nents = 1;
+               fd_prot->prot_sg = kzalloc(sizeof(struct scatterlist),
+                                          GFP_KERNEL);
                 if (!fd_prot->prot_sg) {
                         pr_err("Unable to allocate fd_prot->prot_sg\n");
-                       vfree(fd_prot->prot_buf);
+                       kfree(fd_prot->prot_buf);
                         return -ENOMEM;
                 }
-               size = prot_size;
-
-               for_each_sg(fd_prot->prot_sg, sg, fd_prot->prot_sg_nents, i) {
-
-                       len = min_t(u32, PAGE_SIZE, size);
-                       sg_set_buf(sg, buf, len);
-                       size -= len;
-                       buf += len;
-               }
+               sg_init_table(fd_prot->prot_sg, fd_prot->prot_sg_nents);
+               sg_set_buf(fd_prot->prot_sg, buf, prot_size);
         }
  
         if (is_write) {
@@ -318,7 +310,7 @@ static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot,
  
         if (is_write || ret < 0) {
                 kfree(fd_prot->prot_sg);
-               vfree(fd_prot->prot_buf);
+               kfree(fd_prot->prot_buf);
         }
  
         return ret;
@@ -331,36 +323,33 @@ static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl,
         struct fd_dev *dev = FD_DEV(se_dev);
         struct file *fd = dev->fd_file;
         struct scatterlist *sg;
-       struct iovec *iov;
-       mm_segment_t old_fs;
+       struct iov_iter iter;
+       struct bio_vec *bvec;
+       ssize_t len = 0;
         loff_t pos = (cmd->t_task_lba * se_dev->dev_attrib.block_size);
         int ret = 0, i;
  
-       iov = kzalloc(sizeof(struct iovec) * sgl_nents, GFP_KERNEL);
-       if (!iov) {
+       bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL);
+       if (!bvec) {
                 pr_err("Unable to allocate fd_do_readv iov[]\n");
                 return -ENOMEM;
         }
  
         for_each_sg(sgl, sg, sgl_nents, i) {
-               iov[i].iov_len = sg->length;
-               iov[i].iov_base = kmap(sg_page(sg)) + sg->offset;
-       }
+               bvec[i].bv_page = sg_page(sg);
+               bvec[i].bv_len = sg->length;
+               bvec[i].bv_offset = sg->offset;
  
-       old_fs = get_fs();
-       set_fs(get_ds());
+               len += sg->length;
+       }
  
+       iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
         if (is_write)
-               ret = vfs_writev(fd, &iov[0], sgl_nents, &pos);
+               ret = vfs_iter_write(fd, &iter, &pos);
         else
-               ret = vfs_readv(fd, &iov[0], sgl_nents, &pos);
-
-       set_fs(old_fs);
-
-       for_each_sg(sgl, sg, sgl_nents, i)
-               kunmap(sg_page(sg));
+               ret = vfs_iter_read(fd, &iter, &pos);
  
-       kfree(iov);
+       kfree(bvec);
  
         if (is_write) {
                 if (ret < 0 || ret != cmd->data_length) {
@@ -436,59 +425,17 @@ fd_execute_sync_cache(struct se_cmd *cmd)
         return 0;
  }
  
-static unsigned char *
-fd_setup_write_same_buf(struct se_cmd *cmd, struct scatterlist *sg,
-                   unsigned int len)
-{
-       struct se_device *se_dev = cmd->se_dev;
-       unsigned int block_size = se_dev->dev_attrib.block_size;
-       unsigned int i = 0, end;
-       unsigned char *buf, *p, *kmap_buf;
-
-       buf = kzalloc(min_t(unsigned int, len, PAGE_SIZE), GFP_KERNEL);
-       if (!buf) {
-               pr_err("Unable to allocate fd_execute_write_same buf\n");
-               return NULL;
-       }
-
-       kmap_buf = kmap(sg_page(sg)) + sg->offset;
-       if (!kmap_buf) {
-               pr_err("kmap() failed in fd_setup_write_same\n");
-               kfree(buf);
-               return NULL;
-       }
-       /*
-        * Fill local *buf to contain multiple WRITE_SAME blocks up to
-        * min(len, PAGE_SIZE)
-        */
-       p = buf;
-       end = min_t(unsigned int, len, PAGE_SIZE);
-
-       while (i < end) {
-               memcpy(p, kmap_buf, block_size);
-
-               i += block_size;
-               p += block_size;
-       }
-       kunmap(sg_page(sg));
-
-       return buf;
-}
-
  static sense_reason_t
  fd_execute_write_same(struct se_cmd *cmd)
  {
         struct se_device *se_dev = cmd->se_dev;
         struct fd_dev *fd_dev = FD_DEV(se_dev);
-       struct file *f = fd_dev->fd_file;
-       struct scatterlist *sg;
-       struct iovec *iov;
-       mm_segment_t old_fs;
-       sector_t nolb = sbc_get_write_same_sectors(cmd);
         loff_t pos = cmd->t_task_lba * se_dev->dev_attrib.block_size;
-       unsigned int len, len_tmp, iov_num;
-       int i, rc;
-       unsigned char *buf;
+       sector_t nolb = sbc_get_write_same_sectors(cmd);
+       struct iov_iter iter;
+       struct bio_vec *bvec;
+       unsigned int len = 0, i;
+       ssize_t ret;
  
         if (!nolb) {
                 target_complete_cmd(cmd, SAM_STAT_GOOD);
@@ -499,56 +446,92 @@ fd_execute_write_same(struct se_cmd *cmd)
                        " backends not supported\n");
                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
         }
-       sg = &cmd->t_data_sg[0];
  
         if (cmd->t_data_nents > 1 ||
-           sg->length != cmd->se_dev->dev_attrib.block_size) {
+           cmd->t_data_sg[0].length != cmd->se_dev->dev_attrib.block_size) {
                 pr_err("WRITE_SAME: Illegal SGL t_data_nents: %u length: %u"
-                       " block_size: %u\n", cmd->t_data_nents, sg->length,
+                       " block_size: %u\n",
+                       cmd->t_data_nents,
+                       cmd->t_data_sg[0].length,
                         cmd->se_dev->dev_attrib.block_size);
                 return TCM_INVALID_CDB_FIELD;
         }
  
-       len = len_tmp = nolb * se_dev->dev_attrib.block_size;
-       iov_num = DIV_ROUND_UP(len, PAGE_SIZE);
-
-       buf = fd_setup_write_same_buf(cmd, sg, len);
-       if (!buf)
+       bvec = kcalloc(nolb, sizeof(struct bio_vec), GFP_KERNEL);
+       if (!bvec)
                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
  
-       iov = vzalloc(sizeof(struct iovec) * iov_num);
-       if (!iov) {
-               pr_err("Unable to allocate fd_execute_write_same iovecs\n");
-               kfree(buf);
+       for (i = 0; i < nolb; i++) {
+               bvec[i].bv_page = sg_page(&cmd->t_data_sg[0]);
+               bvec[i].bv_len = cmd->t_data_sg[0].length;
+               bvec[i].bv_offset = cmd->t_data_sg[0].offset;
+
+               len += se_dev->dev_attrib.block_size;
+       }
+
+       iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+       ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos);
+
+       kfree(bvec);
+       if (ret < 0 || ret != len) {
+               pr_err("vfs_iter_write() returned %zd for write same\n", ret);
                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
         }
-       /*
-        * Map the single fabric received scatterlist block now populated
-        * in *buf into each iovec for I/O submission.
-        */
-       for (i = 0; i < iov_num; i++) {
-               iov[i].iov_base = buf;
-               iov[i].iov_len = min_t(unsigned int, len_tmp, PAGE_SIZE);
-               len_tmp -= iov[i].iov_len;
+
+       target_complete_cmd(cmd, SAM_STAT_GOOD);
+       return 0;
+}
+
+static int
+fd_do_prot_fill(struct se_device *se_dev, sector_t lba, sector_t nolb,
+               void *buf, size_t bufsize)
+{
+       struct fd_dev *fd_dev = FD_DEV(se_dev);
+       struct file *prot_fd = fd_dev->fd_prot_file;
+       sector_t prot_length, prot;
+       loff_t pos = lba * se_dev->prot_length;
+
+       if (!prot_fd) {
+               pr_err("Unable to locate fd_dev->fd_prot_file\n");
+               return -ENODEV;
         }
  
-       old_fs = get_fs();
-       set_fs(get_ds());
-       rc = vfs_writev(f, &iov[0], iov_num, &pos);
-       set_fs(old_fs);
+       prot_length = nolb * se_dev->prot_length;
  
-       vfree(iov);
-       kfree(buf);
+       for (prot = 0; prot < prot_length;) {
+               sector_t len = min_t(sector_t, bufsize, prot_length - prot);
+               ssize_t ret = kernel_write(prot_fd, buf, len, pos + prot);
  
-       if (rc < 0 || rc != len) {
-               pr_err("vfs_writev() returned %d for write same\n", rc);
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+               if (ret != len) {
+                       pr_err("vfs_write to prot file failed: %zd\n", ret);
+                       return ret < 0 ? ret : -ENODEV;
+               }
+               prot += ret;
         }
  
-       target_complete_cmd(cmd, SAM_STAT_GOOD);
         return 0;
  }
  
+static int
+fd_do_prot_unmap(struct se_cmd *cmd, sector_t lba, sector_t nolb)
+{
+       void *buf;
+       int rc;
+
+       buf = (void *)__get_free_page(GFP_KERNEL);
+       if (!buf) {
+               pr_err("Unable to allocate FILEIO prot buf\n");
+               return -ENOMEM;
+       }
+       memset(buf, 0xff, PAGE_SIZE);
+
+       rc = fd_do_prot_fill(cmd->se_dev, lba, nolb, buf, PAGE_SIZE);
+
+       free_page((unsigned long)buf);
+
+       return rc;
+}
+
  static sense_reason_t
  fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb)
  {
@@ -556,6 +539,12 @@ fd_do_unmap(struct se_cmd *cmd, void *priv, sector_t lba, sector_t nolb)
         struct inode *inode = file->f_mapping->host;
         int ret;
  
+       if (cmd->se_dev->dev_attrib.pi_prot_type) {
+               ret = fd_do_prot_unmap(cmd, lba, nolb);
+               if (ret)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       }
+
         if (S_ISBLK(inode->i_mode)) {
                 /* The backend is block device, use discard */
                 struct block_device *bdev = inode->i_bdev;
@@ -595,7 +584,7 @@ fd_execute_write_same_unmap(struct se_cmd *cmd)
         struct file *file = fd_dev->fd_file;
         sector_t lba = cmd->t_task_lba;
         sector_t nolb = sbc_get_write_same_sectors(cmd);
-       int ret;
+       sense_reason_t ret;
  
         if (!nolb) {
                 target_complete_cmd(cmd, SAM_STAT_GOOD);
@@ -643,7 +632,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
         if (data_direction == DMA_FROM_DEVICE) {
                 memset(&fd_prot, 0, sizeof(struct fd_prot));
  
-               if (cmd->prot_type) {
+               if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                         ret = fd_do_prot_rw(cmd, &fd_prot, false);
                         if (ret < 0)
                                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -651,23 +640,23 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
  
                 ret = fd_do_rw(cmd, sgl, sgl_nents, 0);
  
-               if (ret > 0 && cmd->prot_type) {
+               if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                         u32 sectors = cmd->data_length / dev->dev_attrib.block_size;
  
                         rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors,
                                                  0, fd_prot.prot_sg, 0);
                         if (rc) {
                                 kfree(fd_prot.prot_sg);
-                               vfree(fd_prot.prot_buf);
+                               kfree(fd_prot.prot_buf);
                                 return rc;
                         }
                         kfree(fd_prot.prot_sg);
-                       vfree(fd_prot.prot_buf);
+                       kfree(fd_prot.prot_buf);
                 }
         } else {
                 memset(&fd_prot, 0, sizeof(struct fd_prot));
  
-               if (cmd->prot_type) {
+               if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                         u32 sectors = cmd->data_length / dev->dev_attrib.block_size;
  
                         ret = fd_do_prot_rw(cmd, &fd_prot, false);
@@ -678,7 +667,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                                                   0, fd_prot.prot_sg, 0);
                         if (rc) {
                                 kfree(fd_prot.prot_sg);
-                               vfree(fd_prot.prot_buf);
+                               kfree(fd_prot.prot_buf);
                                 return rc;
                         }
                 }
@@ -705,7 +694,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                         vfs_fsync_range(fd_dev->fd_file, start, end, 1);
                 }
  
-               if (ret > 0 && cmd->prot_type) {
+               if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                         ret = fd_do_prot_rw(cmd, &fd_prot, true);
                         if (ret < 0)
                                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -714,7 +703,7 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
  
         if (ret < 0) {
                 kfree(fd_prot.prot_sg);
-               vfree(fd_prot.prot_buf);
+               kfree(fd_prot.prot_buf);
                 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
         }
  
@@ -878,48 +867,28 @@ static int fd_init_prot(struct se_device *dev)
  
  static int fd_format_prot(struct se_device *dev)
  {
-       struct fd_dev *fd_dev = FD_DEV(dev);
-       struct file *prot_fd = fd_dev->fd_prot_file;
-       sector_t prot_length, prot;
         unsigned char *buf;
-       loff_t pos = 0;
         int unit_size = FDBD_FORMAT_UNIT_SIZE * dev->dev_attrib.block_size;
-       int rc, ret = 0, size, len;
+       int ret;
  
         if (!dev->dev_attrib.pi_prot_type) {
                 pr_err("Unable to format_prot while pi_prot_type == 0\n");
                 return -ENODEV;
         }
-       if (!prot_fd) {
-               pr_err("Unable to locate fd_dev->fd_prot_file\n");
-               return -ENODEV;
-       }
  
         buf = vzalloc(unit_size);
         if (!buf) {
                 pr_err("Unable to allocate FILEIO prot buf\n");
                 return -ENOMEM;
         }
-       prot_length = (dev->transport->get_blocks(dev) + 1) * dev->prot_length;
-       size = prot_length;
  
         pr_debug("Using FILEIO prot_length: %llu\n",
-                (unsigned long long)prot_length);
+                (unsigned long long)(dev->transport->get_blocks(dev) + 1) *
+                                       dev->prot_length);
  
         memset(buf, 0xff, unit_size);
-       for (prot = 0; prot < prot_length; prot += unit_size) {
-               len = min(unit_size, size);
-               rc = kernel_write(prot_fd, buf, len, pos);
-               if (rc != len) {
-                       pr_err("vfs_write to prot file failed: %d\n", rc);
-                       ret = -ENODEV;
-                       goto out;
-               }
-               pos += len;
-               size -= len;
-       }
-
-out:
+       ret = fd_do_prot_fill(dev, 0, dev->transport->get_blocks(dev) + 1,
+                             buf, unit_size);
         vfree(buf);
         return ret;
  }
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c

index d4a4b0f..1b7947c 100644 (file)
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -444,7 +444,7 @@ iblock_execute_write_same_unmap(struct se_cmd *cmd)
         struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
         sector_t lba = cmd->t_task_lba;
         sector_t nolb = sbc_get_write_same_sectors(cmd);
-       int ret;
+       sense_reason_t ret;
  
         ret = iblock_do_unmap(cmd, bdev, lba, nolb);
         if (ret)
@@ -774,7 +774,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                 sg_num--;
         }
  
-       if (cmd->prot_type) {
+       if (cmd->prot_type && dev->dev_attrib.pi_prot_type) {
                 int rc = iblock_alloc_bip(cmd, bio_start);
                 if (rc)
                         goto fail_put_bios;
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h

index 60381db..874a9bc 100644 (file)
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -4,7 +4,13 @@
  /* target_core_alua.c */
  extern struct t10_alua_lu_gp *default_lu_gp;
  
+/* target_core_configfs.c */
+extern struct configfs_subsystem *target_core_subsystem[];
+
  /* target_core_device.c */
+extern struct mutex g_device_mutex;
+extern struct list_head g_device_list;
+
  struct se_dev_entry *core_get_se_deve_from_rtpi(struct se_node_acl *, u16);
  int    core_free_device_list_for_node(struct se_node_acl *,
                 struct se_portal_group *);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c

index 2de6fb8..c1aa965 100644 (file)
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -78,6 +78,22 @@ enum preempt_type {
  static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *,
                                               struct t10_pr_registration *, int, int);
  
+static int is_reservation_holder(
+       struct t10_pr_registration *pr_res_holder,
+       struct t10_pr_registration *pr_reg)
+{
+       int pr_res_type;
+
+       if (pr_res_holder) {
+               pr_res_type = pr_res_holder->pr_res_type;
+
+               return pr_res_holder == pr_reg ||
+                      pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG ||
+                      pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG;
+       }
+       return 0;
+}
+
  static sense_reason_t
  target_scsi2_reservation_check(struct se_cmd *cmd)
  {
@@ -664,7 +680,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
         struct se_dev_entry *deve_tmp;
         struct se_node_acl *nacl_tmp;
         struct se_port *port, *port_tmp;
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
         struct t10_pr_registration *pr_reg, *pr_reg_atp, *pr_reg_tmp, *pr_reg_tmp_safe;
         int ret;
         /*
@@ -963,7 +979,7 @@ int core_scsi3_check_aptpl_registration(
  }
  
  static void __core_scsi3_dump_registration(
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
         struct se_device *dev,
         struct se_node_acl *nacl,
         struct t10_pr_registration *pr_reg,
@@ -1004,7 +1020,7 @@ static void __core_scsi3_add_registration(
         enum register_type register_type,
         int register_move)
  {
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
         struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe;
         struct t10_reservation *pr_tmpl = &dev->t10_pr;
  
@@ -1220,8 +1236,10 @@ static void __core_scsi3_free_registration(
         struct t10_pr_registration *pr_reg,
         struct list_head *preempt_and_abort_list,
         int dec_holders)
+       __releases(&pr_tmpl->registration_lock)
+       __acquires(&pr_tmpl->registration_lock)
  {
-       struct target_core_fabric_ops *tfo =
+       const struct target_core_fabric_ops *tfo =
                         pr_reg->pr_reg_nacl->se_tpg->se_tpg_tfo;
         struct t10_reservation *pr_tmpl = &dev->t10_pr;
         char i_buf[PR_REG_ISID_ID_LEN];
@@ -1445,7 +1463,7 @@ core_scsi3_decode_spec_i_port(
         struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe;
         LIST_HEAD(tid_dest_list);
         struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp;
-       struct target_core_fabric_ops *tmp_tf_ops;
+       const struct target_core_fabric_ops *tmp_tf_ops;
         unsigned char *buf;
         unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident;
         char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
@@ -2287,7 +2305,6 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
         spin_lock(&dev->dev_reservation_lock);
         pr_res_holder = dev->dev_pr_res_holder;
         if (pr_res_holder) {
-               int pr_res_type = pr_res_holder->pr_res_type;
                 /*
                  * From spc4r17 Section 5.7.9: Reserving:
                  *
@@ -2298,9 +2315,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key)
                  * the logical unit, then the command shall be completed with
                  * RESERVATION CONFLICT status.
                  */
-               if ((pr_res_holder != pr_reg) &&
-                   (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) &&
-                   (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
+               if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                         struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
                         pr_err("SPC-3 PR: Attempted RESERVE from"
                                 " [%s]: %s while reservation already held by"
@@ -2409,7 +2424,7 @@ static void __core_scsi3_complete_pro_release(
         int explicit,
         int unreg)
  {
-       struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
         char i_buf[PR_REG_ISID_ID_LEN];
         int pr_res_type = 0, pr_res_scope = 0;
  
@@ -2477,7 +2492,6 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
         struct se_lun *se_lun = cmd->se_lun;
         struct t10_pr_registration *pr_reg, *pr_reg_p, *pr_res_holder;
         struct t10_reservation *pr_tmpl = &dev->t10_pr;
-       int all_reg = 0;
         sense_reason_t ret = 0;
  
         if (!se_sess || !se_lun) {
@@ -2514,13 +2528,9 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope,
                 spin_unlock(&dev->dev_reservation_lock);
                 goto out_put_pr_reg;
         }
-       if ((pr_res_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
-           (pr_res_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))
-               all_reg = 1;
  
-       if ((all_reg == 0) && (pr_res_holder != pr_reg)) {
+       if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                 /*
-                * Non 'All Registrants' PR Type cases..
                  * Release request from a registered I_T nexus that is not a
                  * persistent reservation holder. return GOOD status.
                  */
@@ -2726,7 +2736,7 @@ static void __core_scsi3_complete_pro_preempt(
         enum preempt_type preempt_type)
  {
         struct se_node_acl *nacl = pr_reg->pr_reg_nacl;
-       struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
+       const struct target_core_fabric_ops *tfo = nacl->se_tpg->se_tpg_tfo;
         char i_buf[PR_REG_ISID_ID_LEN];
  
         memset(i_buf, 0, PR_REG_ISID_ID_LEN);
@@ -3111,7 +3121,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
         struct se_node_acl *pr_res_nacl, *pr_reg_nacl, *dest_node_acl = NULL;
         struct se_port *se_port;
         struct se_portal_group *se_tpg, *dest_se_tpg = NULL;
-       struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops;
+       const struct target_core_fabric_ops *dest_tf_ops = NULL, *tf_ops;
         struct t10_pr_registration *pr_reg, *pr_res_holder, *dest_pr_reg;
         struct t10_reservation *pr_tmpl = &dev->t10_pr;
         unsigned char *buf;
@@ -3375,7 +3385,7 @@ after_iport_check:
          * From spc4r17 section 5.7.8  Table 50 --
          *      Register behaviors for a REGISTER AND MOVE service action
          */
-       if (pr_res_holder != pr_reg) {
+       if (!is_reservation_holder(pr_res_holder, pr_reg)) {
                 pr_warn("SPC-3 PR REGISTER_AND_MOVE: Calling I_T"
                         " Nexus is not reservation holder\n");
                 spin_unlock(&dev->dev_reservation_lock);
diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c

index 98e83ac..a263bf5 100644 (file)
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c
@@ -139,10 +139,22 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
         unsigned char *p;
  
         while (total_sg_needed) {
+               unsigned int chain_entry = 0;
+
                 sg_per_table = (total_sg_needed > max_sg_per_table) ?
                         max_sg_per_table : total_sg_needed;
  
-               sg = kzalloc(sg_per_table * sizeof(struct scatterlist),
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
+
+               /*
+                * Reserve extra element for chain entry
+                */
+               if (sg_per_table < total_sg_needed)
+                       chain_entry = 1;
+
+#endif /* CONFIG_ARCH_HAS_SG_CHAIN */
+
+               sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg),
                                 GFP_KERNEL);
                 if (!sg) {
                         pr_err("Unable to allocate scatterlist array"
@@ -150,7 +162,16 @@ static int rd_allocate_sgl_table(struct rd_dev *rd_dev, struct rd_dev_sg_table *
                         return -ENOMEM;
                 }
  
-               sg_init_table(sg, sg_per_table);
+               sg_init_table(sg, sg_per_table + chain_entry);
+
+#ifdef CONFIG_ARCH_HAS_SG_CHAIN
+
+               if (i > 0) {
+                       sg_chain(sg_table[i - 1].sg_table,
+                                max_sg_per_table + 1, sg);
+               }
+
+#endif /* CONFIG_ARCH_HAS_SG_CHAIN */
  
                 sg_table[i].sg_table = sg;
                 sg_table[i].rd_sg_count = sg_per_table;
@@ -382,6 +403,76 @@ static struct rd_dev_sg_table *rd_get_prot_table(struct rd_dev *rd_dev, u32 page
         return NULL;
  }
  
+typedef sense_reason_t (*dif_verify)(struct se_cmd *, sector_t, unsigned int,
+                                    unsigned int, struct scatterlist *, int);
+
+static sense_reason_t rd_do_prot_rw(struct se_cmd *cmd, dif_verify dif_verify)
+{
+       struct se_device *se_dev = cmd->se_dev;
+       struct rd_dev *dev = RD_DEV(se_dev);
+       struct rd_dev_sg_table *prot_table;
+       bool need_to_release = false;
+       struct scatterlist *prot_sg;
+       u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
+       u32 prot_offset, prot_page;
+       u32 prot_npages __maybe_unused;
+       u64 tmp;
+       sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+       tmp = cmd->t_task_lba * se_dev->prot_length;
+       prot_offset = do_div(tmp, PAGE_SIZE);
+       prot_page = tmp;
+
+       prot_table = rd_get_prot_table(dev, prot_page);
+       if (!prot_table)
+               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+       prot_sg = &prot_table->sg_table[prot_page -
+                                       prot_table->page_start_offset];
+
+#ifndef CONFIG_ARCH_HAS_SG_CHAIN
+
+       prot_npages = DIV_ROUND_UP(prot_offset + sectors * se_dev->prot_length,
+                                  PAGE_SIZE);
+
+       /*
+        * Allocate temporaly contiguous scatterlist entries if prot pages
+        * straddles multiple scatterlist tables.
+        */
+       if (prot_table->page_end_offset < prot_page + prot_npages - 1) {
+               int i;
+
+               prot_sg = kcalloc(prot_npages, sizeof(*prot_sg), GFP_KERNEL);
+               if (!prot_sg)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+
+               need_to_release = true;
+               sg_init_table(prot_sg, prot_npages);
+
+               for (i = 0; i < prot_npages; i++) {
+                       if (prot_page + i > prot_table->page_end_offset) {
+                               prot_table = rd_get_prot_table(dev,
+                                                               prot_page + i);
+                               if (!prot_table) {
+                                       kfree(prot_sg);
+                                       return rc;
+                               }
+                               sg_unmark_end(&prot_sg[i - 1]);
+                       }
+                       prot_sg[i] = prot_table->sg_table[prot_page + i -
+                                               prot_table->page_start_offset];
+               }
+       }
+
+#endif /* !CONFIG_ARCH_HAS_SG_CHAIN */
+
+       rc = dif_verify(cmd, cmd->t_task_lba, sectors, 0, prot_sg, prot_offset);
+       if (need_to_release)
+               kfree(prot_sg);
+
+       return rc;
+}
+
  static sense_reason_t
  rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
               enum dma_data_direction data_direction)
@@ -419,24 +510,9 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
                         data_direction == DMA_FROM_DEVICE ? "Read" : "Write",
                         cmd->t_task_lba, rd_size, rd_page, rd_offset);
  
-       if (cmd->prot_type && data_direction == DMA_TO_DEVICE) {
-               struct rd_dev_sg_table *prot_table;
-               struct scatterlist *prot_sg;
-               u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
-               u32 prot_offset, prot_page;
-
-               tmp = cmd->t_task_lba * se_dev->prot_length;
-               prot_offset = do_div(tmp, PAGE_SIZE);
-               prot_page = tmp;
-
-               prot_table = rd_get_prot_table(dev, prot_page);
-               if (!prot_table)
-                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-
-               prot_sg = &prot_table->sg_table[prot_page - prot_table->page_start_offset];
-
-               rc = sbc_dif_verify_write(cmd, cmd->t_task_lba, sectors, 0,
-                                         prot_sg, prot_offset);
+       if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type &&
+           data_direction == DMA_TO_DEVICE) {
+               rc = rd_do_prot_rw(cmd, sbc_dif_verify_write);
                 if (rc)
                         return rc;
         }
@@ -502,24 +578,9 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
         }
         sg_miter_stop(&m);
  
-       if (cmd->prot_type && data_direction == DMA_FROM_DEVICE) {
-               struct rd_dev_sg_table *prot_table;
-               struct scatterlist *prot_sg;
-               u32 sectors = cmd->data_length / se_dev->dev_attrib.block_size;
-               u32 prot_offset, prot_page;
-
-               tmp = cmd->t_task_lba * se_dev->prot_length;
-               prot_offset = do_div(tmp, PAGE_SIZE);
-               prot_page = tmp;
-
-               prot_table = rd_get_prot_table(dev, prot_page);
-               if (!prot_table)
-                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-
-               prot_sg = &prot_table->sg_table[prot_page - prot_table->page_start_offset];
-
-               rc = sbc_dif_verify_read(cmd, cmd->t_task_lba, sectors, 0,
-                                        prot_sg, prot_offset);
+       if (cmd->prot_type && se_dev->dev_attrib.pi_prot_type &&
+           data_direction == DMA_FROM_DEVICE) {
+               rc = rd_do_prot_rw(cmd, sbc_dif_verify_read);
                 if (rc)
                         return rc;
         }
diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c

index 3e72974..8855781 100644 (file)
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -93,6 +93,8 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
  {
         struct se_device *dev = cmd->se_dev;
         struct se_session *sess = cmd->se_sess;
+       int pi_prot_type = dev->dev_attrib.pi_prot_type;
+
         unsigned char *rbuf;
         unsigned char buf[32];
         unsigned long long blocks = dev->transport->get_blocks(dev);
@@ -114,8 +116,15 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd)
          * Set P_TYPE and PROT_EN bits for DIF support
          */
         if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
-                       buf[12] = (dev->dev_attrib.pi_prot_type - 1) << 1 | 0x1;
+               /*
+                * Only override a device's pi_prot_type if no T10-PI is
+                * available, and sess_prot_type has been explicitly enabled.
+                */
+               if (!pi_prot_type)
+                       pi_prot_type = sess->sess_prot_type;
+
+               if (pi_prot_type)
+                       buf[12] = (pi_prot_type - 1) << 1 | 0x1;
         }
  
         if (dev->transport->get_lbppbe)
@@ -312,7 +321,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
         return 0;
  }
  
-static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd)
+static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd, bool success)
  {
         unsigned char *buf, *addr;
         struct scatterlist *sg;
@@ -376,7 +385,7 @@ sbc_execute_rw(struct se_cmd *cmd)
                                cmd->data_direction);
  }
  
-static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd, bool success)
  {
         struct se_device *dev = cmd->se_dev;
  
@@ -399,7 +408,7 @@ static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
         return TCM_NO_SENSE;
  }
  
-static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool success)
  {
         struct se_device *dev = cmd->se_dev;
         struct scatterlist *write_sg = NULL, *sg;
@@ -414,10 +423,15 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
  
         /*
          * Handle early failure in transport_generic_request_failure(),
-        * which will not have taken ->caw_mutex yet..
+        * which will not have taken ->caw_sem yet..
          */
-       if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
+       if (!success && (!cmd->t_data_sg || !cmd->t_bidi_data_sg))
                 return TCM_NO_SENSE;
+       /*
+        * Handle special case for zero-length COMPARE_AND_WRITE
+        */
+       if (!cmd->data_length)
+               goto out;
         /*
          * Immediately exit + release dev->caw_sem if command has already
          * been failed with a non-zero SCSI status.
@@ -581,12 +595,13 @@ sbc_compare_and_write(struct se_cmd *cmd)
  }
  
  static int
-sbc_set_prot_op_checks(u8 protect, enum target_prot_type prot_type,
+sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_type,
                        bool is_write, struct se_cmd *cmd)
  {
         if (is_write) {
-               cmd->prot_op = protect ? TARGET_PROT_DOUT_PASS :
-                                        TARGET_PROT_DOUT_INSERT;
+               cmd->prot_op = fabric_prot ? TARGET_PROT_DOUT_STRIP :
+                              protect ? TARGET_PROT_DOUT_PASS :
+                              TARGET_PROT_DOUT_INSERT;
                 switch (protect) {
                 case 0x0:
                 case 0x3:
@@ -610,8 +625,9 @@ sbc_set_prot_op_checks(u8 protect, enum target_prot_type prot_type,
                         return -EINVAL;
                 }
         } else {
-               cmd->prot_op = protect ? TARGET_PROT_DIN_PASS :
-                                        TARGET_PROT_DIN_STRIP;
+               cmd->prot_op = fabric_prot ? TARGET_PROT_DIN_INSERT :
+                              protect ? TARGET_PROT_DIN_PASS :
+                              TARGET_PROT_DIN_STRIP;
                 switch (protect) {
                 case 0x0:
                 case 0x1:
@@ -644,11 +660,15 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
                u32 sectors, bool is_write)
  {
         u8 protect = cdb[1] >> 5;
+       int sp_ops = cmd->se_sess->sup_prot_ops;
+       int pi_prot_type = dev->dev_attrib.pi_prot_type;
+       bool fabric_prot = false;
  
         if (!cmd->t_prot_sg || !cmd->t_prot_nents) {
-               if (protect && !dev->dev_attrib.pi_prot_type) {
-                       pr_err("CDB contains protect bit, but device does not"
-                              " advertise PROTECT=1 feature bit\n");
+               if (unlikely(protect &&
+                   !dev->dev_attrib.pi_prot_type && !cmd->se_sess->sess_prot_type)) {
+                       pr_err("CDB contains protect bit, but device + fabric does"
+                              " not advertise PROTECT=1 feature bit\n");
                         return TCM_INVALID_CDB_FIELD;
                 }
                 if (cmd->prot_pto)
@@ -669,15 +689,32 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb,
                 cmd->reftag_seed = cmd->t_task_lba;
                 break;
         case TARGET_DIF_TYPE0_PROT:
+               /*
+                * See if the fabric supports T10-PI, and the session has been
+                * configured to allow export PROTECT=1 feature bit with backend
+                * devices that don't support T10-PI.
+                */
+               fabric_prot = is_write ?
+                             !!(sp_ops & (TARGET_PROT_DOUT_PASS | TARGET_PROT_DOUT_STRIP)) :
+                             !!(sp_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DIN_INSERT));
+
+               if (fabric_prot && cmd->se_sess->sess_prot_type) {
+                       pi_prot_type = cmd->se_sess->sess_prot_type;
+                       break;
+               }
+               if (!protect)
+                       return TCM_NO_SENSE;
+               /* Fallthrough */
         default:
-               return TCM_NO_SENSE;
+               pr_err("Unable to determine pi_prot_type for CDB: 0x%02x "
+                      "PROTECT: 0x%02x\n", cdb[0], protect);
+               return TCM_INVALID_CDB_FIELD;
         }
  
-       if (sbc_set_prot_op_checks(protect, dev->dev_attrib.pi_prot_type,
-                                  is_write, cmd))
+       if (sbc_set_prot_op_checks(protect, fabric_prot, pi_prot_type, is_write, cmd))
                 return TCM_INVALID_CDB_FIELD;
  
-       cmd->prot_type = dev->dev_attrib.pi_prot_type;
+       cmd->prot_type = pi_prot_type;
         cmd->prot_length = dev->prot_length * sectors;
  
         /**
@@ -1166,14 +1203,16 @@ sbc_dif_generate(struct se_cmd *cmd)
                         sdt = paddr + offset;
                         sdt->guard_tag = cpu_to_be16(crc_t10dif(daddr + j,
                                                 dev->dev_attrib.block_size));
-                       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT)
+                       if (cmd->prot_type == TARGET_DIF_TYPE1_PROT)
                                 sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
                         sdt->app_tag = 0;
  
-                       pr_debug("DIF WRITE INSERT sector: %llu guard_tag: 0x%04x"
+                       pr_debug("DIF %s INSERT sector: %llu guard_tag: 0x%04x"
                                  " app_tag: 0x%04x ref_tag: %u\n",
-                                (unsigned long long)sector, sdt->guard_tag,
-                                sdt->app_tag, be32_to_cpu(sdt->ref_tag));
+                                (cmd->data_direction == DMA_TO_DEVICE) ?
+                                "WRITE" : "READ", (unsigned long long)sector,
+                                sdt->guard_tag, sdt->app_tag,
+                                be32_to_cpu(sdt->ref_tag));
  
                         sector++;
                         offset += sizeof(struct se_dif_v1_tuple);
@@ -1185,12 +1224,16 @@ sbc_dif_generate(struct se_cmd *cmd)
  }
  
  static sense_reason_t
-sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
+sbc_dif_v1_verify(struct se_cmd *cmd, struct se_dif_v1_tuple *sdt,
                   const void *p, sector_t sector, unsigned int ei_lba)
  {
+       struct se_device *dev = cmd->se_dev;
         int block_size = dev->dev_attrib.block_size;
         __be16 csum;
  
+       if (!(cmd->prot_checks & TARGET_DIF_CHECK_GUARD))
+               goto check_ref;
+
         csum = cpu_to_be16(crc_t10dif(p, block_size));
  
         if (sdt->guard_tag != csum) {
@@ -1200,7 +1243,11 @@ sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
                 return TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED;
         }
  
-       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT &&
+check_ref:
+       if (!(cmd->prot_checks & TARGET_DIF_CHECK_REFTAG))
+               return 0;
+
+       if (cmd->prot_type == TARGET_DIF_TYPE1_PROT &&
             be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
                 pr_err("DIFv1 Type 1 reference failed on sector: %llu tag: 0x%08x"
                        " sector MSB: 0x%08x\n", (unsigned long long)sector,
@@ -1208,7 +1255,7 @@ sbc_dif_v1_verify(struct se_device *dev, struct se_dif_v1_tuple *sdt,
                 return TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED;
         }
  
-       if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE2_PROT &&
+       if (cmd->prot_type == TARGET_DIF_TYPE2_PROT &&
             be32_to_cpu(sdt->ref_tag) != ei_lba) {
                 pr_err("DIFv1 Type 2 reference failed on sector: %llu tag: 0x%08x"
                        " ei_lba: 0x%08x\n", (unsigned long long)sector,
@@ -1229,6 +1276,9 @@ sbc_dif_copy_prot(struct se_cmd *cmd, unsigned int sectors, bool read,
         unsigned int i, len, left;
         unsigned int offset = sg_off;
  
+       if (!sg)
+               return;
+
         left = sectors * dev->prot_length;
  
         for_each_sg(cmd->t_prot_sg, psg, cmd->t_prot_nents, i) {
@@ -1292,7 +1342,7 @@ sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                                  (unsigned long long)sector, sdt->guard_tag,
                                  sdt->app_tag, be32_to_cpu(sdt->ref_tag));
  
-                       rc = sbc_dif_v1_verify(dev, sdt, daddr + j, sector,
+                       rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
                                                ei_lba);
                         if (rc) {
                                 kunmap_atomic(paddr);
@@ -1309,6 +1359,9 @@ sbc_dif_verify_write(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                 kunmap_atomic(paddr);
                 kunmap_atomic(daddr);
         }
+       if (!sg)
+               return 0;
+
         sbc_dif_copy_prot(cmd, sectors, false, sg, sg_off);
  
         return 0;
@@ -1353,7 +1406,7 @@ __sbc_dif_verify_read(struct se_cmd *cmd, sector_t start, unsigned int sectors,
                                 continue;
                         }
  
-                       rc = sbc_dif_v1_verify(dev, sdt, daddr + j, sector,
+                       rc = sbc_dif_v1_verify(cmd, sdt, daddr + j, sector,
                                                ei_lba);
                         if (rc) {
                                 kunmap_atomic(paddr);
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c

index 6c8bd6b..7912aa1 100644 (file)
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -103,10 +103,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf)
                 buf[5] |= 0x8;
         /*
          * Set Protection (PROTECT) bit when DIF has been enabled on the
-        * device, and the transport supports VERIFY + PASS.
+        * device, and the fabric supports VERIFY + PASS.  Also report
+        * PROTECT=1 if sess_prot_type has been configured to allow T10-PI
+        * to unprotected devices.
          */
         if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
+               if (dev->dev_attrib.pi_prot_type || cmd->se_sess->sess_prot_type)
                         buf[5] |= 0x1;
         }
  
@@ -467,9 +469,11 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf)
          * only for TYPE3 protection.
          */
         if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT)
+               if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE1_PROT ||
+                   cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE1_PROT)
                         buf[4] = 0x5;
-               else if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE3_PROT)
+               else if (dev->dev_attrib.pi_prot_type == TARGET_DIF_TYPE3_PROT ||
+                       cmd->se_sess->sess_prot_type == TARGET_DIF_TYPE3_PROT)
                         buf[4] = 0x4;
         }
  
@@ -861,7 +865,7 @@ static int spc_modesense_control(struct se_cmd *cmd, u8 pc, u8 *p)
          * TAG field.
          */
         if (sess->sup_prot_ops & (TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS)) {
-               if (dev->dev_attrib.pi_prot_type)
+               if (dev->dev_attrib.pi_prot_type || sess->sess_prot_type)
                         p[5] |= 0x80;
         }
  
@@ -1099,7 +1103,7 @@ static sense_reason_t spc_emulate_modeselect(struct se_cmd *cmd)
         unsigned char *buf;
         unsigned char tbuf[SE_MODE_PAGE_BUF];
         int length;
-       int ret = 0;
+       sense_reason_t ret = 0;
         int i;
  
         if (!cmd->data_length) {
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c

index fa5e157..315ec34 100644 (file)
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -125,8 +125,8 @@ void core_tmr_abort_task(
                 if (dev != se_cmd->se_dev)
                         continue;
  
-               /* skip se_cmd associated with tmr */
-               if (tmr->task_cmd == se_cmd)
+               /* skip task management functions, including tmr->task_cmd */
+               if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
                         continue;
  
                 ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c

index 0696de9..47f0644 100644 (file)
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -672,7 +672,7 @@ static int core_tpg_setup_virtual_lun0(struct se_portal_group *se_tpg)
  }
  
  int core_tpg_register(
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
         struct se_wwn *se_wwn,
         struct se_portal_group *se_tpg,
         void *tpg_fabric_ptr,
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c

index ac3cbab..3fe5cb2 100644 (file)
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -322,6 +322,7 @@ void __transport_register_session(
         struct se_session *se_sess,
         void *fabric_sess_ptr)
  {
+       const struct target_core_fabric_ops *tfo = se_tpg->se_tpg_tfo;
         unsigned char buf[PR_REG_ISID_LEN];
  
         se_sess->se_tpg = se_tpg;
@@ -333,6 +334,21 @@ void __transport_register_session(
          * eg: *NOT* discovery sessions.
          */
         if (se_nacl) {
+               /*
+                *
+                * Determine if fabric allows for T10-PI feature bits exposed to
+                * initiators for device backends with !dev->dev_attrib.pi_prot_type.
+                *
+                * If so, then always save prot_type on a per se_node_acl node
+                * basis and re-instate the previous sess_prot_type to avoid
+                * disabling PI from below any previously initiator side
+                * registered LUNs.
+                */
+               if (se_nacl->saved_prot_type)
+                       se_sess->sess_prot_type = se_nacl->saved_prot_type;
+               else if (tfo->tpg_check_prot_fabric_only)
+                       se_sess->sess_prot_type = se_nacl->saved_prot_type =
+                                       tfo->tpg_check_prot_fabric_only(se_tpg);
                 /*
                  * If the fabric module supports an ISID based TransportID,
                  * save this value in binary from the fabric I_T Nexus now.
@@ -404,6 +420,30 @@ void target_put_session(struct se_session *se_sess)
  }
  EXPORT_SYMBOL(target_put_session);
  
+ssize_t target_show_dynamic_sessions(struct se_portal_group *se_tpg, char *page)
+{
+       struct se_session *se_sess;
+       ssize_t len = 0;
+
+       spin_lock_bh(&se_tpg->session_lock);
+       list_for_each_entry(se_sess, &se_tpg->tpg_sess_list, sess_list) {
+               if (!se_sess->se_node_acl)
+                       continue;
+               if (!se_sess->se_node_acl->dynamic_node_acl)
+                       continue;
+               if (strlen(se_sess->se_node_acl->initiatorname) + 1 + len > PAGE_SIZE)
+                       break;
+
+               len += snprintf(page + len, PAGE_SIZE - len, "%s\n",
+                               se_sess->se_node_acl->initiatorname);
+               len += 1; /* Include NULL terminator */
+       }
+       spin_unlock_bh(&se_tpg->session_lock);
+
+       return len;
+}
+EXPORT_SYMBOL(target_show_dynamic_sessions);
+
  static void target_complete_nacl(struct kref *kref)
  {
         struct se_node_acl *nacl = container_of(kref,
@@ -462,7 +502,7 @@ EXPORT_SYMBOL(transport_free_session);
  void transport_deregister_session(struct se_session *se_sess)
  {
         struct se_portal_group *se_tpg = se_sess->se_tpg;
-       struct target_core_fabric_ops *se_tfo;
+       const struct target_core_fabric_ops *se_tfo;
         struct se_node_acl *se_nacl;
         unsigned long flags;
         bool comp_nacl = true;
@@ -1118,7 +1158,7 @@ target_cmd_size_check(struct se_cmd *cmd, unsigned int size)
   */
  void transport_init_se_cmd(
         struct se_cmd *cmd,
-       struct target_core_fabric_ops *tfo,
+       const struct target_core_fabric_ops *tfo,
         struct se_session *se_sess,
         u32 data_length,
         int data_direction,
@@ -1570,6 +1610,8 @@ EXPORT_SYMBOL(target_submit_tmr);
   * has completed.
   */
  bool target_stop_cmd(struct se_cmd *cmd, unsigned long *flags)
+       __releases(&cmd->t_state_lock)
+       __acquires(&cmd->t_state_lock)
  {
         bool was_active = false;
  
@@ -1615,11 +1657,11 @@ void transport_generic_request_failure(struct se_cmd *cmd,
         transport_complete_task_attr(cmd);
         /*
          * Handle special case for COMPARE_AND_WRITE failure, where the
-        * callback is expected to drop the per device ->caw_mutex.
+        * callback is expected to drop the per device ->caw_sem.
          */
         if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
              cmd->transport_complete_callback)
-               cmd->transport_complete_callback(cmd);
+               cmd->transport_complete_callback(cmd, false);
  
         switch (sense_reason) {
         case TCM_NON_EXISTENT_LUN:
@@ -1706,6 +1748,41 @@ void __target_execute_cmd(struct se_cmd *cmd)
         }
  }
  
+static int target_write_prot_action(struct se_cmd *cmd)
+{
+       u32 sectors;
+       /*
+        * Perform WRITE_INSERT of PI using software emulation when backend
+        * device has PI enabled, if the transport has not already generated
+        * PI using hardware WRITE_INSERT offload.
+        */
+       switch (cmd->prot_op) {
+       case TARGET_PROT_DOUT_INSERT:
+               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_INSERT))
+                       sbc_dif_generate(cmd);
+               break;
+       case TARGET_PROT_DOUT_STRIP:
+               if (cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_STRIP)
+                       break;
+
+               sectors = cmd->data_length >> ilog2(cmd->se_dev->dev_attrib.block_size);
+               cmd->pi_err = sbc_dif_verify_write(cmd, cmd->t_task_lba,
+                                                  sectors, 0, NULL, 0);
+               if (unlikely(cmd->pi_err)) {
+                       spin_lock_irq(&cmd->t_state_lock);
+                       cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+                       spin_unlock_irq(&cmd->t_state_lock);
+                       transport_generic_request_failure(cmd, cmd->pi_err);
+                       return -1;
+               }
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
  static bool target_handle_task_attr(struct se_cmd *cmd)
  {
         struct se_device *dev = cmd->se_dev;
@@ -1785,15 +1862,9 @@ void target_execute_cmd(struct se_cmd *cmd)
         cmd->t_state = TRANSPORT_PROCESSING;
         cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
         spin_unlock_irq(&cmd->t_state_lock);
-       /*
-        * Perform WRITE_INSERT of PI using software emulation when backend
-        * device has PI enabled, if the transport has not already generated
-        * PI using hardware WRITE_INSERT offload.
-        */
-       if (cmd->prot_op == TARGET_PROT_DOUT_INSERT) {
-               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DOUT_INSERT))
-                       sbc_dif_generate(cmd);
-       }
+
+       if (target_write_prot_action(cmd))
+               return;
  
         if (target_handle_task_attr(cmd)) {
                 spin_lock_irq(&cmd->t_state_lock);
@@ -1919,16 +1990,28 @@ static void transport_handle_queue_full(
         schedule_work(&cmd->se_dev->qf_work_queue);
  }
  
-static bool target_check_read_strip(struct se_cmd *cmd)
+static bool target_read_prot_action(struct se_cmd *cmd)
  {
         sense_reason_t rc;
  
-       if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) {
-               rc = sbc_dif_read_strip(cmd);
-               if (rc) {
-                       cmd->pi_err = rc;
-                       return true;
+       switch (cmd->prot_op) {
+       case TARGET_PROT_DIN_STRIP:
+               if (!(cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_STRIP)) {
+                       rc = sbc_dif_read_strip(cmd);
+                       if (rc) {
+                               cmd->pi_err = rc;
+                               return true;
+                       }
                 }
+               break;
+       case TARGET_PROT_DIN_INSERT:
+               if (cmd->se_sess->sup_prot_ops & TARGET_PROT_DIN_INSERT)
+                       break;
+
+               sbc_dif_generate(cmd);
+               break;
+       default:
+               break;
         }
  
         return false;
@@ -1975,8 +2058,12 @@ static void target_complete_ok_work(struct work_struct *work)
         if (cmd->transport_complete_callback) {
                 sense_reason_t rc;
  
-               rc = cmd->transport_complete_callback(cmd);
+               rc = cmd->transport_complete_callback(cmd, true);
                 if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) {
+                       if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
+                           !cmd->data_length)
+                               goto queue_rsp;
+
                         return;
                 } else if (rc) {
                         ret = transport_send_check_condition_and_sense(cmd,
@@ -1990,6 +2077,7 @@ static void target_complete_ok_work(struct work_struct *work)
                 }
         }
  
+queue_rsp:
         switch (cmd->data_direction) {
         case DMA_FROM_DEVICE:
                 spin_lock(&cmd->se_lun->lun_sep_lock);
@@ -2003,8 +2091,7 @@ static void target_complete_ok_work(struct work_struct *work)
                  * backend had PI enabled, if the transport will not be
                  * performing hardware READ_STRIP offload.
                  */
-               if (cmd->prot_op == TARGET_PROT_DIN_STRIP &&
-                   target_check_read_strip(cmd)) {
+               if (target_read_prot_action(cmd)) {
                         ret = transport_send_check_condition_and_sense(cmd,
                                                 cmd->pi_err, 0);
                         if (ret == -EAGAIN || ret == -ENOMEM)
@@ -2094,6 +2181,16 @@ static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
  static inline void transport_free_pages(struct se_cmd *cmd)
  {
         if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) {
+               /*
+                * Release special case READ buffer payload required for
+                * SG_TO_MEM_NOALLOC to function with COMPARE_AND_WRITE
+                */
+               if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) {
+                       transport_free_sgl(cmd->t_bidi_data_sg,
+                                          cmd->t_bidi_data_nents);
+                       cmd->t_bidi_data_sg = NULL;
+                       cmd->t_bidi_data_nents = 0;
+               }
                 transport_reset_sgl_orig(cmd);
                 return;
         }
@@ -2246,6 +2343,7 @@ sense_reason_t
  transport_generic_new_cmd(struct se_cmd *cmd)
  {
         int ret = 0;
+       bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
  
         /*
          * Determine is the TCM fabric module has already allocated physical
@@ -2254,7 +2352,6 @@ transport_generic_new_cmd(struct se_cmd *cmd)
          */
         if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) &&
             cmd->data_length) {
-               bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
  
                 if ((cmd->se_cmd_flags & SCF_BIDI) ||
                     (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) {
@@ -2285,6 +2382,20 @@ transport_generic_new_cmd(struct se_cmd *cmd)
                                        cmd->data_length, zero_flag);
                 if (ret < 0)
                         return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       } else if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
+                   cmd->data_length) {
+               /*
+                * Special case for COMPARE_AND_WRITE with fabrics
+                * using SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC.
+                */
+               u32 caw_length = cmd->t_task_nolb *
+                                cmd->se_dev->dev_attrib.block_size;
+
+               ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
+                                      &cmd->t_bidi_data_nents,
+                                      caw_length, zero_flag);
+               if (ret < 0)
+                       return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
         }
         /*
          * If this command is not a write we can execute it right here,
@@ -2376,10 +2487,8 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd,
          * fabric acknowledgement that requires two target_put_sess_cmd()
          * invocations before se_cmd descriptor release.
          */
-       if (ack_kref) {
+       if (ack_kref)
                 kref_get(&se_cmd->cmd_kref);
-               se_cmd->se_cmd_flags |= SCF_ACK_KREF;
-       }
  
         spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
         if (se_sess->sess_tearing_down) {
@@ -2398,6 +2507,7 @@ out:
  EXPORT_SYMBOL(target_get_sess_cmd);
  
  static void target_release_cmd_kref(struct kref *kref)
+               __releases(&se_cmd->se_sess->sess_cmd_lock)
  {
         struct se_cmd *se_cmd = container_of(kref, struct se_cmd, cmd_kref);
         struct se_session *se_sess = se_cmd->se_sess;
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c

index 1a1bcf7..dbc872a 100644 (file)
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -344,8 +344,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  
                 entry = (void *) mb + CMDR_OFF + cmd_head;
                 tcmu_flush_dcache_range(entry, sizeof(*entry));
-               tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
-               tcmu_hdr_set_len(&entry->hdr, pad_size);
+               tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
+               tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
+               entry->hdr.cmd_id = 0; /* not used for PAD */
+               entry->hdr.kflags = 0;
+               entry->hdr.uflags = 0;
  
                 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
  
@@ -355,9 +358,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  
         entry = (void *) mb + CMDR_OFF + cmd_head;
         tcmu_flush_dcache_range(entry, sizeof(*entry));
-       tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
-       tcmu_hdr_set_len(&entry->hdr, command_size);
-       entry->cmd_id = tcmu_cmd->cmd_id;
+       tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
+       tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
+       entry->hdr.cmd_id = tcmu_cmd->cmd_id;
+       entry->hdr.kflags = 0;
+       entry->hdr.uflags = 0;
  
         /*
          * Fix up iovecs, and handle if allocation in data ring wrapped.
@@ -376,7 +381,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
  
                 /* Even iov_base is relative to mb_addr */
                 iov->iov_len = copy_bytes;
-               iov->iov_base = (void *) udev->data_off + udev->data_head;
+               iov->iov_base = (void __user *) udev->data_off +
+                                               udev->data_head;
                 iov_cnt++;
                 iov++;
  
@@ -388,7 +394,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                         copy_bytes = sg->length - copy_bytes;
  
                         iov->iov_len = copy_bytes;
-                       iov->iov_base = (void *) udev->data_off + udev->data_head;
+                       iov->iov_base = (void __user *) udev->data_off +
+                                                       udev->data_head;
  
                         if (se_cmd->data_direction == DMA_TO_DEVICE) {
                                 to = (void *) mb + udev->data_off + udev->data_head;
@@ -405,6 +412,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
                 kunmap_atomic(from);
         }
         entry->req.iov_cnt = iov_cnt;
+       entry->req.iov_bidi_cnt = 0;
+       entry->req.iov_dif_cnt = 0;
  
         /* All offsets relative to mb_addr, not start of entry! */
         cdb_off = CMDR_OFF + cmd_head + base_command_size;
@@ -462,6 +471,17 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
                 return;
         }
  
+       if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
+               UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+               pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
+                       cmd->se_cmd);
+               transport_generic_request_failure(cmd->se_cmd,
+                       TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+               cmd->se_cmd = NULL;
+               kmem_cache_free(tcmu_cmd_cache, cmd);
+               return;
+       }
+
         if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
                 memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
                                se_cmd->scsi_sense_length);
@@ -540,14 +560,16 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
  
                 tcmu_flush_dcache_range(entry, sizeof(*entry));
  
-               if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
-                       UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+               if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) {
+                       UPDATE_HEAD(udev->cmdr_last_cleaned,
+                                   tcmu_hdr_get_len(entry->hdr.len_op),
+                                   udev->cmdr_size);
                         continue;
                 }
-               WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
+               WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
  
                 spin_lock(&udev->commands_lock);
-               cmd = idr_find(&udev->commands, entry->cmd_id);
+               cmd = idr_find(&udev->commands, entry->hdr.cmd_id);
                 if (cmd)
                         idr_remove(&udev->commands, cmd->cmd_id);
                 spin_unlock(&udev->commands_lock);
@@ -560,7 +582,9 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
  
                 tcmu_handle_completion(cmd, entry);
  
-               UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+               UPDATE_HEAD(udev->cmdr_last_cleaned,
+                           tcmu_hdr_get_len(entry->hdr.len_op),
+                           udev->cmdr_size);
  
                 handled++;
         }
@@ -838,14 +862,14 @@ static int tcmu_configure_device(struct se_device *dev)
         udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
  
         mb = udev->mb_addr;
-       mb->version = 1;
+       mb->version = TCMU_MAILBOX_VERSION;
         mb->cmdr_off = CMDR_OFF;
         mb->cmdr_size = udev->cmdr_size;
  
         WARN_ON(!PAGE_ALIGNED(udev->data_off));
         WARN_ON(udev->data_size % PAGE_SIZE);
  
-       info->version = "1";
+       info->version = xstr(TCMU_MAILBOX_VERSION);
  
         info->mem[0].name = "tcm-user command & data buffer";
         info->mem[0].addr = (phys_addr_t) udev->mb_addr;
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c

index 33ac39b..a600ff1 100644 (file)
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -34,20 +34,12 @@
  #include <target/target_core_fabric.h>
  #include <target/target_core_configfs.h>
  
+#include "target_core_internal.h"
  #include "target_core_pr.h"
  #include "target_core_ua.h"
  #include "target_core_xcopy.h"
  
  static struct workqueue_struct *xcopy_wq = NULL;
-/*
- * From target_core_device.c
- */
-extern struct mutex g_device_mutex;
-extern struct list_head g_device_list;
-/*
- * From target_core_configfs.c
- */
-extern struct configfs_subsystem *target_core_subsystem[];
  
  static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
  {
@@ -433,7 +425,7 @@ static int xcopy_pt_queue_status(struct se_cmd *se_cmd)
         return 0;
  }
  
-static struct target_core_fabric_ops xcopy_pt_tfo = {
+static const struct target_core_fabric_ops xcopy_pt_tfo = {
         .get_fabric_name        = xcopy_pt_get_fabric_name,
         .get_task_tag           = xcopy_pt_get_tag,
         .get_cmd_state          = xcopy_pt_get_cmd_state,
@@ -548,33 +540,22 @@ static void target_xcopy_setup_pt_port(
         }
  }
  
-static int target_xcopy_init_pt_lun(
-       struct xcopy_pt_cmd *xpt_cmd,
-       struct xcopy_op *xop,
-       struct se_device *se_dev,
-       struct se_cmd *pt_cmd,
-       bool remote_port)
+static void target_xcopy_init_pt_lun(struct se_device *se_dev,
+               struct se_cmd *pt_cmd, bool remote_port)
  {
         /*
          * Don't allocate + init an pt_cmd->se_lun if honoring local port for
          * reservations.  The pt_cmd->se_lun pointer will be setup from within
          * target_xcopy_setup_pt_port()
          */
-       if (!remote_port) {
-               pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
-               return 0;
+       if (remote_port) {
+               pr_debug("Setup emulated se_dev: %p from se_dev\n",
+                       pt_cmd->se_dev);
+               pt_cmd->se_lun = &se_dev->xcopy_lun;
+               pt_cmd->se_dev = se_dev;
         }
  
-       pt_cmd->se_lun = &se_dev->xcopy_lun;
-       pt_cmd->se_dev = se_dev;
-
-       pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev);
-       pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
-
-       pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n",
-               pt_cmd->se_lun->lun_se_dev);
-
-       return 0;
+       pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
  }
  
  static int target_xcopy_setup_pt_cmd(
@@ -592,11 +573,8 @@ static int target_xcopy_setup_pt_cmd(
          * Setup LUN+port to honor reservations based upon xop->op_origin for
          * X-COPY PUSH or X-COPY PULL based upon where the CDB was received.
          */
-       rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port);
-       if (rc < 0) {
-               ret = rc;
-               goto out;
-       }
+       target_xcopy_init_pt_lun(se_dev, cmd, remote_port);
+
         xpt_cmd->xcopy_op = xop;
         target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port);
  
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h

index a0bcfd3..881deb3 100644 (file)
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -129,7 +129,6 @@ struct ft_cmd {
  
  extern struct mutex ft_lport_lock;
  extern struct fc4_prov ft_prov;
-extern struct target_fabric_configfs *ft_configfs;
  extern unsigned int ft_debug_logging;
  
  /*
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c

index efdcb96..65dce13 100644 (file)
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -48,7 +48,7 @@
  
  #include "tcm_fc.h"
  
-struct target_fabric_configfs *ft_configfs;
+static const struct target_core_fabric_ops ft_fabric_ops;
  
  static LIST_HEAD(ft_wwn_list);
  DEFINE_MUTEX(ft_lport_lock);
@@ -337,7 +337,7 @@ static struct se_portal_group *ft_add_tpg(
                 return NULL;
         }
  
-       ret = core_tpg_register(&ft_configfs->tf_ops, wwn, &tpg->se_tpg,
+       ret = core_tpg_register(&ft_fabric_ops, wwn, &tpg->se_tpg,
                                 tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 destroy_workqueue(wq);
@@ -507,7 +507,9 @@ static u32 ft_tpg_get_inst_index(struct se_portal_group *se_tpg)
         return tpg->index;
  }
  
-static struct target_core_fabric_ops ft_fabric_ops = {
+static const struct target_core_fabric_ops ft_fabric_ops = {
+       .module =                       THIS_MODULE,
+       .name =                         "fc",
         .get_fabric_name =              ft_get_fabric_name,
         .get_fabric_proto_ident =       fc_get_fabric_proto_ident,
         .tpg_get_wwn =                  ft_get_fabric_wwn,
@@ -552,62 +554,10 @@ static struct target_core_fabric_ops ft_fabric_ops = {
         .fabric_drop_np =               NULL,
         .fabric_make_nodeacl =          &ft_add_acl,
         .fabric_drop_nodeacl =          &ft_del_acl,
-};
-
-static int ft_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "fc");
-       if (IS_ERR(fabric)) {
-               pr_err("%s: target_fabric_configfs_init() failed!\n",
-                      __func__);
-               return PTR_ERR(fabric);
-       }
-       fabric->tf_ops = ft_fabric_ops;
-
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = ft_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs =
-                                                   ft_nacl_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_debug("target_fabric_configfs_register() for"
-                           " FC Target failed!\n");
-               target_fabric_configfs_free(fabric);
-               return -1;
-       }
-
-       /*
-        * Setup our local pointer to *fabric.
-        */
-       ft_configfs = fabric;
-       return 0;
-}
  
-static void ft_deregister_configfs(void)
-{
-       if (!ft_configfs)
-               return;
-       target_fabric_configfs_deregister(ft_configfs);
-       ft_configfs = NULL;
-}
+       .tfc_wwn_attrs                  = ft_wwn_attrs,
+       .tfc_tpg_nacl_base_attrs        = ft_nacl_base_attrs,
+};
  
  static struct notifier_block ft_notifier = {
         .notifier_call = ft_lport_notify
@@ -615,15 +565,24 @@ static struct notifier_block ft_notifier = {
  
  static int __init ft_init(void)
  {
-       if (ft_register_configfs())
-               return -1;
-       if (fc_fc4_register_provider(FC_TYPE_FCP, &ft_prov)) {
-               ft_deregister_configfs();
-               return -1;
-       }
+       int ret;
+
+       ret = target_register_template(&ft_fabric_ops);
+       if (ret)
+               goto out;
+
+       ret = fc_fc4_register_provider(FC_TYPE_FCP, &ft_prov);
+       if (ret)
+               goto out_unregister_template;
+
         blocking_notifier_chain_register(&fc_lport_notifier_head, &ft_notifier);
         fc_lport_iterate(ft_lport_add, NULL);
         return 0;
+
+out_unregister_template:
+       target_unregister_template(&ft_fabric_ops);
+out:
+       return ret;
  }
  
  static void __exit ft_exit(void)
@@ -632,7 +591,7 @@ static void __exit ft_exit(void)
                                            &ft_notifier);
         fc_fc4_deregister_provider(FC_TYPE_FCP, &ft_prov);
         fc_lport_iterate(ft_lport_del, NULL);
-       ft_deregister_configfs();
+       target_unregister_template(&ft_fabric_ops);
         synchronize_rcu();
  }
  
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c

index 422ebea..4506e40 100644 (file)
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -450,6 +450,18 @@ static unsigned int mem32_serial_in(struct uart_port *p, int offset)
         return readl(p->membase + offset);
  }
  
+static void mem32be_serial_out(struct uart_port *p, int offset, int value)
+{
+       offset = offset << p->regshift;
+       iowrite32be(value, p->membase + offset);
+}
+
+static unsigned int mem32be_serial_in(struct uart_port *p, int offset)
+{
+       offset = offset << p->regshift;
+       return ioread32be(p->membase + offset);
+}
+
  static unsigned int io_serial_in(struct uart_port *p, int offset)
  {
         offset = offset << p->regshift;
@@ -488,6 +500,11 @@ static void set_io_from_upio(struct uart_port *p)
                 p->serial_out = mem32_serial_out;
                 break;
  
+       case UPIO_MEM32BE:
+               p->serial_in = mem32be_serial_in;
+               p->serial_out = mem32be_serial_out;
+               break;
+
  #if defined(CONFIG_MIPS_ALCHEMY) || defined(CONFIG_SERIAL_8250_RT288X)
         case UPIO_AU:
                 p->serial_in = au_serial_in;
@@ -513,6 +530,7 @@ serial_port_out_sync(struct uart_port *p, int offset, int value)
         switch (p->iotype) {
         case UPIO_MEM:
         case UPIO_MEM32:
+       case UPIO_MEM32BE:
         case UPIO_AU:
                 p->serial_out(p, offset, value);
                 p->serial_in(p, UART_LCR);      /* safe, no side-effects */
@@ -2748,6 +2766,7 @@ static int serial8250_request_std_resource(struct uart_8250_port *up)
         case UPIO_AU:
         case UPIO_TSI:
         case UPIO_MEM32:
+       case UPIO_MEM32BE:
         case UPIO_MEM:
                 if (!port->mapbase)
                         break;
@@ -2784,6 +2803,7 @@ static void serial8250_release_std_resource(struct uart_8250_port *up)
         case UPIO_AU:
         case UPIO_TSI:
         case UPIO_MEM32:
+       case UPIO_MEM32BE:
         case UPIO_MEM:
                 if (!port->mapbase)
                         break;
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c

index 8e11968..6c0fd8b 100644 (file)
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -42,6 +42,8 @@ unsigned int __weak __init serial8250_early_in(struct uart_port *port, int offse
                 return readb(port->membase + offset);
         case UPIO_MEM32:
                 return readl(port->membase + (offset << 2));
+       case UPIO_MEM32BE:
+               return ioread32be(port->membase + (offset << 2));
         case UPIO_PORT:
                 return inb(port->iobase + offset);
         default:
@@ -58,6 +60,9 @@ void __weak __init serial8250_early_out(struct uart_port *port, int offset, int
         case UPIO_MEM32:
                 writel(value, port->membase + (offset << 2));
                 break;
+       case UPIO_MEM32BE:
+               iowrite32be(value, port->membase + (offset << 2));
+               break;
         case UPIO_PORT:
                 outb(value, port->iobase + offset);
                 break;
diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c

index aa00154..5b73afb 100644 (file)
--- a/drivers/tty/serial/of_serial.c
+++ b/drivers/tty/serial/of_serial.c
@@ -116,7 +116,8 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
                         port->iotype = UPIO_MEM;
                         break;
                 case 4:
-                       port->iotype = UPIO_MEM32;
+                       port->iotype = of_device_is_big_endian(np) ?
+                                      UPIO_MEM32BE : UPIO_MEM32;
                         break;
                 default:
                         dev_warn(&ofdev->dev, "unsupported reg-io-width (%d)\n",
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c

index 6af58c6..2030565 100644 (file)
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1505,7 +1505,7 @@ static void destroy_ep_files (struct dev_data *dev)
                 list_del_init (&ep->epfiles);
                 dentry = ep->dentry;
                 ep->dentry = NULL;
-               parent = dentry->d_parent->d_inode;
+               parent = d_inode(dentry->d_parent);
  
                 /* break link to controller */
                 if (ep->state == STATE_EP_ENABLED)
diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c

index 6e0a019..8b80add 100644 (file)
--- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c
+++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c
@@ -29,7 +29,7 @@
  
  USB_GADGET_COMPOSITE_OPTIONS();
  
-static struct target_fabric_configfs *usbg_fabric_configfs;
+static const struct target_core_fabric_ops usbg_ops;
  
  static inline struct f_uas *to_f_uas(struct usb_function *f)
  {
@@ -1572,8 +1572,7 @@ static struct se_portal_group *usbg_make_tpg(
         tpg->tport = tport;
         tpg->tport_tpgt = tpgt;
  
-       ret = core_tpg_register(&usbg_fabric_configfs->tf_ops, wwn,
-                               &tpg->se_tpg, tpg,
+       ret = core_tpg_register(&usbg_ops, wwn, &tpg->se_tpg, tpg,
                                 TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 destroy_workqueue(tpg->workqueue);
@@ -1864,7 +1863,9 @@ static int usbg_check_stop_free(struct se_cmd *se_cmd)
         return 1;
  }
  
-static struct target_core_fabric_ops usbg_ops = {
+static const struct target_core_fabric_ops usbg_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "usb_gadget",
         .get_fabric_name                = usbg_get_fabric_name,
         .get_fabric_proto_ident         = usbg_get_fabric_proto_ident,
         .tpg_get_wwn                    = usbg_get_fabric_wwn,
@@ -1906,46 +1907,9 @@ static struct target_core_fabric_ops usbg_ops = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = usbg_make_nodeacl,
         .fabric_drop_nodeacl            = usbg_drop_nodeacl,
-};
-
-static int usbg_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       fabric = target_fabric_configfs_init(THIS_MODULE, "usb_gadget");
-       if (IS_ERR(fabric)) {
-               printk(KERN_ERR "target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-
-       fabric->tf_ops = usbg_ops;
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = usbg_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = usbg_base_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               printk(KERN_ERR "target_fabric_configfs_register() failed"
-                               " for usb-gadget\n");
-               return ret;
-       }
-       usbg_fabric_configfs = fabric;
-       return 0;
-};
  
-static void usbg_deregister_configfs(void)
-{
-       if (!(usbg_fabric_configfs))
-               return;
-
-       target_fabric_configfs_deregister(usbg_fabric_configfs);
-       usbg_fabric_configfs = NULL;
+       .tfc_wwn_attrs                  = usbg_wwn_attrs,
+       .tfc_tpg_base_attrs             = usbg_base_attrs,
  };
  
  /* Start gadget.c code */
@@ -2454,16 +2418,13 @@ static void usbg_detach(struct usbg_tpg *tpg)
  
  static int __init usb_target_gadget_init(void)
  {
-       int ret;
-
-       ret = usbg_register_configfs();
-       return ret;
+       return target_register_template(&usbg_ops);
  }
  module_init(usb_target_gadget_init);
  
  static void __exit usb_target_gadget_exit(void)
  {
-       usbg_deregister_configfs();
+       target_unregister_template(&usbg_ops);
  }
  module_exit(usb_target_gadget_exit);
  
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c

index 71df240..5e19bb5 100644 (file)
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -131,6 +131,8 @@ struct vhost_scsi_tpg {
         int tv_tpg_port_count;
         /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
         int tv_tpg_vhost_count;
+       /* Used for enabling T10-PI with legacy devices */
+       int tv_fabric_prot_type;
         /* list for vhost_scsi_list */
         struct list_head tv_tpg_list;
         /* Used to protect access for tpg_nexus */
@@ -214,9 +216,7 @@ struct vhost_scsi {
         int vs_events_nr; /* num of pending events, protected by vq->mutex */
  };
  
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *vhost_scsi_fabric_configfs;
-
+static struct target_core_fabric_ops vhost_scsi_ops;
  static struct workqueue_struct *vhost_scsi_workqueue;
  
  /* Global spinlock to protect vhost_scsi TPG list for vhost IOCTL access */
@@ -431,6 +431,14 @@ vhost_scsi_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
                         port_nexus_ptr);
  }
  
+static int vhost_scsi_check_prot_fabric_only(struct se_portal_group *se_tpg)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+
+       return tpg->tv_fabric_prot_type;
+}
+
  static struct se_node_acl *
  vhost_scsi_alloc_fabric_acl(struct se_portal_group *se_tpg)
  {
@@ -1878,6 +1886,45 @@ static void vhost_scsi_free_cmd_map_res(struct vhost_scsi_nexus *nexus,
         }
  }
  
+static ssize_t vhost_scsi_tpg_attrib_store_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       const char *page,
+       size_t count)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+       unsigned long val;
+       int ret = kstrtoul(page, 0, &val);
+
+       if (ret) {
+               pr_err("kstrtoul() returned %d for fabric_prot_type\n", ret);
+               return ret;
+       }
+       if (val != 0 && val != 1 && val != 3) {
+               pr_err("Invalid vhost_scsi fabric_prot_type: %lu\n", val);
+               return -EINVAL;
+       }
+       tpg->tv_fabric_prot_type = val;
+
+       return count;
+}
+
+static ssize_t vhost_scsi_tpg_attrib_show_fabric_prot_type(
+       struct se_portal_group *se_tpg,
+       char *page)
+{
+       struct vhost_scsi_tpg *tpg = container_of(se_tpg,
+                               struct vhost_scsi_tpg, se_tpg);
+
+       return sprintf(page, "%d\n", tpg->tv_fabric_prot_type);
+}
+TF_TPG_ATTRIB_ATTR(vhost_scsi, fabric_prot_type, S_IRUGO | S_IWUSR);
+
+static struct configfs_attribute *vhost_scsi_tpg_attrib_attrs[] = {
+       &vhost_scsi_tpg_attrib_fabric_prot_type.attr,
+       NULL,
+};
+
  static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
                                 const char *name)
  {
@@ -2155,7 +2202,7 @@ vhost_scsi_make_tpg(struct se_wwn *wwn,
         tpg->tport = tport;
         tpg->tport_tpgt = tpgt;
  
-       ret = core_tpg_register(&vhost_scsi_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&vhost_scsi_ops, wwn,
                                 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 kfree(tpg);
@@ -2277,6 +2324,8 @@ static struct configfs_attribute *vhost_scsi_wwn_attrs[] = {
  };
  
  static struct target_core_fabric_ops vhost_scsi_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "vhost",
         .get_fabric_name                = vhost_scsi_get_fabric_name,
         .get_fabric_proto_ident         = vhost_scsi_get_fabric_proto_ident,
         .tpg_get_wwn                    = vhost_scsi_get_fabric_wwn,
@@ -2289,6 +2338,7 @@ static struct target_core_fabric_ops vhost_scsi_ops = {
         .tpg_check_demo_mode_cache      = vhost_scsi_check_true,
         .tpg_check_demo_mode_write_protect = vhost_scsi_check_false,
         .tpg_check_prod_mode_write_protect = vhost_scsi_check_false,
+       .tpg_check_prot_fabric_only     = vhost_scsi_check_prot_fabric_only,
         .tpg_alloc_fabric_acl           = vhost_scsi_alloc_fabric_acl,
         .tpg_release_fabric_acl         = vhost_scsi_release_fabric_acl,
         .tpg_get_inst_index             = vhost_scsi_tpg_get_inst_index,
@@ -2320,70 +2370,20 @@ static struct target_core_fabric_ops vhost_scsi_ops = {
         .fabric_drop_np                 = NULL,
         .fabric_make_nodeacl            = vhost_scsi_make_nodeacl,
         .fabric_drop_nodeacl            = vhost_scsi_drop_nodeacl,
+
+       .tfc_wwn_attrs                  = vhost_scsi_wwn_attrs,
+       .tfc_tpg_base_attrs             = vhost_scsi_tpg_attrs,
+       .tfc_tpg_attrib_attrs           = vhost_scsi_tpg_attrib_attrs,
  };
  
-static int vhost_scsi_register_configfs(void)
+static int __init vhost_scsi_init(void)
  {
-       struct target_fabric_configfs *fabric;
-       int ret;
+       int ret = -ENOMEM;
  
-       pr_debug("vhost-scsi fabric module %s on %s/%s"
+       pr_debug("TCM_VHOST fabric module %s on %s/%s"
                 " on "UTS_RELEASE"\n", VHOST_SCSI_VERSION, utsname()->sysname,
                 utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "vhost");
-       if (IS_ERR(fabric)) {
-               pr_err("target_fabric_configfs_init() failed\n");
-               return PTR_ERR(fabric);
-       }
-       /*
-        * Setup fabric->tf_ops from our local vhost_scsi_ops
-        */
-       fabric->tf_ops = vhost_scsi_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = vhost_scsi_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = vhost_scsi_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               pr_err("target_fabric_configfs_register() failed"
-                               " for TCM_VHOST\n");
-               return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       vhost_scsi_fabric_configfs = fabric;
-       pr_debug("TCM_VHOST[0] - Set fabric -> vhost_scsi_fabric_configfs\n");
-       return 0;
-};
-
-static void vhost_scsi_deregister_configfs(void)
-{
-       if (!vhost_scsi_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(vhost_scsi_fabric_configfs);
-       vhost_scsi_fabric_configfs = NULL;
-       pr_debug("TCM_VHOST[0] - Cleared vhost_scsi_fabric_configfs\n");
-};
  
-static int __init vhost_scsi_init(void)
-{
-       int ret = -ENOMEM;
         /*
          * Use our own dedicated workqueue for submitting I/O into
          * target core to avoid contention within system_wq.
@@ -2396,7 +2396,7 @@ static int __init vhost_scsi_init(void)
         if (ret < 0)
                 goto out_destroy_workqueue;
  
-       ret = vhost_scsi_register_configfs();
+       ret = target_register_template(&vhost_scsi_ops);
         if (ret < 0)
                 goto out_vhost_scsi_deregister;
  
@@ -2412,7 +2412,7 @@ out:
  
  static void vhost_scsi_exit(void)
  {
-       vhost_scsi_deregister_configfs();
+       target_unregister_template(&vhost_scsi_ops);
         vhost_scsi_deregister();
         destroy_workqueue(vhost_scsi_workqueue);
  };
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig

index b546da5..cab9f3f 100644 (file)
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -48,6 +48,16 @@ config VIRTIO_BALLOON
  
          If unsure, say M.
  
+config VIRTIO_INPUT
+       tristate "Virtio input driver"
+       depends on VIRTIO
+       depends on INPUT
+       ---help---
+        This driver supports virtio input devices such as
+        keyboards, mice and tablets.
+
+        If unsure, say M.
+
   config VIRTIO_MMIO
         tristate "Platform bus driver for memory mapped virtio devices"
         depends on HAS_IOMEM
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile

index d85565b..41e30e3 100644 (file)
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
  virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
  virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
  obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c

index 5ce2aa4..b1877d7 100644 (file)
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -278,12 +278,6 @@ static struct bus_type virtio_bus = {
         .remove = virtio_dev_remove,
  };
  
-bool virtio_device_is_legacy_only(struct virtio_device_id id)
-{
-       return id.device == VIRTIO_ID_BALLOON;
-}
-EXPORT_SYMBOL_GPL(virtio_device_is_legacy_only);
-
  int register_virtio_driver(struct virtio_driver *driver)
  {
         /* Catch this early. */
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c

index 6a356e3..82e80e0 100644 (file)
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -214,8 +214,8 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
                                u16 tag, u64 val)
  {
         BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
-       vb->stats[idx].tag = tag;
-       vb->stats[idx].val = val;
+       vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
+       vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
  }
  
  #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
@@ -283,18 +283,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
  
  static inline s64 towards_target(struct virtio_balloon *vb)
  {
-       __le32 v;
         s64 target;
+       u32 num_pages;
  
-       virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v);
+       virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
+                    &num_pages);
  
-       target = le32_to_cpu(v);
+       /* Legacy balloon config space is LE, unlike all other devices. */
+       if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+               num_pages = le32_to_cpu((__force __le32)num_pages);
+
+       target = num_pages;
         return target - vb->num_pages;
  }
  
  static void update_balloon_size(struct virtio_balloon *vb)
  {
-       __le32 actual = cpu_to_le32(vb->num_pages);
+       u32 actual = vb->num_pages;
+
+       /* Legacy balloon config space is LE, unlike all other devices. */
+       if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+               actual = (__force u32)cpu_to_le32(actual);
  
         virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
                       &actual);
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c

new file mode 100644 (file)

index 0000000..60e2a16
--- /dev/null
+++ b/drivers/virtio/virtio_input.c
@@ -0,0 +1,384 @@
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/input.h>
+
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_input.h>
+
+struct virtio_input {
+       struct virtio_device       *vdev;
+       struct input_dev           *idev;
+       char                       name[64];
+       char                       serial[64];
+       char                       phys[64];
+       struct virtqueue           *evt, *sts;
+       struct virtio_input_event  evts[64];
+       spinlock_t                 lock;
+       bool                       ready;
+};
+
+static void virtinput_queue_evtbuf(struct virtio_input *vi,
+                                  struct virtio_input_event *evtbuf)
+{
+       struct scatterlist sg[1];
+
+       sg_init_one(sg, evtbuf, sizeof(*evtbuf));
+       virtqueue_add_inbuf(vi->evt, sg, 1, evtbuf, GFP_ATOMIC);
+}
+
+static void virtinput_recv_events(struct virtqueue *vq)
+{
+       struct virtio_input *vi = vq->vdev->priv;
+       struct virtio_input_event *event;
+       unsigned long flags;
+       unsigned int len;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       if (vi->ready) {
+               while ((event = virtqueue_get_buf(vi->evt, &len)) != NULL) {
+                       spin_unlock_irqrestore(&vi->lock, flags);
+                       input_event(vi->idev,
+                                   le16_to_cpu(event->type),
+                                   le16_to_cpu(event->code),
+                                   le32_to_cpu(event->value));
+                       spin_lock_irqsave(&vi->lock, flags);
+                       virtinput_queue_evtbuf(vi, event);
+               }
+               virtqueue_kick(vq);
+       }
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+/*
+ * On error we are losing the status update, which isn't critical as
+ * this is typically used for stuff like keyboard leds.
+ */
+static int virtinput_send_status(struct virtio_input *vi,
+                                u16 type, u16 code, s32 value)
+{
+       struct virtio_input_event *stsbuf;
+       struct scatterlist sg[1];
+       unsigned long flags;
+       int rc;
+
+       stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC);
+       if (!stsbuf)
+               return -ENOMEM;
+
+       stsbuf->type  = cpu_to_le16(type);
+       stsbuf->code  = cpu_to_le16(code);
+       stsbuf->value = cpu_to_le32(value);
+       sg_init_one(sg, stsbuf, sizeof(*stsbuf));
+
+       spin_lock_irqsave(&vi->lock, flags);
+       if (vi->ready) {
+               rc = virtqueue_add_outbuf(vi->sts, sg, 1, stsbuf, GFP_ATOMIC);
+               virtqueue_kick(vi->sts);
+       } else {
+               rc = -ENODEV;
+       }
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       if (rc != 0)
+               kfree(stsbuf);
+       return rc;
+}
+
+static void virtinput_recv_status(struct virtqueue *vq)
+{
+       struct virtio_input *vi = vq->vdev->priv;
+       struct virtio_input_event *stsbuf;
+       unsigned long flags;
+       unsigned int len;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       while ((stsbuf = virtqueue_get_buf(vi->sts, &len)) != NULL)
+               kfree(stsbuf);
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_status(struct input_dev *idev, unsigned int type,
+                           unsigned int code, int value)
+{
+       struct virtio_input *vi = input_get_drvdata(idev);
+
+       return virtinput_send_status(vi, type, code, value);
+}
+
+static u8 virtinput_cfg_select(struct virtio_input *vi,
+                              u8 select, u8 subsel)
+{
+       u8 size;
+
+       virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select);
+       virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel);
+       virtio_cread(vi->vdev, struct virtio_input_config, size, &size);
+       return size;
+}
+
+static void virtinput_cfg_bits(struct virtio_input *vi, int select, int subsel,
+                              unsigned long *bits, unsigned int bitcount)
+{
+       unsigned int bit;
+       u8 *virtio_bits;
+       u8 bytes;
+
+       bytes = virtinput_cfg_select(vi, select, subsel);
+       if (!bytes)
+               return;
+       if (bitcount > bytes * 8)
+               bitcount = bytes * 8;
+
+       /*
+        * Bitmap in virtio config space is a simple stream of bytes,
+        * with the first byte carrying bits 0-7, second bits 8-15 and
+        * so on.
+        */
+       virtio_bits = kzalloc(bytes, GFP_KERNEL);
+       if (!virtio_bits)
+               return;
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.bitmap),
+                          virtio_bits, bytes);
+       for (bit = 0; bit < bitcount; bit++) {
+               if (virtio_bits[bit / 8] & (1 << (bit % 8)))
+                       __set_bit(bit, bits);
+       }
+       kfree(virtio_bits);
+
+       if (select == VIRTIO_INPUT_CFG_EV_BITS)
+               __set_bit(subsel, vi->idev->evbit);
+}
+
+static void virtinput_cfg_abs(struct virtio_input *vi, int abs)
+{
+       u32 mi, ma, re, fu, fl;
+
+       virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu);
+       virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl);
+       input_set_abs_params(vi->idev, abs, mi, ma, fu, fl);
+       input_abs_set_res(vi->idev, abs, re);
+}
+
+static int virtinput_init_vqs(struct virtio_input *vi)
+{
+       struct virtqueue *vqs[2];
+       vq_callback_t *cbs[] = { virtinput_recv_events,
+                                virtinput_recv_status };
+       static const char *names[] = { "events", "status" };
+       int err;
+
+       err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
+       if (err)
+               return err;
+       vi->evt = vqs[0];
+       vi->sts = vqs[1];
+
+       return 0;
+}
+
+static void virtinput_fill_evt(struct virtio_input *vi)
+{
+       unsigned long flags;
+       int i, size;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       size = virtqueue_get_vring_size(vi->evt);
+       if (size > ARRAY_SIZE(vi->evts))
+               size = ARRAY_SIZE(vi->evts);
+       for (i = 0; i < size; i++)
+               virtinput_queue_evtbuf(vi, &vi->evts[i]);
+       virtqueue_kick(vi->evt);
+       spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_probe(struct virtio_device *vdev)
+{
+       struct virtio_input *vi;
+       unsigned long flags;
+       size_t size;
+       int abs, err;
+
+       if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+               return -ENODEV;
+
+       vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+       if (!vi)
+               return -ENOMEM;
+
+       vdev->priv = vi;
+       vi->vdev = vdev;
+       spin_lock_init(&vi->lock);
+
+       err = virtinput_init_vqs(vi);
+       if (err)
+               goto err_init_vq;
+
+       vi->idev = input_allocate_device();
+       if (!vi->idev) {
+               err = -ENOMEM;
+               goto err_input_alloc;
+       }
+       input_set_drvdata(vi->idev, vi);
+
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_NAME, 0);
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.string),
+                          vi->name, min(size, sizeof(vi->name)));
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_SERIAL, 0);
+       virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+                                             u.string),
+                          vi->serial, min(size, sizeof(vi->serial)));
+       snprintf(vi->phys, sizeof(vi->phys),
+                "virtio%d/input0", vdev->index);
+       vi->idev->name = vi->name;
+       vi->idev->phys = vi->phys;
+       vi->idev->uniq = vi->serial;
+
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0);
+       if (size >= sizeof(struct virtio_input_devids)) {
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.bustype, &vi->idev->id.bustype);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.vendor, &vi->idev->id.vendor);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.product, &vi->idev->id.product);
+               virtio_cread(vi->vdev, struct virtio_input_config,
+                            u.ids.version, &vi->idev->id.version);
+       } else {
+               vi->idev->id.bustype = BUS_VIRTUAL;
+       }
+
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_PROP_BITS, 0,
+                          vi->idev->propbit, INPUT_PROP_CNT);
+       size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REP);
+       if (size)
+               __set_bit(EV_REP, vi->idev->evbit);
+
+       vi->idev->dev.parent = &vdev->dev;
+       vi->idev->event = virtinput_status;
+
+       /* device -> kernel */
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_KEY,
+                          vi->idev->keybit, KEY_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REL,
+                          vi->idev->relbit, REL_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_ABS,
+                          vi->idev->absbit, ABS_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_MSC,
+                          vi->idev->mscbit, MSC_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SW,
+                          vi->idev->swbit,  SW_CNT);
+
+       /* kernel -> device */
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_LED,
+                          vi->idev->ledbit, LED_CNT);
+       virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND,
+                          vi->idev->sndbit, SND_CNT);
+
+       if (test_bit(EV_ABS, vi->idev->evbit)) {
+               for (abs = 0; abs < ABS_CNT; abs++) {
+                       if (!test_bit(abs, vi->idev->absbit))
+                               continue;
+                       virtinput_cfg_abs(vi, abs);
+               }
+       }
+
+       virtio_device_ready(vdev);
+       vi->ready = true;
+       err = input_register_device(vi->idev);
+       if (err)
+               goto err_input_register;
+
+       virtinput_fill_evt(vi);
+       return 0;
+
+err_input_register:
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+       input_free_device(vi->idev);
+err_input_alloc:
+       vdev->config->del_vqs(vdev);
+err_init_vq:
+       kfree(vi);
+       return err;
+}
+
+static void virtinput_remove(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       input_unregister_device(vi->idev);
+       vdev->config->del_vqs(vdev);
+       kfree(vi);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtinput_freeze(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vi->lock, flags);
+       vi->ready = false;
+       spin_unlock_irqrestore(&vi->lock, flags);
+
+       vdev->config->del_vqs(vdev);
+       return 0;
+}
+
+static int virtinput_restore(struct virtio_device *vdev)
+{
+       struct virtio_input *vi = vdev->priv;
+       int err;
+
+       err = virtinput_init_vqs(vi);
+       if (err)
+               return err;
+
+       virtio_device_ready(vdev);
+       vi->ready = true;
+       virtinput_fill_evt(vi);
+       return 0;
+}
+#endif
+
+static unsigned int features[] = {
+       /* none */
+};
+static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_INPUT, VIRTIO_DEV_ANY_ID },
+       { 0 },
+};
+
+static struct virtio_driver virtio_input_driver = {
+       .driver.name         = KBUILD_MODNAME,
+       .driver.owner        = THIS_MODULE,
+       .feature_table       = features,
+       .feature_table_size  = ARRAY_SIZE(features),
+       .id_table            = id_table,
+       .probe               = virtinput_probe,
+       .remove              = virtinput_remove,
+#ifdef CONFIG_PM_SLEEP
+       .freeze              = virtinput_freeze,
+       .restore             = virtinput_restore,
+#endif
+};
+
+module_virtio_driver(virtio_input_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Virtio input device driver");
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c

index 6010d7e..7a5e60d 100644 (file)
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -581,14 +581,6 @@ static int virtio_mmio_probe(struct platform_device *pdev)
         }
         vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
  
-       /* Reject legacy-only IDs for version 2 devices */
-       if (vm_dev->version == 2 &&
-                       virtio_device_is_legacy_only(vm_dev->vdev.id)) {
-               dev_err(&pdev->dev, "Version 2 not supported for devices %u!\n",
-                               vm_dev->vdev.id.device);
-               return -ENODEV;
-       }
-
         if (vm_dev->version == 1)
                 writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
  
diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c

index 2aa38e5..e88e099 100644 (file)
--- a/drivers/virtio/virtio_pci_modern.c
+++ b/drivers/virtio/virtio_pci_modern.c
@@ -20,6 +20,50 @@
  #define VIRTIO_PCI_NO_LEGACY
  #include "virtio_pci_common.h"
  
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(u8 __iomem *addr)
+{
+       return ioread8(addr);
+}
+static inline u16 vp_ioread16 (u16 __iomem *addr)
+{
+       return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(u32 __iomem *addr)
+{
+       return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+       iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
+{
+       iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
+{
+       iowrite32(value, addr);
+}
+
+static void vp_iowrite64_twopart(u64 val,
+                                __le32 __iomem *lo, __le32 __iomem *hi)
+{
+       vp_iowrite32((u32)val, lo);
+       vp_iowrite32(val >> 32, hi);
+}
+
  static void __iomem *map_capability(struct pci_dev *dev, int off,
                                     size_t minlen,
                                     u32 align,
@@ -94,22 +138,16 @@ static void __iomem *map_capability(struct pci_dev *dev, int off,
         return p;
  }
  
-static void iowrite64_twopart(u64 val, __le32 __iomem *lo, __le32 __iomem *hi)
-{
-       iowrite32((u32)val, lo);
-       iowrite32(val >> 32, hi);
-}
-
  /* virtio config->get_features() implementation */
  static u64 vp_get_features(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
         u64 features;
  
-       iowrite32(0, &vp_dev->common->device_feature_select);
-       features = ioread32(&vp_dev->common->device_feature);
-       iowrite32(1, &vp_dev->common->device_feature_select);
-       features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
+       vp_iowrite32(0, &vp_dev->common->device_feature_select);
+       features = vp_ioread32(&vp_dev->common->device_feature);
+       vp_iowrite32(1, &vp_dev->common->device_feature_select);
+       features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
  
         return features;
  }
@@ -128,10 +166,10 @@ static int vp_finalize_features(struct virtio_device *vdev)
                 return -EINVAL;
         }
  
-       iowrite32(0, &vp_dev->common->guest_feature_select);
-       iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
-       iowrite32(1, &vp_dev->common->guest_feature_select);
-       iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
+       vp_iowrite32(0, &vp_dev->common->guest_feature_select);
+       vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
+       vp_iowrite32(1, &vp_dev->common->guest_feature_select);
+       vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
  
         return 0;
  }
@@ -210,14 +248,14 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
  static u32 vp_generation(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return ioread8(&vp_dev->common->config_generation);
+       return vp_ioread8(&vp_dev->common->config_generation);
  }
  
  /* config->{get,set}_status() implementations */
  static u8 vp_get_status(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return ioread8(&vp_dev->common->device_status);
+       return vp_ioread8(&vp_dev->common->device_status);
  }
  
  static void vp_set_status(struct virtio_device *vdev, u8 status)
@@ -225,17 +263,17 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
         /* We should never be setting status to 0. */
         BUG_ON(status == 0);
-       iowrite8(status, &vp_dev->common->device_status);
+       vp_iowrite8(status, &vp_dev->common->device_status);
  }
  
  static void vp_reset(struct virtio_device *vdev)
  {
         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
         /* 0 status means a reset. */
-       iowrite8(0, &vp_dev->common->device_status);
+       vp_iowrite8(0, &vp_dev->common->device_status);
         /* Flush out the status write, and flush in device writes,
          * including MSI-X interrupts, if any. */
-       ioread8(&vp_dev->common->device_status);
+       vp_ioread8(&vp_dev->common->device_status);
         /* Flush pending VQ/configuration callbacks. */
         vp_synchronize_vectors(vdev);
  }
@@ -243,10 +281,10 @@ static void vp_reset(struct virtio_device *vdev)
  static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
  {
         /* Setup the vector used for configuration events */
-       iowrite16(vector, &vp_dev->common->msix_config);
+       vp_iowrite16(vector, &vp_dev->common->msix_config);
         /* Verify we had enough resources to assign the vector */
         /* Will also flush the write out to device */
-       return ioread16(&vp_dev->common->msix_config);
+       return vp_ioread16(&vp_dev->common->msix_config);
  }
  
  static size_t vring_pci_size(u16 num)
@@ -286,15 +324,15 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         u16 num, off;
         int err;
  
-       if (index >= ioread16(&cfg->num_queues))
+       if (index >= vp_ioread16(&cfg->num_queues))
                 return ERR_PTR(-ENOENT);
  
         /* Select the queue we're interested in */
-       iowrite16(index, &cfg->queue_select);
+       vp_iowrite16(index, &cfg->queue_select);
  
         /* Check if queue is either not available or already active. */
-       num = ioread16(&cfg->queue_size);
-       if (!num || ioread16(&cfg->queue_enable))
+       num = vp_ioread16(&cfg->queue_size);
+       if (!num || vp_ioread16(&cfg->queue_enable))
                 return ERR_PTR(-ENOENT);
  
         if (num & (num - 1)) {
@@ -303,7 +341,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         }
  
         /* get offset of notification word for this vq */
-       off = ioread16(&cfg->queue_notify_off);
+       off = vp_ioread16(&cfg->queue_notify_off);
  
         info->num = num;
         info->msix_vector = msix_vec;
@@ -322,13 +360,13 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         }
  
         /* activate the queue */
-       iowrite16(num, &cfg->queue_size);
-       iowrite64_twopart(virt_to_phys(info->queue),
-                         &cfg->queue_desc_lo, &cfg->queue_desc_hi);
-       iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
-                         &cfg->queue_avail_lo, &cfg->queue_avail_hi);
-       iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
-                         &cfg->queue_used_lo, &cfg->queue_used_hi);
+       vp_iowrite16(num, &cfg->queue_size);
+       vp_iowrite64_twopart(virt_to_phys(info->queue),
+                            &cfg->queue_desc_lo, &cfg->queue_desc_hi);
+       vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
+                            &cfg->queue_avail_lo, &cfg->queue_avail_hi);
+       vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
+                            &cfg->queue_used_lo, &cfg->queue_used_hi);
  
         if (vp_dev->notify_base) {
                 /* offset should not wrap */
@@ -357,8 +395,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
         }
  
         if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-               iowrite16(msix_vec, &cfg->queue_msix_vector);
-               msix_vec = ioread16(&cfg->queue_msix_vector);
+               vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
+               msix_vec = vp_ioread16(&cfg->queue_msix_vector);
                 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
                         err = -EBUSY;
                         goto err_assign_vector;
@@ -393,8 +431,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
          * this, there's no way to go back except reset.
          */
         list_for_each_entry(vq, &vdev->vqs, list) {
-               iowrite16(vq->index, &vp_dev->common->queue_select);
-               iowrite16(1, &vp_dev->common->queue_enable);
+               vp_iowrite16(vq->index, &vp_dev->common->queue_select);
+               vp_iowrite16(1, &vp_dev->common->queue_enable);
         }
  
         return 0;
@@ -405,13 +443,13 @@ static void del_vq(struct virtio_pci_vq_info *info)
         struct virtqueue *vq = info->vq;
         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
  
-       iowrite16(vq->index, &vp_dev->common->queue_select);
+       vp_iowrite16(vq->index, &vp_dev->common->queue_select);
  
         if (vp_dev->msix_enabled) {
-               iowrite16(VIRTIO_MSI_NO_VECTOR,
-                         &vp_dev->common->queue_msix_vector);
+               vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
+                            &vp_dev->common->queue_msix_vector);
                 /* Flush the write out to device */
-               ioread16(&vp_dev->common->queue_msix_vector);
+               vp_ioread16(&vp_dev->common->queue_msix_vector);
         }
  
         if (!vp_dev->notify_base)
@@ -577,9 +615,6 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
         }
         vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
  
-       if (virtio_device_is_legacy_only(vp_dev->vdev.id))
-               return -ENODEV;
-
         /* check for a common config: if not, use legacy mode (bar 0). */
         common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
                                             IORESOURCE_IO | IORESOURCE_MEM);
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig

index ce4f3a7..e5e7c55 100644 (file)
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -169,7 +169,6 @@ config AT91SAM9X_WATCHDOG
  
  config CADENCE_WATCHDOG
         tristate "Cadence Watchdog Timer"
-       depends on ARM
         select WATCHDOG_CORE
         help
           Say Y here if you want to include support for the watchdog
@@ -1190,6 +1189,7 @@ config OCTEON_WDT
         tristate "Cavium OCTEON SOC family Watchdog Timer"
         depends on CAVIUM_OCTEON_SOC
         default y
+       select WATCHDOG_CORE
         select EXPORT_UASM if OCTEON_WDT = m
         help
           Hardware driver for OCTEON's on chip watchdog timer.
diff --git a/drivers/watchdog/bcm_kona_wdt.c b/drivers/watchdog/bcm_kona_wdt.c

index 4e37db3..22d8ae6 100644 (file)
--- a/drivers/watchdog/bcm_kona_wdt.c
+++ b/drivers/watchdog/bcm_kona_wdt.c
@@ -99,12 +99,14 @@ static int secure_register_read(struct bcm_kona_wdt *wdt, uint32_t offset)
  
  static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
  {
-       int ctl_val, cur_val, ret;
+       int ctl_val, cur_val;
         unsigned long flags;
         struct bcm_kona_wdt *wdt = s->private;
  
-       if (!wdt)
-               return seq_puts(s, "No device pointer\n");
+       if (!wdt) {
+               seq_puts(s, "No device pointer\n");
+               return 0;
+       }
  
         spin_lock_irqsave(&wdt->lock, flags);
         ctl_val = secure_register_read(wdt, SECWDOG_CTRL_REG);
@@ -112,7 +114,7 @@ static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
         spin_unlock_irqrestore(&wdt->lock, flags);
  
         if (ctl_val < 0 || cur_val < 0) {
-               ret = seq_puts(s, "Error accessing hardware\n");
+               seq_puts(s, "Error accessing hardware\n");
         } else {
                 int ctl, cur, ctl_sec, cur_sec, res;
  
@@ -121,15 +123,18 @@ static int bcm_kona_wdt_dbg_show(struct seq_file *s, void *data)
                 cur = cur_val & SECWDOG_COUNT_MASK;
                 ctl_sec = TICKS_TO_SECS(ctl, wdt);
                 cur_sec = TICKS_TO_SECS(cur, wdt);
-               ret = seq_printf(s, "Resolution: %d / %d\n"
-                               "Control: %d s / %d (%#x) ticks\n"
-                               "Current: %d s / %d (%#x) ticks\n"
-                               "Busy count: %lu\n", res,
-                               wdt->resolution, ctl_sec, ctl, ctl, cur_sec,
-                               cur, cur, wdt->busy_count);
+               seq_printf(s,
+                          "Resolution: %d / %d\n"
+                          "Control: %d s / %d (%#x) ticks\n"
+                          "Current: %d s / %d (%#x) ticks\n"
+                          "Busy count: %lu\n",
+                          res, wdt->resolution,
+                          ctl_sec, ctl, ctl,
+                          cur_sec, cur, cur,
+                          wdt->busy_count);
         }
  
-       return ret;
+       return 0;
  }
  
  static int bcm_kona_dbg_open(struct inode *inode, struct file *file)
diff --git a/drivers/watchdog/octeon-wdt-main.c b/drivers/watchdog/octeon-wdt-main.c

index 8453531..14521c8 100644 (file)
--- a/drivers/watchdog/octeon-wdt-main.c
+++ b/drivers/watchdog/octeon-wdt-main.c
@@ -3,6 +3,8 @@
   *
   * Copyright (C) 2007, 2008, 2009, 2010 Cavium Networks
   *
+ * Converted to use WATCHDOG_CORE by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
   * Some parts derived from wdt.c
   *
   *     (c) Copyright 1996-1997 Alan Cox <alan@lxorguk.ukuu.org.uk>,
@@ -103,13 +105,10 @@ MODULE_PARM_DESC(nowayout,
         "Watchdog cannot be stopped once started (default="
                                 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
  
-static unsigned long octeon_wdt_is_open;
-static char expect_close;
-
-static u32 __initdata nmi_stage1_insns[64];
+static u32 nmi_stage1_insns[64] __initdata;
  /* We need one branch and therefore one relocation per target label. */
-static struct uasm_label __initdata labels[5];
-static struct uasm_reloc __initdata relocs[5];
+static struct uasm_label labels[5] __initdata;
+static struct uasm_reloc relocs[5] __initdata;
  
  enum lable_id {
         label_enter_bootloader = 1
@@ -218,7 +217,8 @@ static void __init octeon_wdt_build_stage1(void)
         pr_debug("\t.set pop\n");
  
         if (len > 32)
-               panic("NMI stage 1 handler exceeds 32 instructions, was %d\n", len);
+               panic("NMI stage 1 handler exceeds 32 instructions, was %d\n",
+                     len);
  }
  
  static int cpu2core(int cpu)
@@ -294,6 +294,7 @@ static void octeon_wdt_write_hex(u64 value, int digits)
  {
         int d;
         int v;
+
         for (d = 0; d < digits; d++) {
                 v = (value >> ((digits - d - 1) * 4)) & 0xf;
                 if (v >= 10)
@@ -303,7 +304,7 @@ static void octeon_wdt_write_hex(u64 value, int digits)
         }
  }
  
-const char *reg_name[] = {
+static const char reg_name[][3] = {
         "$0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
         "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
         "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
@@ -444,7 +445,7 @@ static int octeon_wdt_cpu_callback(struct notifier_block *nfb,
         return NOTIFY_OK;
  }
  
-static void octeon_wdt_ping(void)
+static int octeon_wdt_ping(struct watchdog_device __always_unused *wdog)
  {
         int cpu;
         int coreid;
@@ -457,10 +458,12 @@ static void octeon_wdt_ping(void)
                     !cpumask_test_cpu(cpu, &irq_enabled_cpus)) {
                         /* We have to enable the irq */
                         int irq = OCTEON_IRQ_WDOG0 + coreid;
+
                         enable_irq(irq);
                         cpumask_set_cpu(cpu, &irq_enabled_cpus);
                 }
         }
+       return 0;
  }
  
  static void octeon_wdt_calc_parameters(int t)
@@ -489,7 +492,8 @@ static void octeon_wdt_calc_parameters(int t)
         timeout_cnt = ((octeon_get_io_clock_rate() >> 8) * timeout_sec) >> 8;
  }
  
-static int octeon_wdt_set_heartbeat(int t)
+static int octeon_wdt_set_timeout(struct watchdog_device *wdog,
+                                 unsigned int t)
  {
         int cpu;
         int coreid;
@@ -509,158 +513,45 @@ static int octeon_wdt_set_heartbeat(int t)
                 cvmx_write_csr(CVMX_CIU_WDOGX(coreid), ciu_wdog.u64);
                 cvmx_write_csr(CVMX_CIU_PP_POKEX(coreid), 1);
         }
-       octeon_wdt_ping(); /* Get the irqs back on. */
+       octeon_wdt_ping(wdog); /* Get the irqs back on. */
         return 0;
  }
  
-/**
- *     octeon_wdt_write:
- *     @file: file handle to the watchdog
- *     @buf: buffer to write (unused as data does not matter here
- *     @count: count of bytes
- *     @ppos: pointer to the position to write. No seeks allowed
- *
- *     A write to a watchdog device is defined as a keepalive signal. Any
- *     write of data will do, as we we don't define content meaning.
- */
-
-static ssize_t octeon_wdt_write(struct file *file, const char __user *buf,
-                               size_t count, loff_t *ppos)
-{
-       if (count) {
-               if (!nowayout) {
-                       size_t i;
-
-                       /* In case it was set long ago */
-                       expect_close = 0;
-
-                       for (i = 0; i != count; i++) {
-                               char c;
-                               if (get_user(c, buf + i))
-                                       return -EFAULT;
-                               if (c == 'V')
-                                       expect_close = 1;
-                       }
-               }
-               octeon_wdt_ping();
-       }
-       return count;
-}
-
-/**
- *     octeon_wdt_ioctl:
- *     @file: file handle to the device
- *     @cmd: watchdog command
- *     @arg: argument pointer
- *
- *     The watchdog API defines a common set of functions for all
- *     watchdogs according to their available features. We only
- *     actually usefully support querying capabilities and setting
- *     the timeout.
- */
-
-static long octeon_wdt_ioctl(struct file *file, unsigned int cmd,
-                            unsigned long arg)
-{
-       void __user *argp = (void __user *)arg;
-       int __user *p = argp;
-       int new_heartbeat;
-
-       static struct watchdog_info ident = {
-               .options =              WDIOF_SETTIMEOUT|
-                                       WDIOF_MAGICCLOSE|
-                                       WDIOF_KEEPALIVEPING,
-               .firmware_version =     1,
-               .identity =             "OCTEON",
-       };
-
-       switch (cmd) {
-       case WDIOC_GETSUPPORT:
-               return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
-       case WDIOC_GETSTATUS:
-       case WDIOC_GETBOOTSTATUS:
-               return put_user(0, p);
-       case WDIOC_KEEPALIVE:
-               octeon_wdt_ping();
-               return 0;
-       case WDIOC_SETTIMEOUT:
-               if (get_user(new_heartbeat, p))
-                       return -EFAULT;
-               if (octeon_wdt_set_heartbeat(new_heartbeat))
-                       return -EINVAL;
-               /* Fall through. */
-       case WDIOC_GETTIMEOUT:
-               return put_user(heartbeat, p);
-       default:
-               return -ENOTTY;
-       }
-}
-
-/**
- *     octeon_wdt_open:
- *     @inode: inode of device
- *     @file: file handle to device
- *
- *     The watchdog device has been opened. The watchdog device is single
- *     open and on opening we do a ping to reset the counters.
- */
-
-static int octeon_wdt_open(struct inode *inode, struct file *file)
+static int octeon_wdt_start(struct watchdog_device *wdog)
  {
-       if (test_and_set_bit(0, &octeon_wdt_is_open))
-               return -EBUSY;
-       /*
-        *      Activate
-        */
-       octeon_wdt_ping();
+       octeon_wdt_ping(wdog);
         do_coundown = 1;
-       return nonseekable_open(inode, file);
+       return 0;
  }
  
-/**
- *     octeon_wdt_release:
- *     @inode: inode to board
- *     @file: file handle to board
- *
- *     The watchdog has a configurable API. There is a religious dispute
- *     between people who want their watchdog to be able to shut down and
- *     those who want to be sure if the watchdog manager dies the machine
- *     reboots. In the former case we disable the counters, in the latter
- *     case you have to open it again very soon.
- */
-
-static int octeon_wdt_release(struct inode *inode, struct file *file)
+static int octeon_wdt_stop(struct watchdog_device *wdog)
  {
-       if (expect_close) {
-               do_coundown = 0;
-               octeon_wdt_ping();
-       } else {
-               pr_crit("WDT device closed unexpectedly.  WDT will not stop!\n");
-       }
-       clear_bit(0, &octeon_wdt_is_open);
-       expect_close = 0;
+       do_coundown = 0;
+       octeon_wdt_ping(wdog);
         return 0;
  }
  
-static const struct file_operations octeon_wdt_fops = {
-       .owner          = THIS_MODULE,
-       .llseek         = no_llseek,
-       .write          = octeon_wdt_write,
-       .unlocked_ioctl = octeon_wdt_ioctl,
-       .open           = octeon_wdt_open,
-       .release        = octeon_wdt_release,
+static struct notifier_block octeon_wdt_cpu_notifier = {
+       .notifier_call = octeon_wdt_cpu_callback,
  };
  
-static struct miscdevice octeon_wdt_miscdev = {
-       .minor  = WATCHDOG_MINOR,
-       .name   = "watchdog",
-       .fops   = &octeon_wdt_fops,
+static const struct watchdog_info octeon_wdt_info = {
+       .options = WDIOF_SETTIMEOUT | WDIOF_MAGICCLOSE | WDIOF_KEEPALIVEPING,
+       .identity = "OCTEON",
  };
  
-static struct notifier_block octeon_wdt_cpu_notifier = {
-       .notifier_call = octeon_wdt_cpu_callback,
+static const struct watchdog_ops octeon_wdt_ops = {
+       .owner          = THIS_MODULE,
+       .start          = octeon_wdt_start,
+       .stop           = octeon_wdt_stop,
+       .ping           = octeon_wdt_ping,
+       .set_timeout    = octeon_wdt_set_timeout,
  };
  
+static struct watchdog_device octeon_wdt = {
+       .info   = &octeon_wdt_info,
+       .ops    = &octeon_wdt_ops,
+};
  
  /**
   * Module/ driver initialization.
@@ -685,7 +576,8 @@ static int __init octeon_wdt_init(void)
         max_timeout_sec = 6;
         do {
                 max_timeout_sec--;
-               timeout_cnt = ((octeon_get_io_clock_rate() >> 8) * max_timeout_sec) >> 8;
+               timeout_cnt = ((octeon_get_io_clock_rate() >> 8) *
+                             max_timeout_sec) >> 8;
         } while (timeout_cnt > 65535);
  
         BUG_ON(timeout_cnt == 0);
@@ -694,11 +586,15 @@ static int __init octeon_wdt_init(void)
  
         pr_info("Initial granularity %d Sec\n", timeout_sec);
  
-       ret = misc_register(&octeon_wdt_miscdev);
+       octeon_wdt.timeout      = timeout_sec;
+       octeon_wdt.max_timeout  = UINT_MAX;
+
+       watchdog_set_nowayout(&octeon_wdt, nowayout);
+
+       ret = watchdog_register_device(&octeon_wdt);
         if (ret) {
-               pr_err("cannot register miscdev on minor=%d (err=%d)\n",
-                      WATCHDOG_MINOR, ret);
-               goto out;
+               pr_err("watchdog_register_device() failed: %d\n", ret);
+               return ret;
         }
  
         /* Build the NMI handler ... */
@@ -721,8 +617,7 @@ static int __init octeon_wdt_init(void)
         __register_hotcpu_notifier(&octeon_wdt_cpu_notifier);
         cpu_notifier_register_done();
  
-out:
-       return ret;
+       return 0;
  }
  
  /**
@@ -732,7 +627,7 @@ static void __exit octeon_wdt_cleanup(void)
  {
         int cpu;
  
-       misc_deregister(&octeon_wdt_miscdev);
+       watchdog_unregister_device(&octeon_wdt);
  
         cpu_notifier_register_begin();
         __unregister_hotcpu_notifier(&octeon_wdt_cpu_notifier);
diff --git a/drivers/watchdog/pnx4008_wdt.c b/drivers/watchdog/pnx4008_wdt.c

index 55e2201..b9c6049 100644 (file)
--- a/drivers/watchdog/pnx4008_wdt.c
+++ b/drivers/watchdog/pnx4008_wdt.c
@@ -216,7 +216,7 @@ static struct platform_driver platform_wdt_driver = {
  module_platform_driver(platform_wdt_driver);
  
  MODULE_AUTHOR("MontaVista Software, Inc. <source@mvista.com>");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
  MODULE_DESCRIPTION("PNX4008 Watchdog Driver");
  
  module_param(heartbeat, uint, 0);
diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c

index aa85618..aa03ca8 100644 (file)
--- a/drivers/watchdog/qcom-wdt.c
+++ b/drivers/watchdog/qcom-wdt.c
@@ -20,9 +20,9 @@
  #include <linux/reboot.h>
  #include <linux/watchdog.h>
  
-#define WDT_RST                0x0
-#define WDT_EN         0x8
-#define WDT_BITE_TIME  0x24
+#define WDT_RST                0x38
+#define WDT_EN         0x40
+#define WDT_BITE_TIME  0x5C
  
  struct qcom_wdt {
         struct watchdog_device  wdd;
@@ -117,6 +117,8 @@ static int qcom_wdt_probe(struct platform_device *pdev)
  {
         struct qcom_wdt *wdt;
         struct resource *res;
+       struct device_node *np = pdev->dev.of_node;
+       u32 percpu_offset;
         int ret;
  
         wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
@@ -124,6 +126,14 @@ static int qcom_wdt_probe(struct platform_device *pdev)
                 return -ENOMEM;
  
         res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+       /* We use CPU0's DGT for the watchdog */
+       if (of_property_read_u32(np, "cpu-offset", &percpu_offset))
+               percpu_offset = 0;
+
+       res->start += percpu_offset;
+       res->end += percpu_offset;
+
         wdt->base = devm_ioremap_resource(&pdev->dev, res);
         if (IS_ERR(wdt->base))
                 return PTR_ERR(wdt->base);
@@ -203,9 +213,8 @@ static int qcom_wdt_remove(struct platform_device *pdev)
  }
  
  static const struct of_device_id qcom_wdt_of_table[] = {
-       { .compatible = "qcom,kpss-wdt-msm8960", },
-       { .compatible = "qcom,kpss-wdt-apq8064", },
-       { .compatible = "qcom,kpss-wdt-ipq8064", },
+       { .compatible = "qcom,kpss-timer" },
+       { .compatible = "qcom,scss-timer" },
         { },
  };
  MODULE_DEVICE_TABLE(of, qcom_wdt_of_table);
diff --git a/drivers/watchdog/stmp3xxx_rtc_wdt.c b/drivers/watchdog/stmp3xxx_rtc_wdt.c

index a62b1b6..e7f0d5b 100644 (file)
--- a/drivers/watchdog/stmp3xxx_rtc_wdt.c
+++ b/drivers/watchdog/stmp3xxx_rtc_wdt.c
@@ -1,7 +1,7 @@
  /*
   * Watchdog driver for the RTC based watchdog in STMP3xxx and i.MX23/28
   *
- * Author: Wolfram Sang <w.sang@pengutronix.de>
+ * Author: Wolfram Sang <kernel@pengutronix.de>
   *
   * Copyright (C) 2011-12 Wolfram Sang, Pengutronix
   *
@@ -129,4 +129,4 @@ module_platform_driver(stmp3xxx_wdt_driver);
  
  MODULE_DESCRIPTION("STMP3XXX RTC Watchdog Driver");
  MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Wolfram Sang <w.sang@pengutronix.de>");
+MODULE_AUTHOR("Wolfram Sang <kernel@pengutronix.de>");
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig

index a270004..7cd226d 100644 (file)
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -276,4 +276,8 @@ config XEN_AUTO_XLATE
         help
           Support for auto-translated physmap guests.
  
+config XEN_ACPI
+       def_bool y
+       depends on X86 && ACPI
+
  endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile

index 40edd1c..e293bc5 100644 (file)
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_efi.o                          += -fshort-wchar
  
  dom0-$(CONFIG_PCI) += pci.o
  dom0-$(CONFIG_USB_SUPPORT) += dbgp.o
-dom0-$(CONFIG_ACPI) += acpi.o $(xen-pad-y)
+dom0-$(CONFIG_XEN_ACPI) += acpi.o $(xen-pad-y)
  xen-pad-$(CONFIG_X86) += xen-acpi-pad.o
  dom0-$(CONFIG_X86) += pcpu.o
  obj-$(CONFIG_XEN_DOM0)                 += $(dom0-y)
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c

index 07ef383..b7f5150 100644 (file)
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -204,8 +204,7 @@ static LIST_HEAD(scsiback_free_pages);
  static DEFINE_MUTEX(scsiback_mutex);
  static LIST_HEAD(scsiback_list);
  
-/* Local pointer to allocated TCM configfs fabric module */
-static struct target_fabric_configfs *scsiback_fabric_configfs;
+static const struct target_core_fabric_ops scsiback_ops;
  
  static void scsiback_get(struct vscsibk_info *info)
  {
@@ -1902,7 +1901,7 @@ scsiback_make_tpg(struct se_wwn *wwn,
         tpg->tport = tport;
         tpg->tport_tpgt = tpgt;
  
-       ret = core_tpg_register(&scsiback_fabric_configfs->tf_ops, wwn,
+       ret = core_tpg_register(&scsiback_ops, wwn,
                                 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
         if (ret < 0) {
                 kfree(tpg);
@@ -1944,7 +1943,9 @@ static int scsiback_check_false(struct se_portal_group *se_tpg)
         return 0;
  }
  
-static struct target_core_fabric_ops scsiback_ops = {
+static const struct target_core_fabric_ops scsiback_ops = {
+       .module                         = THIS_MODULE,
+       .name                           = "xen-pvscsi",
         .get_fabric_name                = scsiback_get_fabric_name,
         .get_fabric_proto_ident         = scsiback_get_fabric_proto_ident,
         .tpg_get_wwn                    = scsiback_get_fabric_wwn,
@@ -1991,62 +1992,10 @@ static struct target_core_fabric_ops scsiback_ops = {
         .fabric_make_nodeacl            = scsiback_make_nodeacl,
         .fabric_drop_nodeacl            = scsiback_drop_nodeacl,
  #endif
-};
-
-static int scsiback_register_configfs(void)
-{
-       struct target_fabric_configfs *fabric;
-       int ret;
-
-       pr_debug("fabric module %s on %s/%s on "UTS_RELEASE"\n",
-                VSCSI_VERSION, utsname()->sysname, utsname()->machine);
-       /*
-        * Register the top level struct config_item_type with TCM core
-        */
-       fabric = target_fabric_configfs_init(THIS_MODULE, "xen-pvscsi");
-       if (IS_ERR(fabric))
-               return PTR_ERR(fabric);
  
-       /*
-        * Setup fabric->tf_ops from our local scsiback_ops
-        */
-       fabric->tf_ops = scsiback_ops;
-       /*
-        * Setup default attribute lists for various fabric->tf_cit_tmpl
-        */
-       fabric->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = scsiback_wwn_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = scsiback_tpg_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = scsiback_param_attrs;
-       fabric->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
-       fabric->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
-       /*
-        * Register the fabric for use within TCM
-        */
-       ret = target_fabric_configfs_register(fabric);
-       if (ret < 0) {
-               target_fabric_configfs_free(fabric);
-               return ret;
-       }
-       /*
-        * Setup our local pointer to *fabric
-        */
-       scsiback_fabric_configfs = fabric;
-       pr_debug("Set fabric -> scsiback_fabric_configfs\n");
-       return 0;
-};
-
-static void scsiback_deregister_configfs(void)
-{
-       if (!scsiback_fabric_configfs)
-               return;
-
-       target_fabric_configfs_deregister(scsiback_fabric_configfs);
-       scsiback_fabric_configfs = NULL;
-       pr_debug("Cleared scsiback_fabric_configfs\n");
+       .tfc_wwn_attrs                  = scsiback_wwn_attrs,
+       .tfc_tpg_base_attrs             = scsiback_tpg_attrs,
+       .tfc_tpg_param_attrs            = scsiback_param_attrs,
  };
  
  static const struct xenbus_device_id scsiback_ids[] = {
@@ -2078,6 +2027,9 @@ static int __init scsiback_init(void)
         if (!xen_domain())
                 return -ENODEV;
  
+       pr_debug("xen-pvscsi: fabric module %s on %s/%s on "UTS_RELEASE"\n",
+                VSCSI_VERSION, utsname()->sysname, utsname()->machine);
+
         scsiback_cachep = kmem_cache_create("vscsiif_cache",
                 sizeof(struct vscsibk_pend), 0, 0, scsiback_init_pend);
         if (!scsiback_cachep)
@@ -2087,7 +2039,7 @@ static int __init scsiback_init(void)
         if (ret)
                 goto out_cache_destroy;
  
-       ret = scsiback_register_configfs();
+       ret = target_register_template(&scsiback_ops);
         if (ret)
                 goto out_unregister_xenbus;
  
@@ -2110,7 +2062,7 @@ static void __exit scsiback_exit(void)
                         BUG();
                 gnttab_free_pages(1, &page);
         }
-       scsiback_deregister_configfs();
+       target_unregister_template(&scsiback_ops);
         xenbus_unregister_driver(&scsiback_driver);
         kmem_cache_destroy(scsiback_cachep);
  }
diff --git a/fs/9p/acl.c b/fs/9p/acl.c

index 8482f2d..31c0103 100644 (file)
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -247,7 +247,7 @@ static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
         if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
                 return v9fs_remote_get_acl(dentry, name, buffer, size, type);
  
-       acl = v9fs_get_cached_acl(dentry->d_inode, type);
+       acl = v9fs_get_cached_acl(d_inode(dentry), type);
         if (IS_ERR(acl))
                 return PTR_ERR(acl);
         if (acl == NULL)
@@ -285,7 +285,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
         int retval;
         struct posix_acl *acl;
         struct v9fs_session_info *v9ses;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (strcmp(name, "") != 0)
                 return -EINVAL;
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c

index a345b2d..bd456c6 100644 (file)
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -53,7 +53,7 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry)
                  dentry, dentry);
  
         /* Don't cache negative dentries */
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 return 1;
         return 0;
  }
@@ -83,7 +83,7 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         if (!inode)
                 goto out_valid;
  
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c

index 76c3b1a..5cc00e5 100644 (file)
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -138,6 +138,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
                                            &err);
                         if (err)
                                 return err;
+                       if (n == 0)
+                               return 0;
  
                         rdir->head = 0;
                         rdir->tail = n;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c

index 3662f1d..703342e 100644 (file)
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -595,7 +595,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
                  dir, dentry, flags);
  
         v9ses = v9fs_inode2v9ses(dir);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         dfid = v9fs_fid_lookup(dentry->d_parent);
         if (IS_ERR(dfid)) {
                 retval = PTR_ERR(dfid);
@@ -864,7 +864,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
         }
  
         /* Only creates */
-       if (!(flags & O_CREAT) || dentry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(dentry))
                 return finish_no_open(file, res);
  
         err = 0;
@@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
         }
  
         v9fs_invalidate_inode_attr(dir);
-       v9inode = V9FS_I(dentry->d_inode);
+       v9inode = V9FS_I(d_inode(dentry));
         mutex_lock(&v9inode->v_mutex);
         if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
             !v9inode->writeback_fid &&
@@ -908,7 +908,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
  
         file->private_data = fid;
         if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
-               v9fs_cache_inode_set_cookie(dentry->d_inode, file);
+               v9fs_cache_inode_set_cookie(d_inode(dentry), file);
  
         *opened |= FILE_CREATED;
  out:
@@ -969,8 +969,8 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  
         p9_debug(P9_DEBUG_VFS, "\n");
         retval = 0;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
         v9ses = v9fs_inode2v9ses(old_inode);
         oldfid = v9fs_fid_lookup(old_dentry);
         if (IS_ERR(oldfid))
@@ -1061,7 +1061,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
         p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
         v9ses = v9fs_dentry2v9ses(dentry);
         if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
                 return 0;
         }
         fid = v9fs_fid_lookup(dentry);
@@ -1072,8 +1072,8 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
         if (IS_ERR(st))
                 return PTR_ERR(st);
  
-       v9fs_stat2inode(st, dentry->d_inode, dentry->d_inode->i_sb);
-       generic_fillattr(dentry->d_inode, stat);
+       v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb);
+       generic_fillattr(d_inode(dentry), stat);
  
         p9stat_free(st);
         kfree(st);
@@ -1095,7 +1095,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
         struct p9_wstat wstat;
  
         p9_debug(P9_DEBUG_VFS, "\n");
-       retval = inode_change_ok(dentry->d_inode, iattr);
+       retval = inode_change_ok(d_inode(dentry), iattr);
         if (retval)
                 return retval;
  
@@ -1128,20 +1128,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  
         /* Write all dirty data */
         if (d_is_reg(dentry))
-               filemap_write_and_wait(dentry->d_inode->i_mapping);
+               filemap_write_and_wait(d_inode(dentry)->i_mapping);
  
         retval = p9_client_wstat(fid, &wstat);
         if (retval < 0)
                 return retval;
  
         if ((iattr->ia_valid & ATTR_SIZE) &&
-           iattr->ia_size != i_size_read(dentry->d_inode))
-               truncate_setsize(dentry->d_inode, iattr->ia_size);
+           iattr->ia_size != i_size_read(d_inode(dentry)))
+               truncate_setsize(d_inode(dentry), iattr->ia_size);
  
-       v9fs_invalidate_inode_attr(dentry->d_inode);
+       v9fs_invalidate_inode_attr(d_inode(dentry));
  
-       setattr_copy(dentry->d_inode, iattr);
-       mark_inode_dirty(dentry->d_inode);
+       setattr_copy(d_inode(dentry), iattr);
+       mark_inode_dirty(d_inode(dentry));
         return 0;
  }
  
@@ -1403,7 +1403,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
         retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name);
         __putname(name);
         if (!retval) {
-               v9fs_refresh_inode(oldfid, old_dentry->d_inode);
+               v9fs_refresh_inode(oldfid, d_inode(old_dentry));
                 v9fs_invalidate_inode_attr(dir);
         }
  clunk_fid:
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c

index 6054c16..9861c7c 100644 (file)
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -265,7 +265,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
         }
  
         /* Only creates */
-       if (!(flags & O_CREAT) || dentry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(dentry))
                 return  finish_no_open(file, res);
  
         v9ses = v9fs_inode2v9ses(dir);
@@ -481,7 +481,7 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
         p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
         v9ses = v9fs_dentry2v9ses(dentry);
         if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
                 return 0;
         }
         fid = v9fs_fid_lookup(dentry);
@@ -496,8 +496,8 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry,
         if (IS_ERR(st))
                 return PTR_ERR(st);
  
-       v9fs_stat2inode_dotl(st, dentry->d_inode);
-       generic_fillattr(dentry->d_inode, stat);
+       v9fs_stat2inode_dotl(st, d_inode(dentry));
+       generic_fillattr(d_inode(dentry), stat);
         /* Change block size to what the server returned */
         stat->blksize = st->st_blksize;
  
@@ -557,7 +557,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
         int retval;
         struct p9_fid *fid;
         struct p9_iattr_dotl p9attr;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         p9_debug(P9_DEBUG_VFS, "\n");
  
@@ -795,10 +795,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
                 if (IS_ERR(fid))
                         return PTR_ERR(fid);
  
-               v9fs_refresh_inode_dotl(fid, old_dentry->d_inode);
+               v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
         }
-       ihold(old_dentry->d_inode);
-       d_instantiate(dentry, old_dentry->d_inode);
+       ihold(d_inode(old_dentry));
+       d_instantiate(dentry, d_inode(old_dentry));
  
         return err;
  }
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c

index 0afd038..e99a338 100644 (file)
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -168,8 +168,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                         retval = PTR_ERR(st);
                         goto release_sb;
                 }
-               root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode_dotl(st, root->d_inode);
+               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+               v9fs_stat2inode_dotl(st, d_inode(root));
                 kfree(st);
         } else {
                 struct p9_wstat *st = NULL;
@@ -179,8 +179,8 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
                         goto release_sb;
                 }
  
-               root->d_inode->i_ino = v9fs_qid2ino(&st->qid);
-               v9fs_stat2inode(st, root->d_inode, sb);
+               d_inode(root)->i_ino = v9fs_qid2ino(&st->qid);
+               v9fs_stat2inode(st, d_inode(root), sb);
  
                 p9stat_free(st);
                 kfree(st);
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c

index b9acada..335055d 100644 (file)
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -298,7 +298,7 @@ out:
  int
  adfs_notify_change(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = inode->i_sb;
         unsigned int ia_valid = attr->ia_valid;
         int error;
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c

index 5022ac9..a8f463c 100644 (file)
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -138,7 +138,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
  static int
  affs_remove_link(struct dentry *dentry)
  {
-       struct inode *dir, *inode = dentry->d_inode;
+       struct inode *dir, *inode = d_inode(dentry);
         struct super_block *sb = inode->i_sb;
         struct buffer_head *bh = NULL, *link_bh = NULL;
         u32 link_ino, ino;
@@ -268,11 +268,11 @@ affs_remove_header(struct dentry *dentry)
         struct buffer_head *bh = NULL;
         int retval;
  
-       dir = dentry->d_parent->d_inode;
+       dir = d_inode(dentry->d_parent);
         sb = dir->i_sb;
  
         retval = -ENOENT;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         if (!inode)
                 goto done;
  
@@ -471,10 +471,9 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...)
  bool
  affs_nofilenametruncate(const struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         return affs_test_opt(AFFS_SB(inode->i_sb)->s_flags, SF_NO_TRUNCATE);
-
  }
  
  /* Check if the name is valid for a affs object. */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c

index 9628003..a022f4a 100644 (file)
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -213,7 +213,7 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
  int
  affs_notify_change(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c

index ec8ca0e..181e05b 100644 (file)
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -251,7 +251,7 @@ int
  affs_unlink(struct inode *dir, struct dentry *dentry)
  {
         pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
-                dentry->d_inode->i_ino, dentry);
+                d_inode(dentry)->i_ino, dentry);
  
         return affs_remove_header(dentry);
  }
@@ -320,7 +320,7 @@ int
  affs_rmdir(struct inode *dir, struct dentry *dentry)
  {
         pr_debug("%s(dir=%lu, %lu \"%pd\")\n", __func__, dir->i_ino,
-                dentry->d_inode->i_ino, dentry);
+                d_inode(dentry)->i_ino, dentry);
  
         return affs_remove_header(dentry);
  }
@@ -403,7 +403,7 @@ err:
  int
  affs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         pr_debug("%s(%lu, %lu, \"%pd\")\n", __func__, inode->i_ino, dir->i_ino,
                  dentry);
@@ -430,13 +430,13 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 return retval;
  
         /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                 retval = affs_remove_header(new_dentry);
                 if (retval)
                         return retval;
         }
  
-       bh = affs_bread(sb, old_dentry->d_inode->i_ino);
+       bh = affs_bread(sb, d_inode(old_dentry)->i_ino);
         if (!bh)
                 return -EIO;
  
diff --git a/fs/afs/dir.c b/fs/afs/dir.c

index 4ec35e9..e10e177 100644 (file)
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -505,7 +505,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
         _enter("{%x:%u},%p{%pd},",
                vnode->fid.vid, vnode->fid.vnode, dentry, dentry);
  
-       ASSERTCMP(dentry->d_inode, ==, NULL);
+       ASSERTCMP(d_inode(dentry), ==, NULL);
  
         if (dentry->d_name.len >= AFSNAMEMAX) {
                 _leave(" = -ENAMETOOLONG");
@@ -563,8 +563,8 @@ success:
         _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%u }",
                fid.vnode,
                fid.unique,
-              dentry->d_inode->i_ino,
-              dentry->d_inode->i_generation);
+              d_inode(dentry)->i_ino,
+              d_inode(dentry)->i_generation);
  
         return NULL;
  }
@@ -586,9 +586,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       vnode = AFS_FS_I(dentry->d_inode);
+       vnode = AFS_FS_I(d_inode(dentry));
  
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 _enter("{v={%x:%u} n=%pd fl=%lx},",
                        vnode->fid.vid, vnode->fid.vnode, dentry,
                        vnode->flags);
@@ -601,7 +601,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
  
         /* lock down the parent dentry so we can peer at it */
         parent = dget_parent(dentry);
-       dir = AFS_FS_I(parent->d_inode);
+       dir = AFS_FS_I(d_inode(parent));
  
         /* validate the parent directory */
         if (test_bit(AFS_VNODE_MODIFIED, &dir->flags))
@@ -623,9 +623,9 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
         switch (ret) {
         case 0:
                 /* the filename maps to something */
-               if (!dentry->d_inode)
+               if (d_really_is_negative(dentry))
                         goto out_bad;
-               if (is_bad_inode(dentry->d_inode)) {
+               if (is_bad_inode(d_inode(dentry))) {
                         printk("kAFS: afs_d_revalidate: %pd2 has bad inode\n",
                                dentry);
                         goto out_bad;
@@ -647,7 +647,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
                         _debug("%pd: file deleted (uq %u -> %u I:%u)",
                                dentry, fid.unique,
                                vnode->fid.unique,
-                              dentry->d_inode->i_generation);
+                              d_inode(dentry)->i_generation);
                         spin_lock(&vnode->lock);
                         set_bit(AFS_VNODE_DELETED, &vnode->flags);
                         spin_unlock(&vnode->lock);
@@ -658,7 +658,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
         case -ENOENT:
                 /* the filename is unknown */
                 _debug("%pd: dirent not found", dentry);
-               if (dentry->d_inode)
+               if (d_really_is_positive(dentry))
                         goto not_found;
                 goto out_valid;
  
@@ -703,9 +703,9 @@ static int afs_d_delete(const struct dentry *dentry)
         if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
                 goto zap;
  
-       if (dentry->d_inode &&
-           (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(dentry->d_inode)->flags) ||
-            test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(dentry->d_inode)->flags)))
+       if (d_really_is_positive(dentry) &&
+           (test_bit(AFS_VNODE_DELETED,   &AFS_FS_I(d_inode(dentry))->flags) ||
+            test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(d_inode(dentry))->flags)))
                 goto zap;
  
         _leave(" = 0 [keep]");
@@ -814,8 +814,8 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
         if (ret < 0)
                 goto rmdir_error;
  
-       if (dentry->d_inode) {
-               vnode = AFS_FS_I(dentry->d_inode);
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
                 clear_nlink(&vnode->vfs_inode);
                 set_bit(AFS_VNODE_DELETED, &vnode->flags);
                 afs_discard_callback_on_delete(vnode);
@@ -856,8 +856,8 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                 goto error;
         }
  
-       if (dentry->d_inode) {
-               vnode = AFS_FS_I(dentry->d_inode);
+       if (d_really_is_positive(dentry)) {
+               vnode = AFS_FS_I(d_inode(dentry));
  
                 /* make sure we have a callback promise on the victim */
                 ret = afs_validate(vnode, key);
@@ -869,7 +869,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
         if (ret < 0)
                 goto remove_error;
  
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 /* if the file wasn't deleted due to excess hard links, the
                  * fileserver will break the callback promise on the file - if
                  * it had one - before it returns to us, and if it was deleted,
@@ -879,7 +879,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
                  * or it was outstanding on a different server, then it won't
                  * break it either...
                  */
-               vnode = AFS_FS_I(dentry->d_inode);
+               vnode = AFS_FS_I(d_inode(dentry));
                 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
                         _debug("AFS_VNODE_DELETED");
                 if (test_bit(AFS_VNODE_CB_BROKEN, &vnode->flags))
@@ -977,7 +977,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
         struct key *key;
         int ret;
  
-       vnode = AFS_FS_I(from->d_inode);
+       vnode = AFS_FS_I(d_inode(from));
         dvnode = AFS_FS_I(dir);
  
         _enter("{%x:%u},{%x:%u},{%pd}",
@@ -1089,7 +1089,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct key *key;
         int ret;
  
-       vnode = AFS_FS_I(old_dentry->d_inode);
+       vnode = AFS_FS_I(d_inode(old_dentry));
         orig_dvnode = AFS_FS_I(old_dir);
         new_dvnode = AFS_FS_I(new_dir);
  
diff --git a/fs/afs/inode.c b/fs/afs/inode.c

index 8a1d38e..e06f5a2 100644 (file)
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -379,7 +379,7 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
  {
         struct inode *inode;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
  
@@ -458,7 +458,7 @@ void afs_evict_inode(struct inode *inode)
   */
  int afs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+       struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
         struct key *key;
         int ret;
  
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c

index 938c5ab..ccd0b21 100644 (file)
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -134,7 +134,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
  
         _enter("{%pd}", mntpt);
  
-       BUG_ON(!mntpt->d_inode);
+       BUG_ON(!d_inode(mntpt));
  
         ret = -ENOMEM;
         devname = (char *) get_zeroed_page(GFP_KERNEL);
@@ -145,7 +145,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
         if (!options)
                 goto error_no_options;
  
-       vnode = AFS_FS_I(mntpt->d_inode);
+       vnode = AFS_FS_I(d_inode(mntpt));
         if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
                 /* if the directory is a pseudo directory, use the d_name */
                 static const char afs_root_cell[] = ":root.cell.";
@@ -169,14 +169,14 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
                 }
         } else {
                 /* read the contents of the AFS special symlink */
-               loff_t size = i_size_read(mntpt->d_inode);
+               loff_t size = i_size_read(d_inode(mntpt));
                 char *buf;
  
                 ret = -EINVAL;
                 if (size > PAGE_SIZE - 1)
                         goto error_no_page;
  
-               page = read_mapping_page(mntpt->d_inode->i_mapping, 0, NULL);
+               page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
                 if (IS_ERR(page)) {
                         ret = PTR_ERR(page);
                         goto error_no_page;
diff --git a/fs/afs/super.c b/fs/afs/super.c

index c486155..1fb4a51 100644 (file)
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -529,7 +529,7 @@ static void afs_destroy_inode(struct inode *inode)
  static int afs_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
         struct afs_volume_status vs;
-       struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+       struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
         struct key *key;
         int ret;
  
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h

index d10e619..5b700ef 100644 (file)
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -235,12 +235,12 @@ static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
  
  static inline u64 autofs4_get_ino(struct autofs_sb_info *sbi)
  {
-       return sbi->sb->s_root->d_inode->i_ino;
+       return d_inode(sbi->sb->s_root)->i_ino;
  }
  
  static inline int simple_positive(struct dentry *dentry)
  {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  static inline void __autofs4_add_expiring(struct dentry *dentry)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c

index 11dd118..1cebc3c 100644 (file)
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -374,7 +374,7 @@ static struct dentry *should_expire(struct dentry *dentry,
                 return NULL;
         }
  
-       if (dentry->d_inode && d_is_symlink(dentry)) {
+       if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
                 DPRINTK("checking symlink %p %pd", dentry, dentry);
                 /*
                  * A symlink can't be "busy" in the usual sense so
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c

index 1c55388..a3ae0b2 100644 (file)
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -71,7 +71,7 @@ void autofs4_kill_sb(struct super_block *sb)
  static int autofs4_show_options(struct seq_file *m, struct dentry *root)
  {
         struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
-       struct inode *root_inode = root->d_sb->s_root->d_inode;
+       struct inode *root_inode = d_inode(root->d_sb->s_root);
  
         if (!sbi)
                 return 0;
@@ -352,8 +352,8 @@ struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
  
         inode->i_mode = mode;
         if (sb->s_root) {
-               inode->i_uid = sb->s_root->d_inode->i_uid;
-               inode->i_gid = sb->s_root->d_inode->i_gid;
+               inode->i_uid = d_inode(sb->s_root)->i_uid;
+               inode->i_gid = d_inode(sb->s_root)->i_gid;
         }
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         inode->i_ino = get_next_ino();
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c

index 7e44fdd..c6d7d3d 100644 (file)
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -240,7 +240,7 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry,
                 spin_lock(&expiring->d_lock);
  
                 /* We've already been dentry_iput or unlinked */
-               if (!expiring->d_inode)
+               if (d_really_is_negative(expiring))
                         goto next;
  
                 qstr = &expiring->d_name;
@@ -371,7 +371,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
          * having d_mountpoint() true, so there's no need to call back
          * to the daemon.
          */
-       if (dentry->d_inode && d_is_symlink(dentry)) {
+       if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
                 spin_unlock(&sbi->fs_lock);
                 goto done;
         }
@@ -459,7 +459,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
                         return 0;
                 if (d_mountpoint(dentry))
                         return 0;
-               inode = ACCESS_ONCE(dentry->d_inode);
+               inode = d_inode_rcu(dentry);
                 if (inode && S_ISLNK(inode->i_mode))
                         return -EISDIR;
                 if (list_empty(&dentry->d_subdirs))
@@ -485,7 +485,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
                  * an incorrect ELOOP error return.
                  */
                 if ((!d_mountpoint(dentry) && !simple_empty(dentry)) ||
-                   (dentry->d_inode && d_is_symlink(dentry)))
+                   (d_really_is_positive(dentry) && d_is_symlink(dentry)))
                         status = -EISDIR;
         }
         spin_unlock(&sbi->fs_lock);
@@ -625,8 +625,8 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
         }
         dput(ino->dentry);
  
-       dentry->d_inode->i_size = 0;
-       clear_nlink(dentry->d_inode);
+       d_inode(dentry)->i_size = 0;
+       clear_nlink(d_inode(dentry));
  
         dir->i_mtime = CURRENT_TIME;
  
@@ -719,8 +719,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
                         atomic_dec(&p_ino->count);
         }
         dput(ino->dentry);
-       dentry->d_inode->i_size = 0;
-       clear_nlink(dentry->d_inode);
+       d_inode(dentry)->i_size = 0;
+       clear_nlink(d_inode(dentry));
  
         if (dir->i_nlink)
                 drop_nlink(dir);
@@ -839,7 +839,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
  */
  int is_autofs4_dentry(struct dentry *dentry)
  {
-       return dentry && dentry->d_inode &&
+       return dentry && d_really_is_positive(dentry) &&
                 dentry->d_op == &autofs4_dentry_operations &&
                 dentry->d_fsdata != NULL;
  }
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c

index 1e8ea19..de58cc7 100644 (file)
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -18,7 +18,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
         struct autofs_info *ino = autofs4_dentry_ino(dentry);
         if (ino && !autofs4_oz_mode(sbi))
                 ino->last_used = jiffies;
-       nd_set_link(nd, dentry->d_inode->i_private);
+       nd_set_link(nd, d_inode(dentry)->i_private);
         return NULL;
  }
  
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c

index 2ad05ab..35b755e 100644 (file)
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -322,7 +322,7 @@ static int validate_request(struct autofs_wait_queue **wait,
                  * continue on and create a new request.
                  */
                 if (!IS_ROOT(dentry)) {
-                       if (dentry->d_inode && d_unhashed(dentry)) {
+                       if (d_really_is_positive(dentry) && d_unhashed(dentry)) {
                                 struct dentry *parent = dentry->d_parent;
                                 new = d_lookup(parent, &dentry->d_name);
                                 if (new)
@@ -364,7 +364,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
         if (pid == 0 || tgid == 0)
                 return -ENOENT;
  
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                 /*
                  * A wait for a negative dentry is invalid for certain
                  * cases. A direct or offset mount "always" has its mount
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c

index 16e0a48..7943533 100644 (file)
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -471,7 +471,7 @@ static void *
  befs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct super_block *sb = dentry->d_sb;
-       struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+       struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
         befs_data_stream *data = &befs_ino->i_data.ds;
         befs_off_t len = data->size;
         char *link;
@@ -501,7 +501,7 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd)
  static void *
  befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct befs_inode_info *befs_ino = BEFS_I(dentry->d_inode);
+       struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry));
  
         nd_set_link(nd, befs_ino->i_data.symlink);
         return NULL;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c

index 7a81827..3ec6113 100644 (file)
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -153,7 +153,7 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
  static int bfs_link(struct dentry *old, struct inode *dir,
                                                 struct dentry *new)
  {
-       struct inode *inode = old->d_inode;
+       struct inode *inode = d_inode(old);
         struct bfs_sb_info *info = BFS_SB(inode->i_sb);
         int err;
  
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
  static int bfs_unlink(struct inode *dir, struct dentry *dentry)
  {
         int error = -ENOENT;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct buffer_head *bh;
         struct bfs_dirent *de;
         struct bfs_sb_info *info = BFS_SB(inode->i_sb);
@@ -216,7 +216,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         int error = -ENOENT;
  
         old_bh = new_bh = NULL;
-       old_inode = old_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
         if (S_ISDIR(old_inode->i_mode))
                 return -EINVAL;
  
@@ -231,7 +231,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto end_rename;
  
         error = -EPERM;
-       new_inode = new_dentry->d_inode;
+       new_inode = d_inode(new_dentry);
         new_bh = bfs_find_entry(new_dir, 
                                 new_dentry->d_name.name, 
                                 new_dentry->d_name.len, &new_de);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c

index 9dcb054..78f005f 100644 (file)
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -591,7 +591,7 @@ static void kill_node(Node *e)
         write_unlock(&entries_lock);
  
         if (dentry) {
-               drop_nlink(dentry->d_inode);
+               drop_nlink(d_inode(dentry));
                 d_drop(dentry);
                 dput(dentry);
                 simple_release_fs(&bm_mnt, &entry_count);
@@ -638,11 +638,11 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
         case 3:
                 /* Delete this handler. */
                 root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&root->d_inode->i_mutex);
+               mutex_lock(&d_inode(root)->i_mutex);
  
                 kill_node(e);
  
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                 dput(root);
                 break;
         default:
@@ -675,14 +675,14 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                 return PTR_ERR(e);
  
         root = dget(sb->s_root);
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
         dentry = lookup_one_len(e->name, root, strlen(e->name));
         err = PTR_ERR(dentry);
         if (IS_ERR(dentry))
                 goto out;
  
         err = -EEXIST;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 goto out2;
  
         inode = bm_get_inode(sb, S_IFREG | 0644);
@@ -711,7 +711,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
  out2:
         dput(dentry);
  out:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         dput(root);
  
         if (err) {
@@ -754,12 +754,12 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
         case 3:
                 /* Delete all handlers. */
                 root = dget(file->f_path.dentry->d_sb->s_root);
-               mutex_lock(&root->d_inode->i_mutex);
+               mutex_lock(&d_inode(root)->i_mutex);
  
                 while (!list_empty(&entries))
                         kill_node(list_entry(entries.next, Node, list));
  
-               mutex_unlock(&root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(root)->i_mutex);
                 dput(root);
                 break;
         default:
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 897ee05..c7e4163 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -152,7 +152,8 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
         struct inode *inode = file->f_mapping->host;
  
         return __blockdev_direct_IO(iocb, inode, I_BDEV(inode), iter, offset,
-                                   blkdev_get_block, NULL, NULL, 0);
+                                   blkdev_get_block, NULL, NULL,
+                                   DIO_SKIP_DIO_COUNT);
  }
  
  int __sync_blockdev(struct block_device *bdev, int wait)
@@ -1716,7 +1717,7 @@ struct block_device *lookup_bdev(const char *pathname)
         if (error)
                 return ERR_PTR(error);
  
-       inode = path.dentry->d_inode;
+       inode = d_backing_inode(path.dentry);
         error = -ENOTBLK;
         if (!S_ISBLK(inode->i_mode))
                 goto fail;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c

index 4dabeb8..df9932b 100644 (file)
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -87,7 +87,7 @@ BTRFS_WORK_HELPER(scrubwrc_helper);
  BTRFS_WORK_HELPER(scrubnc_helper);
  
  static struct __btrfs_workqueue *
-__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
+__btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
                          int thresh)
  {
         struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -132,7 +132,7 @@ static inline void
  __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq);
  
  struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
-                                             int flags,
+                                             unsigned int flags,
                                               int max_active,
                                               int thresh)
  {
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h

index e386c29..ec2ee47 100644 (file)
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -66,7 +66,7 @@ BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
  BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
  
  struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
-                                             int flags,
+                                             unsigned int flags,
                                               int max_active,
                                               int thresh);
  void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t helper,
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c

index f55721f..9de772e 100644 (file)
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1206,7 +1206,7 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans,
         struct ulist *roots = NULL;
         struct ulist_iterator uiter;
         struct ulist_node *node;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
         int ret = 0;
  
         tmp = ulist_alloc(GFP_NOFS);
@@ -1610,7 +1610,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
         struct ulist *roots = NULL;
         struct ulist_node *ref_node = NULL;
         struct ulist_node *root_node = NULL;
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
         struct ulist_iterator ref_uiter;
         struct ulist_iterator root_uiter;
  
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h

index de5e4f2..0ef5cc1 100644 (file)
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -66,7 +66,11 @@ struct btrfs_inode {
          */
         struct btrfs_key location;
  
-       /* Lock for counters */
+       /*
+        * Lock for counters and all fields used to determine if the inode is in
+        * the log or not (last_trans, last_sub_trans, last_log_commit,
+        * logged_trans).
+        */
         spinlock_t lock;
  
         /* the extent_tree has caches of all the extent mappings to disk */
@@ -250,6 +254,9 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
  
  static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
  {
+       int ret = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
         if (BTRFS_I(inode)->logged_trans == generation &&
             BTRFS_I(inode)->last_sub_trans <=
             BTRFS_I(inode)->last_log_commit &&
@@ -263,9 +270,10 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
                  */
                 smp_mb();
                 if (list_empty(&BTRFS_I(inode)->extent_tree.modified_extents))
-                       return 1;
+                       ret = 1;
         }
-       return 0;
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return ret;
  }
  
  #define BTRFS_DIO_ORIG_BIO_SUBMITTED   0x1
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c

index d897ef8..ce7dec8 100644 (file)
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -2990,8 +2990,8 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
                                (unsigned long long)bio->bi_iter.bi_sector,
                                dev_bytenr, bio->bi_bdev);
  
-               mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
-                                      GFP_NOFS);
+               mapped_datav = kmalloc_array(bio->bi_vcnt,
+                                            sizeof(*mapped_datav), GFP_NOFS);
                 if (!mapped_datav)
                         goto leave;
                 cur_bytenr = dev_bytenr;
@@ -3241,8 +3241,5 @@ void btrfsic_unmount(struct btrfs_root *root,
  
         mutex_unlock(&btrfsic_mutex);
  
-       if (is_vmalloc_addr(state))
-               vfree(state);
-       else
-               kfree(state);
+       kvfree(state);
  }
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c

index e9df886..ce62324 100644 (file)
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -622,7 +622,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
         cb->orig_bio = bio;
  
         nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
-       cb->compressed_pages = kzalloc(sizeof(struct page *) * nr_pages,
+       cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
                                        GFP_NOFS);
         if (!cb->compressed_pages)
                 goto fail1;
@@ -750,7 +750,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
  static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
  static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
  
-static struct btrfs_compress_op *btrfs_compress_op[] = {
+static const struct btrfs_compress_op * const btrfs_compress_op[] = {
         &btrfs_zlib_compress,
         &btrfs_lzo_compress,
  };
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h

index d181f70..13a4dc0 100644 (file)
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -77,7 +77,7 @@ struct btrfs_compress_op {
                           size_t srclen, size_t destlen);
  };
  
-extern struct btrfs_compress_op btrfs_zlib_compress;
-extern struct btrfs_compress_op btrfs_lzo_compress;
+extern const struct btrfs_compress_op btrfs_zlib_compress;
+extern const struct btrfs_compress_op btrfs_lzo_compress;
  
  #endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c

index 6d67f32..0f11ebc 100644 (file)
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -578,7 +578,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
         if (!tree_mod_need_log(fs_info, eb))
                 return 0;
  
-       tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+       tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), flags);
         if (!tm_list)
                 return -ENOMEM;
  
@@ -677,7 +677,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
  
         if (log_removal && btrfs_header_level(old_root) > 0) {
                 nritems = btrfs_header_nritems(old_root);
-               tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+               tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
                                   flags);
                 if (!tm_list) {
                         ret = -ENOMEM;
@@ -814,7 +814,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
         if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
                 return 0;
  
-       tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+       tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
                           GFP_NOFS);
         if (!tm_list)
                 return -ENOMEM;
@@ -905,8 +905,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
                 return 0;
  
         nritems = btrfs_header_nritems(eb);
-       tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
-                         GFP_NOFS);
+       tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
         if (!tm_list)
                 return -ENOMEM;
  
@@ -1073,7 +1072,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                         ret = btrfs_dec_ref(trans, root, buf, 1);
                         BUG_ON(ret); /* -ENOMEM */
                 }
-               clean_tree_block(trans, root, buf);
+               clean_tree_block(trans, root->fs_info, buf);
                 *last_ref = 1;
         }
         return 0;
@@ -1678,7 +1677,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                         continue;
                 }
  
-               cur = btrfs_find_tree_block(root, blocknr);
+               cur = btrfs_find_tree_block(root->fs_info, blocknr);
                 if (cur)
                         uptodate = btrfs_buffer_uptodate(cur, gen, 0);
                 else
@@ -1943,7 +1942,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
  
                 path->locks[level] = 0;
                 path->nodes[level] = NULL;
-               clean_tree_block(trans, root, mid);
+               clean_tree_block(trans, root->fs_info, mid);
                 btrfs_tree_unlock(mid);
                 /* once for the path */
                 free_extent_buffer(mid);
@@ -1997,7 +1996,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                 if (wret < 0 && wret != -ENOSPC)
                         ret = wret;
                 if (btrfs_header_nritems(right) == 0) {
-                       clean_tree_block(trans, root, right);
+                       clean_tree_block(trans, root->fs_info, right);
                         btrfs_tree_unlock(right);
                         del_ptr(root, path, level + 1, pslot + 1);
                         root_sub_used(root, right->len);
@@ -2041,7 +2040,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                 BUG_ON(wret == 1);
         }
         if (btrfs_header_nritems(mid) == 0) {
-               clean_tree_block(trans, root, mid);
+               clean_tree_block(trans, root->fs_info, mid);
                 btrfs_tree_unlock(mid);
                 del_ptr(root, path, level + 1, pslot);
                 root_sub_used(root, mid->len);
@@ -2259,7 +2258,7 @@ static void reada_for_search(struct btrfs_root *root,
  
         search = btrfs_node_blockptr(node, slot);
         blocksize = root->nodesize;
-       eb = btrfs_find_tree_block(root, search);
+       eb = btrfs_find_tree_block(root->fs_info, search);
         if (eb) {
                 free_extent_buffer(eb);
                 return;
@@ -2319,7 +2318,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
         if (slot > 0) {
                 block1 = btrfs_node_blockptr(parent, slot - 1);
                 gen = btrfs_node_ptr_generation(parent, slot - 1);
-               eb = btrfs_find_tree_block(root, block1);
+               eb = btrfs_find_tree_block(root->fs_info, block1);
                 /*
                  * if we get -eagain from btrfs_buffer_uptodate, we
                  * don't want to return eagain here.  That will loop
@@ -2332,7 +2331,7 @@ static noinline void reada_for_balance(struct btrfs_root *root,
         if (slot + 1 < nritems) {
                 block2 = btrfs_node_blockptr(parent, slot + 1);
                 gen = btrfs_node_ptr_generation(parent, slot + 1);
-               eb = btrfs_find_tree_block(root, block2);
+               eb = btrfs_find_tree_block(root->fs_info, block2);
                 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
                         block2 = 0;
                 free_extent_buffer(eb);
@@ -2450,7 +2449,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
         blocknr = btrfs_node_blockptr(b, slot);
         gen = btrfs_node_ptr_generation(b, slot);
  
-       tmp = btrfs_find_tree_block(root, blocknr);
+       tmp = btrfs_find_tree_block(root->fs_info, blocknr);
         if (tmp) {
                 /* first we do an atomic uptodate check */
                 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
@@ -3126,7 +3125,8 @@ again:
   * higher levels
   *
   */
-static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_fs_info *fs_info,
+                          struct btrfs_path *path,
                            struct btrfs_disk_key *key, int level)
  {
         int i;
@@ -3137,7 +3137,7 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
                 if (!path->nodes[i])
                         break;
                 t = path->nodes[i];
-               tree_mod_log_set_node_key(root->fs_info, t, tslot, 1);
+               tree_mod_log_set_node_key(fs_info, t, tslot, 1);
                 btrfs_set_node_key(t, key, tslot);
                 btrfs_mark_buffer_dirty(path->nodes[i]);
                 if (tslot != 0)
@@ -3151,7 +3151,8 @@ static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
   * This function isn't completely safe. It's the caller's responsibility
   * that the new key won't break the order
   */
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path,
                              struct btrfs_key *new_key)
  {
         struct btrfs_disk_key disk_key;
@@ -3173,7 +3174,7 @@ void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
         btrfs_set_item_key(eb, &disk_key, slot);
         btrfs_mark_buffer_dirty(eb);
         if (slot == 0)
-               fixup_low_keys(root, path, &disk_key, 1);
+               fixup_low_keys(fs_info, path, &disk_key, 1);
  }
  
  /*
@@ -3692,7 +3693,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
         if (left_nritems)
                 btrfs_mark_buffer_dirty(left);
         else
-               clean_tree_block(trans, root, left);
+               clean_tree_block(trans, root->fs_info, left);
  
         btrfs_mark_buffer_dirty(right);
  
@@ -3704,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
         if (path->slots[0] >= left_nritems) {
                 path->slots[0] -= left_nritems;
                 if (btrfs_header_nritems(path->nodes[0]) == 0)
-                       clean_tree_block(trans, root, path->nodes[0]);
+                       clean_tree_block(trans, root->fs_info, path->nodes[0]);
                 btrfs_tree_unlock(path->nodes[0]);
                 free_extent_buffer(path->nodes[0]);
                 path->nodes[0] = right;
@@ -3928,10 +3929,10 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
         if (right_nritems)
                 btrfs_mark_buffer_dirty(right);
         else
-               clean_tree_block(trans, root, right);
+               clean_tree_block(trans, root->fs_info, right);
  
         btrfs_item_key(right, &disk_key, 0);
-       fixup_low_keys(root, path, &disk_key, 1);
+       fixup_low_keys(root->fs_info, path, &disk_key, 1);
  
         /* then fixup the leaf pointer in the path */
         if (path->slots[0] < push_items) {
@@ -4168,6 +4169,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
         int mid;
         int slot;
         struct extent_buffer *right;
+       struct btrfs_fs_info *fs_info = root->fs_info;
         int ret = 0;
         int wret;
         int split;
@@ -4271,10 +4273,10 @@ again:
         btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
         btrfs_set_header_owner(right, root->root_key.objectid);
         btrfs_set_header_level(right, 0);
-       write_extent_buffer(right, root->fs_info->fsid,
+       write_extent_buffer(right, fs_info->fsid,
                             btrfs_header_fsid(), BTRFS_FSID_SIZE);
  
-       write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
+       write_extent_buffer(right, fs_info->chunk_tree_uuid,
                             btrfs_header_chunk_tree_uuid(right),
                             BTRFS_UUID_SIZE);
  
@@ -4297,7 +4299,7 @@ again:
                         path->nodes[0] = right;
                         path->slots[0] = 0;
                         if (path->slots[1] == 0)
-                               fixup_low_keys(root, path, &disk_key, 1);
+                               fixup_low_keys(fs_info, path, &disk_key, 1);
                 }
                 btrfs_mark_buffer_dirty(right);
                 return ret;
@@ -4615,7 +4617,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
                 btrfs_set_item_key(leaf, &disk_key, slot);
                 if (slot == 0)
-                       fixup_low_keys(root, path, &disk_key, 1);
+                       fixup_low_keys(root->fs_info, path, &disk_key, 1);
         }
  
         item = btrfs_item_nr(slot);
@@ -4716,7 +4718,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
  
         if (path->slots[0] == 0) {
                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
-               fixup_low_keys(root, path, &disk_key, 1);
+               fixup_low_keys(root->fs_info, path, &disk_key, 1);
         }
         btrfs_unlock_up_safe(path, 1);
  
@@ -4888,7 +4890,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
                 struct btrfs_disk_key disk_key;
  
                 btrfs_node_key(parent, &disk_key, 0);
-               fixup_low_keys(root, path, &disk_key, level + 1);
+               fixup_low_keys(root->fs_info, path, &disk_key, level + 1);
         }
         btrfs_mark_buffer_dirty(parent);
  }
@@ -4981,7 +4983,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                         btrfs_set_header_level(leaf, 0);
                 } else {
                         btrfs_set_path_blocking(path);
-                       clean_tree_block(trans, root, leaf);
+                       clean_tree_block(trans, root->fs_info, leaf);
                         btrfs_del_leaf(trans, root, path, leaf);
                 }
         } else {
@@ -4990,7 +4992,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                         struct btrfs_disk_key disk_key;
  
                         btrfs_item_key(leaf, &disk_key, 0);
-                       fixup_low_keys(root, path, &disk_key, 1);
+                       fixup_low_keys(root->fs_info, path, &disk_key, 1);
                 }
  
                 /* delete the leaf if it is mostly empty */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index f9c89ca..6f364e1 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1061,6 +1061,12 @@ struct btrfs_block_group_item {
         __le64 flags;
  } __attribute__ ((__packed__));
  
+#define BTRFS_QGROUP_LEVEL_SHIFT               48
+static inline u64 btrfs_qgroup_level(u64 qgroupid)
+{
+       return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
  /*
   * is subvolume quota turned on?
   */
@@ -1256,6 +1262,20 @@ struct btrfs_caching_control {
         atomic_t count;
  };
  
+struct btrfs_io_ctl {
+       void *cur, *orig;
+       struct page *page;
+       struct page **pages;
+       struct btrfs_root *root;
+       struct inode *inode;
+       unsigned long size;
+       int index;
+       int num_pages;
+       int entries;
+       int bitmaps;
+       unsigned check_crcs:1;
+};
+
  struct btrfs_block_group_cache {
         struct btrfs_key key;
         struct btrfs_block_group_item item;
@@ -1321,6 +1341,9 @@ struct btrfs_block_group_cache {
  
         /* For dirty block groups */
         struct list_head dirty_list;
+       struct list_head io_list;
+
+       struct btrfs_io_ctl io_ctl;
  };
  
  /* delayed seq elem */
@@ -1329,6 +1352,8 @@ struct seq_list {
         u64 seq;
  };
  
+#define SEQ_LIST_INIT(name)    { .list = LIST_HEAD_INIT((name).list), .seq = 0 }
+
  enum btrfs_orphan_cleanup_state {
         ORPHAN_CLEANUP_STARTED  = 1,
         ORPHAN_CLEANUP_DONE     = 2,
@@ -1472,6 +1497,12 @@ struct btrfs_fs_info {
         struct mutex chunk_mutex;
         struct mutex volume_mutex;
  
+       /*
+        * this is taken to make sure we don't set block groups ro after
+        * the free space cache has been allocated on them
+        */
+       struct mutex ro_block_group_mutex;
+
         /* this is used during read/modify/write to make sure
          * no two ios are trying to mod the same stripe at the same
          * time
@@ -1513,6 +1544,7 @@ struct btrfs_fs_info {
  
         spinlock_t delayed_iput_lock;
         struct list_head delayed_iputs;
+       struct rw_semaphore delayed_iput_sem;
  
         /* this protects tree_mod_seq_list */
         spinlock_t tree_mod_seq_lock;
@@ -3295,6 +3327,9 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
  }
  
  /* extent-tree.c */
+
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
+
  static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
                                                  unsigned num_items)
  {
@@ -3385,6 +3420,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                          u64 bytenr, u64 num_bytes, u64 parent,
                          u64 root_objectid, u64 owner, u64 offset, int no_quota);
  
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root);
  int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
  int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
@@ -3417,7 +3454,7 @@ enum btrfs_reserve_flush_enum {
         BTRFS_RESERVE_FLUSH_ALL,
  };
  
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes);
  void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
  void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root);
@@ -3440,6 +3477,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
                                               unsigned short type);
  void btrfs_free_block_rsv(struct btrfs_root *root,
                           struct btrfs_block_rsv *rsv);
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv);
  int btrfs_block_rsv_add(struct btrfs_root *root,
                         struct btrfs_block_rsv *block_rsv, u64 num_bytes,
                         enum btrfs_reserve_flush_enum flush);
@@ -3486,7 +3524,8 @@ int btrfs_previous_item(struct btrfs_root *root,
                         int type);
  int btrfs_previous_extent_item(struct btrfs_root *root,
                         struct btrfs_path *path, u64 min_objectid);
-void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
+                            struct btrfs_path *path,
                              struct btrfs_key *new_key);
  struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
  struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
@@ -4180,7 +4219,8 @@ int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
  static inline int is_fstree(u64 rootid)
  {
         if (rootid == BTRFS_FS_TREE_OBJECTID ||
-           (s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
+           ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID &&
+             !btrfs_qgroup_level(rootid)))
                 return 1;
         return 0;
  }
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c

index 82f0c7c..cde698a 100644 (file)
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1383,7 +1383,7 @@ out:
  
  
  static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
-                                    struct btrfs_root *root, int nr)
+                                    struct btrfs_fs_info *fs_info, int nr)
  {
         struct btrfs_async_delayed_work *async_work;
  
@@ -1399,7 +1399,7 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
                         btrfs_async_run_delayed_root, NULL, NULL);
         async_work->nr = nr;
  
-       btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work);
+       btrfs_queue_work(fs_info->delayed_workers, &async_work->work);
         return 0;
  }
  
@@ -1426,6 +1426,7 @@ static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
  void btrfs_balance_delayed_items(struct btrfs_root *root)
  {
         struct btrfs_delayed_root *delayed_root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
  
         delayed_root = btrfs_get_delayed_root(root);
  
@@ -1438,7 +1439,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
  
                 seq = atomic_read(&delayed_root->items_seq);
  
-               ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
+               ret = btrfs_wq_run_delayed_node(delayed_root, fs_info, 0);
                 if (ret)
                         return;
  
@@ -1447,7 +1448,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
                 return;
         }
  
-       btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
+       btrfs_wq_run_delayed_node(delayed_root, fs_info, BTRFS_DELAYED_BATCH);
  }
  
  /* Will return 0 or -ENOMEM */
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c

index 6d16bea..8f8ed7d 100644 (file)
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -489,11 +489,13 @@ update_existing_ref(struct btrfs_trans_handle *trans,
   * existing and update must have the same bytenr
   */
  static noinline void
-update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
+update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
+                        struct btrfs_delayed_ref_node *existing,
                          struct btrfs_delayed_ref_node *update)
  {
         struct btrfs_delayed_ref_head *existing_ref;
         struct btrfs_delayed_ref_head *ref;
+       int old_ref_mod;
  
         existing_ref = btrfs_delayed_node_to_head(existing);
         ref = btrfs_delayed_node_to_head(update);
@@ -541,7 +543,20 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
          * only need the lock for this case cause we could be processing it
          * currently, for refs we just added we know we're a-ok.
          */
+       old_ref_mod = existing_ref->total_ref_mod;
         existing->ref_mod += update->ref_mod;
+       existing_ref->total_ref_mod += update->ref_mod;
+
+       /*
+        * If we are going to from a positive ref mod to a negative or vice
+        * versa we need to make sure to adjust pending_csums accordingly.
+        */
+       if (existing_ref->is_data) {
+               if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0)
+                       delayed_refs->pending_csums -= existing->num_bytes;
+               if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0)
+                       delayed_refs->pending_csums += existing->num_bytes;
+       }
         spin_unlock(&existing_ref->lock);
  }
  
@@ -605,6 +620,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
         head_ref->is_data = is_data;
         head_ref->ref_root = RB_ROOT;
         head_ref->processing = 0;
+       head_ref->total_ref_mod = count_mod;
  
         spin_lock_init(&head_ref->lock);
         mutex_init(&head_ref->mutex);
@@ -614,7 +630,7 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
         existing = htree_insert(&delayed_refs->href_root,
                                 &head_ref->href_node);
         if (existing) {
-               update_existing_head_ref(&existing->node, ref);
+               update_existing_head_ref(delayed_refs, &existing->node, ref);
                 /*
                  * we've updated the existing ref, free the newly
                  * allocated ref
@@ -622,6 +638,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                 head_ref = existing;
         } else {
+               if (is_data && count_mod < 0)
+                       delayed_refs->pending_csums += num_bytes;
                 delayed_refs->num_heads++;
                 delayed_refs->num_heads_ready++;
                 atomic_inc(&delayed_refs->num_entries);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h

index a764e23..5eb0892 100644 (file)
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -88,6 +88,14 @@ struct btrfs_delayed_ref_head {
         struct rb_node href_node;
  
         struct btrfs_delayed_extent_op *extent_op;
+
+       /*
+        * This is used to track the final ref_mod from all the refs associated
+        * with this head ref, this is not adjusted as delayed refs are run,
+        * this is meant to track if we need to do the csum accounting or not.
+        */
+       int total_ref_mod;
+
         /*
          * when a new extent is allocated, it is just reserved in memory
          * The actual extent isn't inserted into the extent allocation tree
@@ -138,6 +146,8 @@ struct btrfs_delayed_ref_root {
         /* total number of head nodes ready for processing */
         unsigned long num_heads_ready;
  
+       u64 pending_csums;
+
         /*
          * set when the tree is flushing before a transaction commit,
          * used by the throttling code to decide if new updates need
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c

index 5ec03d9..0573848 100644 (file)
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -670,8 +670,8 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
         case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
         case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
                 srcdev = dev_replace->srcdev;
-               args->status.progress_1000 = div64_u64(dev_replace->cursor_left,
-                       div64_u64(btrfs_device_get_total_bytes(srcdev), 1000));
+               args->status.progress_1000 = div_u64(dev_replace->cursor_left,
+                       div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
                 break;
         }
         btrfs_dev_replace_unlock(dev_replace);
@@ -806,7 +806,7 @@ static int btrfs_dev_replace_kthread(void *data)
                 btrfs_dev_replace_status(fs_info, status_args);
                 progress = status_args->status.progress_1000;
                 kfree(status_args);
-               do_div(progress, 10);
+               progress = div_u64(progress, 10);
                 printk_in_rcu(KERN_INFO
                         "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n",
                         dev_replace->srcdev->missing ? "<missing disk>" :
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 639f266..2ef9a4b 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -54,7 +54,7 @@
  #include <asm/cpufeature.h>
  #endif
  
-static struct extent_io_ops btree_extent_io_ops;
+static const struct extent_io_ops btree_extent_io_ops;
  static void end_workqueue_fn(struct btrfs_work *work);
  static void free_fs_root(struct btrfs_root *root);
  static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -274,10 +274,11 @@ void btrfs_csum_final(u32 crc, char *result)
   * compute the csum for a btree block, and either verify it or write it
   * into the csum field of the block.
   */
-static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+                          struct extent_buffer *buf,
                            int verify)
  {
-       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+       u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
         char *result = NULL;
         unsigned long len;
         unsigned long cur_len;
@@ -302,7 +303,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                 offset += cur_len;
         }
         if (csum_size > sizeof(inline_result)) {
-               result = kzalloc(csum_size * sizeof(char), GFP_NOFS);
+               result = kzalloc(csum_size, GFP_NOFS);
                 if (!result)
                         return 1;
         } else {
@@ -321,7 +322,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                         printk_ratelimited(KERN_WARNING
                                 "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
                                 "level %d\n",
-                               root->fs_info->sb->s_id, buf->start,
+                               fs_info->sb->s_id, buf->start,
                                 val, found, btrfs_header_level(buf));
                         if (result != (char *)&inline_result)
                                 kfree(result);
@@ -418,12 +419,6 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
  
                 if (memcmp(raw_disk_sb, result, csum_size))
                         ret = 1;
-
-               if (ret && btrfs_super_generation(disk_sb) < 10) {
-                       printk(KERN_WARNING
-                               "BTRFS: super block crcs don't match, older mkfs detected\n");
-                       ret = 0;
-               }
         }
  
         if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
@@ -501,7 +496,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
   * we only fill in the checksum field in the first page of a multi-page block
   */
  
-static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
+static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
  {
         u64 start = page_offset(page);
         u64 found_start;
@@ -513,14 +508,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
         found_start = btrfs_header_bytenr(eb);
         if (WARN_ON(found_start != start || !PageUptodate(page)))
                 return 0;
-       csum_tree_block(root, eb, 0);
+       csum_tree_block(fs_info, eb, 0);
         return 0;
  }
  
-static int check_tree_block_fsid(struct btrfs_root *root,
+static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
                                  struct extent_buffer *eb)
  {
-       struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
         u8 fsid[BTRFS_UUID_SIZE];
         int ret = 1;
  
@@ -640,7 +635,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                 ret = -EIO;
                 goto err;
         }
-       if (check_tree_block_fsid(root, eb)) {
+       if (check_tree_block_fsid(root->fs_info, eb)) {
                 printk_ratelimited(KERN_ERR "BTRFS (device %s): bad fsid on block %llu\n",
                                eb->fs_info->sb->s_id, eb->start);
                 ret = -EIO;
@@ -657,7 +652,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
         btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
                                        eb, found_level);
  
-       ret = csum_tree_block(root, eb, 1);
+       ret = csum_tree_block(root->fs_info, eb, 1);
         if (ret) {
                 ret = -EIO;
                 goto err;
@@ -882,7 +877,7 @@ static int btree_csum_one_bio(struct bio *bio)
  
         bio_for_each_segment_all(bvec, bio, i) {
                 root = BTRFS_I(bvec->bv_page->mapping->host)->root;
-               ret = csum_dirty_buffer(root, bvec->bv_page);
+               ret = csum_dirty_buffer(root->fs_info, bvec->bv_page);
                 if (ret)
                         break;
         }
@@ -1119,10 +1114,10 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
         return 0;
  }
  
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
                                             u64 bytenr)
  {
-       return find_extent_buffer(root->fs_info, bytenr);
+       return find_extent_buffer(fs_info, bytenr);
  }
  
  struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
@@ -1165,11 +1160,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
  
  }
  
-void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+void clean_tree_block(struct btrfs_trans_handle *trans,
+                     struct btrfs_fs_info *fs_info,
                       struct extent_buffer *buf)
  {
-       struct btrfs_fs_info *fs_info = root->fs_info;
-
         if (btrfs_header_generation(buf) ==
             fs_info->running_transaction->transid) {
                 btrfs_assert_tree_locked(buf);
@@ -2146,6 +2140,267 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
         }
  }
  
+static void btrfs_init_scrub(struct btrfs_fs_info *fs_info)
+{
+       mutex_init(&fs_info->scrub_lock);
+       atomic_set(&fs_info->scrubs_running, 0);
+       atomic_set(&fs_info->scrub_pause_req, 0);
+       atomic_set(&fs_info->scrubs_paused, 0);
+       atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->scrub_pause_wait);
+       fs_info->scrub_workers_refcnt = 0;
+}
+
+static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
+{
+       spin_lock_init(&fs_info->balance_lock);
+       mutex_init(&fs_info->balance_mutex);
+       atomic_set(&fs_info->balance_running, 0);
+       atomic_set(&fs_info->balance_pause_req, 0);
+       atomic_set(&fs_info->balance_cancel_req, 0);
+       fs_info->balance_ctl = NULL;
+       init_waitqueue_head(&fs_info->balance_wait_q);
+}
+
+static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_root *tree_root)
+{
+       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
+       set_nlink(fs_info->btree_inode, 1);
+       /*
+        * we set the i_size on the btree inode to the max possible int.
+        * the real end of the address space is determined by all of
+        * the devices in the system
+        */
+       fs_info->btree_inode->i_size = OFFSET_MAX;
+       fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
+
+       RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
+       extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
+                            fs_info->btree_inode->i_mapping);
+       BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
+       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
+
+       BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
+
+       BTRFS_I(fs_info->btree_inode)->root = tree_root;
+       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
+              sizeof(struct btrfs_key));
+       set_bit(BTRFS_INODE_DUMMY,
+               &BTRFS_I(fs_info->btree_inode)->runtime_flags);
+       btrfs_insert_inode_hash(fs_info->btree_inode);
+}
+
+static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
+{
+       fs_info->dev_replace.lock_owner = 0;
+       atomic_set(&fs_info->dev_replace.nesting_level, 0);
+       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
+       mutex_init(&fs_info->dev_replace.lock_management_lock);
+       mutex_init(&fs_info->dev_replace.lock);
+       init_waitqueue_head(&fs_info->replace_wait);
+}
+
+static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
+{
+       spin_lock_init(&fs_info->qgroup_lock);
+       mutex_init(&fs_info->qgroup_ioctl_lock);
+       fs_info->qgroup_tree = RB_ROOT;
+       fs_info->qgroup_op_tree = RB_ROOT;
+       INIT_LIST_HEAD(&fs_info->dirty_qgroups);
+       fs_info->qgroup_seq = 1;
+       fs_info->quota_enabled = 0;
+       fs_info->pending_quota_state = 0;
+       fs_info->qgroup_ulist = NULL;
+       mutex_init(&fs_info->qgroup_rescan_lock);
+}
+
+static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
+               struct btrfs_fs_devices *fs_devices)
+{
+       int max_active = fs_info->thread_pool_size;
+       unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
+
+       fs_info->workers =
+               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
+                                     max_active, 16);
+
+       fs_info->delalloc_workers =
+               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
+
+       fs_info->flush_workers =
+               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
+
+       fs_info->caching_workers =
+               btrfs_alloc_workqueue("cache", flags, max_active, 0);
+
+       /*
+        * a higher idle thresh on the submit workers makes it much more
+        * likely that bios will be send down in a sane order to the
+        * devices
+        */
+       fs_info->submit_workers =
+               btrfs_alloc_workqueue("submit", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 64);
+
+       fs_info->fixup_workers =
+               btrfs_alloc_workqueue("fixup", flags, 1, 0);
+
+       /*
+        * endios are largely parallel and should have a very
+        * low idle thresh
+        */
+       fs_info->endio_workers =
+               btrfs_alloc_workqueue("endio", flags, max_active, 4);
+       fs_info->endio_meta_workers =
+               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+       fs_info->endio_meta_write_workers =
+               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+       fs_info->endio_raid56_workers =
+               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+       fs_info->endio_repair_workers =
+               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
+       fs_info->rmw_workers =
+               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+       fs_info->endio_write_workers =
+               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+       fs_info->endio_freespace_worker =
+               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+       fs_info->delayed_workers =
+               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+       fs_info->readahead_workers =
+               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+       fs_info->qgroup_rescan_workers =
+               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+       fs_info->extent_workers =
+               btrfs_alloc_workqueue("extent-refs", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 8);
+
+       if (!(fs_info->workers && fs_info->delalloc_workers &&
+             fs_info->submit_workers && fs_info->flush_workers &&
+             fs_info->endio_workers && fs_info->endio_meta_workers &&
+             fs_info->endio_meta_write_workers &&
+             fs_info->endio_repair_workers &&
+             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
+             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
+             fs_info->caching_workers && fs_info->readahead_workers &&
+             fs_info->fixup_workers && fs_info->delayed_workers &&
+             fs_info->extent_workers &&
+             fs_info->qgroup_rescan_workers)) {
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
+                           struct btrfs_fs_devices *fs_devices)
+{
+       int ret;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *log_tree_root;
+       struct btrfs_super_block *disk_super = fs_info->super_copy;
+       u64 bytenr = btrfs_super_log_root(disk_super);
+
+       if (fs_devices->rw_devices == 0) {
+               printk(KERN_WARNING "BTRFS: log replay required "
+                      "on RO media\n");
+               return -EIO;
+       }
+
+       log_tree_root = btrfs_alloc_root(fs_info);
+       if (!log_tree_root)
+               return -ENOMEM;
+
+       __setup_root(tree_root->nodesize, tree_root->sectorsize,
+                       tree_root->stripesize, log_tree_root, fs_info,
+                       BTRFS_TREE_LOG_OBJECTID);
+
+       log_tree_root->node = read_tree_block(tree_root, bytenr,
+                       fs_info->generation + 1);
+       if (!log_tree_root->node ||
+           !extent_buffer_uptodate(log_tree_root->node)) {
+               printk(KERN_ERR "BTRFS: failed to read log tree\n");
+               free_extent_buffer(log_tree_root->node);
+               kfree(log_tree_root);
+               return -EIO;
+       }
+       /* returns with log_tree_root freed on success */
+       ret = btrfs_recover_log_trees(log_tree_root);
+       if (ret) {
+               btrfs_error(tree_root->fs_info, ret,
+                           "Failed to recover log tree");
+               free_extent_buffer(log_tree_root->node);
+               kfree(log_tree_root);
+               return ret;
+       }
+
+       if (fs_info->sb->s_flags & MS_RDONLY) {
+               ret = btrfs_commit_super(tree_root);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
+                           struct btrfs_root *tree_root)
+{
+       struct btrfs_root *root;
+       struct btrfs_key location;
+       int ret;
+
+       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+       location.type = BTRFS_ROOT_ITEM_KEY;
+       location.offset = 0;
+
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->extent_root = root;
+
+       location.objectid = BTRFS_DEV_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->dev_root = root;
+       btrfs_init_devices_late(fs_info);
+
+       location.objectid = BTRFS_CSUM_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root))
+               return PTR_ERR(root);
+       set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+       fs_info->csum_root = root;
+
+       location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (!IS_ERR(root)) {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->quota_enabled = 1;
+               fs_info->pending_quota_state = 1;
+               fs_info->quota_root = root;
+       }
+
+       location.objectid = BTRFS_UUID_TREE_OBJECTID;
+       root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(root)) {
+               ret = PTR_ERR(root);
+               if (ret != -ENOENT)
+                       return ret;
+       } else {
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->uuid_root = root;
+       }
+
+       return 0;
+}
+
  int open_ctree(struct super_block *sb,
                struct btrfs_fs_devices *fs_devices,
                char *options)
@@ -2160,21 +2415,12 @@ int open_ctree(struct super_block *sb,
         struct btrfs_super_block *disk_super;
         struct btrfs_fs_info *fs_info = btrfs_sb(sb);
         struct btrfs_root *tree_root;
-       struct btrfs_root *extent_root;
-       struct btrfs_root *csum_root;
         struct btrfs_root *chunk_root;
-       struct btrfs_root *dev_root;
-       struct btrfs_root *quota_root;
-       struct btrfs_root *uuid_root;
-       struct btrfs_root *log_tree_root;
         int ret;
         int err = -EINVAL;
         int num_backups_tried = 0;
         int backup_index = 0;
         int max_active;
-       int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
-       bool create_uuid_tree;
-       bool check_uuid_tree;
  
         tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
         chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2241,11 +2487,12 @@ int open_ctree(struct super_block *sb,
         spin_lock_init(&fs_info->qgroup_op_lock);
         spin_lock_init(&fs_info->buffer_lock);
         spin_lock_init(&fs_info->unused_bgs_lock);
-       mutex_init(&fs_info->unused_bg_unpin_mutex);
         rwlock_init(&fs_info->tree_mod_log_lock);
+       mutex_init(&fs_info->unused_bg_unpin_mutex);
         mutex_init(&fs_info->reloc_mutex);
         mutex_init(&fs_info->delalloc_root_mutex);
         seqlock_init(&fs_info->profiles_lock);
+       init_rwsem(&fs_info->delayed_iput_sem);
  
         init_completion(&fs_info->kobj_unregister);
         INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2276,7 +2523,7 @@ int open_ctree(struct super_block *sb,
         fs_info->free_chunk_space = 0;
         fs_info->tree_mod_log = RB_ROOT;
         fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
+       fs_info->avg_delayed_ref_runtime = NSEC_PER_SEC >> 6; /* div by 64 */
         /* readahead state */
         INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
         spin_lock_init(&fs_info->reada_lock);
@@ -2294,55 +2541,18 @@ int open_ctree(struct super_block *sb,
         }
         btrfs_init_delayed_root(fs_info->delayed_root);
  
-       mutex_init(&fs_info->scrub_lock);
-       atomic_set(&fs_info->scrubs_running, 0);
-       atomic_set(&fs_info->scrub_pause_req, 0);
-       atomic_set(&fs_info->scrubs_paused, 0);
-       atomic_set(&fs_info->scrub_cancel_req, 0);
-       init_waitqueue_head(&fs_info->replace_wait);
-       init_waitqueue_head(&fs_info->scrub_pause_wait);
-       fs_info->scrub_workers_refcnt = 0;
+       btrfs_init_scrub(fs_info);
  #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
         fs_info->check_integrity_print_mask = 0;
  #endif
-
-       spin_lock_init(&fs_info->balance_lock);
-       mutex_init(&fs_info->balance_mutex);
-       atomic_set(&fs_info->balance_running, 0);
-       atomic_set(&fs_info->balance_pause_req, 0);
-       atomic_set(&fs_info->balance_cancel_req, 0);
-       fs_info->balance_ctl = NULL;
-       init_waitqueue_head(&fs_info->balance_wait_q);
+       btrfs_init_balance(fs_info);
         btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
  
         sb->s_blocksize = 4096;
         sb->s_blocksize_bits = blksize_bits(4096);
         sb->s_bdi = &fs_info->bdi;
  
-       fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
-       set_nlink(fs_info->btree_inode, 1);
-       /*
-        * we set the i_size on the btree inode to the max possible int.
-        * the real end of the address space is determined by all of
-        * the devices in the system
-        */
-       fs_info->btree_inode->i_size = OFFSET_MAX;
-       fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
-
-       RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
-       extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
-                            fs_info->btree_inode->i_mapping);
-       BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0;
-       extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree);
-
-       BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops;
-
-       BTRFS_I(fs_info->btree_inode)->root = tree_root;
-       memset(&BTRFS_I(fs_info->btree_inode)->location, 0,
-              sizeof(struct btrfs_key));
-       set_bit(BTRFS_INODE_DUMMY,
-               &BTRFS_I(fs_info->btree_inode)->runtime_flags);
-       btrfs_insert_inode_hash(fs_info->btree_inode);
+       btrfs_init_btree_inode(fs_info, tree_root);
  
         spin_lock_init(&fs_info->block_group_cache_lock);
         fs_info->block_group_cache_tree = RB_ROOT;
@@ -2363,26 +2573,14 @@ int open_ctree(struct super_block *sb,
         mutex_init(&fs_info->transaction_kthread_mutex);
         mutex_init(&fs_info->cleaner_mutex);
         mutex_init(&fs_info->volume_mutex);
+       mutex_init(&fs_info->ro_block_group_mutex);
         init_rwsem(&fs_info->commit_root_sem);
         init_rwsem(&fs_info->cleanup_work_sem);
         init_rwsem(&fs_info->subvol_sem);
         sema_init(&fs_info->uuid_tree_rescan_sem, 1);
-       fs_info->dev_replace.lock_owner = 0;
-       atomic_set(&fs_info->dev_replace.nesting_level, 0);
-       mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
-       mutex_init(&fs_info->dev_replace.lock_management_lock);
-       mutex_init(&fs_info->dev_replace.lock);
  
-       spin_lock_init(&fs_info->qgroup_lock);
-       mutex_init(&fs_info->qgroup_ioctl_lock);
-       fs_info->qgroup_tree = RB_ROOT;
-       fs_info->qgroup_op_tree = RB_ROOT;
-       INIT_LIST_HEAD(&fs_info->dirty_qgroups);
-       fs_info->qgroup_seq = 1;
-       fs_info->quota_enabled = 0;
-       fs_info->pending_quota_state = 0;
-       fs_info->qgroup_ulist = NULL;
-       mutex_init(&fs_info->qgroup_rescan_lock);
+       btrfs_init_dev_replace_locks(fs_info);
+       btrfs_init_qgroup(fs_info);
  
         btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
         btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2554,75 +2752,9 @@ int open_ctree(struct super_block *sb,
  
         max_active = fs_info->thread_pool_size;
  
-       fs_info->workers =
-               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
-                                     max_active, 16);
-
-       fs_info->delalloc_workers =
-               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
-
-       fs_info->flush_workers =
-               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
-
-       fs_info->caching_workers =
-               btrfs_alloc_workqueue("cache", flags, max_active, 0);
-
-       /*
-        * a higher idle thresh on the submit workers makes it much more
-        * likely that bios will be send down in a sane order to the
-        * devices
-        */
-       fs_info->submit_workers =
-               btrfs_alloc_workqueue("submit", flags,
-                                     min_t(u64, fs_devices->num_devices,
-                                           max_active), 64);
-
-       fs_info->fixup_workers =
-               btrfs_alloc_workqueue("fixup", flags, 1, 0);
-
-       /*
-        * endios are largely parallel and should have a very
-        * low idle thresh
-        */
-       fs_info->endio_workers =
-               btrfs_alloc_workqueue("endio", flags, max_active, 4);
-       fs_info->endio_meta_workers =
-               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
-       fs_info->endio_meta_write_workers =
-               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
-       fs_info->endio_raid56_workers =
-               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
-       fs_info->endio_repair_workers =
-               btrfs_alloc_workqueue("endio-repair", flags, 1, 0);
-       fs_info->rmw_workers =
-               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
-       fs_info->endio_write_workers =
-               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
-       fs_info->endio_freespace_worker =
-               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
-       fs_info->delayed_workers =
-               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
-       fs_info->readahead_workers =
-               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
-       fs_info->qgroup_rescan_workers =
-               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
-       fs_info->extent_workers =
-               btrfs_alloc_workqueue("extent-refs", flags,
-                                     min_t(u64, fs_devices->num_devices,
-                                           max_active), 8);
-
-       if (!(fs_info->workers && fs_info->delalloc_workers &&
-             fs_info->submit_workers && fs_info->flush_workers &&
-             fs_info->endio_workers && fs_info->endio_meta_workers &&
-             fs_info->endio_meta_write_workers &&
-             fs_info->endio_repair_workers &&
-             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
-             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
-             fs_info->caching_workers && fs_info->readahead_workers &&
-             fs_info->fixup_workers && fs_info->delayed_workers &&
-             fs_info->extent_workers &&
-             fs_info->qgroup_rescan_workers)) {
-               err = -ENOMEM;
+       ret = btrfs_init_workqueues(fs_info, fs_devices);
+       if (ret) {
+               err = ret;
                 goto fail_sb_buffer;
         }
  
@@ -2688,7 +2820,7 @@ int open_ctree(struct super_block *sb,
          * keep the device that is marked to be the target device for the
          * dev_replace procedure
          */
-       btrfs_close_extra_devices(fs_info, fs_devices, 0);
+       btrfs_close_extra_devices(fs_devices, 0);
  
         if (!fs_devices->latest_bdev) {
                 printk(KERN_ERR "BTRFS: failed to read devices on %s\n",
@@ -2714,61 +2846,9 @@ retry_root_backup:
         tree_root->commit_root = btrfs_root_node(tree_root);
         btrfs_set_root_refs(&tree_root->root_item, 1);
  
-       location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
-       location.type = BTRFS_ROOT_ITEM_KEY;
-       location.offset = 0;
-
-       extent_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(extent_root)) {
-               ret = PTR_ERR(extent_root);
-               goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
-       fs_info->extent_root = extent_root;
-
-       location.objectid = BTRFS_DEV_TREE_OBJECTID;
-       dev_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(dev_root)) {
-               ret = PTR_ERR(dev_root);
-               goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
-       fs_info->dev_root = dev_root;
-       btrfs_init_devices_late(fs_info);
-
-       location.objectid = BTRFS_CSUM_TREE_OBJECTID;
-       csum_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(csum_root)) {
-               ret = PTR_ERR(csum_root);
+       ret = btrfs_read_roots(fs_info, tree_root);
+       if (ret)
                 goto recovery_tree_root;
-       }
-       set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
-       fs_info->csum_root = csum_root;
-
-       location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
-       quota_root = btrfs_read_tree_root(tree_root, &location);
-       if (!IS_ERR(quota_root)) {
-               set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
-               fs_info->quota_enabled = 1;
-               fs_info->pending_quota_state = 1;
-               fs_info->quota_root = quota_root;
-       }
-
-       location.objectid = BTRFS_UUID_TREE_OBJECTID;
-       uuid_root = btrfs_read_tree_root(tree_root, &location);
-       if (IS_ERR(uuid_root)) {
-               ret = PTR_ERR(uuid_root);
-               if (ret != -ENOENT)
-                       goto recovery_tree_root;
-               create_uuid_tree = true;
-               check_uuid_tree = false;
-       } else {
-               set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
-               fs_info->uuid_root = uuid_root;
-               create_uuid_tree = false;
-               check_uuid_tree =
-                   generation != btrfs_super_uuid_tree_generation(disk_super);
-       }
  
         fs_info->generation = generation;
         fs_info->last_trans_committed = generation;
@@ -2792,7 +2872,7 @@ retry_root_backup:
                 goto fail_block_groups;
         }
  
-       btrfs_close_extra_devices(fs_info, fs_devices, 1);
+       btrfs_close_extra_devices(fs_devices, 1);
  
         ret = btrfs_sysfs_add_one(fs_info);
         if (ret) {
@@ -2806,7 +2886,7 @@ retry_root_backup:
                 goto fail_sysfs;
         }
  
-       ret = btrfs_read_block_groups(extent_root);
+       ret = btrfs_read_block_groups(fs_info->extent_root);
         if (ret) {
                 printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
                 goto fail_sysfs;
@@ -2864,48 +2944,11 @@ retry_root_backup:
  
         /* do not make disk changes in broken FS */
         if (btrfs_super_log_root(disk_super) != 0) {
-               u64 bytenr = btrfs_super_log_root(disk_super);
-
-               if (fs_devices->rw_devices == 0) {
-                       printk(KERN_WARNING "BTRFS: log replay required "
-                              "on RO media\n");
-                       err = -EIO;
-                       goto fail_qgroup;
-               }
-
-               log_tree_root = btrfs_alloc_root(fs_info);
-               if (!log_tree_root) {
-                       err = -ENOMEM;
-                       goto fail_qgroup;
-               }
-
-               __setup_root(nodesize, sectorsize, stripesize,
-                            log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
-
-               log_tree_root->node = read_tree_block(tree_root, bytenr,
-                                                     generation + 1);
-               if (!log_tree_root->node ||
-                   !extent_buffer_uptodate(log_tree_root->node)) {
-                       printk(KERN_ERR "BTRFS: failed to read log tree\n");
-                       free_extent_buffer(log_tree_root->node);
-                       kfree(log_tree_root);
-                       goto fail_qgroup;
-               }
-               /* returns with log_tree_root freed on success */
-               ret = btrfs_recover_log_trees(log_tree_root);
+               ret = btrfs_replay_log(fs_info, fs_devices);
                 if (ret) {
-                       btrfs_error(tree_root->fs_info, ret,
-                                   "Failed to recover log tree");
-                       free_extent_buffer(log_tree_root->node);
-                       kfree(log_tree_root);
+                       err = ret;
                         goto fail_qgroup;
                 }
-
-               if (sb->s_flags & MS_RDONLY) {
-                       ret = btrfs_commit_super(tree_root);
-                       if (ret)
-                               goto fail_qgroup;
-               }
         }
  
         ret = btrfs_find_orphan_roots(tree_root);
@@ -2966,7 +3009,7 @@ retry_root_backup:
  
         btrfs_qgroup_rescan_resume(fs_info);
  
-       if (create_uuid_tree) {
+       if (!fs_info->uuid_root) {
                 pr_info("BTRFS: creating UUID tree\n");
                 ret = btrfs_create_uuid_tree(fs_info);
                 if (ret) {
@@ -2975,8 +3018,9 @@ retry_root_backup:
                         close_ctree(tree_root);
                         return ret;
                 }
-       } else if (check_uuid_tree ||
-                  btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
+       } else if (btrfs_test_opt(tree_root, RESCAN_UUID_TREE) ||
+                  fs_info->generation !=
+                               btrfs_super_uuid_tree_generation(disk_super)) {
                 pr_info("BTRFS: checking UUID tree\n");
                 ret = btrfs_check_uuid_tree(fs_info);
                 if (ret) {
@@ -3668,7 +3712,7 @@ void close_ctree(struct btrfs_root *root)
         if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                 ret = btrfs_commit_super(root);
                 if (ret)
-                       btrfs_err(root->fs_info, "commit super ret %d", ret);
+                       btrfs_err(fs_info, "commit super ret %d", ret);
         }
  
         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
@@ -3680,10 +3724,10 @@ void close_ctree(struct btrfs_root *root)
         fs_info->closing = 2;
         smp_mb();
  
-       btrfs_free_qgroup_config(root->fs_info);
+       btrfs_free_qgroup_config(fs_info);
  
         if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
-               btrfs_info(root->fs_info, "at unmount delalloc count %lld",
+               btrfs_info(fs_info, "at unmount delalloc count %lld",
                        percpu_counter_sum(&fs_info->delalloc_bytes));
         }
  
@@ -3723,7 +3767,7 @@ void close_ctree(struct btrfs_root *root)
  
         btrfs_free_stripe_hash_table(fs_info);
  
-       btrfs_free_block_rsv(root, root->orphan_block_rsv);
+       __btrfs_free_block_rsv(root->orphan_block_rsv);
         root->orphan_block_rsv = NULL;
  
         lock_chunks(root);
@@ -4134,7 +4178,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
  
                 clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
                 while (start <= end) {
-                       eb = btrfs_find_tree_block(root, start);
+                       eb = btrfs_find_tree_block(root->fs_info, start);
                         start += root->nodesize;
                         if (!eb)
                                 continue;
@@ -4285,7 +4329,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
         return 0;
  }
  
-static struct extent_io_ops btree_extent_io_ops = {
+static const struct extent_io_ops btree_extent_io_ops = {
         .readpage_end_io_hook = btree_readpage_end_io_hook,
         .readpage_io_failed_hook = btree_io_failed_hook,
         .submit_bio_hook = btree_submit_bio_hook,
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h

index 27d44c0..d4cbfee 100644 (file)
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -52,7 +52,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
  struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                    u64 bytenr);
  void clean_tree_block(struct btrfs_trans_handle *trans,
-                     struct btrfs_root *root, struct extent_buffer *buf);
+                     struct btrfs_fs_info *fs_info, struct extent_buffer *buf);
  int open_ctree(struct super_block *sb,
                struct btrfs_fs_devices *fs_devices,
                char *options);
@@ -61,7 +61,7 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
                       struct btrfs_root *root, int max_mirrors);
  struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
  int btrfs_commit_super(struct btrfs_root *root);
-struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
                                             u64 bytenr);
  struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
                                       struct btrfs_key *location);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c

index 37d1645..8d05220 100644 (file)
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -152,7 +152,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
  
  static struct dentry *btrfs_get_parent(struct dentry *child)
  {
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_path *path;
         struct extent_buffer *leaf;
@@ -220,8 +220,8 @@ fail:
  static int btrfs_get_name(struct dentry *parent, char *name,
                           struct dentry *child)
  {
-       struct inode *inode = child->d_inode;
-       struct inode *dir = parent->d_inode;
+       struct inode *inode = d_inode(child);
+       struct inode *dir = d_inode(parent);
         struct btrfs_path *path;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_inode_ref *iref;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 8b353ad..1eef4ee 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2538,6 +2538,12 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                  * list before we release it.
                  */
                 if (btrfs_delayed_ref_is_head(ref)) {
+                       if (locked_ref->is_data &&
+                           locked_ref->total_ref_mod < 0) {
+                               spin_lock(&delayed_refs->lock);
+                               delayed_refs->pending_csums -= ref->num_bytes;
+                               spin_unlock(&delayed_refs->lock);
+                       }
                         btrfs_delayed_ref_unlock(locked_ref);
                         locked_ref = NULL;
                 }
@@ -2561,8 +2567,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                  */
                 spin_lock(&delayed_refs->lock);
                 avg = fs_info->avg_delayed_ref_runtime * 3 + runtime;
-               avg = div64_u64(avg, 4);
-               fs_info->avg_delayed_ref_runtime = avg;
+               fs_info->avg_delayed_ref_runtime = avg >> 2;    /* div by 4 */
                 spin_unlock(&delayed_refs->lock);
         }
         return 0;
@@ -2624,7 +2629,26 @@ static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
          * We don't ever fill up leaves all the way so multiply by 2 just to be
          * closer to what we're really going to want to ouse.
          */
-       return div64_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+       return div_u64(num_bytes, BTRFS_LEAF_DATA_SIZE(root));
+}
+
+/*
+ * Takes the number of bytes to be csumm'ed and figures out how many leaves it
+ * would require to store the csums for that many bytes.
+ */
+u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes)
+{
+       u64 csum_size;
+       u64 num_csums_per_leaf;
+       u64 num_csums;
+
+       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
+       num_csums_per_leaf = div64_u64(csum_size,
+                       (u64)btrfs_super_csum_size(root->fs_info->super_copy));
+       num_csums = div64_u64(csum_bytes, root->sectorsize);
+       num_csums += num_csums_per_leaf - 1;
+       num_csums = div64_u64(num_csums, num_csums_per_leaf);
+       return num_csums;
  }
  
  int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2632,7 +2656,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
  {
         struct btrfs_block_rsv *global_rsv;
         u64 num_heads = trans->transaction->delayed_refs.num_heads_ready;
-       u64 num_bytes;
+       u64 csum_bytes = trans->transaction->delayed_refs.pending_csums;
+       u64 num_dirty_bgs = trans->transaction->num_dirty_bgs;
+       u64 num_bytes, num_dirty_bgs_bytes;
         int ret = 0;
  
         num_bytes = btrfs_calc_trans_metadata_size(root, 1);
@@ -2640,17 +2666,22 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
         if (num_heads > 1)
                 num_bytes += (num_heads - 1) * root->nodesize;
         num_bytes <<= 1;
+       num_bytes += btrfs_csum_bytes_to_leaves(root, csum_bytes) * root->nodesize;
+       num_dirty_bgs_bytes = btrfs_calc_trans_metadata_size(root,
+                                                            num_dirty_bgs);
         global_rsv = &root->fs_info->global_block_rsv;
  
         /*
          * If we can't allocate any more chunks lets make sure we have _lots_ of
          * wiggle room since running delayed refs can create more delayed refs.
          */
-       if (global_rsv->space_info->full)
+       if (global_rsv->space_info->full) {
+               num_dirty_bgs_bytes <<= 1;
                 num_bytes <<= 1;
+       }
  
         spin_lock(&global_rsv->lock);
-       if (global_rsv->reserved <= num_bytes)
+       if (global_rsv->reserved <= num_bytes + num_dirty_bgs_bytes)
                 ret = 1;
         spin_unlock(&global_rsv->lock);
         return ret;
@@ -3193,7 +3224,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
         struct inode *inode = NULL;
         u64 alloc_hint = 0;
         int dcs = BTRFS_DC_ERROR;
-       int num_pages = 0;
+       u64 num_pages = 0;
         int retries = 0;
         int ret = 0;
  
@@ -3267,7 +3298,7 @@ again:
                 if (ret)
                         goto out_put;
  
-               ret = btrfs_truncate_free_space_cache(root, trans, inode);
+               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
                 if (ret)
                         goto out_put;
         }
@@ -3293,14 +3324,14 @@ again:
          * taking up quite a bit since it's not folded into the other space
          * cache.
          */
-       num_pages = (int)div64_u64(block_group->key.offset, 256 * 1024 * 1024);
+       num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024);
         if (!num_pages)
                 num_pages = 1;
  
         num_pages *= 16;
         num_pages *= PAGE_CACHE_SIZE;
  
-       ret = btrfs_check_data_free_space(inode, num_pages);
+       ret = btrfs_check_data_free_space(inode, num_pages, num_pages);
         if (ret)
                 goto out_put;
  
@@ -3351,16 +3382,156 @@ int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
         return 0;
  }
  
-int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+/*
+ * transaction commit does final block group cache writeback during a
+ * critical section where nothing is allowed to change the FS.  This is
+ * required in order for the cache to actually match the block group,
+ * but can introduce a lot of latency into the commit.
+ *
+ * So, btrfs_start_dirty_block_groups is here to kick off block group
+ * cache IO.  There's a chance we'll have to redo some of it if the
+ * block group changes again during the commit, but it greatly reduces
+ * the commit latency by getting rid of the easy block groups while
+ * we're still allowing others to join the commit.
+ */
+int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root)
  {
         struct btrfs_block_group_cache *cache;
         struct btrfs_transaction *cur_trans = trans->transaction;
         int ret = 0;
-       struct btrfs_path *path;
+       int should_put;
+       struct btrfs_path *path = NULL;
+       LIST_HEAD(dirty);
+       struct list_head *io = &cur_trans->io_bgs;
+       int num_started = 0;
+       int loops = 0;
+
+       spin_lock(&cur_trans->dirty_bgs_lock);
+       if (!list_empty(&cur_trans->dirty_bgs)) {
+               list_splice_init(&cur_trans->dirty_bgs, &dirty);
+       }
+       spin_unlock(&cur_trans->dirty_bgs_lock);
  
-       if (list_empty(&cur_trans->dirty_bgs))
+again:
+       if (list_empty(&dirty)) {
+               btrfs_free_path(path);
                 return 0;
+       }
+
+       /*
+        * make sure all the block groups on our dirty list actually
+        * exist
+        */
+       btrfs_create_pending_block_groups(trans, root);
+
+       if (!path) {
+               path = btrfs_alloc_path();
+               if (!path)
+                       return -ENOMEM;
+       }
+
+       while (!list_empty(&dirty)) {
+               cache = list_first_entry(&dirty,
+                                        struct btrfs_block_group_cache,
+                                        dirty_list);
+
+               /*
+                * cache_write_mutex is here only to save us from balance
+                * deleting this block group while we are writing out the
+                * cache
+                */
+               mutex_lock(&trans->transaction->cache_write_mutex);
+
+               /*
+                * this can happen if something re-dirties a block
+                * group that is already under IO.  Just wait for it to
+                * finish and then do it all again
+                */
+               if (!list_empty(&cache->io_list)) {
+                       list_del_init(&cache->io_list);
+                       btrfs_wait_cache_io(root, trans, cache,
+                                           &cache->io_ctl, path,
+                                           cache->key.objectid);
+                       btrfs_put_block_group(cache);
+               }
+
+
+               /*
+                * btrfs_wait_cache_io uses the cache->dirty_list to decide
+                * if it should update the cache_state.  Don't delete
+                * until after we wait.
+                *
+                * Since we're not running in the commit critical section
+                * we need the dirty_bgs_lock to protect from update_block_group
+                */
+               spin_lock(&cur_trans->dirty_bgs_lock);
+               list_del_init(&cache->dirty_list);
+               spin_unlock(&cur_trans->dirty_bgs_lock);
+
+               should_put = 1;
+
+               cache_save_setup(cache, trans, path);
+
+               if (cache->disk_cache_state == BTRFS_DC_SETUP) {
+                       cache->io_ctl.inode = NULL;
+                       ret = btrfs_write_out_cache(root, trans, cache, path);
+                       if (ret == 0 && cache->io_ctl.inode) {
+                               num_started++;
+                               should_put = 0;
+
+                               /*
+                                * the cache_write_mutex is protecting
+                                * the io_list
+                                */
+                               list_add_tail(&cache->io_list, io);
+                       } else {
+                               /*
+                                * if we failed to write the cache, the
+                                * generation will be bad and life goes on
+                                */
+                               ret = 0;
+                       }
+               }
+               if (!ret)
+                       ret = write_one_cache_group(trans, root, path, cache);
+               mutex_unlock(&trans->transaction->cache_write_mutex);
+
+               /* if its not on the io list, we need to put the block group */
+               if (should_put)
+                       btrfs_put_block_group(cache);
+
+               if (ret)
+                       break;
+       }
+
+       /*
+        * go through delayed refs for all the stuff we've just kicked off
+        * and then loop back (just once)
+        */
+       ret = btrfs_run_delayed_refs(trans, root, 0);
+       if (!ret && loops == 0) {
+               loops++;
+               spin_lock(&cur_trans->dirty_bgs_lock);
+               list_splice_init(&cur_trans->dirty_bgs, &dirty);
+               spin_unlock(&cur_trans->dirty_bgs_lock);
+               goto again;
+       }
+
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root)
+{
+       struct btrfs_block_group_cache *cache;
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       int ret = 0;
+       int should_put;
+       struct btrfs_path *path;
+       struct list_head *io = &cur_trans->io_bgs;
+       int num_started = 0;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -3376,16 +3547,61 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                 cache = list_first_entry(&cur_trans->dirty_bgs,
                                          struct btrfs_block_group_cache,
                                          dirty_list);
+
+               /*
+                * this can happen if cache_save_setup re-dirties a block
+                * group that is already under IO.  Just wait for it to
+                * finish and then do it all again
+                */
+               if (!list_empty(&cache->io_list)) {
+                       list_del_init(&cache->io_list);
+                       btrfs_wait_cache_io(root, trans, cache,
+                                           &cache->io_ctl, path,
+                                           cache->key.objectid);
+                       btrfs_put_block_group(cache);
+               }
+
+               /*
+                * don't remove from the dirty list until after we've waited
+                * on any pending IO
+                */
                 list_del_init(&cache->dirty_list);
-               if (cache->disk_cache_state == BTRFS_DC_CLEAR)
-                       cache_save_setup(cache, trans, path);
+               should_put = 1;
+
+               cache_save_setup(cache, trans, path);
+
                 if (!ret)
-                       ret = btrfs_run_delayed_refs(trans, root,
-                                                    (unsigned long) -1);
-               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP)
-                       btrfs_write_out_cache(root, trans, cache, path);
+                       ret = btrfs_run_delayed_refs(trans, root, (unsigned long) -1);
+
+               if (!ret && cache->disk_cache_state == BTRFS_DC_SETUP) {
+                       cache->io_ctl.inode = NULL;
+                       ret = btrfs_write_out_cache(root, trans, cache, path);
+                       if (ret == 0 && cache->io_ctl.inode) {
+                               num_started++;
+                               should_put = 0;
+                               list_add_tail(&cache->io_list, io);
+                       } else {
+                               /*
+                                * if we failed to write the cache, the
+                                * generation will be bad and life goes on
+                                */
+                               ret = 0;
+                       }
+               }
                 if (!ret)
                         ret = write_one_cache_group(trans, root, path, cache);
+
+               /* if its not on the io list, we need to put the block group */
+               if (should_put)
+                       btrfs_put_block_group(cache);
+       }
+
+       while (!list_empty(io)) {
+               cache = list_first_entry(io, struct btrfs_block_group_cache,
+                                        io_list);
+               list_del_init(&cache->io_list);
+               btrfs_wait_cache_io(root, trans, cache,
+                                   &cache->io_ctl, path, cache->key.objectid);
                 btrfs_put_block_group(cache);
         }
  
@@ -3635,19 +3851,21 @@ u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
   * This will check the space that the inode allocates from to make sure we have
   * enough space for bytes.
   */
-int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
+int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
  {
         struct btrfs_space_info *data_sinfo;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_fs_info *fs_info = root->fs_info;
         u64 used;
-       int ret = 0, committed = 0, alloc_chunk = 1;
+       int ret = 0;
+       int need_commit = 2;
+       int have_pinned_space;
  
         /* make sure bytes are sectorsize aligned */
         bytes = ALIGN(bytes, root->sectorsize);
  
         if (btrfs_is_free_space_inode(inode)) {
-               committed = 1;
+               need_commit = 0;
                 ASSERT(current->journal_info);
         }
  
@@ -3669,7 +3887,7 @@ again:
                  * if we don't have enough free bytes in this space then we need
                  * to alloc a new chunk.
                  */
-               if (!data_sinfo->full && alloc_chunk) {
+               if (!data_sinfo->full) {
                         u64 alloc_target;
  
                         data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
@@ -3697,8 +3915,10 @@ alloc:
                         if (ret < 0) {
                                 if (ret != -ENOSPC)
                                         return ret;
-                               else
+                               else {
+                                       have_pinned_space = 1;
                                         goto commit_trans;
+                               }
                         }
  
                         if (!data_sinfo)
@@ -3709,26 +3929,39 @@ alloc:
  
                 /*
                  * If we don't have enough pinned space to deal with this
-                * allocation don't bother committing the transaction.
+                * allocation, and no removed chunk in current transaction,
+                * don't bother committing the transaction.
                  */
-               if (percpu_counter_compare(&data_sinfo->total_bytes_pinned,
-                                          bytes) < 0)
-                       committed = 1;
+               have_pinned_space = percpu_counter_compare(
+                       &data_sinfo->total_bytes_pinned,
+                       used + bytes - data_sinfo->total_bytes);
                 spin_unlock(&data_sinfo->lock);
  
                 /* commit the current transaction and try again */
  commit_trans:
-               if (!committed &&
+               if (need_commit &&
                     !atomic_read(&root->fs_info->open_ioctl_trans)) {
-                       committed = 1;
+                       need_commit--;
  
                         trans = btrfs_join_transaction(root);
                         if (IS_ERR(trans))
                                 return PTR_ERR(trans);
-                       ret = btrfs_commit_transaction(trans, root);
-                       if (ret)
-                               return ret;
-                       goto again;
+                       if (have_pinned_space >= 0 ||
+                           trans->transaction->have_free_bgs ||
+                           need_commit > 0) {
+                               ret = btrfs_commit_transaction(trans, root);
+                               if (ret)
+                                       return ret;
+                               /*
+                                * make sure that all running delayed iput are
+                                * done
+                                */
+                               down_write(&root->fs_info->delayed_iput_sem);
+                               up_write(&root->fs_info->delayed_iput_sem);
+                               goto again;
+                       } else {
+                               btrfs_end_transaction(trans, root);
+                       }
                 }
  
                 trace_btrfs_space_reservation(root->fs_info,
@@ -3736,12 +3969,16 @@ commit_trans:
                                               data_sinfo->flags, bytes, 1);
                 return -ENOSPC;
         }
+       ret = btrfs_qgroup_reserve(root, write_bytes);
+       if (ret)
+               goto out;
         data_sinfo->bytes_may_use += bytes;
         trace_btrfs_space_reservation(root->fs_info, "space_info",
                                       data_sinfo->flags, bytes, 1);
+out:
         spin_unlock(&data_sinfo->lock);
  
-       return 0;
+       return ret;
  }
  
  /*
@@ -4298,8 +4535,13 @@ out:
  static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
                                         struct btrfs_fs_info *fs_info, u64 used)
  {
-       return (used >= div_factor_fine(space_info->total_bytes, 98) &&
-               !btrfs_fs_closing(fs_info) &&
+       u64 thresh = div_factor_fine(space_info->total_bytes, 98);
+
+       /* If we're just plain full then async reclaim just slows us down. */
+       if (space_info->bytes_used >= thresh)
+               return 0;
+
+       return (used >= thresh && !btrfs_fs_closing(fs_info) &&
                 !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
  }
  
@@ -4354,10 +4596,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
                 if (!btrfs_need_do_async_reclaim(space_info, fs_info,
                                                  flush_state))
                         return;
-       } while (flush_state <= COMMIT_TRANS);
-
-       if (btrfs_need_do_async_reclaim(space_info, fs_info, flush_state))
-               queue_work(system_unbound_wq, work);
+       } while (flush_state < COMMIT_TRANS);
  }
  
  void btrfs_init_async_reclaim_work(struct work_struct *work)
@@ -4700,6 +4939,11 @@ void btrfs_free_block_rsv(struct btrfs_root *root,
         kfree(rsv);
  }
  
+void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv)
+{
+       kfree(rsv);
+}
+
  int btrfs_block_rsv_add(struct btrfs_root *root,
                         struct btrfs_block_rsv *block_rsv, u64 num_bytes,
                         enum btrfs_reserve_flush_enum flush)
@@ -4812,10 +5056,10 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
  
         num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
                     csum_size * 2;
-       num_bytes += div64_u64(data_used + meta_used, 50);
+       num_bytes += div_u64(data_used + meta_used, 50);
  
         if (num_bytes * 3 > meta_used)
-               num_bytes = div64_u64(meta_used, 3);
+               num_bytes = div_u64(meta_used, 3);
  
         return ALIGN(num_bytes, fs_info->extent_root->nodesize << 10);
  }
@@ -4998,8 +5242,6 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
                                       u64 qgroup_reserved)
  {
         btrfs_block_rsv_release(root, rsv, (u64)-1);
-       if (qgroup_reserved)
-               btrfs_qgroup_free(root, qgroup_reserved);
  }
  
  /**
@@ -5066,30 +5308,18 @@ static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes,
                                    int reserve)
  {
         struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 csum_size;
-       int num_csums_per_leaf;
-       int num_csums;
-       int old_csums;
+       u64 old_csums, num_csums;
  
         if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM &&
             BTRFS_I(inode)->csum_bytes == 0)
                 return 0;
  
-       old_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
+       old_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
         if (reserve)
                 BTRFS_I(inode)->csum_bytes += num_bytes;
         else
                 BTRFS_I(inode)->csum_bytes -= num_bytes;
-       csum_size = BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item);
-       num_csums_per_leaf = (int)div64_u64(csum_size,
-                                           sizeof(struct btrfs_csum_item) +
-                                           sizeof(struct btrfs_disk_key));
-       num_csums = (int)div64_u64(BTRFS_I(inode)->csum_bytes, root->sectorsize);
-       num_csums = num_csums + num_csums_per_leaf - 1;
-       num_csums = num_csums / num_csums_per_leaf;
-
-       old_csums = old_csums + num_csums_per_leaf - 1;
-       old_csums = old_csums / num_csums_per_leaf;
+       num_csums = btrfs_csum_bytes_to_leaves(root, BTRFS_I(inode)->csum_bytes);
  
         /* No change, no need to reserve more */
         if (old_csums == num_csums)
@@ -5163,8 +5393,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
         spin_unlock(&BTRFS_I(inode)->lock);
  
         if (root->fs_info->quota_enabled) {
-               ret = btrfs_qgroup_reserve(root, num_bytes +
-                                          nr_extents * root->nodesize);
+               ret = btrfs_qgroup_reserve(root, nr_extents * root->nodesize);
                 if (ret)
                         goto out_fail;
         }
@@ -5172,8 +5401,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
         ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
         if (unlikely(ret)) {
                 if (root->fs_info->quota_enabled)
-                       btrfs_qgroup_free(root, num_bytes +
-                                               nr_extents * root->nodesize);
+                       btrfs_qgroup_free(root, nr_extents * root->nodesize);
                 goto out_fail;
         }
  
@@ -5290,10 +5518,6 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
  
         trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                       btrfs_ino(inode), to_free, 0);
-       if (root->fs_info->quota_enabled) {
-               btrfs_qgroup_free(root, num_bytes +
-                                       dropped * root->nodesize);
-       }
  
         btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                 to_free);
@@ -5318,7 +5542,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes)
  {
         int ret;
  
-       ret = btrfs_check_data_free_space(inode, num_bytes);
+       ret = btrfs_check_data_free_space(inode, num_bytes, num_bytes);
         if (ret)
                 return ret;
  
@@ -5390,14 +5614,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                 if (!alloc && cache->cached == BTRFS_CACHE_NO)
                         cache_block_group(cache, 1);
  
-               spin_lock(&trans->transaction->dirty_bgs_lock);
-               if (list_empty(&cache->dirty_list)) {
-                       list_add_tail(&cache->dirty_list,
-                                     &trans->transaction->dirty_bgs);
-                       btrfs_get_block_group(cache);
-               }
-               spin_unlock(&trans->transaction->dirty_bgs_lock);
-
                 byte_in_group = bytenr - cache->key.objectid;
                 WARN_ON(byte_in_group > cache->key.offset);
  
@@ -5446,6 +5662,16 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                                 spin_unlock(&info->unused_bgs_lock);
                         }
                 }
+
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+               if (list_empty(&cache->dirty_list)) {
+                       list_add_tail(&cache->dirty_list,
+                                     &trans->transaction->dirty_bgs);
+                               trans->transaction->num_dirty_bgs++;
+                       btrfs_get_block_group(cache);
+               }
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+
                 btrfs_put_block_group(cache);
                 total -= num_bytes;
                 bytenr += num_bytes;
@@ -6956,15 +7182,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                 return -ENOSPC;
         }
  
-       if (btrfs_test_opt(root, DISCARD))
-               ret = btrfs_discard_extent(root, start, len, NULL);
-
         if (pin)
                 pin_down_extent(root, cache, start, len, 1);
         else {
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_discard_extent(root, start, len, NULL);
                 btrfs_add_free_space(cache, start, len);
                 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
         }
+
         btrfs_put_block_group(cache);
  
         trace_btrfs_reserved_extent_free(root, start, len);
@@ -7095,9 +7321,9 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
         ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
                                       ins, size);
         if (ret) {
+               btrfs_free_path(path);
                 btrfs_free_and_pin_reserved_extent(root, ins->objectid,
                                                    root->nodesize);
-               btrfs_free_path(path);
                 return ret;
         }
  
@@ -7217,7 +7443,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
         btrfs_set_header_generation(buf, trans->transid);
         btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
         btrfs_tree_lock(buf);
-       clean_tree_block(trans, root, buf);
+       clean_tree_block(trans, root->fs_info, buf);
         clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
  
         btrfs_set_lock_blocking(buf);
@@ -7815,7 +8041,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
         bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
         blocksize = root->nodesize;
  
-       next = btrfs_find_tree_block(root, bytenr);
+       next = btrfs_find_tree_block(root->fs_info, bytenr);
         if (!next) {
                 next = btrfs_find_create_tree_block(root, bytenr);
                 if (!next)
@@ -8016,7 +8242,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                         btrfs_set_lock_blocking(eb);
                         path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                 }
-               clean_tree_block(trans, root, eb);
+               clean_tree_block(trans, root->fs_info, eb);
         }
  
         if (eb == root->node) {
@@ -8533,10 +8759,30 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
  
         BUG_ON(cache->ro);
  
+again:
         trans = btrfs_join_transaction(root);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
  
+       /*
+        * we're not allowed to set block groups readonly after the dirty
+        * block groups cache has started writing.  If it already started,
+        * back off and let this transaction commit
+        */
+       mutex_lock(&root->fs_info->ro_block_group_mutex);
+       if (trans->transaction->dirty_bg_run) {
+               u64 transid = trans->transid;
+
+               mutex_unlock(&root->fs_info->ro_block_group_mutex);
+               btrfs_end_transaction(trans, root);
+
+               ret = btrfs_wait_for_commit(root, transid);
+               if (ret)
+                       return ret;
+               goto again;
+       }
+
+
         ret = set_block_group_ro(cache, 0);
         if (!ret)
                 goto out;
@@ -8551,6 +8797,7 @@ out:
                 alloc_flags = update_block_group_flags(root, cache->flags);
                 check_system_chunk(trans, root, alloc_flags);
         }
+       mutex_unlock(&root->fs_info->ro_block_group_mutex);
  
         btrfs_end_transaction(trans, root);
         return ret;
@@ -8720,7 +8967,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                 min_free <<= 1;
         } else if (index == BTRFS_RAID_RAID0) {
                 dev_min = fs_devices->rw_devices;
-               do_div(min_free, dev_min);
+               min_free = div64_u64(min_free, dev_min);
         }
  
         /* We need to do this so that we can look at pending chunks */
@@ -8992,6 +9239,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
         INIT_LIST_HEAD(&cache->bg_list);
         INIT_LIST_HEAD(&cache->ro_list);
         INIT_LIST_HEAD(&cache->dirty_list);
+       INIT_LIST_HEAD(&cache->io_list);
         btrfs_init_free_space_ctl(cache);
         atomic_set(&cache->trimming, 0);
  
@@ -9355,7 +9603,38 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                 goto out;
         }
  
+       /*
+        * get the inode first so any iput calls done for the io_list
+        * aren't the final iput (no unlinks allowed now)
+        */
         inode = lookup_free_space_inode(tree_root, block_group, path);
+
+       mutex_lock(&trans->transaction->cache_write_mutex);
+       /*
+        * make sure our free spache cache IO is done before remove the
+        * free space inode
+        */
+       spin_lock(&trans->transaction->dirty_bgs_lock);
+       if (!list_empty(&block_group->io_list)) {
+               list_del_init(&block_group->io_list);
+
+               WARN_ON(!IS_ERR(inode) && inode != block_group->io_ctl.inode);
+
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+               btrfs_wait_cache_io(root, trans, block_group,
+                                   &block_group->io_ctl, path,
+                                   block_group->key.objectid);
+               btrfs_put_block_group(block_group);
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+       }
+
+       if (!list_empty(&block_group->dirty_list)) {
+               list_del_init(&block_group->dirty_list);
+               btrfs_put_block_group(block_group);
+       }
+       spin_unlock(&trans->transaction->dirty_bgs_lock);
+       mutex_unlock(&trans->transaction->cache_write_mutex);
+
         if (!IS_ERR(inode)) {
                 ret = btrfs_orphan_add(trans, inode);
                 if (ret) {
@@ -9448,18 +9727,29 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
  
         spin_lock(&trans->transaction->dirty_bgs_lock);
         if (!list_empty(&block_group->dirty_list)) {
-               list_del_init(&block_group->dirty_list);
-               btrfs_put_block_group(block_group);
+               WARN_ON(1);
+       }
+       if (!list_empty(&block_group->io_list)) {
+               WARN_ON(1);
         }
         spin_unlock(&trans->transaction->dirty_bgs_lock);
-
         btrfs_remove_free_space_cache(block_group);
  
         spin_lock(&block_group->space_info->lock);
         list_del_init(&block_group->ro_list);
+
+       if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+               WARN_ON(block_group->space_info->total_bytes
+                       < block_group->key.offset);
+               WARN_ON(block_group->space_info->bytes_readonly
+                       < block_group->key.offset);
+               WARN_ON(block_group->space_info->disk_total
+                       < block_group->key.offset * factor);
+       }
         block_group->space_info->total_bytes -= block_group->key.offset;
         block_group->space_info->bytes_readonly -= block_group->key.offset;
         block_group->space_info->disk_total -= block_group->key.offset * factor;
+
         spin_unlock(&block_group->space_info->lock);
  
         memcpy(&key, &block_group->key, sizeof(key));
@@ -9647,8 +9937,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 mutex_unlock(&fs_info->unused_bg_unpin_mutex);
  
                 /* Reset pinned so btrfs_put_block_group doesn't complain */
+               spin_lock(&space_info->lock);
+               spin_lock(&block_group->lock);
+
+               space_info->bytes_pinned -= block_group->pinned;
+               space_info->bytes_readonly += block_group->pinned;
+               percpu_counter_add(&space_info->total_bytes_pinned,
+                                  -block_group->pinned);
                 block_group->pinned = 0;
  
+               spin_unlock(&block_group->lock);
+               spin_unlock(&space_info->lock);
+
                 /*
                  * Btrfs_remove_chunk will abort the transaction if things go
                  * horribly wrong.
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index d688cfe..782f3bc 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4514,8 +4514,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                 }
                 ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
                                               em_len, flags);
-               if (ret)
+               if (ret) {
+                       if (ret == 1)
+                               ret = 0;
                         goto out_free;
+               }
         }
  out_free:
         free_extent_map(em);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index 695b0cc..c668f36 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -97,7 +97,7 @@ struct extent_io_tree {
         u64 dirty_bytes;
         int track_uptodate;
         spinlock_t lock;
-       struct extent_io_ops *ops;
+       const struct extent_io_ops *ops;
  };
  
  struct extent_state {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c

index 84a2d18..58ece65 100644 (file)
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -185,8 +185,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
         nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
         if (!dst) {
                 if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
-                       btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
-                                                           GFP_NOFS);
+                       btrfs_bio->csum_allocated = kmalloc_array(nblocks,
+                                       csum_size, GFP_NOFS);
                         if (!btrfs_bio->csum_allocated) {
                                 btrfs_free_path(path);
                                 return -ENOMEM;
@@ -553,7 +553,7 @@ static noinline void truncate_one_csum(struct btrfs_root *root,
                 btrfs_truncate_item(root, path, new_size, 0);
  
                 key->offset = end_byte;
-               btrfs_set_item_key_safe(root, path, key);
+               btrfs_set_item_key_safe(root->fs_info, path, key);
         } else {
                 BUG();
         }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index faa7d39..b072e17 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -273,11 +273,7 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info)
                 defrag = rb_entry(node, struct inode_defrag, rb_node);
                 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
  
-               if (need_resched()) {
-                       spin_unlock(&fs_info->defrag_inodes_lock);
-                       cond_resched();
-                       spin_lock(&fs_info->defrag_inodes_lock);
-               }
+               cond_resched_lock(&fs_info->defrag_inodes_lock);
  
                 node = rb_first(&fs_info->defrag_inodes);
         }
@@ -868,7 +864,7 @@ next_slot:
  
                         memcpy(&new_key, &key, sizeof(new_key));
                         new_key.offset = end;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
  
                         extent_offset += end - key.offset;
                         btrfs_set_file_extent_offset(leaf, fi, extent_offset);
@@ -1126,7 +1122,7 @@ again:
                                      ino, bytenr, orig_offset,
                                      &other_start, &other_end)) {
                         new_key.offset = end;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
                         fi = btrfs_item_ptr(leaf, path->slots[0],
                                             struct btrfs_file_extent_item);
                         btrfs_set_file_extent_generation(leaf, fi,
@@ -1160,7 +1156,7 @@ again:
                                                          trans->transid);
                         path->slots[0]++;
                         new_key.offset = start;
-                       btrfs_set_item_key_safe(root, path, &new_key);
+                       btrfs_set_item_key_safe(root->fs_info, path, &new_key);
  
                         fi = btrfs_item_ptr(leaf, path->slots[0],
                                             struct btrfs_file_extent_item);
@@ -1485,7 +1481,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                         PAGE_CACHE_SIZE / (sizeof(struct page *)));
         nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
         nrptrs = max(nrptrs, 8);
-       pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
+       pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
         if (!pages)
                 return -ENOMEM;
  
@@ -1514,7 +1510,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 }
  
                 reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
-               ret = btrfs_check_data_free_space(inode, reserve_bytes);
+               ret = btrfs_check_data_free_space(inode, reserve_bytes, write_bytes);
                 if (ret == -ENOSPC &&
                     (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
                                               BTRFS_INODE_PREALLOC))) {
@@ -1635,8 +1631,8 @@ again:
                         btrfs_end_write_no_snapshoting(root);
  
                 if (only_release_metadata && copied > 0) {
-                       u64 lockstart = round_down(pos, root->sectorsize);
-                       u64 lockend = lockstart +
+                       lockstart = round_down(pos, root->sectorsize);
+                       lockend = lockstart +
                                 (dirty_pages << PAGE_CACHE_SHIFT) - 1;
  
                         set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
@@ -1809,7 +1805,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
          * otherwise subsequent syncs to a file that's been synced in this
          * transaction will appear to have already occured.
          */
+       spin_lock(&BTRFS_I(inode)->lock);
         BTRFS_I(inode)->last_sub_trans = root->log_transid;
+       spin_unlock(&BTRFS_I(inode)->lock);
         if (num_written > 0) {
                 err = generic_write_sync(file, pos, num_written);
                 if (err < 0)
@@ -1864,7 +1862,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
  int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
  {
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_trans_handle *trans;
         struct btrfs_log_ctx ctx;
@@ -2162,7 +2160,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
                 u64 num_bytes;
  
                 key.offset = offset;
-               btrfs_set_item_key_safe(root, path, &key);
+               btrfs_set_item_key_safe(root->fs_info, path, &key);
                 fi = btrfs_item_ptr(leaf, path->slots[0],
                                     struct btrfs_file_extent_item);
                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
@@ -2545,7 +2543,6 @@ static long btrfs_fallocate(struct file *file, int mode,
  {
         struct inode *inode = file_inode(file);
         struct extent_state *cached_state = NULL;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 cur_offset;
         u64 last_byte;
         u64 alloc_start;
@@ -2570,14 +2567,9 @@ static long btrfs_fallocate(struct file *file, int mode,
          * Make sure we have enough space before we do the
          * allocation.
          */
-       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start, alloc_end - alloc_start);
         if (ret)
                 return ret;
-       if (root->fs_info->quota_enabled) {
-               ret = btrfs_qgroup_reserve(root, alloc_end - alloc_start);
-               if (ret)
-                       goto out_reserve_fail;
-       }
  
         mutex_lock(&inode->i_mutex);
         ret = inode_newsize_ok(inode, alloc_end);
@@ -2667,23 +2659,35 @@ static long btrfs_fallocate(struct file *file, int mode,
                                                         1 << inode->i_blkbits,
                                                         offset + len,
                                                         &alloc_hint);
-
-                       if (ret < 0) {
-                               free_extent_map(em);
-                               break;
-                       }
                 } else if (actual_end > inode->i_size &&
                            !(mode & FALLOC_FL_KEEP_SIZE)) {
+                       struct btrfs_trans_handle *trans;
+                       struct btrfs_root *root = BTRFS_I(inode)->root;
+
                         /*
                          * We didn't need to allocate any more space, but we
                          * still extended the size of the file so we need to
-                        * update i_size.
+                        * update i_size and the inode item.
                          */
-                       inode->i_ctime = CURRENT_TIME;
-                       i_size_write(inode, actual_end);
-                       btrfs_ordered_update_i_size(inode, actual_end, NULL);
+                       trans = btrfs_start_transaction(root, 1);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                       } else {
+                               inode->i_ctime = CURRENT_TIME;
+                               i_size_write(inode, actual_end);
+                               btrfs_ordered_update_i_size(inode, actual_end,
+                                                           NULL);
+                               ret = btrfs_update_inode(trans, root, inode);
+                               if (ret)
+                                       btrfs_end_transaction(trans, root);
+                               else
+                                       ret = btrfs_end_transaction(trans,
+                                                                   root);
+                       }
                 }
                 free_extent_map(em);
+               if (ret < 0)
+                       break;
  
                 cur_offset = last_byte;
                 if (cur_offset >= alloc_end) {
@@ -2695,9 +2699,6 @@ static long btrfs_fallocate(struct file *file, int mode,
                              &cached_state, GFP_NOFS);
  out:
         mutex_unlock(&inode->i_mutex);
-       if (root->fs_info->quota_enabled)
-               btrfs_qgroup_free(root, alloc_end - alloc_start);
-out_reserve_fail:
         /* Let go of our reservation. */
         btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
         return ret;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index a719785..81fa75a 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -85,7 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
         }
  
         mapping_set_gfp_mask(inode->i_mapping,
-                       mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+                       mapping_gfp_mask(inode->i_mapping) &
+                       ~(GFP_NOFS & ~__GFP_HIGHMEM));
  
         return inode;
  }
@@ -170,13 +171,13 @@ static int __create_free_space_inode(struct btrfs_root *root,
         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
         key.offset = offset;
         key.type = 0;
-
         ret = btrfs_insert_empty_item(trans, root, path, &key,
                                       sizeof(struct btrfs_free_space_header));
         if (ret < 0) {
                 btrfs_release_path(path);
                 return ret;
         }
+
         leaf = path->nodes[0];
         header = btrfs_item_ptr(leaf, path->slots[0],
                                 struct btrfs_free_space_header);
@@ -225,9 +226,37 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
  
  int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                     struct btrfs_trans_handle *trans,
+                                   struct btrfs_block_group_cache *block_group,
                                     struct inode *inode)
  {
         int ret = 0;
+       struct btrfs_path *path = btrfs_alloc_path();
+
+       if (!path) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       if (block_group) {
+               mutex_lock(&trans->transaction->cache_write_mutex);
+               if (!list_empty(&block_group->io_list)) {
+                       list_del_init(&block_group->io_list);
+
+                       btrfs_wait_cache_io(root, trans, block_group,
+                                           &block_group->io_ctl, path,
+                                           block_group->key.objectid);
+                       btrfs_put_block_group(block_group);
+               }
+
+               /*
+                * now that we've truncated the cache away, its no longer
+                * setup or written
+                */
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_CLEAR;
+               spin_unlock(&block_group->lock);
+       }
+       btrfs_free_path(path);
  
         btrfs_i_size_write(inode, 0);
         truncate_pagecache(inode, 0);
@@ -235,15 +264,23 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
         /*
          * We don't need an orphan item because truncating the free space cache
          * will never be split across transactions.
+        * We don't need to check for -EAGAIN because we're a free space
+        * cache inode
          */
         ret = btrfs_truncate_inode_items(trans, root, inode,
                                          0, BTRFS_EXTENT_DATA_KEY);
         if (ret) {
+               mutex_unlock(&trans->transaction->cache_write_mutex);
                 btrfs_abort_transaction(trans, root, ret);
                 return ret;
         }
  
         ret = btrfs_update_inode(trans, root, inode);
+
+       if (block_group)
+               mutex_unlock(&trans->transaction->cache_write_mutex);
+
+fail:
         if (ret)
                 btrfs_abort_transaction(trans, root, ret);
  
@@ -269,18 +306,7 @@ static int readahead_cache(struct inode *inode)
         return 0;
  }
  
-struct io_ctl {
-       void *cur, *orig;
-       struct page *page;
-       struct page **pages;
-       struct btrfs_root *root;
-       unsigned long size;
-       int index;
-       int num_pages;
-       unsigned check_crcs:1;
-};
-
-static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
                        struct btrfs_root *root, int write)
  {
         int num_pages;
@@ -296,45 +322,46 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
             (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
                 return -ENOSPC;
  
-       memset(io_ctl, 0, sizeof(struct io_ctl));
+       memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
  
-       io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+       io_ctl->pages = kcalloc(num_pages, sizeof(struct page *), GFP_NOFS);
         if (!io_ctl->pages)
                 return -ENOMEM;
  
         io_ctl->num_pages = num_pages;
         io_ctl->root = root;
         io_ctl->check_crcs = check_crcs;
+       io_ctl->inode = inode;
  
         return 0;
  }
  
-static void io_ctl_free(struct io_ctl *io_ctl)
+static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
  {
         kfree(io_ctl->pages);
+       io_ctl->pages = NULL;
  }
  
-static void io_ctl_unmap_page(struct io_ctl *io_ctl)
+static void io_ctl_unmap_page(struct btrfs_io_ctl *io_ctl)
  {
         if (io_ctl->cur) {
-               kunmap(io_ctl->page);
                 io_ctl->cur = NULL;
                 io_ctl->orig = NULL;
         }
  }
  
-static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
+static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
  {
         ASSERT(io_ctl->index < io_ctl->num_pages);
         io_ctl->page = io_ctl->pages[io_ctl->index++];
-       io_ctl->cur = kmap(io_ctl->page);
+       io_ctl->cur = page_address(io_ctl->page);
         io_ctl->orig = io_ctl->cur;
         io_ctl->size = PAGE_CACHE_SIZE;
         if (clear)
                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
  }
  
-static void io_ctl_drop_pages(struct io_ctl *io_ctl)
+static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
  {
         int i;
  
@@ -349,7 +376,7 @@ static void io_ctl_drop_pages(struct io_ctl *io_ctl)
         }
  }
  
-static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
+static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
                                 int uptodate)
  {
         struct page *page;
@@ -383,7 +410,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
         return 0;
  }
  
-static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
+static void io_ctl_set_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
  {
         __le64 *val;
  
@@ -406,7 +433,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
         io_ctl->cur += sizeof(u64);
  }
  
-static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
+static int io_ctl_check_generation(struct btrfs_io_ctl *io_ctl, u64 generation)
  {
         __le64 *gen;
  
@@ -435,7 +462,7 @@ static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
         return 0;
  }
  
-static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
+static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
  {
         u32 *tmp;
         u32 crc = ~(u32)0;
@@ -453,13 +480,12 @@ static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
                               PAGE_CACHE_SIZE - offset);
         btrfs_csum_final(crc, (char *)&crc);
         io_ctl_unmap_page(io_ctl);
-       tmp = kmap(io_ctl->pages[0]);
+       tmp = page_address(io_ctl->pages[0]);
         tmp += index;
         *tmp = crc;
-       kunmap(io_ctl->pages[0]);
  }
  
-static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
+static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
  {
         u32 *tmp, val;
         u32 crc = ~(u32)0;
@@ -473,10 +499,9 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
         if (index == 0)
                 offset = sizeof(u32) * io_ctl->num_pages;
  
-       tmp = kmap(io_ctl->pages[0]);
+       tmp = page_address(io_ctl->pages[0]);
         tmp += index;
         val = *tmp;
-       kunmap(io_ctl->pages[0]);
  
         io_ctl_map_page(io_ctl, 0);
         crc = btrfs_csum_data(io_ctl->orig + offset, crc,
@@ -492,7 +517,7 @@ static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
         return 0;
  }
  
-static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
+static int io_ctl_add_entry(struct btrfs_io_ctl *io_ctl, u64 offset, u64 bytes,
                             void *bitmap)
  {
         struct btrfs_free_space_entry *entry;
@@ -522,7 +547,7 @@ static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
         return 0;
  }
  
-static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
+static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
  {
         if (!io_ctl->cur)
                 return -ENOSPC;
@@ -545,7 +570,7 @@ static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
         return 0;
  }
  
-static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
+static void io_ctl_zero_remaining_pages(struct btrfs_io_ctl *io_ctl)
  {
         /*
          * If we're not on the boundary we know we've modified the page and we
@@ -562,7 +587,7 @@ static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
         }
  }
  
-static int io_ctl_read_entry(struct io_ctl *io_ctl,
+static int io_ctl_read_entry(struct btrfs_io_ctl *io_ctl,
                             struct btrfs_free_space *entry, u8 *type)
  {
         struct btrfs_free_space_entry *e;
@@ -589,7 +614,7 @@ static int io_ctl_read_entry(struct io_ctl *io_ctl,
         return 0;
  }
  
-static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
+static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
                               struct btrfs_free_space *entry)
  {
         int ret;
@@ -648,7 +673,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
  {
         struct btrfs_free_space_header *header;
         struct extent_buffer *leaf;
-       struct io_ctl io_ctl;
+       struct btrfs_io_ctl io_ctl;
         struct btrfs_key key;
         struct btrfs_free_space *e, *n;
         LIST_HEAD(bitmaps);
@@ -877,7 +902,7 @@ out:
  }
  
  static noinline_for_stack
-int write_cache_extent_entries(struct io_ctl *io_ctl,
+int write_cache_extent_entries(struct btrfs_io_ctl *io_ctl,
                               struct btrfs_free_space_ctl *ctl,
                               struct btrfs_block_group_cache *block_group,
                               int *entries, int *bitmaps,
@@ -885,6 +910,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
  {
         int ret;
         struct btrfs_free_cluster *cluster = NULL;
+       struct btrfs_free_cluster *cluster_locked = NULL;
         struct rb_node *node = rb_first(&ctl->free_space_offset);
         struct btrfs_trim_range *trim_entry;
  
@@ -896,6 +922,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
         }
  
         if (!node && cluster) {
+               cluster_locked = cluster;
+               spin_lock(&cluster_locked->lock);
                 node = rb_first(&cluster->root);
                 cluster = NULL;
         }
@@ -919,9 +947,15 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
                 node = rb_next(node);
                 if (!node && cluster) {
                         node = rb_first(&cluster->root);
+                       cluster_locked = cluster;
+                       spin_lock(&cluster_locked->lock);
                         cluster = NULL;
                 }
         }
+       if (cluster_locked) {
+               spin_unlock(&cluster_locked->lock);
+               cluster_locked = NULL;
+       }
  
         /*
          * Make sure we don't miss any range that was removed from our rbtree
@@ -939,6 +973,8 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
  
         return 0;
  fail:
+       if (cluster_locked)
+               spin_unlock(&cluster_locked->lock);
         return -ENOSPC;
  }
  
@@ -1000,7 +1036,7 @@ fail:
  static noinline_for_stack int
  write_pinned_extent_entries(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group,
-                           struct io_ctl *io_ctl,
+                           struct btrfs_io_ctl *io_ctl,
                             int *entries)
  {
         u64 start, extent_start, extent_end, len;
@@ -1050,7 +1086,7 @@ write_pinned_extent_entries(struct btrfs_root *root,
  }
  
  static noinline_for_stack int
-write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list)
+write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list)
  {
         struct list_head *pos, *n;
         int ret;
@@ -1083,10 +1119,7 @@ static int flush_dirty_cache(struct inode *inode)
  }
  
  static void noinline_for_stack
-cleanup_write_cache_enospc(struct inode *inode,
-                          struct io_ctl *io_ctl,
-                          struct extent_state **cached_state,
-                          struct list_head *bitmap_list)
+cleanup_bitmap_list(struct list_head *bitmap_list)
  {
         struct list_head *pos, *n;
  
@@ -1095,12 +1128,85 @@ cleanup_write_cache_enospc(struct inode *inode,
                         list_entry(pos, struct btrfs_free_space, list);
                 list_del_init(&entry->list);
         }
+}
+
+static void noinline_for_stack
+cleanup_write_cache_enospc(struct inode *inode,
+                          struct btrfs_io_ctl *io_ctl,
+                          struct extent_state **cached_state,
+                          struct list_head *bitmap_list)
+{
         io_ctl_drop_pages(io_ctl);
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                              i_size_read(inode) - 1, cached_state,
                              GFP_NOFS);
  }
  
+int btrfs_wait_cache_io(struct btrfs_root *root,
+                       struct btrfs_trans_handle *trans,
+                       struct btrfs_block_group_cache *block_group,
+                       struct btrfs_io_ctl *io_ctl,
+                       struct btrfs_path *path, u64 offset)
+{
+       int ret;
+       struct inode *inode = io_ctl->inode;
+
+       if (!inode)
+               return 0;
+
+       if (block_group)
+               root = root->fs_info->tree_root;
+
+       /* Flush the dirty pages in the cache file. */
+       ret = flush_dirty_cache(inode);
+       if (ret)
+               goto out;
+
+       /* Update the cache item to tell everyone this cache file is valid. */
+       ret = update_cache_item(trans, root, inode, path, offset,
+                               io_ctl->entries, io_ctl->bitmaps);
+out:
+       io_ctl_free(io_ctl);
+       if (ret) {
+               invalidate_inode_pages2(inode->i_mapping);
+               BTRFS_I(inode)->generation = 0;
+               if (block_group) {
+#ifdef DEBUG
+                       btrfs_err(root->fs_info,
+                               "failed to write free space cache for block group %llu",
+                               block_group->key.objectid);
+#endif
+               }
+       }
+       btrfs_update_inode(trans, root, inode);
+
+       if (block_group) {
+               /* the dirty list is protected by the dirty_bgs_lock */
+               spin_lock(&trans->transaction->dirty_bgs_lock);
+
+               /* the disk_cache_state is protected by the block group lock */
+               spin_lock(&block_group->lock);
+
+               /*
+                * only mark this as written if we didn't get put back on
+                * the dirty list while waiting for IO.   Otherwise our
+                * cache state won't be right, and we won't get written again
+                */
+               if (!ret && list_empty(&block_group->dirty_list))
+                       block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+               else if (ret)
+                       block_group->disk_cache_state = BTRFS_DC_ERROR;
+
+               spin_unlock(&block_group->lock);
+               spin_unlock(&trans->transaction->dirty_bgs_lock);
+               io_ctl->inode = NULL;
+               iput(inode);
+       }
+
+       return ret;
+
+}
+
  /**
   * __btrfs_write_out_cache - write out cached info to an inode
   * @root - the root the inode belongs to
@@ -1117,20 +1223,22 @@ cleanup_write_cache_enospc(struct inode *inode,
  static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                                    struct btrfs_free_space_ctl *ctl,
                                    struct btrfs_block_group_cache *block_group,
+                                  struct btrfs_io_ctl *io_ctl,
                                    struct btrfs_trans_handle *trans,
                                    struct btrfs_path *path, u64 offset)
  {
         struct extent_state *cached_state = NULL;
-       struct io_ctl io_ctl;
         LIST_HEAD(bitmap_list);
         int entries = 0;
         int bitmaps = 0;
         int ret;
+       int must_iput = 0;
  
         if (!i_size_read(inode))
                 return -1;
  
-       ret = io_ctl_init(&io_ctl, inode, root, 1);
+       WARN_ON(io_ctl->pages);
+       ret = io_ctl_init(io_ctl, inode, root, 1);
         if (ret)
                 return -1;
  
@@ -1143,55 +1251,57 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                         up_write(&block_group->data_rwsem);
                         BTRFS_I(inode)->generation = 0;
                         ret = 0;
+                       must_iput = 1;
                         goto out;
                 }
                 spin_unlock(&block_group->lock);
         }
  
         /* Lock all pages first so we can lock the extent safely. */
-       io_ctl_prepare_pages(&io_ctl, inode, 0);
+       io_ctl_prepare_pages(io_ctl, inode, 0);
  
         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
                          0, &cached_state);
  
-       io_ctl_set_generation(&io_ctl, trans->transid);
+       io_ctl_set_generation(io_ctl, trans->transid);
  
         mutex_lock(&ctl->cache_writeout_mutex);
         /* Write out the extent entries in the free space cache */
-       ret = write_cache_extent_entries(&io_ctl, ctl,
+       spin_lock(&ctl->tree_lock);
+       ret = write_cache_extent_entries(io_ctl, ctl,
                                          block_group, &entries, &bitmaps,
                                          &bitmap_list);
-       if (ret) {
-               mutex_unlock(&ctl->cache_writeout_mutex);
-               goto out_nospc;
-       }
+       if (ret)
+               goto out_nospc_locked;
  
         /*
          * Some spaces that are freed in the current transaction are pinned,
          * they will be added into free space cache after the transaction is
          * committed, we shouldn't lose them.
+        *
+        * If this changes while we are working we'll get added back to
+        * the dirty list and redo it.  No locking needed
          */
-       ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
-       if (ret) {
-               mutex_unlock(&ctl->cache_writeout_mutex);
-               goto out_nospc;
-       }
+       ret = write_pinned_extent_entries(root, block_group, io_ctl, &entries);
+       if (ret)
+               goto out_nospc_locked;
  
         /*
          * At last, we write out all the bitmaps and keep cache_writeout_mutex
          * locked while doing it because a concurrent trim can be manipulating
          * or freeing the bitmap.
          */
-       ret = write_bitmap_entries(&io_ctl, &bitmap_list);
+       ret = write_bitmap_entries(io_ctl, &bitmap_list);
+       spin_unlock(&ctl->tree_lock);
         mutex_unlock(&ctl->cache_writeout_mutex);
         if (ret)
                 goto out_nospc;
  
         /* Zero out the rest of the pages just to make sure */
-       io_ctl_zero_remaining_pages(&io_ctl);
+       io_ctl_zero_remaining_pages(io_ctl);
  
         /* Everything is written out, now we dirty the pages in the file. */
-       ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
+       ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages,
                                 0, i_size_read(inode), &cached_state);
         if (ret)
                 goto out_nospc;
@@ -1202,30 +1312,44 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
          * Release the pages and unlock the extent, we will flush
          * them out later
          */
-       io_ctl_drop_pages(&io_ctl);
+       io_ctl_drop_pages(io_ctl);
  
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
  
-       /* Flush the dirty pages in the cache file. */
-       ret = flush_dirty_cache(inode);
+       /*
+        * at this point the pages are under IO and we're happy,
+        * The caller is responsible for waiting on them and updating the
+        * the cache and the inode
+        */
+       io_ctl->entries = entries;
+       io_ctl->bitmaps = bitmaps;
+
+       ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
         if (ret)
                 goto out;
  
-       /* Update the cache item to tell everyone this cache file is valid. */
-       ret = update_cache_item(trans, root, inode, path, offset,
-                               entries, bitmaps);
+       return 0;
+
  out:
-       io_ctl_free(&io_ctl);
+       io_ctl->inode = NULL;
+       io_ctl_free(io_ctl);
         if (ret) {
                 invalidate_inode_pages2(inode->i_mapping);
                 BTRFS_I(inode)->generation = 0;
         }
         btrfs_update_inode(trans, root, inode);
+       if (must_iput)
+               iput(inode);
         return ret;
  
+out_nospc_locked:
+       cleanup_bitmap_list(&bitmap_list);
+       spin_unlock(&ctl->tree_lock);
+       mutex_unlock(&ctl->cache_writeout_mutex);
+
  out_nospc:
-       cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list);
+       cleanup_write_cache_enospc(inode, io_ctl, &cached_state, &bitmap_list);
  
         if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
                 up_write(&block_group->data_rwsem);
@@ -1241,7 +1365,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
         struct inode *inode;
         int ret = 0;
-       enum btrfs_disk_cache_state dcs = BTRFS_DC_WRITTEN;
  
         root = root->fs_info->tree_root;
  
@@ -1250,34 +1373,34 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                 spin_unlock(&block_group->lock);
                 return 0;
         }
-
-       if (block_group->delalloc_bytes) {
-               block_group->disk_cache_state = BTRFS_DC_WRITTEN;
-               spin_unlock(&block_group->lock);
-               return 0;
-       }
         spin_unlock(&block_group->lock);
  
         inode = lookup_free_space_inode(root, block_group, path);
         if (IS_ERR(inode))
                 return 0;
  
-       ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
+       ret = __btrfs_write_out_cache(root, inode, ctl, block_group,
+                                     &block_group->io_ctl, trans,
                                       path, block_group->key.objectid);
         if (ret) {
-               dcs = BTRFS_DC_ERROR;
-               ret = 0;
  #ifdef DEBUG
                 btrfs_err(root->fs_info,
                         "failed to write free space cache for block group %llu",
                         block_group->key.objectid);
  #endif
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_ERROR;
+               spin_unlock(&block_group->lock);
+
+               block_group->io_ctl.inode = NULL;
+               iput(inode);
         }
  
-       spin_lock(&block_group->lock);
-       block_group->disk_cache_state = dcs;
-       spin_unlock(&block_group->lock);
-       iput(inode);
+       /*
+        * if ret == 0 the caller is expected to call btrfs_wait_cache_io
+        * to wait for IO and put the inode
+        */
+
         return ret;
  }
  
@@ -1298,11 +1421,11 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
                                    u64 offset)
  {
         u64 bitmap_start;
-       u64 bytes_per_bitmap;
+       u32 bytes_per_bitmap;
  
         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
         bitmap_start = offset - ctl->start;
-       bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
+       bitmap_start = div_u64(bitmap_start, bytes_per_bitmap);
         bitmap_start *= bytes_per_bitmap;
         bitmap_start += ctl->start;
  
@@ -1521,10 +1644,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
         u64 bitmap_bytes;
         u64 extent_bytes;
         u64 size = block_group->key.offset;
-       u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
-       int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
+       u32 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
+       u32 max_bitmaps = div_u64(size + bytes_per_bg - 1, bytes_per_bg);
  
-       max_bitmaps = max(max_bitmaps, 1);
+       max_bitmaps = max_t(u32, max_bitmaps, 1);
  
         ASSERT(ctl->total_bitmaps <= max_bitmaps);
  
@@ -1537,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
         else
                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
-                       div64_u64(size, 1024 * 1024 * 1024);
+                       div_u64(size, 1024 * 1024 * 1024);
  
         /*
          * we want to account for 1 more bitmap than what we have so we can make
@@ -1552,14 +1675,14 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
         }
  
         /*
-        * we want the extent entry threshold to always be at most 1/2 the maxw
+        * we want the extent entry threshold to always be at most 1/2 the max
          * bytes we can have, or whatever is less than that.
          */
         extent_bytes = max_bytes - bitmap_bytes;
-       extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
+       extent_bytes = min_t(u64, extent_bytes, max_bytes >> 1);
  
         ctl->extents_thresh =
-               div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
+               div_u64(extent_bytes, sizeof(struct btrfs_free_space));
  }
  
  static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
@@ -1673,7 +1796,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
                  */
                 if (*bytes >= align) {
                         tmp = entry->offset - ctl->start + align - 1;
-                       do_div(tmp, align);
+                       tmp = div64_u64(tmp, align);
                         tmp = tmp * align + ctl->start;
                         align_off = tmp - entry->offset;
                 } else {
@@ -2402,11 +2525,8 @@ static void __btrfs_remove_free_space_cache_locked(
                 } else {
                         free_bitmap(ctl, info);
                 }
-               if (need_resched()) {
-                       spin_unlock(&ctl->tree_lock);
-                       cond_resched();
-                       spin_lock(&ctl->tree_lock);
-               }
+
+               cond_resched_lock(&ctl->tree_lock);
         }
  }
  
@@ -2431,11 +2551,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
  
                 WARN_ON(cluster->block_group != block_group);
                 __btrfs_return_cluster_to_free_space(block_group, cluster);
-               if (need_resched()) {
-                       spin_unlock(&ctl->tree_lock);
-                       cond_resched();
-                       spin_lock(&ctl->tree_lock);
-               }
+
+               cond_resched_lock(&ctl->tree_lock);
         }
         __btrfs_remove_free_space_cache_locked(ctl);
         spin_unlock(&ctl->tree_lock);
@@ -3346,11 +3463,17 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
  {
         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
         int ret;
+       struct btrfs_io_ctl io_ctl;
  
         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                 return 0;
  
-       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
+       memset(&io_ctl, 0, sizeof(io_ctl));
+       ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl,
+                                     trans, path, 0);
+       if (!ret)
+               ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0);
+
         if (ret) {
                 btrfs_delalloc_release_metadata(inode, inode->i_size);
  #ifdef DEBUG
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h

index 88b2238..a16a029 100644 (file)
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -48,6 +48,8 @@ struct btrfs_free_space_op {
                            struct btrfs_free_space *info);
  };
  
+struct btrfs_io_ctl;
+
  struct inode *lookup_free_space_inode(struct btrfs_root *root,
                                       struct btrfs_block_group_cache
                                       *block_group, struct btrfs_path *path);
@@ -60,14 +62,19 @@ int btrfs_check_trunc_cache_free_space(struct btrfs_root *root,
                                        struct btrfs_block_rsv *rsv);
  int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                     struct btrfs_trans_handle *trans,
+                                   struct btrfs_block_group_cache *block_group,
                                     struct inode *inode);
  int load_free_space_cache(struct btrfs_fs_info *fs_info,
                           struct btrfs_block_group_cache *block_group);
+int btrfs_wait_cache_io(struct btrfs_root *root,
+                       struct btrfs_trans_handle *trans,
+                       struct btrfs_block_group_cache *block_group,
+                       struct btrfs_io_ctl *io_ctl,
+                       struct btrfs_path *path, u64 offset);
  int btrfs_write_out_cache(struct btrfs_root *root,
                           struct btrfs_trans_handle *trans,
                           struct btrfs_block_group_cache *block_group,
                           struct btrfs_path *path);
-
  struct inode *lookup_free_ino_inode(struct btrfs_root *root,
                                     struct btrfs_path *path);
  int create_free_ino_inode(struct btrfs_root *root,
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c

index 74faea3..f6a596d 100644 (file)
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -456,7 +456,7 @@ again:
         }
  
         if (i_size_read(inode) > 0) {
-               ret = btrfs_truncate_free_space_cache(root, trans, inode);
+               ret = btrfs_truncate_free_space_cache(root, trans, NULL, inode);
                 if (ret) {
                         if (ret != -ENOSPC)
                                 btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 43192e1..ada4d24 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -59,6 +59,7 @@
  #include "backref.h"
  #include "hash.h"
  #include "props.h"
+#include "qgroup.h"
  
  struct btrfs_iget_args {
         struct btrfs_key *location;
@@ -470,7 +471,7 @@ again:
          */
         if (inode_need_compress(inode)) {
                 WARN_ON(pages);
-               pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+               pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
                 if (!pages) {
                         /* just bail out to the uncompressed code */
                         goto cont;
@@ -752,7 +753,6 @@ retry:
                         }
                         goto out_free;
                 }
-
                 /*
                  * here we're doing allocation and writeback of the
                  * compressed pages
@@ -3110,6 +3110,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
         if (empty)
                 return;
  
+       down_read(&fs_info->delayed_iput_sem);
+
         spin_lock(&fs_info->delayed_iput_lock);
         list_splice_init(&fs_info->delayed_iputs, &list);
         spin_unlock(&fs_info->delayed_iput_lock);
@@ -3120,6 +3122,8 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
                 iput(delayed->inode);
                 kfree(delayed);
         }
+
+       up_read(&root->fs_info->delayed_iput_sem);
  }
  
  /*
@@ -4016,16 +4020,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_trans_handle *trans;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int ret;
  
         trans = __unlink_start_trans(dir);
         if (IS_ERR(trans))
                 return PTR_ERR(trans);
  
-       btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
+       btrfs_record_unlink_dir(trans, dir, d_inode(dentry), 0);
  
-       ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+       ret = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                  dentry->d_name.name, dentry->d_name.len);
         if (ret)
                 goto out;
@@ -4124,7 +4128,7 @@ out:
  
  static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int err = 0;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_trans_handle *trans;
@@ -4151,7 +4155,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
                 goto out;
  
         /* now the directory is empty */
-       err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
+       err = btrfs_unlink_inode(trans, root, dir, d_inode(dentry),
                                  dentry->d_name.name, dentry->d_name.len);
         if (!err)
                 btrfs_i_size_write(inode, 0);
@@ -4162,6 +4166,21 @@ out:
         return err;
  }
  
+static int truncate_space_check(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               u64 bytes_deleted)
+{
+       int ret;
+
+       bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
+       ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
+                                 bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
+       if (!ret)
+               trans->bytes_reserved += bytes_deleted;
+       return ret;
+
+}
+
  /*
   * this can truncate away extent items, csum items and directory items.
   * It starts at a high offset and removes keys until it can't find
@@ -4197,9 +4216,21 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
         int ret;
         int err = 0;
         u64 ino = btrfs_ino(inode);
+       u64 bytes_deleted = 0;
+       bool be_nice = 0;
+       bool should_throttle = 0;
+       bool should_end = 0;
  
         BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
  
+       /*
+        * for non-free space inodes and ref cows, we want to back off from
+        * time to time
+        */
+       if (!btrfs_is_free_space_inode(inode) &&
+           test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+               be_nice = 1;
+
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
@@ -4229,6 +4260,19 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
         key.type = (u8)-1;
  
  search_again:
+       /*
+        * with a 16K leaf size and 128MB extents, you can actually queue
+        * up a huge file in a single leaf.  Most of the time that
+        * bytes_deleted is > 0, it will be huge by the time we get here
+        */
+       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+               if (btrfs_should_end_transaction(trans, root)) {
+                       err = -EAGAIN;
+                       goto error;
+               }
+       }
+
+
         path->leave_spinning = 1;
         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
         if (ret < 0) {
@@ -4371,22 +4415,39 @@ delete:
                 } else {
                         break;
                 }
+               should_throttle = 0;
+
                 if (found_extent &&
                     (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
                      root == root->fs_info->tree_root)) {
                         btrfs_set_path_blocking(path);
+                       bytes_deleted += extent_num_bytes;
                         ret = btrfs_free_extent(trans, root, extent_start,
                                                 extent_num_bytes, 0,
                                                 btrfs_header_owner(leaf),
                                                 ino, extent_offset, 0);
                         BUG_ON(ret);
+                       if (btrfs_should_throttle_delayed_refs(trans, root))
+                               btrfs_async_run_delayed_refs(root,
+                                       trans->delayed_ref_updates * 2, 0);
+                       if (be_nice) {
+                               if (truncate_space_check(trans, root,
+                                                        extent_num_bytes)) {
+                                       should_end = 1;
+                               }
+                               if (btrfs_should_throttle_delayed_refs(trans,
+                                                                      root)) {
+                                       should_throttle = 1;
+                               }
+                       }
                 }
  
                 if (found_type == BTRFS_INODE_ITEM_KEY)
                         break;
  
                 if (path->slots[0] == 0 ||
-                   path->slots[0] != pending_del_slot) {
+                   path->slots[0] != pending_del_slot ||
+                   should_throttle || should_end) {
                         if (pending_del_nr) {
                                 ret = btrfs_del_items(trans, root, path,
                                                 pending_del_slot,
@@ -4399,6 +4460,23 @@ delete:
                                 pending_del_nr = 0;
                         }
                         btrfs_release_path(path);
+                       if (should_throttle) {
+                               unsigned long updates = trans->delayed_ref_updates;
+                               if (updates) {
+                                       trans->delayed_ref_updates = 0;
+                                       ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+                                       if (ret && !err)
+                                               err = ret;
+                               }
+                       }
+                       /*
+                        * if we failed to refill our space rsv, bail out
+                        * and let the transaction restart
+                        */
+                       if (should_end) {
+                               err = -EAGAIN;
+                               goto error;
+                       }
                         goto search_again;
                 } else {
                         path->slots[0]--;
@@ -4415,7 +4493,18 @@ error:
         if (last_size != (u64)-1 &&
             root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
                 btrfs_ordered_update_i_size(inode, last_size, NULL);
+
         btrfs_free_path(path);
+
+       if (be_nice && bytes_deleted > 32 * 1024 * 1024) {
+               unsigned long updates = trans->delayed_ref_updates;
+               if (updates) {
+                       trans->delayed_ref_updates = 0;
+                       ret = btrfs_run_delayed_refs(trans, root, updates * 2);
+                       if (ret && !err)
+                               err = ret;
+               }
+       }
         return err;
  }
  
@@ -4826,7 +4915,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
  
  static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         int err;
  
@@ -4924,6 +5013,7 @@ void btrfs_evict_inode(struct inode *inode)
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_block_rsv *rsv, *global_rsv;
+       int steal_from_global = 0;
         u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
         int ret;
  
@@ -4991,9 +5081,20 @@ void btrfs_evict_inode(struct inode *inode)
                  * hard as possible to get this to work.
                  */
                 if (ret)
-                       ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
+                       steal_from_global++;
+               else
+                       steal_from_global = 0;
+               ret = 0;
  
-               if (ret) {
+               /*
+                * steal_from_global == 0: we reserved stuff, hooray!
+                * steal_from_global == 1: we didn't reserve stuff, boo!
+                * steal_from_global == 2: we've committed, still not a lot of
+                * room but maybe we'll have room in the global reserve this
+                * time.
+                * steal_from_global == 3: abandon all hope!
+                */
+               if (steal_from_global > 2) {
                         btrfs_warn(root->fs_info,
                                 "Could not get space for a delete, will truncate on mount %d",
                                 ret);
@@ -5009,10 +5110,40 @@ void btrfs_evict_inode(struct inode *inode)
                         goto no_delete;
                 }
  
+               /*
+                * We can't just steal from the global reserve, we need tomake
+                * sure there is room to do it, if not we need to commit and try
+                * again.
+                */
+               if (steal_from_global) {
+                       if (!btrfs_check_space_for_delayed_refs(trans, root))
+                               ret = btrfs_block_rsv_migrate(global_rsv, rsv,
+                                                             min_size);
+                       else
+                               ret = -ENOSPC;
+               }
+
+               /*
+                * Couldn't steal from the global reserve, we have too much
+                * pending stuff built up, commit the transaction and try it
+                * again.
+                */
+               if (ret) {
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret) {
+                               btrfs_orphan_del(NULL, inode);
+                               btrfs_free_block_rsv(root, rsv);
+                               goto no_delete;
+                       }
+                       continue;
+               } else {
+                       steal_from_global = 0;
+               }
+
                 trans->block_rsv = rsv;
  
                 ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
-               if (ret != -ENOSPC)
+               if (ret != -ENOSPC && ret != -EAGAIN)
                         break;
  
                 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -5416,10 +5547,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
  static int btrfs_dentry_delete(const struct dentry *dentry)
  {
         struct btrfs_root *root;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (!inode && !IS_ROOT(dentry))
-               inode = dentry->d_parent->d_inode;
+               inode = d_inode(dentry->d_parent);
  
         if (inode) {
                 root = BTRFS_I(inode)->root;
@@ -6226,7 +6357,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
  {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(dir)->root;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         u64 index;
         int err;
         int drop_inode = 0;
@@ -8129,7 +8260,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         if (check_direct_IO(BTRFS_I(inode)->root, iocb, iter, offset))
                 return 0;
  
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
         smp_mb__after_atomic();
  
         /*
@@ -8169,7 +8300,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 current->journal_info = &outstanding_extents;
         } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                      &BTRFS_I(inode)->runtime_flags)) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                 flags = DIO_LOCKING | DIO_SKIP_HOLES;
                 wakeup = false;
         }
@@ -8188,7 +8319,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         }
  out:
         if (wakeup)
-               inode_dio_done(inode);
+               inode_dio_end(inode);
         if (relock)
                 mutex_lock(&inode->i_mutex);
  
@@ -8581,7 +8712,7 @@ static int btrfs_truncate(struct inode *inode)
                 ret = btrfs_truncate_inode_items(trans, root, inode,
                                                  inode->i_size,
                                                  BTRFS_EXTENT_DATA_KEY);
-               if (ret != -ENOSPC) {
+               if (ret != -ENOSPC && ret != -EAGAIN) {
                         err = ret;
                         break;
                 }
@@ -8875,7 +9006,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
                          struct dentry *dentry, struct kstat *stat)
  {
         u64 delalloc_bytes;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         u32 blocksize = inode->i_sb->s_blocksize;
  
         generic_fillattr(inode, stat);
@@ -8896,8 +9027,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(old_dir)->root;
         struct btrfs_root *dest = BTRFS_I(new_dir)->root;
-       struct inode *new_inode = new_dentry->d_inode;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *new_inode = d_inode(new_dentry);
+       struct inode *old_inode = d_inode(old_dentry);
         struct timespec ctime = CURRENT_TIME;
         u64 index = 0;
         u64 root_objectid;
@@ -9009,7 +9140,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                         old_dentry->d_name.len);
         } else {
                 ret = __btrfs_unlink_inode(trans, root, old_dir,
-                                       old_dentry->d_inode,
+                                       d_inode(old_dentry),
                                         old_dentry->d_name.name,
                                         old_dentry->d_name.len);
                 if (!ret)
@@ -9033,12 +9164,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         BUG_ON(new_inode->i_nlink == 0);
                 } else {
                         ret = btrfs_unlink_inode(trans, dest, new_dir,
-                                                new_dentry->d_inode,
+                                                d_inode(new_dentry),
                                                  new_dentry->d_name.name,
                                                  new_dentry->d_name.len);
                 }
                 if (!ret && new_inode->i_nlink == 0)
-                       ret = btrfs_orphan_add(trans, new_dentry->d_inode);
+                       ret = btrfs_orphan_add(trans, d_inode(new_dentry));
                 if (ret) {
                         btrfs_abort_transaction(trans, root, ret);
                         goto out_fail;
@@ -9451,6 +9582,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
                                 btrfs_end_transaction(trans, root);
                         break;
                 }
+
                 btrfs_drop_extent_cache(inode, cur_offset,
                                         cur_offset + ins.offset -1, 0);
  
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index 74609b9..b05653f 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -456,6 +456,13 @@ static noinline int create_subvol(struct inode *dir,
         if (ret)
                 return ret;
  
+       /*
+        * Don't create subvolume whose level is not zero. Or qgroup will be
+        * screwed up since it assume subvolme qgroup's level to be 0.
+        */
+       if (btrfs_qgroup_level(objectid))
+               return -ENOSPC;
+
         btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
         /*
          * The same as the snapshot creation, please see the comment
@@ -717,7 +724,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
         if (ret)
                 goto fail;
  
-       inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+       inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry);
         if (IS_ERR(inode)) {
                 ret = PTR_ERR(inode);
                 goto fail;
@@ -761,10 +768,10 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
  {
         int error;
  
-       if (!victim->d_inode)
+       if (d_really_is_negative(victim))
                 return -ENOENT;
  
-       BUG_ON(victim->d_parent->d_inode != dir);
+       BUG_ON(d_inode(victim->d_parent) != dir);
         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
  
         error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
@@ -772,8 +779,8 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
                 return error;
         if (IS_APPEND(dir))
                 return -EPERM;
-       if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
-           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+       if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
+           IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
                 return -EPERM;
         if (isdir) {
                 if (!d_is_dir(victim))
@@ -792,7 +799,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
  /* copy of may_create in fs/namei.c() */
  static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  {
-       if (child->d_inode)
+       if (d_really_is_positive(child))
                 return -EEXIST;
         if (IS_DEADDIR(dir))
                 return -ENOENT;
@@ -810,7 +817,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
                                    u64 *async_transid, bool readonly,
                                    struct btrfs_qgroup_inherit *inherit)
  {
-       struct inode *dir  = parent->dentry->d_inode;
+       struct inode *dir  = d_inode(parent->dentry);
         struct dentry *dentry;
         int error;
  
@@ -824,7 +831,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
                 goto out_unlock;
  
         error = -EEXIST;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 goto out_dput;
  
         error = btrfs_may_create(dir, dentry);
@@ -1564,7 +1571,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                 goto out_free;
         }
  
-       do_div(new_size, root->sectorsize);
+       new_size = div_u64(new_size, root->sectorsize);
         new_size *= root->sectorsize;
  
         printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",
@@ -2294,7 +2301,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
  {
         struct dentry *parent = file->f_path.dentry;
         struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct inode *inode;
         struct btrfs_root *root = BTRFS_I(dir)->root;
         struct btrfs_root *dest = NULL;
@@ -2333,12 +2340,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                 goto out_unlock_dir;
         }
  
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                 err = -ENOENT;
                 goto out_dput;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         dest = BTRFS_I(inode)->root;
         if (!capable(CAP_SYS_ADMIN)) {
                 /*
@@ -2897,6 +2904,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
         if (src == dst)
                 return -EINVAL;
  
+       if (len == 0)
+               return 0;
+
         btrfs_double_lock(src, loff, dst, dst_loff, len);
  
         ret = extent_same_check_offsets(src, loff, len);
@@ -3039,7 +3049,7 @@ out:
  static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      u64 disko)
  {
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
         struct ulist *roots;
         struct ulist_iterator uiter;
         struct ulist_node *root_node = NULL;
@@ -3202,6 +3212,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
         key.offset = off;
  
         while (1) {
+               u64 next_key_min_offset = key.offset + 1;
+
                 /*
                  * note the key will change type as we walk through the
                  * tree.
@@ -3282,7 +3294,7 @@ process_slot:
                         } else if (key.offset >= off + len) {
                                 break;
                         }
-
+                       next_key_min_offset = key.offset + datal;
                         size = btrfs_item_size_nr(leaf, slot);
                         read_extent_buffer(leaf, buf,
                                            btrfs_item_ptr_offset(leaf, slot),
@@ -3497,7 +3509,7 @@ process_slot:
                                 break;
                 }
                 btrfs_release_path(path);
-               key.offset++;
+               key.offset = next_key_min_offset;
         }
         ret = 0;
  
@@ -3626,6 +3638,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         if (off + len == src->i_size)
                 len = ALIGN(src->i_size, bs) - off;
  
+       if (len == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
         /* verify the end result is block aligned */
         if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
             !IS_ALIGNED(destoff, bs))
@@ -4624,6 +4641,11 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
                                                 sa->src, sa->dst);
         }
  
+       /* update qgroup status and info */
+       err = btrfs_run_qgroups(trans, root->fs_info);
+       if (err < 0)
+               btrfs_error(root->fs_info, ret,
+                           "failed to update qgroup status and info\n");
         err = btrfs_end_transaction(trans, root);
         if (err && !ret)
                 ret = err;
@@ -4669,8 +4691,7 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
  
         /* FIXME: check if the IDs really exist */
         if (sa->create) {
-               ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid,
-                                         NULL);
+               ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid);
         } else {
                 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid);
         }
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c

index 617553c..a2f0513 100644 (file)
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -434,7 +434,7 @@ out:
         return ret;
  }
  
-struct btrfs_compress_op btrfs_lzo_compress = {
+const struct btrfs_compress_op btrfs_lzo_compress = {
         .alloc_workspace        = lzo_alloc_workspace,
         .free_workspace         = lzo_free_workspace,
         .compress_pages         = lzo_compress_pages,
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h

index b7816ce..1b10a3c 100644 (file)
--- a/fs/btrfs/math.h
+++ b/fs/btrfs/math.h
@@ -28,8 +28,7 @@ static inline u64 div_factor(u64 num, int factor)
         if (factor == 10)
                 return num;
         num *= factor;
-       do_div(num, 10);
-       return num;
+       return div_u64(num, 10);
  }
  
  static inline u64 div_factor_fine(u64 num, int factor)
@@ -37,8 +36,7 @@ static inline u64 div_factor_fine(u64 num, int factor)
         if (factor == 100)
                 return num;
         num *= factor;
-       do_div(num, 100);
-       return num;
+       return div_u64(num, 100);
  }
  
  #endif
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c

index 129b1dd..dca137b 100644 (file)
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -425,3 +425,5 @@ static const char *prop_compression_extract(struct inode *inode)
  
         return NULL;
  }
+
+
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 058c79e..3d65465 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -644,9 +644,8 @@ out:
  }
  
  static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
-                                   struct btrfs_root *root, u64 qgroupid,
-                                   u64 flags, u64 max_rfer, u64 max_excl,
-                                   u64 rsv_rfer, u64 rsv_excl)
+                                   struct btrfs_root *root,
+                                   struct btrfs_qgroup *qgroup)
  {
         struct btrfs_path *path;
         struct btrfs_key key;
@@ -657,7 +656,7 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
  
         key.objectid = 0;
         key.type = BTRFS_QGROUP_LIMIT_KEY;
-       key.offset = qgroupid;
+       key.offset = qgroup->qgroupid;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -673,11 +672,11 @@ static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
         l = path->nodes[0];
         slot = path->slots[0];
         qgroup_limit = btrfs_item_ptr(l, slot, struct btrfs_qgroup_limit_item);
-       btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
-       btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
-       btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
-       btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
-       btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
+       btrfs_set_qgroup_limit_flags(l, qgroup_limit, qgroup->lim_flags);
+       btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, qgroup->max_rfer);
+       btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, qgroup->max_excl);
+       btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, qgroup->rsv_rfer);
+       btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, qgroup->rsv_excl);
  
         btrfs_mark_buffer_dirty(l);
  
@@ -967,6 +966,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
         fs_info->pending_quota_state = 0;
         quota_root = fs_info->quota_root;
         fs_info->quota_root = NULL;
+       fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
         spin_unlock(&fs_info->qgroup_lock);
  
         btrfs_free_qgroup_config(fs_info);
@@ -982,7 +982,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
         list_del(&quota_root->dirty_list);
  
         btrfs_tree_lock(quota_root->node);
-       clean_tree_block(trans, tree_root, quota_root->node);
+       clean_tree_block(trans, tree_root->fs_info, quota_root->node);
         btrfs_tree_unlock(quota_root->node);
         btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
  
@@ -1001,6 +1001,110 @@ static void qgroup_dirty(struct btrfs_fs_info *fs_info,
                 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
  }
  
+/*
+ * The easy accounting, if we are adding/removing the only ref for an extent
+ * then this qgroup and all of the parent qgroups get their refrence and
+ * exclusive counts adjusted.
+ *
+ * Caller should hold fs_info->qgroup_lock.
+ */
+static int __qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
+                                   struct ulist *tmp, u64 ref_root,
+                                   u64 num_bytes, int sign)
+{
+       struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup_list *glist;
+       struct ulist_node *unode;
+       struct ulist_iterator uiter;
+       int ret = 0;
+
+       qgroup = find_qgroup_rb(fs_info, ref_root);
+       if (!qgroup)
+               goto out;
+
+       qgroup->rfer += sign * num_bytes;
+       qgroup->rfer_cmpr += sign * num_bytes;
+
+       WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+       qgroup->excl += sign * num_bytes;
+       qgroup->excl_cmpr += sign * num_bytes;
+       if (sign > 0)
+               qgroup->reserved -= num_bytes;
+
+       qgroup_dirty(fs_info, qgroup);
+
+       /* Get all of the parent groups that contain this qgroup */
+       list_for_each_entry(glist, &qgroup->groups, next_group) {
+               ret = ulist_add(tmp, glist->group->qgroupid,
+                               ptr_to_u64(glist->group), GFP_ATOMIC);
+               if (ret < 0)
+                       goto out;
+       }
+
+       /* Iterate all of the parents and adjust their reference counts */
+       ULIST_ITER_INIT(&uiter);
+       while ((unode = ulist_next(tmp, &uiter))) {
+               qgroup = u64_to_ptr(unode->aux);
+               qgroup->rfer += sign * num_bytes;
+               qgroup->rfer_cmpr += sign * num_bytes;
+               WARN_ON(sign < 0 && qgroup->excl < num_bytes);
+               qgroup->excl += sign * num_bytes;
+               if (sign > 0)
+                       qgroup->reserved -= num_bytes;
+               qgroup->excl_cmpr += sign * num_bytes;
+               qgroup_dirty(fs_info, qgroup);
+
+               /* Add any parents of the parents */
+               list_for_each_entry(glist, &qgroup->groups, next_group) {
+                       ret = ulist_add(tmp, glist->group->qgroupid,
+                                       ptr_to_u64(glist->group), GFP_ATOMIC);
+                       if (ret < 0)
+                               goto out;
+               }
+       }
+       ret = 0;
+out:
+       return ret;
+}
+
+
+/*
+ * Quick path for updating qgroup with only excl refs.
+ *
+ * In that case, just update all parent will be enough.
+ * Or we needs to do a full rescan.
+ * Caller should also hold fs_info->qgroup_lock.
+ *
+ * Return 0 for quick update, return >0 for need to full rescan
+ * and mark INCONSISTENT flag.
+ * Return < 0 for other error.
+ */
+static int quick_update_accounting(struct btrfs_fs_info *fs_info,
+                                  struct ulist *tmp, u64 src, u64 dst,
+                                  int sign)
+{
+       struct btrfs_qgroup *qgroup;
+       int ret = 1;
+       int err = 0;
+
+       qgroup = find_qgroup_rb(fs_info, src);
+       if (!qgroup)
+               goto out;
+       if (qgroup->excl == qgroup->rfer) {
+               ret = 0;
+               err = __qgroup_excl_accounting(fs_info, tmp, dst,
+                                              qgroup->excl, sign);
+               if (err < 0) {
+                       ret = err;
+                       goto out;
+               }
+       }
+out:
+       if (ret)
+               fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+       return ret;
+}
+
  int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
  {
@@ -1008,8 +1112,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
         struct btrfs_qgroup *parent;
         struct btrfs_qgroup *member;
         struct btrfs_qgroup_list *list;
+       struct ulist *tmp;
         int ret = 0;
  
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
+       /* Check the level of src and dst first */
+       if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
+               return -EINVAL;
+
         mutex_lock(&fs_info->qgroup_ioctl_lock);
         quota_root = fs_info->quota_root;
         if (!quota_root) {
@@ -1043,23 +1156,33 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
  
         spin_lock(&fs_info->qgroup_lock);
         ret = add_relation_rb(quota_root->fs_info, src, dst);
+       if (ret < 0) {
+               spin_unlock(&fs_info->qgroup_lock);
+               goto out;
+       }
+       ret = quick_update_accounting(fs_info, tmp, src, dst, 1);
         spin_unlock(&fs_info->qgroup_lock);
  out:
         mutex_unlock(&fs_info->qgroup_ioctl_lock);
+       ulist_free(tmp);
         return ret;
  }
  
-int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+int __del_qgroup_relation(struct btrfs_trans_handle *trans,
                               struct btrfs_fs_info *fs_info, u64 src, u64 dst)
  {
         struct btrfs_root *quota_root;
         struct btrfs_qgroup *parent;
         struct btrfs_qgroup *member;
         struct btrfs_qgroup_list *list;
+       struct ulist *tmp;
         int ret = 0;
         int err;
  
-       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
         quota_root = fs_info->quota_root;
         if (!quota_root) {
                 ret = -EINVAL;
@@ -1088,14 +1211,27 @@ exist:
  
         spin_lock(&fs_info->qgroup_lock);
         del_relation_rb(fs_info, src, dst);
+       ret = quick_update_accounting(fs_info, tmp, src, dst, -1);
         spin_unlock(&fs_info->qgroup_lock);
  out:
+       ulist_free(tmp);
+       return ret;
+}
+
+int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
+                             struct btrfs_fs_info *fs_info, u64 src, u64 dst)
+{
+       int ret = 0;
+
+       mutex_lock(&fs_info->qgroup_ioctl_lock);
+       ret = __del_qgroup_relation(trans, fs_info, src, dst);
         mutex_unlock(&fs_info->qgroup_ioctl_lock);
+
         return ret;
  }
  
  int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
+                       struct btrfs_fs_info *fs_info, u64 qgroupid)
  {
         struct btrfs_root *quota_root;
         struct btrfs_qgroup *qgroup;
@@ -1133,6 +1269,7 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
  {
         struct btrfs_root *quota_root;
         struct btrfs_qgroup *qgroup;
+       struct btrfs_qgroup_list *list;
         int ret = 0;
  
         mutex_lock(&fs_info->qgroup_ioctl_lock);
@@ -1147,15 +1284,24 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                 ret = -ENOENT;
                 goto out;
         } else {
-               /* check if there are no relations to this qgroup */
-               if (!list_empty(&qgroup->groups) ||
-                   !list_empty(&qgroup->members)) {
+               /* check if there are no children of this qgroup */
+               if (!list_empty(&qgroup->members)) {
                         ret = -EBUSY;
                         goto out;
                 }
         }
         ret = del_qgroup_item(trans, quota_root, qgroupid);
  
+       while (!list_empty(&qgroup->groups)) {
+               list = list_first_entry(&qgroup->groups,
+                                       struct btrfs_qgroup_list, next_group);
+               ret = __del_qgroup_relation(trans, fs_info,
+                                          qgroupid,
+                                          list->group->qgroupid);
+               if (ret)
+                       goto out;
+       }
+
         spin_lock(&fs_info->qgroup_lock);
         del_qgroup_rb(quota_root->fs_info, qgroupid);
         spin_unlock(&fs_info->qgroup_lock);
@@ -1184,23 +1330,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
                 ret = -ENOENT;
                 goto out;
         }
-       ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
-                                      limit->flags, limit->max_rfer,
-                                      limit->max_excl, limit->rsv_rfer,
-                                      limit->rsv_excl);
+
+       spin_lock(&fs_info->qgroup_lock);
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER)
+               qgroup->max_rfer = limit->max_rfer;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL)
+               qgroup->max_excl = limit->max_excl;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER)
+               qgroup->rsv_rfer = limit->rsv_rfer;
+       if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL)
+               qgroup->rsv_excl = limit->rsv_excl;
+       qgroup->lim_flags |= limit->flags;
+
+       spin_unlock(&fs_info->qgroup_lock);
+
+       ret = update_qgroup_limit_item(trans, quota_root, qgroup);
         if (ret) {
                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
                 btrfs_info(fs_info, "unable to update quota limit for %llu",
                        qgroupid);
         }
  
-       spin_lock(&fs_info->qgroup_lock);
-       qgroup->lim_flags = limit->flags;
-       qgroup->max_rfer = limit->max_rfer;
-       qgroup->max_excl = limit->max_excl;
-       qgroup->rsv_rfer = limit->rsv_rfer;
-       qgroup->rsv_excl = limit->rsv_excl;
-       spin_unlock(&fs_info->qgroup_lock);
  out:
         mutex_unlock(&fs_info->qgroup_ioctl_lock);
         return ret;
@@ -1256,14 +1406,14 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
                 return -1;
         if (oper1->bytenr > oper2->bytenr)
                 return 1;
-       if (oper1->seq < oper2->seq)
-               return -1;
-       if (oper1->seq > oper2->seq)
-               return 1;
         if (oper1->ref_root < oper2->ref_root)
                 return -1;
         if (oper1->ref_root > oper2->ref_root)
                 return 1;
+       if (oper1->seq < oper2->seq)
+               return -1;
+       if (oper1->seq > oper2->seq)
+               return 1;
         if (oper1->type < oper2->type)
                 return -1;
         if (oper1->type > oper2->type)
@@ -1372,19 +1522,10 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
         return 0;
  }
  
-/*
- * The easy accounting, if we are adding/removing the only ref for an extent
- * then this qgroup and all of the parent qgroups get their refrence and
- * exclusive counts adjusted.
- */
  static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
                                   struct btrfs_qgroup_operation *oper)
  {
-       struct btrfs_qgroup *qgroup;
         struct ulist *tmp;
-       struct btrfs_qgroup_list *glist;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
         int sign = 0;
         int ret = 0;
  
@@ -1395,9 +1536,7 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
         spin_lock(&fs_info->qgroup_lock);
         if (!fs_info->quota_root)
                 goto out;
-       qgroup = find_qgroup_rb(fs_info, oper->ref_root);
-       if (!qgroup)
-               goto out;
+
         switch (oper->type) {
         case BTRFS_QGROUP_OPER_ADD_EXCL:
                 sign = 1;
@@ -1408,43 +1547,8 @@ static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
         default:
                 ASSERT(0);
         }
-       qgroup->rfer += sign * oper->num_bytes;
-       qgroup->rfer_cmpr += sign * oper->num_bytes;
-
-       WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
-       qgroup->excl += sign * oper->num_bytes;
-       qgroup->excl_cmpr += sign * oper->num_bytes;
-
-       qgroup_dirty(fs_info, qgroup);
-
-       /* Get all of the parent groups that contain this qgroup */
-       list_for_each_entry(glist, &qgroup->groups, next_group) {
-               ret = ulist_add(tmp, glist->group->qgroupid,
-                               ptr_to_u64(glist->group), GFP_ATOMIC);
-               if (ret < 0)
-                       goto out;
-       }
-
-       /* Iterate all of the parents and adjust their reference counts */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               qgroup = u64_to_ptr(unode->aux);
-               qgroup->rfer += sign * oper->num_bytes;
-               qgroup->rfer_cmpr += sign * oper->num_bytes;
-               WARN_ON(sign < 0 && qgroup->excl < oper->num_bytes);
-               qgroup->excl += sign * oper->num_bytes;
-               qgroup->excl_cmpr += sign * oper->num_bytes;
-               qgroup_dirty(fs_info, qgroup);
-
-               /* Add any parents of the parents */
-               list_for_each_entry(glist, &qgroup->groups, next_group) {
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               goto out;
-               }
-       }
-       ret = 0;
+       ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
+                                      oper->num_bytes, sign);
  out:
         spin_unlock(&fs_info->qgroup_lock);
         ulist_free(tmp);
@@ -1845,7 +1949,7 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
         struct ulist *roots = NULL;
         struct ulist *qgroups, *tmp;
         struct btrfs_qgroup *qgroup;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
         u64 seq;
         int old_roots = 0;
         int new_roots = 0;
@@ -1967,7 +2071,7 @@ static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
         int err;
         struct btrfs_qgroup *qg;
         u64 root_obj = 0;
-       struct seq_list elem = {};
+       struct seq_list elem = SEQ_LIST_INIT(elem);
  
         parents = ulist_alloc(GFP_NOFS);
         if (!parents)
@@ -2153,6 +2257,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
                 list_del_init(&qgroup->dirty);
                 spin_unlock(&fs_info->qgroup_lock);
                 ret = update_qgroup_info_item(trans, quota_root, qgroup);
+               if (ret)
+                       fs_info->qgroup_flags |=
+                                       BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+               ret = update_qgroup_limit_item(trans, quota_root, qgroup);
                 if (ret)
                         fs_info->qgroup_flags |=
                                         BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@@ -2219,6 +2327,11 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                                 ret = -EINVAL;
                                 goto out;
                         }
+
+                       if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                         ++i_qgroups;
                 }
         }
@@ -2230,17 +2343,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
         if (ret)
                 goto out;
  
-       if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
-               ret = update_qgroup_limit_item(trans, quota_root, objectid,
-                                              inherit->lim.flags,
-                                              inherit->lim.max_rfer,
-                                              inherit->lim.max_excl,
-                                              inherit->lim.rsv_rfer,
-                                              inherit->lim.rsv_excl);
-               if (ret)
-                       goto out;
-       }
-
         if (srcid) {
                 struct btrfs_root *srcroot;
                 struct btrfs_key srckey;
@@ -2286,6 +2388,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                 goto unlock;
         }
  
+       if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
+               dstgroup->lim_flags = inherit->lim.flags;
+               dstgroup->max_rfer = inherit->lim.max_rfer;
+               dstgroup->max_excl = inherit->lim.max_excl;
+               dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
+               dstgroup->rsv_excl = inherit->lim.rsv_excl;
+
+               ret = update_qgroup_limit_item(trans, quota_root, dstgroup);
+               if (ret) {
+                       fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+                       btrfs_info(fs_info, "unable to update quota limit for %llu",
+                              dstgroup->qgroupid);
+                       goto unlock;
+               }
+       }
+
         if (srcid) {
                 srcgroup = find_qgroup_rb(fs_info, srcid);
                 if (!srcgroup)
@@ -2302,6 +2420,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
                 dstgroup->excl_cmpr = level_size;
                 srcgroup->excl = level_size;
                 srcgroup->excl_cmpr = level_size;
+
+               /* inherit the limit info */
+               dstgroup->lim_flags = srcgroup->lim_flags;
+               dstgroup->max_rfer = srcgroup->max_rfer;
+               dstgroup->max_excl = srcgroup->max_excl;
+               dstgroup->rsv_rfer = srcgroup->rsv_rfer;
+               dstgroup->rsv_excl = srcgroup->rsv_excl;
+
                 qgroup_dirty(fs_info, dstgroup);
                 qgroup_dirty(fs_info, srcgroup);
         }
@@ -2358,12 +2484,6 @@ out:
         return ret;
  }
  
-/*
- * reserve some space for a qgroup and all its parents. The reservation takes
- * place with start_transaction or dealloc_reserve, similar to ENOSPC
- * accounting. If not enough space is available, EDQUOT is returned.
- * We assume that the requested space is new for all qgroups.
- */
  int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
  {
         struct btrfs_root *quota_root;
@@ -2513,7 +2633,7 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
  
  /*
   * returns < 0 on error, 0 when more leafs are to be scanned.
- * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ * returns 1 when done.
   */
  static int
  qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
@@ -2522,7 +2642,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
  {
         struct btrfs_key found;
         struct ulist *roots = NULL;
-       struct seq_list tree_mod_seq_elem = {};
+       struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
         u64 num_bytes;
         u64 seq;
         int new_roots;
@@ -2618,6 +2738,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
         struct ulist *tmp = NULL, *qgroups = NULL;
         struct extent_buffer *scratch_leaf = NULL;
         int err = -ENOMEM;
+       int ret = 0;
  
         path = btrfs_alloc_path();
         if (!path)
@@ -2660,7 +2781,7 @@ out:
         mutex_lock(&fs_info->qgroup_rescan_lock);
         fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
  
-       if (err == 2 &&
+       if (err > 0 &&
             fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
                 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
         } else if (err < 0) {
@@ -2668,13 +2789,33 @@ out:
         }
         mutex_unlock(&fs_info->qgroup_rescan_lock);
  
+       /*
+        * only update status, since the previous part has alreay updated the
+        * qgroup info.
+        */
+       trans = btrfs_start_transaction(fs_info->quota_root, 1);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               btrfs_err(fs_info,
+                         "fail to start transaction for status update: %d\n",
+                         err);
+               goto done;
+       }
+       ret = update_qgroup_status_item(trans, fs_info, fs_info->quota_root);
+       if (ret < 0) {
+               err = ret;
+               btrfs_err(fs_info, "fail to update qgroup status: %d\n", err);
+       }
+       btrfs_end_transaction(trans, fs_info->quota_root);
+
         if (err >= 0) {
                 btrfs_info(fs_info, "qgroup scan completed%s",
-                       err == 2 ? " (inconsistency flag cleared)" : "");
+                       err > 0 ? " (inconsistency flag cleared)" : "");
         } else {
                 btrfs_err(fs_info, "qgroup scan failed with %d", err);
         }
  
+done:
         complete_all(&fs_info->qgroup_rescan_completion);
  }
  
@@ -2709,7 +2850,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
                         mutex_unlock(&fs_info->qgroup_rescan_lock);
                         goto err;
                 }
-
                 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
         }
  
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h

index 18cc68c..c5242aa 100644 (file)
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -70,8 +70,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
  int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
                               struct btrfs_fs_info *fs_info, u64 src, u64 dst);
  int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
-                       struct btrfs_fs_info *fs_info, u64 qgroupid,
-                       char *name);
+                       struct btrfs_fs_info *fs_info, u64 qgroupid);
  int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
                               struct btrfs_fs_info *fs_info, u64 qgroupid);
  int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index 5264858..fa72068 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -237,12 +237,8 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
         }
  
         x = cmpxchg(&info->stripe_hash_table, NULL, table);
-       if (x) {
-               if (is_vmalloc_addr(x))
-                       vfree(x);
-               else
-                       kfree(x);
-       }
+       if (x)
+               kvfree(x);
         return 0;
  }
  
@@ -453,10 +449,7 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
         if (!info->stripe_hash_table)
                 return;
         btrfs_clear_rbio_cache(info);
-       if (is_vmalloc_addr(info->stripe_hash_table))
-               vfree(info->stripe_hash_table);
-       else
-               kfree(info->stripe_hash_table);
+       kvfree(info->stripe_hash_table);
         info->stripe_hash_table = NULL;
  }
  
@@ -1807,8 +1800,7 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
         int err;
         int i;
  
-       pointers = kzalloc(rbio->real_stripes * sizeof(void *),
-                          GFP_NOFS);
+       pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
         if (!pointers) {
                 err = -ENOMEM;
                 goto cleanup_io;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c

index d830853..74b24b0 100644 (file)
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3027,7 +3027,7 @@ int prealloc_file_extent_cluster(struct inode *inode,
         mutex_lock(&inode->i_mutex);
  
         ret = btrfs_check_data_free_space(inode, cluster->end +
-                                         1 - cluster->start);
+                                         1 - cluster->start, 0);
         if (ret)
                 goto out;
  
@@ -3430,7 +3430,9 @@ static int block_use_full_backref(struct reloc_control *rc,
  }
  
  static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
-                                   struct inode *inode, u64 ino)
+                                   struct btrfs_block_group_cache *block_group,
+                                   struct inode *inode,
+                                   u64 ino)
  {
         struct btrfs_key key;
         struct btrfs_root *root = fs_info->tree_root;
@@ -3463,7 +3465,7 @@ truncate:
                 goto out;
         }
  
-       ret = btrfs_truncate_free_space_cache(root, trans, inode);
+       ret = btrfs_truncate_free_space_cache(root, trans, block_group, inode);
  
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root);
@@ -3509,6 +3511,7 @@ static int find_data_references(struct reloc_control *rc,
          */
         if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
                 ret = delete_block_group_cache(rc->extent_root->fs_info,
+                                              rc->block_group,
                                                NULL, ref_objectid);
                 if (ret != -ENOENT)
                         return ret;
@@ -4223,7 +4226,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
         btrfs_free_path(path);
  
         if (!IS_ERR(inode))
-               ret = delete_block_group_cache(fs_info, inode, 0);
+               ret = delete_block_group_cache(fs_info, rc->block_group, inode, 0);
         else
                 ret = PTR_ERR(inode);
  
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c

index ec57687..ab58115 100644 (file)
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -964,9 +964,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
          * the statistics.
          */
  
-       sblocks_for_recheck = kzalloc(BTRFS_MAX_MIRRORS *
-                                    sizeof(*sblocks_for_recheck),
-                                    GFP_NOFS);
+       sblocks_for_recheck = kcalloc(BTRFS_MAX_MIRRORS,
+                                     sizeof(*sblocks_for_recheck), GFP_NOFS);
         if (!sblocks_for_recheck) {
                 spin_lock(&sctx->stat_lock);
                 sctx->stat.malloc_errors++;
@@ -2319,7 +2318,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
                                        unsigned long *bitmap,
                                        u64 start, u64 len)
  {
-       int offset;
+       u32 offset;
         int nsectors;
         int sectorsize = sparity->sctx->dev_root->sectorsize;
  
@@ -2329,7 +2328,7 @@ static inline void __scrub_mark_bitmap(struct scrub_parity *sparity,
         }
  
         start -= sparity->logic_start;
-       offset = (int)do_div(start, sparity->stripe_len);
+       start = div_u64_rem(start, sparity->stripe_len, &offset);
         offset /= sectorsize;
         nsectors = (int)len / sectorsize;
  
@@ -2612,8 +2611,8 @@ static int get_raid56_logic_offset(u64 physical, int num,
         int j = 0;
         u64 stripe_nr;
         u64 last_offset;
-       int stripe_index;
-       int rot;
+       u32 stripe_index;
+       u32 rot;
  
         last_offset = (physical - map->stripes[num].physical) *
                       nr_data_stripes(map);
@@ -2624,12 +2623,11 @@ static int get_raid56_logic_offset(u64 physical, int num,
         for (i = 0; i < nr_data_stripes(map); i++) {
                 *offset = last_offset + i * map->stripe_len;
  
-               stripe_nr = *offset;
-               do_div(stripe_nr, map->stripe_len);
-               do_div(stripe_nr, nr_data_stripes(map));
+               stripe_nr = div_u64(*offset, map->stripe_len);
+               stripe_nr = div_u64(stripe_nr, nr_data_stripes(map));
  
                 /* Work out the disk rotation on this stripe-set */
-               rot = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &rot);
                 /* calculate which stripe this data locates */
                 rot += i;
                 stripe_index = rot % map->num_stripes;
@@ -2995,10 +2993,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
         int extent_mirror_num;
         int stop_loop = 0;
  
-       nstripes = length;
         physical = map->stripes[num].physical;
         offset = 0;
-       do_div(nstripes, map->stripe_len);
+       nstripes = div_u64(length, map->stripe_len);
         if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
                 offset = map->stripe_len * num;
                 increment = map->stripe_len * map->num_stripes;
@@ -3563,7 +3560,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                                                 int is_dev_replace)
  {
         int ret = 0;
-       int flags = WQ_FREEZABLE | WQ_UNBOUND;
+       unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
         int max_active = fs_info->thread_pool_size;
  
         if (fs_info->scrub_workers_refcnt == 0) {
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index d6033f5..a1216f9 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3067,48 +3067,6 @@ static struct pending_dir_move *get_pending_dir_moves(struct send_ctx *sctx,
         return NULL;
  }
  
-static int path_loop(struct send_ctx *sctx, struct fs_path *name,
-                    u64 ino, u64 gen, u64 *ancestor_ino)
-{
-       int ret = 0;
-       u64 parent_inode = 0;
-       u64 parent_gen = 0;
-       u64 start_ino = ino;
-
-       *ancestor_ino = 0;
-       while (ino != BTRFS_FIRST_FREE_OBJECTID) {
-               fs_path_reset(name);
-
-               if (is_waiting_for_rm(sctx, ino))
-                       break;
-               if (is_waiting_for_move(sctx, ino)) {
-                       if (*ancestor_ino == 0)
-                               *ancestor_ino = ino;
-                       ret = get_first_ref(sctx->parent_root, ino,
-                                           &parent_inode, &parent_gen, name);
-               } else {
-                       ret = __get_cur_name_and_parent(sctx, ino, gen,
-                                                       &parent_inode,
-                                                       &parent_gen, name);
-                       if (ret > 0) {
-                               ret = 0;
-                               break;
-                       }
-               }
-               if (ret < 0)
-                       break;
-               if (parent_inode == start_ino) {
-                       ret = 1;
-                       if (*ancestor_ino == 0)
-                               *ancestor_ino = ino;
-                       break;
-               }
-               ino = parent_inode;
-               gen = parent_gen;
-       }
-       return ret;
-}
-
  static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
  {
         struct fs_path *from_path = NULL;
@@ -3120,7 +3078,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
         struct waiting_dir_move *dm = NULL;
         u64 rmdir_ino = 0;
         int ret;
-       u64 ancestor = 0;
  
         name = fs_path_alloc();
         from_path = fs_path_alloc();
@@ -3152,22 +3109,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
                 goto out;
  
         sctx->send_progress = sctx->cur_ino + 1;
-       ret = path_loop(sctx, name, pm->ino, pm->gen, &ancestor);
-       if (ret) {
-               LIST_HEAD(deleted_refs);
-               ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
-               ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
-                                          &pm->update_refs, &deleted_refs,
-                                          pm->is_orphan);
-               if (ret < 0)
-                       goto out;
-               if (rmdir_ino) {
-                       dm = get_waiting_dir_move(sctx, pm->ino);
-                       ASSERT(dm);
-                       dm->rmdir_ino = rmdir_ino;
-               }
-               goto out;
-       }
         fs_path_reset(name);
         to_path = name;
         name = NULL;
@@ -3610,10 +3551,27 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                         if (ret < 0)
                                 goto out;
                         if (ret) {
+                               struct name_cache_entry *nce;
+
                                 ret = orphanize_inode(sctx, ow_inode, ow_gen,
                                                 cur->full_path);
                                 if (ret < 0)
                                         goto out;
+                               /*
+                                * Make sure we clear our orphanized inode's
+                                * name from the name cache. This is because the
+                                * inode ow_inode might be an ancestor of some
+                                * other inode that will be orphanized as well
+                                * later and has an inode number greater than
+                                * sctx->send_progress. We need to prevent
+                                * future name lookups from using the old name
+                                * and get instead the orphan name.
+                                */
+                               nce = name_cache_search(sctx, ow_inode, ow_gen);
+                               if (nce) {
+                                       name_cache_delete(sctx, nce);
+                                       kfree(nce);
+                               }
                         } else {
                                 ret = send_unlink(sctx, cur->full_path);
                                 if (ret < 0)
@@ -5852,19 +5810,20 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                                 ret = PTR_ERR(clone_root);
                                 goto out;
                         }
-                       clone_sources_to_rollback = i + 1;
                         spin_lock(&clone_root->root_item_lock);
-                       clone_root->send_in_progress++;
-                       if (!btrfs_root_readonly(clone_root)) {
+                       if (!btrfs_root_readonly(clone_root) ||
+                           btrfs_root_dead(clone_root)) {
                                 spin_unlock(&clone_root->root_item_lock);
                                 srcu_read_unlock(&fs_info->subvol_srcu, index);
                                 ret = -EPERM;
                                 goto out;
                         }
+                       clone_root->send_in_progress++;
                         spin_unlock(&clone_root->root_item_lock);
                         srcu_read_unlock(&fs_info->subvol_srcu, index);
  
                         sctx->clone_roots[i].root = clone_root;
+                       clone_sources_to_rollback = i + 1;
                 }
                 vfree(clone_sources_tmp);
                 clone_sources_tmp = NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 05fef19..9e66f5e 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -901,6 +901,15 @@ find_root:
         if (IS_ERR(new_root))
                 return ERR_CAST(new_root);
  
+       if (!(sb->s_flags & MS_RDONLY)) {
+               int ret;
+               down_read(&fs_info->cleanup_work_sem);
+               ret = btrfs_orphan_cleanup(new_root);
+               up_read(&fs_info->cleanup_work_sem);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
+
         dir_id = btrfs_root_dirid(&new_root->root_item);
  setup_root:
         location.objectid = dir_id;
@@ -916,7 +925,7 @@ setup_root:
          * a reference to the dentry.  We will have already gotten a reference
          * to the inode in btrfs_fill_super so we're good to go.
          */
-       if (!new && sb->s_root->d_inode == inode) {
+       if (!new && d_inode(sb->s_root) == inode) {
                 iput(inode);
                 return dget(sb->s_root);
         }
@@ -1221,7 +1230,7 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
  
         root = mount_subtree(mnt, subvol_name);
  
-       if (!IS_ERR(root) && !is_subvolume_inode(root->d_inode)) {
+       if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
                 struct super_block *s = root->d_sb;
                 dput(root);
                 root = ERR_PTR(-EINVAL);
@@ -1714,7 +1723,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
                 avail_space = device->total_bytes - device->bytes_used;
  
                 /* align with stripe_len */
-               do_div(avail_space, BTRFS_STRIPE_LEN);
+               avail_space = div_u64(avail_space, BTRFS_STRIPE_LEN);
                 avail_space *= BTRFS_STRIPE_LEN;
  
                 /*
@@ -1886,8 +1895,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
         buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
         /* Mask in the root object ID too, to disambiguate subvols */
-       buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
-       buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
+       buf->f_fsid.val[0] ^= BTRFS_I(d_inode(dentry))->root->objectid >> 32;
+       buf->f_fsid.val[1] ^= BTRFS_I(d_inode(dentry))->root->objectid;
  
         return 0;
  }
@@ -1908,6 +1917,17 @@ static struct file_system_type btrfs_fs_type = {
  };
  MODULE_ALIAS_FS("btrfs");
  
+static int btrfs_control_open(struct inode *inode, struct file *file)
+{
+       /*
+        * The control file's private_data is used to hold the
+        * transaction when it is started and is used to keep
+        * track of whether a transaction is already in progress.
+        */
+       file->private_data = NULL;
+       return 0;
+}
+
  /*
   * used by btrfsctl to scan devices when no FS is mounted
   */
@@ -2009,6 +2029,7 @@ static const struct super_operations btrfs_super_ops = {
  };
  
  static const struct file_operations btrfs_ctl_fops = {
+       .open = btrfs_control_open,
         .unlocked_ioctl  = btrfs_control_ioctl,
         .compat_ioctl = btrfs_control_ioctl,
         .owner   = THIS_MODULE,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c

index 94edb0a..e8a4c86 100644 (file)
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -459,7 +459,7 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
  static char btrfs_unknown_feature_names[3][NUM_FEATURE_BITS][13];
  static struct btrfs_feature_attr btrfs_feature_attrs[3][NUM_FEATURE_BITS];
  
-static u64 supported_feature_masks[3] = {
+static const u64 supported_feature_masks[3] = {
         [FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
         [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
         [FEAT_INCOMPAT]  = BTRFS_FEATURE_INCOMPAT_SUPP,
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h

index f7dd298..3a4bbed 100644 (file)
--- a/fs/btrfs/sysfs.h
+++ b/fs/btrfs/sysfs.h
@@ -61,11 +61,23 @@ static struct btrfs_feature_attr btrfs_attr_##_name = {                          \
         BTRFS_FEAT_ATTR(name, FEAT_INCOMPAT, BTRFS_FEATURE_INCOMPAT, feature)
  
  /* convert from attribute */
-#define to_btrfs_feature_attr(a) \
-                       container_of(a, struct btrfs_feature_attr, kobj_attr)
-#define attr_to_btrfs_attr(a) container_of(a, struct kobj_attribute, attr)
-#define attr_to_btrfs_feature_attr(a) \
-                       to_btrfs_feature_attr(attr_to_btrfs_attr(a))
+static inline struct btrfs_feature_attr *
+to_btrfs_feature_attr(struct kobj_attribute *a)
+{
+       return container_of(a, struct btrfs_feature_attr, kobj_attr);
+}
+
+static inline struct kobj_attribute *attr_to_btrfs_attr(struct attribute *attr)
+{
+       return container_of(attr, struct kobj_attribute, attr);
+}
+
+static inline struct btrfs_feature_attr *
+attr_to_btrfs_feature_attr(struct attribute *attr)
+{
+       return to_btrfs_feature_attr(attr_to_btrfs_attr(attr));
+}
+
  char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
  extern const char * const btrfs_feature_set_names[3];
  extern struct kobj_type space_info_ktype;
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c

index 73f299e..c32a7ba 100644 (file)
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -232,7 +232,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
         init_dummy_trans(&trans);
  
         test_msg("Qgroup basic add\n");
-       ret = btrfs_create_qgroup(NULL, fs_info, 5, NULL);
+       ret = btrfs_create_qgroup(NULL, fs_info, 5);
         if (ret) {
                 test_msg("Couldn't create a qgroup %d\n", ret);
                 return ret;
@@ -301,7 +301,7 @@ static int test_multiple_refs(struct btrfs_root *root)
         test_msg("Qgroup multiple refs test\n");
  
         /* We have 5 created already from the previous test */
-       ret = btrfs_create_qgroup(NULL, fs_info, 256, NULL);
+       ret = btrfs_create_qgroup(NULL, fs_info, 256);
         if (ret) {
                 test_msg("Couldn't create a qgroup %d\n", ret);
                 return ret;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 8be4278..5628e25 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -35,7 +35,7 @@
  
  #define BTRFS_ROOT_TRANS_TAG 0
  
-static unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
+static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
         [TRANS_STATE_RUNNING]           = 0U,
         [TRANS_STATE_BLOCKED]           = (__TRANS_USERSPACE |
                                            __TRANS_START),
@@ -64,6 +64,9 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
         if (atomic_dec_and_test(&transaction->use_count)) {
                 BUG_ON(!list_empty(&transaction->list));
                 WARN_ON(!RB_EMPTY_ROOT(&transaction->delayed_refs.href_root));
+               if (transaction->delayed_refs.pending_csums)
+                       printk(KERN_ERR "pending csums is %llu\n",
+                              transaction->delayed_refs.pending_csums);
                 while (!list_empty(&transaction->pending_chunks)) {
                         struct extent_map *em;
  
@@ -93,11 +96,8 @@ static void clear_btree_io_tree(struct extent_io_tree *tree)
                  */
                 ASSERT(!waitqueue_active(&state->wq));
                 free_extent_state(state);
-               if (need_resched()) {
-                       spin_unlock(&tree->lock);
-                       cond_resched();
-                       spin_lock(&tree->lock);
-               }
+
+               cond_resched_lock(&tree->lock);
         }
         spin_unlock(&tree->lock);
  }
@@ -222,10 +222,12 @@ loop:
         atomic_set(&cur_trans->use_count, 2);
         cur_trans->have_free_bgs = 0;
         cur_trans->start_time = get_seconds();
+       cur_trans->dirty_bg_run = 0;
  
         cur_trans->delayed_refs.href_root = RB_ROOT;
         atomic_set(&cur_trans->delayed_refs.num_entries, 0);
         cur_trans->delayed_refs.num_heads_ready = 0;
+       cur_trans->delayed_refs.pending_csums = 0;
         cur_trans->delayed_refs.num_heads = 0;
         cur_trans->delayed_refs.flushing = 0;
         cur_trans->delayed_refs.run_delayed_start = 0;
@@ -250,6 +252,9 @@ loop:
         INIT_LIST_HEAD(&cur_trans->switch_commits);
         INIT_LIST_HEAD(&cur_trans->pending_ordered);
         INIT_LIST_HEAD(&cur_trans->dirty_bgs);
+       INIT_LIST_HEAD(&cur_trans->io_bgs);
+       mutex_init(&cur_trans->cache_write_mutex);
+       cur_trans->num_dirty_bgs = 0;
         spin_lock_init(&cur_trans->dirty_bgs_lock);
         list_add_tail(&cur_trans->list, &fs_info->trans_list);
         extent_io_tree_init(&cur_trans->dirty_pages,
@@ -721,7 +726,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
         updates = trans->delayed_ref_updates;
         trans->delayed_ref_updates = 0;
         if (updates) {
-               err = btrfs_run_delayed_refs(trans, root, updates);
+               err = btrfs_run_delayed_refs(trans, root, updates * 2);
                 if (err) /* Error code will also eval true */
                         return err;
         }
@@ -1057,6 +1062,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
  {
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
+       struct list_head *io_bgs = &trans->transaction->io_bgs;
         struct list_head *next;
         struct extent_buffer *eb;
         int ret;
@@ -1110,7 +1116,7 @@ again:
                         return ret;
         }
  
-       while (!list_empty(dirty_bgs)) {
+       while (!list_empty(dirty_bgs) || !list_empty(io_bgs)) {
                 ret = btrfs_write_dirty_block_groups(trans, root);
                 if (ret)
                         return ret;
@@ -1810,6 +1816,37 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 return ret;
         }
  
+       if (!cur_trans->dirty_bg_run) {
+               int run_it = 0;
+
+               /* this mutex is also taken before trying to set
+                * block groups readonly.  We need to make sure
+                * that nobody has set a block group readonly
+                * after a extents from that block group have been
+                * allocated for cache files.  btrfs_set_block_group_ro
+                * will wait for the transaction to commit if it
+                * finds dirty_bg_run = 1
+                *
+                * The dirty_bg_run flag is also used to make sure only
+                * one process starts all the block group IO.  It wouldn't
+                * hurt to have more than one go through, but there's no
+                * real advantage to it either.
+                */
+               mutex_lock(&root->fs_info->ro_block_group_mutex);
+               if (!cur_trans->dirty_bg_run) {
+                       run_it = 1;
+                       cur_trans->dirty_bg_run = 1;
+               }
+               mutex_unlock(&root->fs_info->ro_block_group_mutex);
+
+               if (run_it)
+                       ret = btrfs_start_dirty_block_groups(trans, root);
+       }
+       if (ret) {
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+
         spin_lock(&root->fs_info->trans_lock);
         list_splice(&trans->ordered, &cur_trans->pending_ordered);
         if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
@@ -2003,6 +2040,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
         assert_qgroups_uptodate(trans);
         ASSERT(list_empty(&cur_trans->dirty_bgs));
+       ASSERT(list_empty(&cur_trans->io_bgs));
         update_super_roots(root);
  
         btrfs_set_super_log_root(root->fs_info->super_copy, 0);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index 937050a..0b24755 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -64,9 +64,19 @@ struct btrfs_transaction {
         struct list_head pending_ordered;
         struct list_head switch_commits;
         struct list_head dirty_bgs;
+       struct list_head io_bgs;
+       u64 num_dirty_bgs;
+
+       /*
+        * we need to make sure block group deletion doesn't race with
+        * free space cache writeout.  This mutex keeps them from stomping
+        * on each other
+        */
+       struct mutex cache_write_mutex;
         spinlock_t dirty_bgs_lock;
         struct btrfs_delayed_ref_root delayed_refs;
         int aborted;
+       int dirty_bg_run;
  };
  
  #define __TRANS_FREEZABLE      (1U << 0)
@@ -136,9 +146,11 @@ struct btrfs_pending_snapshot {
  static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
                                               struct inode *inode)
  {
+       spin_lock(&BTRFS_I(inode)->lock);
         BTRFS_I(inode)->last_trans = trans->transaction->transid;
         BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
         BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
+       spin_unlock(&BTRFS_I(inode)->lock);
  }
  
  int btrfs_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index c5b8ba3..d049683 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
  
                 if (btrfs_inode_generation(eb, src_item) == 0) {
                         struct extent_buffer *dst_eb = path->nodes[0];
+                       const u64 ino_size = btrfs_inode_size(eb, src_item);
  
+                       /*
+                        * For regular files an ino_size == 0 is used only when
+                        * logging that an inode exists, as part of a directory
+                        * fsync, and the inode wasn't fsynced before. In this
+                        * case don't set the size of the inode in the fs/subvol
+                        * tree, otherwise we would be throwing valid data away.
+                        */
                         if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
-                           S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
+                           S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
+                           ino_size != 0) {
                                 struct btrfs_map_token token;
-                               u64 ino_size = btrfs_inode_size(eb, src_item);
  
                                 btrfs_init_map_token(&token);
                                 btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1951,6 +1959,104 @@ out:
         return ret;
  }
  
+static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
+                             struct btrfs_root *root,
+                             struct btrfs_root *log,
+                             struct btrfs_path *path,
+                             const u64 ino)
+{
+       struct btrfs_key search_key;
+       struct btrfs_path *log_path;
+       int i;
+       int nritems;
+       int ret;
+
+       log_path = btrfs_alloc_path();
+       if (!log_path)
+               return -ENOMEM;
+
+       search_key.objectid = ino;
+       search_key.type = BTRFS_XATTR_ITEM_KEY;
+       search_key.offset = 0;
+again:
+       ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+process_leaf:
+       nritems = btrfs_header_nritems(path->nodes[0]);
+       for (i = path->slots[0]; i < nritems; i++) {
+               struct btrfs_key key;
+               struct btrfs_dir_item *di;
+               struct btrfs_dir_item *log_di;
+               u32 total_size;
+               u32 cur;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, i);
+               if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
+                       ret = 0;
+                       goto out;
+               }
+
+               di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
+               total_size = btrfs_item_size_nr(path->nodes[0], i);
+               cur = 0;
+               while (cur < total_size) {
+                       u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
+                       u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
+                       u32 this_len = sizeof(*di) + name_len + data_len;
+                       char *name;
+
+                       name = kmalloc(name_len, GFP_NOFS);
+                       if (!name) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       read_extent_buffer(path->nodes[0], name,
+                                          (unsigned long)(di + 1), name_len);
+
+                       log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
+                                                   name, name_len, 0);
+                       btrfs_release_path(log_path);
+                       if (!log_di) {
+                               /* Doesn't exist in log tree, so delete it. */
+                               btrfs_release_path(path);
+                               di = btrfs_lookup_xattr(trans, root, path, ino,
+                                                       name, name_len, -1);
+                               kfree(name);
+                               if (IS_ERR(di)) {
+                                       ret = PTR_ERR(di);
+                                       goto out;
+                               }
+                               ASSERT(di);
+                               ret = btrfs_delete_one_dir_name(trans, root,
+                                                               path, di);
+                               if (ret)
+                                       goto out;
+                               btrfs_release_path(path);
+                               search_key = key;
+                               goto again;
+                       }
+                       kfree(name);
+                       if (IS_ERR(log_di)) {
+                               ret = PTR_ERR(log_di);
+                               goto out;
+                       }
+                       cur += this_len;
+                       di = (struct btrfs_dir_item *)((char *)di + this_len);
+               }
+       }
+       ret = btrfs_next_leaf(root, path);
+       if (ret > 0)
+               ret = 0;
+       else if (ret == 0)
+               goto process_leaf;
+out:
+       btrfs_free_path(log_path);
+       btrfs_release_path(path);
+       return ret;
+}
+
+
  /*
   * deletion replay happens before we copy any new directory items
   * out of the log or out of backreferences from inodes.  It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
  
                         inode_item = btrfs_item_ptr(eb, i,
                                             struct btrfs_inode_item);
+                       ret = replay_xattr_deletes(wc->trans, root, log,
+                                                  path, key.objectid);
+                       if (ret)
+                               break;
                         mode = btrfs_inode_mode(eb, inode_item);
                         if (S_ISDIR(mode)) {
                                 ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
                                 if (trans) {
                                         btrfs_tree_lock(next);
                                         btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, root, next);
+                                       clean_tree_block(trans, root->fs_info,
+                                                       next);
                                         btrfs_wait_tree_block_writeback(next);
                                         btrfs_tree_unlock(next);
                                 }
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
                                 if (trans) {
                                         btrfs_tree_lock(next);
                                         btrfs_set_lock_blocking(next);
-                                       clean_tree_block(trans, root, next);
+                                       clean_tree_block(trans, root->fs_info,
+                                                       next);
                                         btrfs_wait_tree_block_writeback(next);
                                         btrfs_tree_unlock(next);
                                 }
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
                         if (trans) {
                                 btrfs_tree_lock(next);
                                 btrfs_set_lock_blocking(next);
-                               clean_tree_block(trans, log, next);
+                               clean_tree_block(trans, log->fs_info, next);
                                 btrfs_wait_tree_block_writeback(next);
                                 btrfs_tree_unlock(next);
                         }
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, struct inode *inode,
                           struct btrfs_path *path,
                           struct btrfs_path *dst_path, int key_type,
+                         struct btrfs_log_ctx *ctx,
                           u64 min_offset, u64 *last_offset_ret)
  {
         struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                 src = path->nodes[0];
                 nritems = btrfs_header_nritems(src);
                 for (i = path->slots[0]; i < nritems; i++) {
+                       struct btrfs_dir_item *di;
+
                         btrfs_item_key_to_cpu(src, &min_key, i);
  
                         if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
                                 err = ret;
                                 goto done;
                         }
+
+                       /*
+                        * We must make sure that when we log a directory entry,
+                        * the corresponding inode, after log replay, has a
+                        * matching link count. For example:
+                        *
+                        * touch foo
+                        * mkdir mydir
+                        * sync
+                        * ln foo mydir/bar
+                        * xfs_io -c "fsync" mydir
+                        * <crash>
+                        * <mount fs and log replay>
+                        *
+                        * Would result in a fsync log that when replayed, our
+                        * file inode would have a link count of 1, but we get
+                        * two directory entries pointing to the same inode.
+                        * After removing one of the names, it would not be
+                        * possible to remove the other name, which resulted
+                        * always in stale file handle errors, and would not
+                        * be possible to rmdir the parent directory, since
+                        * its i_size could never decrement to the value
+                        * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
+                        */
+                       di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
+                       btrfs_dir_item_key_to_cpu(src, di, &tmp);
+                       if (ctx &&
+                           (btrfs_dir_transid(src, di) == trans->transid ||
+                            btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
+                           tmp.type != BTRFS_ROOT_ITEM_KEY)
+                               ctx->log_new_dentries = true;
                 }
                 path->slots[0] = nritems;
  
@@ -3175,7 +3321,8 @@ done:
  static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, struct inode *inode,
                           struct btrfs_path *path,
-                         struct btrfs_path *dst_path)
+                         struct btrfs_path *dst_path,
+                         struct btrfs_log_ctx *ctx)
  {
         u64 min_key;
         u64 max_key;
@@ -3187,7 +3334,7 @@ again:
         max_key = 0;
         while (1) {
                 ret = log_dir_items(trans, root, inode, path,
-                                   dst_path, key_type, min_key,
+                                   dst_path, key_type, ctx, min_key,
                                     &max_key);
                 if (ret)
                         return ret;
@@ -3963,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
         if (ret < 0) {
                 return ret;
         } else if (ret > 0) {
-               *size_ret = i_size_read(inode);
+               *size_ret = 0;
         } else {
                 struct btrfs_inode_item *item;
  
@@ -4070,10 +4217,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
         if (S_ISDIR(inode->i_mode)) {
                 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
  
-               if (inode_only == LOG_INODE_EXISTS) {
-                       max_key_type = BTRFS_INODE_EXTREF_KEY;
-                       max_key.type = max_key_type;
-               }
+               if (inode_only == LOG_INODE_EXISTS)
+                       max_key_type = BTRFS_XATTR_ITEM_KEY;
                 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
         } else {
                 if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4243,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
                              &BTRFS_I(inode)->runtime_flags)) {
                         if (inode_only == LOG_INODE_EXISTS) {
-                               max_key.type = BTRFS_INODE_EXTREF_KEY;
+                               max_key.type = BTRFS_XATTR_ITEM_KEY;
                                 ret = drop_objectid_items(trans, log, path, ino,
                                                           max_key.type);
                         } else {
@@ -4106,20 +4251,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                                           &BTRFS_I(inode)->runtime_flags);
                                 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
                                           &BTRFS_I(inode)->runtime_flags);
-                               ret = btrfs_truncate_inode_items(trans, log,
-                                                                inode, 0, 0);
+                               while(1) {
+                                       ret = btrfs_truncate_inode_items(trans,
+                                                        log, inode, 0, 0);
+                                       if (ret != -EAGAIN)
+                                               break;
+                               }
                         }
-               } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                   &BTRFS_I(inode)->runtime_flags) ||
+               } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
+                                             &BTRFS_I(inode)->runtime_flags) ||
                            inode_only == LOG_INODE_EXISTS) {
-                       if (inode_only == LOG_INODE_ALL) {
-                               clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                         &BTRFS_I(inode)->runtime_flags);
+                       if (inode_only == LOG_INODE_ALL)
                                 fast_search = true;
-                               max_key.type = BTRFS_XATTR_ITEM_KEY;
-                       } else {
-                               max_key.type = BTRFS_INODE_EXTREF_KEY;
-                       }
+                       max_key.type = BTRFS_XATTR_ITEM_KEY;
                         ret = drop_objectid_items(trans, log, path, ino,
                                                   max_key.type);
                 } else {
@@ -4277,15 +4421,18 @@ log_extents:
         }
  
         if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-               ret = log_directory_changes(trans, root, inode, path, dst_path);
+               ret = log_directory_changes(trans, root, inode, path, dst_path,
+                                           ctx);
                 if (ret) {
                         err = ret;
                         goto out_unlock;
                 }
         }
  
+       spin_lock(&BTRFS_I(inode)->lock);
         BTRFS_I(inode)->logged_trans = trans->transid;
         BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
+       spin_unlock(&BTRFS_I(inode)->lock);
  out_unlock:
         if (unlikely(err))
                 btrfs_put_logged_extents(&logged_list);
@@ -4327,9 +4474,9 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                         goto out;
  
         if (!S_ISDIR(inode->i_mode)) {
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                         goto out;
-               inode = parent->d_inode;
+               inode = d_inode(parent);
         }
  
         while (1) {
@@ -4355,7 +4502,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                         break;
                 }
  
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                         break;
  
                 if (IS_ROOT(parent))
@@ -4364,7 +4511,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
                 parent = dget_parent(parent);
                 dput(old_parent);
                 old_parent = parent;
-               inode = parent->d_inode;
+               inode = d_inode(parent);
  
         }
         dput(old_parent);
@@ -4372,6 +4519,181 @@ out:
         return ret;
  }
  
+struct btrfs_dir_list {
+       u64 ino;
+       struct list_head list;
+};
+
+/*
+ * Log the inodes of the new dentries of a directory. See log_dir_items() for
+ * details about the why it is needed.
+ * This is a recursive operation - if an existing dentry corresponds to a
+ * directory, that directory's new entries are logged too (same behaviour as
+ * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
+ * the dentries point to we do not lock their i_mutex, otherwise lockdep
+ * complains about the following circular lock dependency / possible deadlock:
+ *
+ *        CPU0                                        CPU1
+ *        ----                                        ----
+ * lock(&type->i_mutex_dir_key#3/2);
+ *                                            lock(sb_internal#2);
+ *                                            lock(&type->i_mutex_dir_key#3/2);
+ * lock(&sb->s_type->i_mutex_key#14);
+ *
+ * Where sb_internal is the lock (a counter that works as a lock) acquired by
+ * sb_start_intwrite() in btrfs_start_transaction().
+ * Not locking i_mutex of the inodes is still safe because:
+ *
+ * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
+ *    that while logging the inode new references (names) are added or removed
+ *    from the inode, leaving the logged inode item with a link count that does
+ *    not match the number of logged inode reference items. This is fine because
+ *    at log replay time we compute the real number of links and correct the
+ *    link count in the inode item (see replay_one_buffer() and
+ *    link_to_fixup_dir());
+ *
+ * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
+ *    while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
+ *    BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
+ *    has a size that doesn't match the sum of the lengths of all the logged
+ *    names. This does not result in a problem because if a dir_item key is
+ *    logged but its matching dir_index key is not logged, at log replay time we
+ *    don't use it to replay the respective name (see replay_one_name()). On the
+ *    other hand if only the dir_index key ends up being logged, the respective
+ *    name is added to the fs/subvol tree with both the dir_item and dir_index
+ *    keys created (see replay_one_name()).
+ *    The directory's inode item with a wrong i_size is not a problem as well,
+ *    since we don't use it at log replay time to set the i_size in the inode
+ *    item of the fs/subvol tree (see overwrite_item()).
+ */
+static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *start_inode,
+                               struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_root *log = root->log_root;
+       struct btrfs_path *path;
+       LIST_HEAD(dir_list);
+       struct btrfs_dir_list *dir_elem;
+       int ret = 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
+       if (!dir_elem) {
+               btrfs_free_path(path);
+               return -ENOMEM;
+       }
+       dir_elem->ino = btrfs_ino(start_inode);
+       list_add_tail(&dir_elem->list, &dir_list);
+
+       while (!list_empty(&dir_list)) {
+               struct extent_buffer *leaf;
+               struct btrfs_key min_key;
+               int nritems;
+               int i;
+
+               dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
+                                           list);
+               if (ret)
+                       goto next_dir_inode;
+
+               min_key.objectid = dir_elem->ino;
+               min_key.type = BTRFS_DIR_ITEM_KEY;
+               min_key.offset = 0;
+again:
+               btrfs_release_path(path);
+               ret = btrfs_search_forward(log, &min_key, path, trans->transid);
+               if (ret < 0) {
+                       goto next_dir_inode;
+               } else if (ret > 0) {
+                       ret = 0;
+                       goto next_dir_inode;
+               }
+
+process_leaf:
+               leaf = path->nodes[0];
+               nritems = btrfs_header_nritems(leaf);
+               for (i = path->slots[0]; i < nritems; i++) {
+                       struct btrfs_dir_item *di;
+                       struct btrfs_key di_key;
+                       struct inode *di_inode;
+                       struct btrfs_dir_list *new_dir_elem;
+                       int log_mode = LOG_INODE_EXISTS;
+                       int type;
+
+                       btrfs_item_key_to_cpu(leaf, &min_key, i);
+                       if (min_key.objectid != dir_elem->ino ||
+                           min_key.type != BTRFS_DIR_ITEM_KEY)
+                               goto next_dir_inode;
+
+                       di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
+                       type = btrfs_dir_type(leaf, di);
+                       if (btrfs_dir_transid(leaf, di) < trans->transid &&
+                           type != BTRFS_FT_DIR)
+                               continue;
+                       btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
+                       if (di_key.type == BTRFS_ROOT_ITEM_KEY)
+                               continue;
+
+                       di_inode = btrfs_iget(root->fs_info->sb, &di_key,
+                                             root, NULL);
+                       if (IS_ERR(di_inode)) {
+                               ret = PTR_ERR(di_inode);
+                               goto next_dir_inode;
+                       }
+
+                       if (btrfs_inode_in_log(di_inode, trans->transid)) {
+                               iput(di_inode);
+                               continue;
+                       }
+
+                       ctx->log_new_dentries = false;
+                       if (type == BTRFS_FT_DIR)
+                               log_mode = LOG_INODE_ALL;
+                       btrfs_release_path(path);
+                       ret = btrfs_log_inode(trans, root, di_inode,
+                                             log_mode, 0, LLONG_MAX, ctx);
+                       iput(di_inode);
+                       if (ret)
+                               goto next_dir_inode;
+                       if (ctx->log_new_dentries) {
+                               new_dir_elem = kmalloc(sizeof(*new_dir_elem),
+                                                      GFP_NOFS);
+                               if (!new_dir_elem) {
+                                       ret = -ENOMEM;
+                                       goto next_dir_inode;
+                               }
+                               new_dir_elem->ino = di_key.objectid;
+                               list_add_tail(&new_dir_elem->list, &dir_list);
+                       }
+                       break;
+               }
+               if (i == nritems) {
+                       ret = btrfs_next_leaf(log, path);
+                       if (ret < 0) {
+                               goto next_dir_inode;
+                       } else if (ret > 0) {
+                               ret = 0;
+                               goto next_dir_inode;
+                       }
+                       goto process_leaf;
+               }
+               if (min_key.offset < (u64)-1) {
+                       min_key.offset++;
+                       goto again;
+               }
+next_dir_inode:
+               list_del(&dir_elem->list);
+               kfree(dir_elem);
+       }
+
+       btrfs_free_path(path);
+       return ret;
+}
+
  /*
   * helper function around btrfs_log_inode to make sure newly created
   * parent directories also end up in the log.  A minimal inode and backref
@@ -4394,6 +4716,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
         const struct dentry * const first_parent = parent;
         const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
                                  last_committed);
+       bool log_dentries = false;
+       struct inode *orig_inode = inode;
  
         sb = inode->i_sb;
  
@@ -4449,11 +4773,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                 goto end_trans;
         }
  
+       if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
+               log_dentries = true;
+
         while (1) {
-               if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
+               if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
                         break;
  
-               inode = parent->d_inode;
+               inode = d_inode(parent);
                 if (root != BTRFS_I(inode)->root)
                         break;
  
@@ -4485,7 +4812,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
                 dput(old_parent);
                 old_parent = parent;
         }
-       ret = 0;
+       if (log_dentries)
+               ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
+       else
+               ret = 0;
  end_trans:
         dput(old_parent);
         if (ret < 0) {
@@ -4515,7 +4845,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
         struct dentry *parent = dget_parent(dentry);
         int ret;
  
-       ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
+       ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
                                      start, end, 0, ctx);
         dput(parent);
  
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h

index 154990c..6916a78 100644 (file)
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -29,6 +29,7 @@ struct btrfs_log_ctx {
         int log_ret;
         int log_transid;
         int io_err;
+       bool log_new_dentries;
         struct list_head list;
  };
  
@@ -37,6 +38,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
         ctx->log_ret = 0;
         ctx->log_transid = 0;
         ctx->io_err = 0;
+       ctx->log_new_dentries = false;
         INIT_LIST_HEAD(&ctx->list);
  }
  
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c

index 8222f6f..8bcd2a0 100644 (file)
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -366,8 +366,8 @@ loop_lock:
                 btrfsic_submit_bio(cur->bi_rw, cur);
                 num_run++;
                 batch_run++;
-               if (need_resched())
-                       cond_resched();
+
+               cond_resched();
  
                 /*
                  * we made progress, there is more work to do and the bdi
@@ -400,8 +400,7 @@ loop_lock:
                                  * against it before looping
                                  */
                                 last_waited = ioc->last_waited;
-                               if (need_resched())
-                                       cond_resched();
+                               cond_resched();
                                 continue;
                         }
                         spin_lock(&device->io_lock);
@@ -609,8 +608,7 @@ error:
         return ERR_PTR(-ENOMEM);
  }
  
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
-                              struct btrfs_fs_devices *fs_devices, int step)
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
  {
         struct btrfs_device *device, *next;
         struct btrfs_device *latest_dev = NULL;
@@ -1136,11 +1134,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
         path = btrfs_alloc_path();
         if (!path)
                 return -ENOMEM;
-again:
+
         max_hole_start = search_start;
         max_hole_size = 0;
-       hole_size = 0;
  
+again:
         if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
                 ret = -ENOSPC;
                 goto out;
@@ -1233,21 +1231,23 @@ next:
          * allocated dev extents, and when shrinking the device,
          * search_end may be smaller than search_start.
          */
-       if (search_end > search_start)
+       if (search_end > search_start) {
                 hole_size = search_end - search_start;
  
-       if (hole_size > max_hole_size) {
-               max_hole_start = search_start;
-               max_hole_size = hole_size;
-       }
+               if (contains_pending_extent(trans, device, &search_start,
+                                           hole_size)) {
+                       btrfs_release_path(path);
+                       goto again;
+               }
  
-       if (contains_pending_extent(trans, device, &search_start, hole_size)) {
-               btrfs_release_path(path);
-               goto again;
+               if (hole_size > max_hole_size) {
+                       max_hole_start = search_start;
+                       max_hole_size = hole_size;
+               }
         }
  
         /* See above. */
-       if (hole_size < num_bytes)
+       if (max_hole_size < num_bytes)
                 ret = -ENOSPC;
         else
                 ret = 0;
@@ -2487,8 +2487,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans,
  }
  
  static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           u64 chunk_tree, u64 chunk_objectid,
+                           struct btrfs_root *root, u64 chunk_objectid,
                             u64 chunk_offset)
  {
         int ret;
@@ -2580,7 +2579,6 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
         struct map_lookup *map;
         u64 dev_extent_len = 0;
         u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
-       u64 chunk_tree = root->fs_info->chunk_root->objectid;
         int i, ret = 0;
  
         /* Just in case */
@@ -2634,8 +2632,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                         }
                 }
         }
-       ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
-                              chunk_offset);
+       ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
         if (ret) {
                 btrfs_abort_transaction(trans, root, ret);
                 goto out;
@@ -2664,8 +2661,8 @@ out:
  }
  
  static int btrfs_relocate_chunk(struct btrfs_root *root,
-                        u64 chunk_tree, u64 chunk_objectid,
-                        u64 chunk_offset)
+                               u64 chunk_objectid,
+                               u64 chunk_offset)
  {
         struct btrfs_root *extent_root;
         struct btrfs_trans_handle *trans;
@@ -2707,7 +2704,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root)
         struct btrfs_chunk *chunk;
         struct btrfs_key key;
         struct btrfs_key found_key;
-       u64 chunk_tree = chunk_root->root_key.objectid;
         u64 chunk_type;
         bool retried = false;
         int failed = 0;
@@ -2744,7 +2740,7 @@ again:
                 btrfs_release_path(path);
  
                 if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
-                       ret = btrfs_relocate_chunk(chunk_root, chunk_tree,
+                       ret = btrfs_relocate_chunk(chunk_root,
                                                    found_key.objectid,
                                                    found_key.offset);
                         if (ret == -ENOSPC)
@@ -3022,7 +3018,7 @@ static int chunk_drange_filter(struct extent_buffer *leaf,
  
                 stripe_offset = btrfs_stripe_offset(leaf, stripe);
                 stripe_length = btrfs_chunk_length(leaf, chunk);
-               do_div(stripe_length, factor);
+               stripe_length = div_u64(stripe_length, factor);
  
                 if (stripe_offset < bargs->pend &&
                     stripe_offset + stripe_length > bargs->pstart)
@@ -3255,7 +3251,6 @@ again:
                 }
  
                 ret = btrfs_relocate_chunk(chunk_root,
-                                          chunk_root->root_key.objectid,
                                            found_key.objectid,
                                            found_key.offset);
                 if (ret && ret != -ENOSPC)
@@ -3957,7 +3952,6 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
         struct btrfs_dev_extent *dev_extent = NULL;
         struct btrfs_path *path;
         u64 length;
-       u64 chunk_tree;
         u64 chunk_objectid;
         u64 chunk_offset;
         int ret;
@@ -4027,13 +4021,11 @@ again:
                         break;
                 }
  
-               chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
                 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
                 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
                 btrfs_release_path(path);
  
-               ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
-                                          chunk_offset);
+               ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset);
                 if (ret && ret != -ENOSPC)
                         goto done;
                 if (ret == -ENOSPC)
@@ -4131,7 +4123,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
         return 0;
  }
  
-static struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+static const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
         [BTRFS_RAID_RAID10] = {
                 .sub_stripes    = 2,
                 .dev_stripes    = 1,
@@ -4289,7 +4281,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
         max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
                              max_chunk_size);
  
-       devices_info = kzalloc(sizeof(*devices_info) * fs_devices->rw_devices,
+       devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
                                GFP_NOFS);
         if (!devices_info)
                 return -ENOMEM;
@@ -4400,8 +4392,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
          */
         if (stripe_size * data_stripes > max_chunk_size) {
                 u64 mask = (1ULL << 24) - 1;
-               stripe_size = max_chunk_size;
-               do_div(stripe_size, data_stripes);
+
+               stripe_size = div_u64(max_chunk_size, data_stripes);
  
                 /* bump the answer up to a 16MB boundary */
                 stripe_size = (stripe_size + mask) & ~mask;
@@ -4413,10 +4405,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                         stripe_size = devices_info[ndevs-1].max_avail;
         }
  
-       do_div(stripe_size, dev_stripes);
+       stripe_size = div_u64(stripe_size, dev_stripes);
  
         /* align to BTRFS_STRIPE_LEN */
-       do_div(stripe_size, raid_stripe_len);
+       stripe_size = div_u64(stripe_size, raid_stripe_len);
         stripe_size *= raid_stripe_len;
  
         map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
@@ -4954,7 +4946,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         u64 stripe_nr_orig;
         u64 stripe_nr_end;
         u64 stripe_len;
-       int stripe_index;
+       u32 stripe_index;
         int i;
         int ret = 0;
         int num_stripes;
@@ -4995,7 +4987,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
          * stripe_nr counts the total number of stripes we have to stride
          * to get to this block
          */
-       do_div(stripe_nr, stripe_len);
+       stripe_nr = div64_u64(stripe_nr, stripe_len);
  
         stripe_offset = stripe_nr * stripe_len;
         BUG_ON(offset < stripe_offset);
@@ -5011,7 +5003,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 /* allow a write of a full stripe, but make sure we don't
                  * allow straddling of stripes
                  */
-               do_div(raid56_full_stripe_start, full_stripe_len);
+               raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
+                               full_stripe_len);
                 raid56_full_stripe_start *= full_stripe_len;
         }
  
@@ -5136,7 +5129,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         stripe_index = 0;
         stripe_nr_orig = stripe_nr;
         stripe_nr_end = ALIGN(offset + *length, map->stripe_len);
-       do_div(stripe_nr_end, map->stripe_len);
+       stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len);
         stripe_end_offset = stripe_nr_end * map->stripe_len -
                             (offset + *length);
  
@@ -5144,7 +5137,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 if (rw & REQ_DISCARD)
                         num_stripes = min_t(u64, map->num_stripes,
                                             stripe_nr_end - stripe_nr_orig);
-               stripe_index = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+                               &stripe_index);
                 if (!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)))
                         mirror_num = 1;
         } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
@@ -5170,9 +5164,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                 }
  
         } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
-               int factor = map->num_stripes / map->sub_stripes;
+               u32 factor = map->num_stripes / map->sub_stripes;
  
-               stripe_index = do_div(stripe_nr, factor);
+               stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
                 stripe_index *= map->sub_stripes;
  
                 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
@@ -5198,8 +5192,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                     ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
                      mirror_num > 1)) {
                         /* push stripe_nr back to the start of the full stripe */
-                       stripe_nr = raid56_full_stripe_start;
-                       do_div(stripe_nr, stripe_len * nr_data_stripes(map));
+                       stripe_nr = div_u64(raid56_full_stripe_start,
+                                       stripe_len * nr_data_stripes(map));
  
                         /* RAID[56] write or recovery. Return all stripes */
                         num_stripes = map->num_stripes;
@@ -5209,32 +5203,32 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                         stripe_index = 0;
                         stripe_offset = 0;
                 } else {
-                       u64 tmp;
-
                         /*
                          * Mirror #0 or #1 means the original data block.
                          * Mirror #2 is RAID5 parity block.
                          * Mirror #3 is RAID6 Q block.
                          */
-                       stripe_index = do_div(stripe_nr, nr_data_stripes(map));
+                       stripe_nr = div_u64_rem(stripe_nr,
+                                       nr_data_stripes(map), &stripe_index);
                         if (mirror_num > 1)
                                 stripe_index = nr_data_stripes(map) +
                                                 mirror_num - 2;
  
                         /* We distribute the parity blocks across stripes */
-                       tmp = stripe_nr + stripe_index;
-                       stripe_index = do_div(tmp, map->num_stripes);
+                       div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
+                                       &stripe_index);
                         if (!(rw & (REQ_WRITE | REQ_DISCARD |
                                     REQ_GET_READ_MIRRORS)) && mirror_num <= 1)
                                 mirror_num = 1;
                 }
         } else {
                 /*
-                * after this do_div call, stripe_nr is the number of stripes
-                * on this device we have to walk to find the data, and
-                * stripe_index is the number of our device in the stripe array
+                * after this, stripe_nr is the number of stripes on this
+                * device we have to walk to find the data, and stripe_index is
+                * the number of our device in the stripe array
                  */
-               stripe_index = do_div(stripe_nr, map->num_stripes);
+               stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
+                               &stripe_index);
                 mirror_num = stripe_index + 1;
         }
         BUG_ON(stripe_index >= map->num_stripes);
@@ -5261,7 +5255,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
             need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) ||
             mirror_num > 1)) {
                 u64 tmp;
-               int i, rot;
+               unsigned rot;
  
                 bbio->raid_map = (u64 *)((void *)bbio->stripes +
                                  sizeof(struct btrfs_bio_stripe) *
@@ -5269,8 +5263,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                  sizeof(int) * tgtdev_indexes);
  
                 /* Work out the disk rotation on this stripe-set */
-               tmp = stripe_nr;
-               rot = do_div(tmp, num_stripes);
+               div_u64_rem(stripe_nr, num_stripes, &rot);
  
                 /* Fill in the logical address of each stripe */
                 tmp = stripe_nr * nr_data_stripes(map);
@@ -5285,8 +5278,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
         }
  
         if (rw & REQ_DISCARD) {
-               int factor = 0;
-               int sub_stripes = 0;
+               u32 factor = 0;
+               u32 sub_stripes = 0;
                 u64 stripes_per_dev = 0;
                 u32 remaining_stripes = 0;
                 u32 last_stripe = 0;
@@ -5437,9 +5430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                         }
                 }
                 if (found) {
-                       u64 length = map->stripe_len;
-
-                       if (physical_of_found + length <=
+                       if (physical_of_found + map->stripe_len <=
                             dev_replace->cursor_left) {
                                 struct btrfs_bio_stripe *tgtdev_stripe =
                                         bbio->stripes + num_stripes;
@@ -5535,15 +5526,15 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
         rmap_len = map->stripe_len;
  
         if (map->type & BTRFS_BLOCK_GROUP_RAID10)
-               do_div(length, map->num_stripes / map->sub_stripes);
+               length = div_u64(length, map->num_stripes / map->sub_stripes);
         else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
-               do_div(length, map->num_stripes);
+               length = div_u64(length, map->num_stripes);
         else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
-               do_div(length, nr_data_stripes(map));
+               length = div_u64(length, nr_data_stripes(map));
                 rmap_len = map->stripe_len * nr_data_stripes(map);
         }
  
-       buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+       buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
         BUG_ON(!buf); /* -ENOMEM */
  
         for (i = 0; i < map->num_stripes; i++) {
@@ -5554,11 +5545,11 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
                         continue;
  
                 stripe_nr = physical - map->stripes[i].physical;
-               do_div(stripe_nr, map->stripe_len);
+               stripe_nr = div_u64(stripe_nr, map->stripe_len);
  
                 if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                         stripe_nr = stripe_nr * map->num_stripes + i;
-                       do_div(stripe_nr, map->sub_stripes);
+                       stripe_nr = div_u64(stripe_nr, map->sub_stripes);
                 } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
                         stripe_nr = stripe_nr * map->num_stripes + i;
                 } /* else if RAID[56], multiply by nr_data_stripes().
@@ -5835,8 +5826,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
         u64 length = 0;
         u64 map_length;
         int ret;
-       int dev_nr = 0;
-       int total_devs = 1;
+       int dev_nr;
+       int total_devs;
         struct btrfs_bio *bbio = NULL;
  
         length = bio->bi_iter.bi_size;
@@ -5877,11 +5868,10 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                 BUG();
         }
  
-       while (dev_nr < total_devs) {
+       for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
                 dev = bbio->stripes[dev_nr].dev;
                 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
                         bbio_error(bbio, first_bio, logical);
-                       dev_nr++;
                         continue;
                 }
  
@@ -5894,7 +5884,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                         ret = breakup_stripe_bio(root, bbio, first_bio, dev,
                                                  dev_nr, rw, async_submit);
                         BUG_ON(ret);
-                       dev_nr++;
                         continue;
                 }
  
@@ -5909,7 +5898,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                 submit_stripe_bio(root, bbio, bio,
                                   bbio->stripes[dev_nr].physical, dev_nr, rw,
                                   async_submit);
-               dev_nr++;
         }
         btrfs_bio_counter_dec(root->fs_info);
         return 0;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h

index 83069de..ebc3133 100644 (file)
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -421,8 +421,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
  int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                           struct btrfs_fs_devices **fs_devices_ret);
  int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
-void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
-                              struct btrfs_fs_devices *fs_devices, int step);
+void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step);
  int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
                                          char *device_path,
                                          struct btrfs_device **device);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index 883b936..6f518c9 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -261,7 +261,7 @@ out:
  ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
         struct btrfs_key key, found_key;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_path *path;
         struct extent_buffer *leaf;
@@ -364,22 +364,42 @@ const struct xattr_handler *btrfs_xattr_handlers[] = {
  /*
   * Check if the attribute is in a supported namespace.
   *
- * This applied after the check for the synthetic attributes in the system
+ * This is applied after the check for the synthetic attributes in the system
   * namespace.
   */
-static bool btrfs_is_valid_xattr(const char *name)
+static int btrfs_is_valid_xattr(const char *name)
  {
-       return !strncmp(name, XATTR_SECURITY_PREFIX,
-                       XATTR_SECURITY_PREFIX_LEN) ||
-              !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
-              !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
-              !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) ||
-               !strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN);
+       int len = strlen(name);
+       int prefixlen = 0;
+
+       if (!strncmp(name, XATTR_SECURITY_PREFIX,
+                       XATTR_SECURITY_PREFIX_LEN))
+               prefixlen = XATTR_SECURITY_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+               prefixlen = XATTR_SYSTEM_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+               prefixlen = XATTR_TRUSTED_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+               prefixlen = XATTR_USER_PREFIX_LEN;
+       else if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
+               prefixlen = XATTR_BTRFS_PREFIX_LEN;
+       else
+               return -EOPNOTSUPP;
+
+       /*
+        * The name cannot consist of just prefix
+        */
+       if (len <= prefixlen)
+               return -EINVAL;
+
+       return 0;
  }
  
  ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
                        void *buffer, size_t size)
  {
+       int ret;
+
         /*
          * If this is a request for a synthetic attribute in the system.*
          * namespace use the generic infrastructure to resolve a handler
@@ -388,15 +408,17 @@ ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                 return generic_getxattr(dentry, name, buffer, size);
  
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
-       return __btrfs_getxattr(dentry->d_inode, name, buffer, size);
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
+       return __btrfs_getxattr(d_inode(dentry), name, buffer, size);
  }
  
  int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                    size_t size, int flags)
  {
-       struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+       struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+       int ret;
  
         /*
          * The permission on security.* and system.* is not checked
@@ -413,23 +435,25 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                 return generic_setxattr(dentry, name, value, size, flags);
  
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
  
         if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
-               return btrfs_set_prop(dentry->d_inode, name,
+               return btrfs_set_prop(d_inode(dentry), name,
                                       value, size, flags);
  
         if (size == 0)
                 value = "";  /* empty EA, do not remove */
  
-       return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
+       return __btrfs_setxattr(NULL, d_inode(dentry), name, value, size,
                                 flags);
  }
  
  int btrfs_removexattr(struct dentry *dentry, const char *name)
  {
-       struct btrfs_root *root = BTRFS_I(dentry->d_inode)->root;
+       struct btrfs_root *root = BTRFS_I(d_inode(dentry))->root;
+       int ret;
  
         /*
          * The permission on security.* and system.* is not checked
@@ -446,14 +470,15 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                 return generic_removexattr(dentry, name);
  
-       if (!btrfs_is_valid_xattr(name))
-               return -EOPNOTSUPP;
+       ret = btrfs_is_valid_xattr(name);
+       if (ret)
+               return ret;
  
         if (!strncmp(name, XATTR_BTRFS_PREFIX, XATTR_BTRFS_PREFIX_LEN))
-               return btrfs_set_prop(dentry->d_inode, name,
+               return btrfs_set_prop(d_inode(dentry), name,
                                       NULL, 0, XATTR_REPLACE);
  
-       return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
+       return __btrfs_setxattr(NULL, d_inode(dentry), name, NULL, 0,
                                 XATTR_REPLACE);
  }
  
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c

index fb22fd8..82990b8 100644 (file)
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -403,7 +403,7 @@ next:
         return ret;
  }
  
-struct btrfs_compress_op btrfs_zlib_compress = {
+const struct btrfs_compress_op btrfs_zlib_compress = {
         .alloc_workspace        = zlib_alloc_workspace,
         .free_workspace         = zlib_free_workspace,
         .compress_pages         = zlib_compress_pages,
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c

index fbb08e9..6af790f 100644 (file)
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -123,11 +123,11 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
  
         /* check parameters */
         ret = -EOPNOTSUPP;
-       if (!root->d_inode ||
-           !root->d_inode->i_op->lookup ||
-           !root->d_inode->i_op->mkdir ||
-           !root->d_inode->i_op->setxattr ||
-           !root->d_inode->i_op->getxattr ||
+       if (d_is_negative(root) ||
+           !d_backing_inode(root)->i_op->lookup ||
+           !d_backing_inode(root)->i_op->mkdir ||
+           !d_backing_inode(root)->i_op->setxattr ||
+           !d_backing_inode(root)->i_op->getxattr ||
             !root->d_sb->s_op->statfs ||
             !root->d_sb->s_op->sync_fs)
                 goto error_unsupported;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c

index 2324262..afa023d 100644 (file)
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -441,12 +441,12 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
  
         fscache_set_store_limit(&object->fscache, ni_size);
  
-       oi_size = i_size_read(object->backer->d_inode);
+       oi_size = i_size_read(d_backing_inode(object->backer));
         if (oi_size == ni_size)
                 return 0;
  
         cachefiles_begin_secure(cache, &saved_cred);
-       mutex_lock(&object->backer->d_inode->i_mutex);
+       mutex_lock(&d_inode(object->backer)->i_mutex);
  
         /* if there's an extension to a partial page at the end of the backing
          * file, we need to discard the partial page so that we pick up new
@@ -465,7 +465,7 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
         ret = notify_change(object->backer, &newattrs, NULL);
  
  truncate_failed:
-       mutex_unlock(&object->backer->d_inode->i_mutex);
+       mutex_unlock(&d_inode(object->backer)->i_mutex);
         cachefiles_end_secure(cache, saved_cred);
  
         if (ret == -EIO) {
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c

index 1e51714..ab857ab 100644 (file)
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -286,13 +286,13 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                 if (ret < 0) {
                         cachefiles_io_error(cache, "Unlink security error");
                 } else {
-                       ret = vfs_unlink(dir->d_inode, rep, NULL);
+                       ret = vfs_unlink(d_inode(dir), rep, NULL);
  
                         if (preemptive)
                                 cachefiles_mark_object_buried(cache, rep);
                 }
  
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
  
                 if (ret == -EIO)
                         cachefiles_io_error(cache, "Unlink failed");
@@ -303,7 +303,7 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
  
         /* directories have to be moved to the graveyard */
         _debug("move stale object to graveyard");
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  
  try_again:
         /* first step is to make up a grave dentry in the graveyard */
@@ -355,7 +355,7 @@ try_again:
                 return -EIO;
         }
  
-       if (grave->d_inode) {
+       if (d_is_positive(grave)) {
                 unlock_rename(cache->graveyard, dir);
                 dput(grave);
                 grave = NULL;
@@ -387,8 +387,8 @@ try_again:
         if (ret < 0) {
                 cachefiles_io_error(cache, "Rename security error %d", ret);
         } else {
-               ret = vfs_rename(dir->d_inode, rep,
-                                cache->graveyard->d_inode, grave, NULL, 0);
+               ret = vfs_rename(d_inode(dir), rep,
+                                d_inode(cache->graveyard), grave, NULL, 0);
                 if (ret != 0 && ret != -ENOMEM)
                         cachefiles_io_error(cache,
                                             "Rename failed with error %d", ret);
@@ -415,18 +415,18 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
         _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
  
         ASSERT(object->dentry);
-       ASSERT(object->dentry->d_inode);
+       ASSERT(d_backing_inode(object->dentry));
         ASSERT(object->dentry->d_parent);
  
         dir = dget_parent(object->dentry);
  
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
  
         if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
                 /* object allocation for the same key preemptively deleted this
                  * object's file so that it could create its own file */
                 _debug("object preemptively buried");
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
                 ret = 0;
         } else {
                 /* we need to check that our parent is _still_ our parent - it
@@ -438,7 +438,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
                         /* it got moved, presumably by cachefilesd culling it,
                          * so it's no longer in the key path and we can ignore
                          * it */
-                       mutex_unlock(&dir->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dir)->i_mutex);
                         ret = 0;
                 }
         }
@@ -473,7 +473,7 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
         path.mnt = cache->mnt;
  
         ASSERT(parent->dentry);
-       ASSERT(parent->dentry->d_inode);
+       ASSERT(d_backing_inode(parent->dentry));
  
         if (!(d_is_dir(parent->dentry))) {
                 // TODO: convert file to dir
@@ -497,7 +497,7 @@ lookup_again:
         /* search the current directory for the element name */
         _debug("lookup '%s'", name);
  
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
  
         start = jiffies;
         next = lookup_one_len(name, dir, nlen);
@@ -505,21 +505,21 @@ lookup_again:
         if (IS_ERR(next))
                 goto lookup_error;
  
-       _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
+       _debug("next -> %p %s", next, d_backing_inode(next) ? "positive" : "negative");
  
         if (!key)
-               object->new = !next->d_inode;
+               object->new = !d_backing_inode(next);
  
         /* if this element of the path doesn't exist, then the lookup phase
          * failed, and we can release any readers in the certain knowledge that
          * there's nothing for them to actually read */
-       if (!next->d_inode)
+       if (d_is_negative(next))
                 fscache_object_lookup_negative(&object->fscache);
  
         /* we need to create the object if it's negative */
         if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
                 /* index objects and intervening tree levels must be subdirs */
-               if (!next->d_inode) {
+               if (d_is_negative(next)) {
                         ret = cachefiles_has_space(cache, 1, 0);
                         if (ret < 0)
                                 goto create_error;
@@ -529,26 +529,26 @@ lookup_again:
                         if (ret < 0)
                                 goto create_error;
                         start = jiffies;
-                       ret = vfs_mkdir(dir->d_inode, next, 0);
+                       ret = vfs_mkdir(d_inode(dir), next, 0);
                         cachefiles_hist(cachefiles_mkdir_histogram, start);
                         if (ret < 0)
                                 goto create_error;
  
-                       ASSERT(next->d_inode);
+                       ASSERT(d_backing_inode(next));
  
                         _debug("mkdir -> %p{%p{ino=%lu}}",
-                              next, next->d_inode, next->d_inode->i_ino);
+                              next, d_backing_inode(next), d_backing_inode(next)->i_ino);
  
                 } else if (!d_can_lookup(next)) {
                         pr_err("inode %lu is not a directory\n",
-                              next->d_inode->i_ino);
+                              d_backing_inode(next)->i_ino);
                         ret = -ENOBUFS;
                         goto error;
                 }
  
         } else {
                 /* non-index objects start out life as files */
-               if (!next->d_inode) {
+               if (d_is_negative(next)) {
                         ret = cachefiles_has_space(cache, 1, 0);
                         if (ret < 0)
                                 goto create_error;
@@ -558,21 +558,21 @@ lookup_again:
                         if (ret < 0)
                                 goto create_error;
                         start = jiffies;
-                       ret = vfs_create(dir->d_inode, next, S_IFREG, true);
+                       ret = vfs_create(d_inode(dir), next, S_IFREG, true);
                         cachefiles_hist(cachefiles_create_histogram, start);
                         if (ret < 0)
                                 goto create_error;
  
-                       ASSERT(next->d_inode);
+                       ASSERT(d_backing_inode(next));
  
                         _debug("create -> %p{%p{ino=%lu}}",
-                              next, next->d_inode, next->d_inode->i_ino);
+                              next, d_backing_inode(next), d_backing_inode(next)->i_ino);
  
                 } else if (!d_can_lookup(next) &&
                            !d_is_reg(next)
                            ) {
                         pr_err("inode %lu is not a file or directory\n",
-                              next->d_inode->i_ino);
+                              d_backing_inode(next)->i_ino);
                         ret = -ENOBUFS;
                         goto error;
                 }
@@ -581,7 +581,7 @@ lookup_again:
         /* process the next component */
         if (key) {
                 _debug("advance");
-               mutex_unlock(&dir->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dir)->i_mutex);
                 dput(dir);
                 dir = next;
                 next = NULL;
@@ -617,7 +617,7 @@ lookup_again:
         /* note that we're now using this object */
         ret = cachefiles_mark_object_active(cache, object);
  
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(dir);
         dir = NULL;
  
@@ -646,7 +646,7 @@ lookup_again:
                         const struct address_space_operations *aops;
  
                         ret = -EPERM;
-                       aops = object->dentry->d_inode->i_mapping->a_ops;
+                       aops = d_backing_inode(object->dentry)->i_mapping->a_ops;
                         if (!aops->bmap)
                                 goto check_error;
  
@@ -659,7 +659,7 @@ lookup_again:
         object->new = 0;
         fscache_obtained_object(&object->fscache);
  
-       _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
+       _leave(" = 0 [%lu]", d_backing_inode(object->dentry)->i_ino);
         return 0;
  
  create_error:
@@ -695,7 +695,7 @@ lookup_error:
                 cachefiles_io_error(cache, "Lookup failed");
         next = NULL;
  error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(next);
  error_out2:
         dput(dir);
@@ -719,7 +719,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
         _enter(",,%s", dirname);
  
         /* search the current directory for the element name */
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
  
         start = jiffies;
         subdir = lookup_one_len(dirname, dir, strlen(dirname));
@@ -731,10 +731,10 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
         }
  
         _debug("subdir -> %p %s",
-              subdir, subdir->d_inode ? "positive" : "negative");
+              subdir, d_backing_inode(subdir) ? "positive" : "negative");
  
         /* we need to create the subdir if it doesn't exist yet */
-       if (!subdir->d_inode) {
+       if (d_is_negative(subdir)) {
                 ret = cachefiles_has_space(cache, 1, 0);
                 if (ret < 0)
                         goto mkdir_error;
@@ -746,22 +746,22 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
                 ret = security_path_mkdir(&path, subdir, 0700);
                 if (ret < 0)
                         goto mkdir_error;
-               ret = vfs_mkdir(dir->d_inode, subdir, 0700);
+               ret = vfs_mkdir(d_inode(dir), subdir, 0700);
                 if (ret < 0)
                         goto mkdir_error;
  
-               ASSERT(subdir->d_inode);
+               ASSERT(d_backing_inode(subdir));
  
                 _debug("mkdir -> %p{%p{ino=%lu}}",
                        subdir,
-                      subdir->d_inode,
-                      subdir->d_inode->i_ino);
+                      d_backing_inode(subdir),
+                      d_backing_inode(subdir)->i_ino);
         }
  
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  
         /* we need to make sure the subdir is a directory */
-       ASSERT(subdir->d_inode);
+       ASSERT(d_backing_inode(subdir));
  
         if (!d_can_lookup(subdir)) {
                 pr_err("%s is not a directory\n", dirname);
@@ -770,18 +770,18 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
         }
  
         ret = -EPERM;
-       if (!subdir->d_inode->i_op->setxattr ||
-           !subdir->d_inode->i_op->getxattr ||
-           !subdir->d_inode->i_op->lookup ||
-           !subdir->d_inode->i_op->mkdir ||
-           !subdir->d_inode->i_op->create ||
-           (!subdir->d_inode->i_op->rename &&
-            !subdir->d_inode->i_op->rename2) ||
-           !subdir->d_inode->i_op->rmdir ||
-           !subdir->d_inode->i_op->unlink)
+       if (!d_backing_inode(subdir)->i_op->setxattr ||
+           !d_backing_inode(subdir)->i_op->getxattr ||
+           !d_backing_inode(subdir)->i_op->lookup ||
+           !d_backing_inode(subdir)->i_op->mkdir ||
+           !d_backing_inode(subdir)->i_op->create ||
+           (!d_backing_inode(subdir)->i_op->rename &&
+            !d_backing_inode(subdir)->i_op->rename2) ||
+           !d_backing_inode(subdir)->i_op->rmdir ||
+           !d_backing_inode(subdir)->i_op->unlink)
                 goto check_error;
  
-       _leave(" = [%lu]", subdir->d_inode->i_ino);
+       _leave(" = [%lu]", d_backing_inode(subdir)->i_ino);
         return subdir;
  
  check_error:
@@ -790,19 +790,19 @@ check_error:
         return ERR_PTR(ret);
  
  mkdir_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(subdir);
         pr_err("mkdir %s failed with error %d\n", dirname, ret);
         return ERR_PTR(ret);
  
  lookup_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         ret = PTR_ERR(subdir);
         pr_err("Lookup %s failed with error %d\n", dirname, ret);
         return ERR_PTR(ret);
  
  nomem_d_alloc:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         _leave(" = -ENOMEM");
         return ERR_PTR(-ENOMEM);
  }
@@ -827,7 +827,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
         //       dir, filename);
  
         /* look up the victim */
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
  
         start = jiffies;
         victim = lookup_one_len(filename, dir, strlen(filename));
@@ -836,13 +836,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
                 goto lookup_error;
  
         //_debug("victim -> %p %s",
-       //       victim, victim->d_inode ? "positive" : "negative");
+       //       victim, d_backing_inode(victim) ? "positive" : "negative");
  
         /* if the object is no longer there then we probably retired the object
          * at the netfs's request whilst the cull was in progress
          */
-       if (!victim->d_inode) {
-               mutex_unlock(&dir->d_inode->i_mutex);
+       if (d_is_negative(victim)) {
+               mutex_unlock(&d_inode(dir)->i_mutex);
                 dput(victim);
                 _leave(" = -ENOENT [absent]");
                 return ERR_PTR(-ENOENT);
@@ -871,13 +871,13 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
  
  object_in_use:
         read_unlock(&cache->active_lock);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(victim);
         //_leave(" = -EBUSY [in use]");
         return ERR_PTR(-EBUSY);
  
  lookup_error:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         ret = PTR_ERR(victim);
         if (ret == -ENOENT) {
                 /* file or dir now absent - probably retired by netfs */
@@ -913,7 +913,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
                 return PTR_ERR(victim);
  
         _debug("victim -> %p %s",
-              victim, victim->d_inode ? "positive" : "negative");
+              victim, d_backing_inode(victim) ? "positive" : "negative");
  
         /* okay... the victim is not being used so we can cull it
          * - start by marking it as stale
@@ -936,7 +936,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
         return 0;
  
  error_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  error:
         dput(victim);
         if (ret == -ENOENT) {
@@ -971,7 +971,7 @@ int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
         if (IS_ERR(victim))
                 return PTR_ERR(victim);
  
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(victim);
         //_leave(" = 0");
         return 0;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c

index c6cd8d7..3cbb0e8 100644 (file)
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -74,12 +74,12 @@ static int cachefiles_read_waiter(wait_queue_t *wait, unsigned mode,
  static int cachefiles_read_reissue(struct cachefiles_object *object,
                                    struct cachefiles_one_read *monitor)
  {
-       struct address_space *bmapping = object->backer->d_inode->i_mapping;
+       struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
         struct page *backpage = monitor->back_page, *backpage2;
         int ret;
  
         _enter("{ino=%lx},{%lx,%lx}",
-              object->backer->d_inode->i_ino,
+              d_backing_inode(object->backer)->i_ino,
                backpage->index, backpage->flags);
  
         /* skip if the page was truncated away completely */
@@ -157,7 +157,7 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
         object = container_of(op->op.object,
                               struct cachefiles_object, fscache);
  
-       _enter("{ino=%lu}", object->backer->d_inode->i_ino);
+       _enter("{ino=%lu}", d_backing_inode(object->backer)->i_ino);
  
         max = 8;
         spin_lock_irq(&object->work_lock);
@@ -247,7 +247,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
         init_waitqueue_func_entry(&monitor->monitor, cachefiles_read_waiter);
  
         /* attempt to get hold of the backing page */
-       bmapping = object->backer->d_inode->i_mapping;
+       bmapping = d_backing_inode(object->backer)->i_mapping;
         newpage = NULL;
  
         for (;;) {
@@ -408,7 +408,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op,
         if (!object->backer)
                 goto enobufs;
  
-       inode = object->backer->d_inode;
+       inode = d_backing_inode(object->backer);
         ASSERT(S_ISREG(inode->i_mode));
         ASSERT(inode->i_mapping->a_ops->bmap);
         ASSERT(inode->i_mapping->a_ops->readpages);
@@ -468,7 +468,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
                                         struct list_head *list)
  {
         struct cachefiles_one_read *monitor = NULL;
-       struct address_space *bmapping = object->backer->d_inode->i_mapping;
+       struct address_space *bmapping = d_backing_inode(object->backer)->i_mapping;
         struct page *newpage = NULL, *netpage, *_n, *backpage = NULL;
         int ret = 0;
  
@@ -705,7 +705,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op,
         if (cachefiles_has_space(cache, 0, *nr_pages) < 0)
                 space = 0;
  
-       inode = object->backer->d_inode;
+       inode = d_backing_inode(object->backer);
         ASSERT(S_ISREG(inode->i_mode));
         ASSERT(inode->i_mapping->a_ops->bmap);
         ASSERT(inode->i_mapping->a_ops->readpages);
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c

index 396c18e..31bbc05 100644 (file)
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -55,14 +55,14 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
  {
         int ret;
  
-       ret = security_inode_mkdir(root->d_inode, root, 0);
+       ret = security_inode_mkdir(d_backing_inode(root), root, 0);
         if (ret < 0) {
                 pr_err("Security denies permission to make dirs: error %d",
                        ret);
                 return ret;
         }
  
-       ret = security_inode_create(root->d_inode, root, 0);
+       ret = security_inode_create(d_backing_inode(root), root, 0);
         if (ret < 0)
                 pr_err("Security denies permission to create files: error %d",
                        ret);
@@ -95,7 +95,7 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
  
         /* use the cache root dir's security context as the basis with
          * which create files */
-       ret = set_create_files_as(new, root->d_inode);
+       ret = set_create_files_as(new, d_backing_inode(root));
         if (ret < 0) {
                 abort_creds(new);
                 cachefiles_begin_secure(cache, _saved_cred);
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c

index a8a6874..d31c1a7 100644 (file)
--- a/fs/cachefiles/xattr.c
+++ b/fs/cachefiles/xattr.c
@@ -33,7 +33,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
         int ret;
  
         ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
  
         if (!object->fscache.cookie)
                 strcpy(type, "C3");
@@ -52,7 +52,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
  
         if (ret != -EEXIST) {
                 pr_err("Can't set xattr on %pd [%lu] (err %d)\n",
-                      dentry, dentry->d_inode->i_ino,
+                      dentry, d_backing_inode(dentry)->i_ino,
                        -ret);
                 goto error;
         }
@@ -64,7 +64,7 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
                         goto bad_type_length;
  
                 pr_err("Can't read xattr on %pd [%lu] (err %d)\n",
-                      dentry, dentry->d_inode->i_ino,
+                      dentry, d_backing_inode(dentry)->i_ino,
                        -ret);
                 goto error;
         }
@@ -84,14 +84,14 @@ error:
  
  bad_type_length:
         pr_err("Cache object %lu type xattr length incorrect\n",
-              dentry->d_inode->i_ino);
+              d_backing_inode(dentry)->i_ino);
         ret = -EIO;
         goto error;
  
  bad_type:
         xtype[2] = 0;
         pr_err("Cache object %pd [%lu] type %s not %s\n",
-              dentry, dentry->d_inode->i_ino,
+              dentry, d_backing_inode(dentry)->i_ino,
                xtype, type);
         ret = -EIO;
         goto error;
@@ -165,7 +165,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
         int ret;
  
         ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
         ASSERT(object->fscache.cookie->def->check_aux);
  
         auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, GFP_KERNEL);
@@ -204,7 +204,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
         _enter("%p,#%d", object, auxdata->len);
  
         ASSERT(dentry);
-       ASSERT(dentry->d_inode);
+       ASSERT(d_backing_inode(dentry));
  
         auxbuf = kmalloc(sizeof(struct cachefiles_xattr) + 512, cachefiles_gfp);
         if (!auxbuf) {
@@ -225,7 +225,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
  
                 cachefiles_io_error_obj(object,
                                         "Can't read xattr on %lu (err %d)",
-                                       dentry->d_inode->i_ino, -ret);
+                                       d_backing_inode(dentry)->i_ino, -ret);
                 goto error;
         }
  
@@ -276,7 +276,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
                         cachefiles_io_error_obj(object,
                                                 "Can't update xattr on %lu"
                                                 " (error %d)",
-                                               dentry->d_inode->i_ino, -ret);
+                                               d_backing_inode(dentry)->i_ino, -ret);
                         goto error;
                 }
         }
@@ -291,7 +291,7 @@ error:
  
  bad_type_length:
         pr_err("Cache object %lu xattr length incorrect\n",
-              dentry->d_inode->i_ino);
+              d_backing_inode(dentry)->i_ino);
         ret = -EIO;
         goto error;
  
@@ -316,7 +316,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
                         cachefiles_io_error(cache,
                                             "Can't remove xattr from %lu"
                                             " (error %d)",
-                                           dentry->d_inode->i_ino, -ret);
+                                           d_backing_inode(dentry)->i_ino, -ret);
         }
  
         _leave(" = %d", ret);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c

index 155ab9c..e162bcd 100644 (file)
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1146,6 +1146,10 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
                      inode, page, (int)pos, (int)len);
  
                 r = ceph_update_writeable_page(file, pos, len, page);
+               if (r < 0)
+                       page_cache_release(page);
+               else
+                       *pagep = page;
         } while (r == -EAGAIN);
  
         return r;
@@ -1534,19 +1538,27 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
  
         osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
  
-       err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
-                                   "inline_version", &inline_version,
-                                   sizeof(inline_version),
-                                   CEPH_OSD_CMPXATTR_OP_GT,
-                                   CEPH_OSD_CMPXATTR_MODE_U64);
-       if (err)
-               goto out_put;
-
-       err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
-                                   "inline_version", &inline_version,
-                                   sizeof(inline_version), 0, 0);
-       if (err)
-               goto out_put;
+       {
+               __le64 xattr_buf = cpu_to_le64(inline_version);
+               err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+                                           "inline_version", &xattr_buf,
+                                           sizeof(xattr_buf),
+                                           CEPH_OSD_CMPXATTR_OP_GT,
+                                           CEPH_OSD_CMPXATTR_MODE_U64);
+               if (err)
+                       goto out_put;
+       }
+
+       {
+               char xattr_buf[32];
+               int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
+                                        "%llu", inline_version);
+               err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+                                           "inline_version",
+                                           xattr_buf, xattr_len, 0, 0);
+               if (err)
+                       goto out_put;
+       }
  
         ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
         err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c

index 8172775..be5ea6a 100644 (file)
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -896,6 +896,18 @@ int ceph_is_any_caps(struct inode *inode)
         return ret;
  }
  
+static void drop_inode_snap_realm(struct ceph_inode_info *ci)
+{
+       struct ceph_snap_realm *realm = ci->i_snap_realm;
+       spin_lock(&realm->inodes_with_caps_lock);
+       list_del_init(&ci->i_snap_realm_item);
+       ci->i_snap_realm_counter++;
+       ci->i_snap_realm = NULL;
+       spin_unlock(&realm->inodes_with_caps_lock);
+       ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
+                           realm);
+}
+
  /*
   * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
   *
@@ -946,15 +958,13 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
         if (removed)
                 ceph_put_cap(mdsc, cap);
  
-       if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
-               struct ceph_snap_realm *realm = ci->i_snap_realm;
-               spin_lock(&realm->inodes_with_caps_lock);
-               list_del_init(&ci->i_snap_realm_item);
-               ci->i_snap_realm_counter++;
-               ci->i_snap_realm = NULL;
-               spin_unlock(&realm->inodes_with_caps_lock);
-               ceph_put_snap_realm(mdsc, realm);
-       }
+       /* when reconnect denied, we remove session caps forcibly,
+        * i_wr_ref can be non-zero. If there are ongoing write,
+        * keep i_snap_realm.
+        */
+       if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
+               drop_inode_snap_realm(ci);
+
         if (!__ceph_is_any_real_caps(ci))
                 __cap_delay_cancel(mdsc, ci);
  }
@@ -1394,6 +1404,13 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
         int was = ci->i_dirty_caps;
         int dirty = 0;
  
+       if (!ci->i_auth_cap) {
+               pr_warn("__mark_dirty_caps %p %llx mask %s, "
+                       "but no auth cap (session was closed?)\n",
+                       inode, ceph_ino(inode), ceph_cap_string(mask));
+               return 0;
+       }
+
         dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
              ceph_cap_string(mask), ceph_cap_string(was),
              ceph_cap_string(was | mask));
@@ -1404,7 +1421,6 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
                                 ci->i_snap_realm->cached_context);
                 dout(" inode %p now dirty snapc %p auth cap %p\n",
                      &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
-               WARN_ON(!ci->i_auth_cap);
                 BUG_ON(!list_empty(&ci->i_dirty_item));
                 spin_lock(&mdsc->cap_dirty_lock);
                 list_add(&ci->i_dirty_item, &mdsc->cap_dirty);
@@ -1545,7 +1561,19 @@ retry_locked:
         if (!mdsc->stopping && inode->i_nlink > 0) {
                 if (want) {
                         retain |= CEPH_CAP_ANY;       /* be greedy */
+               } else if (S_ISDIR(inode->i_mode) &&
+                          (issued & CEPH_CAP_FILE_SHARED) &&
+                           __ceph_dir_is_complete(ci)) {
+                       /*
+                        * If a directory is complete, we want to keep
+                        * the exclusive cap. So that MDS does not end up
+                        * revoking the shared cap on every create/unlink
+                        * operation.
+                        */
+                       want = CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
+                       retain |= want;
                 } else {
+
                         retain |= CEPH_CAP_ANY_SHARED;
                         /*
                          * keep RD only if we didn't have the file open RW,
@@ -2309,6 +2337,9 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
                                         wake = 1;
                                 }
                         }
+                       /* see comment in __ceph_remove_cap() */
+                       if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+                               drop_inode_snap_realm(ci);
                 }
         spin_unlock(&ci->i_ceph_lock);
  
@@ -3391,7 +3422,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
  int ceph_encode_dentry_release(void **p, struct dentry *dentry,
                                int mds, int drop, int unless)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct ceph_mds_request_release *rel = *p;
         struct ceph_dentry_info *di = ceph_dentry(dentry);
         int force = 0;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c

index 1b23551..31f8314 100644 (file)
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -84,7 +84,7 @@ static int mdsc_show(struct seq_file *s, void *p)
                                 path = NULL;
                         spin_lock(&req->r_dentry->d_lock);
                         seq_printf(s, " #%llx/%pd (%s)",
-                                  ceph_ino(req->r_dentry->d_parent->d_inode),
+                                  ceph_ino(d_inode(req->r_dentry->d_parent)),
                                    req->r_dentry,
                                    path ? path : "");
                         spin_unlock(&req->r_dentry->d_lock);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c

index 83e9976..4248307 100644 (file)
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -49,9 +49,9 @@ int ceph_init_dentry(struct dentry *dentry)
                 goto out_unlock;
         }
  
-       if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP)
                 d_set_d_op(dentry, &ceph_dentry_ops);
-       else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+       else if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_SNAPDIR)
                 d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
         else
                 d_set_d_op(dentry, &ceph_snap_dentry_ops);
@@ -77,7 +77,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
  
         spin_lock(&dentry->d_lock);
         if (!IS_ROOT(dentry)) {
-               inode = dentry->d_parent->d_inode;
+               inode = d_inode(dentry->d_parent);
                 ihold(inode);
         }
         spin_unlock(&dentry->d_lock);
@@ -122,7 +122,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
  {
         struct ceph_file_info *fi = file->private_data;
         struct dentry *parent = file->f_path.dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct list_head *p;
         struct dentry *dentry, *last;
         struct ceph_dentry_info *di;
@@ -161,15 +161,15 @@ more:
                 }
                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
                 if (di->lease_shared_gen == shared_gen &&
-                   !d_unhashed(dentry) && dentry->d_inode &&
-                   ceph_snap(dentry->d_inode) != CEPH_SNAPDIR &&
-                   ceph_ino(dentry->d_inode) != CEPH_INO_CEPH &&
+                   !d_unhashed(dentry) && d_really_is_positive(dentry) &&
+                   ceph_snap(d_inode(dentry)) != CEPH_SNAPDIR &&
+                   ceph_ino(d_inode(dentry)) != CEPH_INO_CEPH &&
                     fpos_cmp(ctx->pos, di->offset) <= 0)
                         break;
                 dout(" skipping %p %pd at %llu (%llu)%s%s\n", dentry,
                      dentry, di->offset,
                      ctx->pos, d_unhashed(dentry) ? " unhashed" : "",
-                    !dentry->d_inode ? " null" : "");
+                    !d_inode(dentry) ? " null" : "");
                 spin_unlock(&dentry->d_lock);
                 p = p->prev;
                 dentry = list_entry(p, struct dentry, d_child);
@@ -189,11 +189,11 @@ more:
         }
  
         dout(" %llu (%llu) dentry %p %pd %p\n", di->offset, ctx->pos,
-            dentry, dentry, dentry->d_inode);
+            dentry, dentry, d_inode(dentry));
         if (!dir_emit(ctx, dentry->d_name.name,
                       dentry->d_name.len,
-                     ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
-                     dentry->d_inode->i_mode >> 12)) {
+                     ceph_translate_ino(dentry->d_sb, d_inode(dentry)->i_ino),
+                     d_inode(dentry)->i_mode >> 12)) {
                 if (last) {
                         /* remember our position */
                         fi->dentry = last;
@@ -281,6 +281,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
         /* can we use the dcache? */
         spin_lock(&ci->i_ceph_lock);
         if ((ctx->pos == 2 || fi->dentry) &&
+           ceph_test_mount_opt(fsc, DCACHE) &&
             !ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
             ceph_snap(inode) != CEPH_SNAPDIR &&
             __ceph_dir_is_complete_ordered(ci) &&
@@ -336,16 +337,23 @@ more:
                         ceph_mdsc_put_request(req);
                         return err;
                 }
-               req->r_inode = inode;
-               ihold(inode);
-               req->r_dentry = dget(file->f_path.dentry);
                 /* hints to request -> mds selection code */
                 req->r_direct_mode = USE_AUTH_MDS;
                 req->r_direct_hash = ceph_frag_value(frag);
                 req->r_direct_is_hash = true;
-               req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+               if (fi->last_name) {
+                       req->r_path2 = kstrdup(fi->last_name, GFP_NOFS);
+                       if (!req->r_path2) {
+                               ceph_mdsc_put_request(req);
+                               return -ENOMEM;
+                       }
+               }
                 req->r_readdir_offset = fi->next_offset;
                 req->r_args.readdir.frag = cpu_to_le32(frag);
+
+               req->r_inode = inode;
+               ihold(inode);
+               req->r_dentry = dget(file->f_path.dentry);
                 err = ceph_mdsc_do_request(mdsc, NULL, req);
                 if (err < 0) {
                         ceph_mdsc_put_request(req);
@@ -535,7 +543,7 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
                         struct dentry *dentry, int err)
  {
         struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-       struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
+       struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
  
         /* .snap dir? */
         if (err == -ENOENT &&
@@ -571,8 +579,8 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
                 err = 0;
                 if (!req->r_reply_info.head->is_dentry) {
                         dout("ENOENT and no trace, dentry %p inode %p\n",
-                            dentry, dentry->d_inode);
-                       if (dentry->d_inode) {
+                            dentry, d_inode(dentry));
+                       if (d_really_is_positive(dentry)) {
                                 d_drop(dentry);
                                 err = -ENOENT;
                         } else {
@@ -619,7 +627,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                 return ERR_PTR(err);
  
         /* can we conclude ENOENT locally? */
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                 struct ceph_inode_info *ci = ceph_inode(dir);
                 struct ceph_dentry_info *di = ceph_dentry(dentry);
  
@@ -629,6 +637,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
                             fsc->mount_options->snapdir_name,
                             dentry->d_name.len) &&
                     !is_root_ceph_dentry(dir, dentry) &&
+                   ceph_test_mount_opt(fsc, DCACHE) &&
                     __ceph_dir_is_complete(ci) &&
                     (__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
                         spin_unlock(&ci->i_ceph_lock);
@@ -725,7 +734,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
         ceph_mdsc_put_request(req);
  out:
         if (!err)
-               ceph_init_inode_acls(dentry->d_inode, &acls);
+               ceph_init_inode_acls(d_inode(dentry), &acls);
         else
                 d_drop(dentry);
         ceph_release_acls_info(&acls);
@@ -755,10 +764,15 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
                 err = PTR_ERR(req);
                 goto out;
         }
-       req->r_dentry = dget(dentry);
-       req->r_num_caps = 2;
         req->r_path2 = kstrdup(dest, GFP_NOFS);
+       if (!req->r_path2) {
+               err = -ENOMEM;
+               ceph_mdsc_put_request(req);
+               goto out;
+       }
         req->r_locked_dir = dir;
+       req->r_dentry = dget(dentry);
+       req->r_num_caps = 2;
         req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
         req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
         err = ceph_mdsc_do_request(mdsc, dir, req);
@@ -821,7 +835,7 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
         ceph_mdsc_put_request(req);
  out:
         if (!err)
-               ceph_init_inode_acls(dentry->d_inode, &acls);
+               ceph_init_inode_acls(d_inode(dentry), &acls);
         else
                 d_drop(dentry);
         ceph_release_acls_info(&acls);
@@ -858,8 +872,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
         if (err) {
                 d_drop(dentry);
         } else if (!req->r_reply_info.head->is_dentry) {
-               ihold(old_dentry->d_inode);
-               d_instantiate(dentry, old_dentry->d_inode);
+               ihold(d_inode(old_dentry));
+               d_instantiate(dentry, d_inode(old_dentry));
         }
         ceph_mdsc_put_request(req);
         return err;
@@ -892,7 +906,7 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
         struct ceph_mds_client *mdsc = fsc->mdsc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_mds_request *req;
         int err = -EROFS;
         int op;
@@ -933,16 +947,20 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb);
         struct ceph_mds_client *mdsc = fsc->mdsc;
         struct ceph_mds_request *req;
+       int op = CEPH_MDS_OP_RENAME;
         int err;
  
         if (ceph_snap(old_dir) != ceph_snap(new_dir))
                 return -EXDEV;
-       if (ceph_snap(old_dir) != CEPH_NOSNAP ||
-           ceph_snap(new_dir) != CEPH_NOSNAP)
-               return -EROFS;
+       if (ceph_snap(old_dir) != CEPH_NOSNAP) {
+               if (old_dir == new_dir && ceph_snap(old_dir) == CEPH_SNAPDIR)
+                       op = CEPH_MDS_OP_RENAMESNAP;
+               else
+                       return -EROFS;
+       }
         dout("rename dir %p dentry %p to dir %p dentry %p\n",
              old_dir, old_dentry, new_dir, new_dentry);
-       req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS);
+       req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
         if (IS_ERR(req))
                 return PTR_ERR(req);
         ihold(old_dir);
@@ -957,8 +975,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
         req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
         /* release LINK_RDCACHE on source inode (mds will lock it) */
         req->r_old_inode_drop = CEPH_CAP_LINK_SHARED;
-       if (new_dentry->d_inode)
-               req->r_inode_drop = drop_caps_for_unlink(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry))
+               req->r_inode_drop = drop_caps_for_unlink(d_inode(new_dentry));
         err = ceph_mdsc_do_request(mdsc, old_dir, req);
         if (!err && !req->r_reply_info.head->is_dentry) {
                 /*
@@ -1024,7 +1042,7 @@ static int dentry_lease_is_valid(struct dentry *dentry)
                         if (di->lease_renew_after &&
                             time_after(jiffies, di->lease_renew_after)) {
                                 /* we should renew */
-                               dir = dentry->d_parent->d_inode;
+                               dir = d_inode(dentry->d_parent);
                                 session = ceph_get_mds_session(s);
                                 seq = di->lease_seq;
                                 di->lease_renew_after = 0;
@@ -1074,22 +1092,22 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
-            dentry, dentry->d_inode, ceph_dentry(dentry)->offset);
+            dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
  
         dir = ceph_get_dentry_parent_inode(dentry);
  
         /* always trust cached snapped dentries, snapdir dentry */
         if (ceph_snap(dir) != CEPH_NOSNAP) {
                 dout("d_revalidate %p '%pd' inode %p is SNAPPED\n", dentry,
-                    dentry, dentry->d_inode);
+                    dentry, d_inode(dentry));
                 valid = 1;
-       } else if (dentry->d_inode &&
-                  ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+       } else if (d_really_is_positive(dentry) &&
+                  ceph_snap(d_inode(dentry)) == CEPH_SNAPDIR) {
                 valid = 1;
         } else if (dentry_lease_is_valid(dentry) ||
                    dir_lease_is_valid(dir, dentry)) {
-               if (dentry->d_inode)
-                       valid = ceph_is_any_caps(dentry->d_inode);
+               if (d_really_is_positive(dentry))
+                       valid = ceph_is_any_caps(d_inode(dentry));
                 else
                         valid = 1;
         }
@@ -1151,7 +1169,7 @@ static void ceph_d_prune(struct dentry *dentry)
          * we hold d_lock, so d_parent is stable, and d_fsdata is never
          * cleared until d_release
          */
-       ceph_dir_clear_complete(dentry->d_parent->d_inode);
+       ceph_dir_clear_complete(d_inode(dentry->d_parent));
  }
  
  /*
@@ -1240,11 +1258,12 @@ static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
                 dout("dir_fsync %p wait on tid %llu (until %llu)\n",
                      inode, req->r_tid, last_tid);
                 if (req->r_timeout) {
-                       ret = wait_for_completion_timeout(
-                               &req->r_safe_completion, req->r_timeout);
-                       if (ret > 0)
+                       unsigned long time_left = wait_for_completion_timeout(
+                                                       &req->r_safe_completion,
+                                                       req->r_timeout);
+                       if (time_left > 0)
                                 ret = 0;
-                       else if (ret == 0)
+                       else
                                 ret = -EIO;  /* timed out */
                 } else {
                         wait_for_completion(&req->r_safe_completion);
@@ -1372,6 +1391,7 @@ const struct inode_operations ceph_snapdir_iops = {
         .getattr = ceph_getattr,
         .mkdir = ceph_mkdir,
         .rmdir = ceph_unlink,
+       .rename = ceph_rename,
  };
  
  const struct dentry_operations ceph_dentry_ops = {
diff --git a/fs/ceph/export.c b/fs/ceph/export.c

index 8d7d782..fe02ae7 100644 (file)
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -136,8 +136,8 @@ static struct dentry *__get_parent(struct super_block *sb,
                 return ERR_CAST(req);
  
         if (child) {
-               req->r_inode = child->d_inode;
-               ihold(child->d_inode);
+               req->r_inode = d_inode(child);
+               ihold(d_inode(child));
         } else {
                 req->r_ino1 = (struct ceph_vino) {
                         .ino = ino,
@@ -164,7 +164,7 @@ static struct dentry *__get_parent(struct super_block *sb,
                 return ERR_PTR(err);
         }
         dout("__get_parent ino %llx parent %p ino %llx.%llx\n",
-            child ? ceph_ino(child->d_inode) : ino,
+            child ? ceph_ino(d_inode(child)) : ino,
              dentry, ceph_vinop(inode));
         return dentry;
  }
@@ -172,11 +172,11 @@ static struct dentry *__get_parent(struct super_block *sb,
  static struct dentry *ceph_get_parent(struct dentry *child)
  {
         /* don't re-export snaps */
-       if (ceph_snap(child->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(child)) != CEPH_NOSNAP)
                 return ERR_PTR(-EINVAL);
  
         dout("get_parent %p ino %llx.%llx\n",
-            child, ceph_vinop(child->d_inode));
+            child, ceph_vinop(d_inode(child)));
         return __get_parent(child->d_sb, child, 0);
  }
  
@@ -209,32 +209,32 @@ static int ceph_get_name(struct dentry *parent, char *name,
         struct ceph_mds_request *req;
         int err;
  
-       mdsc = ceph_inode_to_client(child->d_inode)->mdsc;
+       mdsc = ceph_inode_to_client(d_inode(child))->mdsc;
         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
                                        USE_ANY_MDS);
         if (IS_ERR(req))
                 return PTR_ERR(req);
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
  
-       req->r_inode = child->d_inode;
-       ihold(child->d_inode);
-       req->r_ino2 = ceph_vino(parent->d_inode);
-       req->r_locked_dir = parent->d_inode;
+       req->r_inode = d_inode(child);
+       ihold(d_inode(child));
+       req->r_ino2 = ceph_vino(d_inode(parent));
+       req->r_locked_dir = d_inode(parent);
         req->r_num_caps = 2;
         err = ceph_mdsc_do_request(mdsc, NULL, req);
  
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  
         if (!err) {
                 struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
                 memcpy(name, rinfo->dname, rinfo->dname_len);
                 name[rinfo->dname_len] = 0;
                 dout("get_name %p ino %llx.%llx name %s\n",
-                    child, ceph_vinop(child->d_inode), name);
+                    child, ceph_vinop(d_inode(child)), name);
         } else {
                 dout("get_name %p ino %llx.%llx err %d\n",
-                    child, ceph_vinop(child->d_inode), err);
+                    child, ceph_vinop(d_inode(child)), err);
         }
  
         ceph_mdsc_put_request(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c

index b9b8eb2..3b6b522 100644 (file)
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -291,14 +291,14 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
         }
         if (err)
                 goto out_req;
-       if (dn || dentry->d_inode == NULL || d_is_symlink(dentry)) {
+       if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) {
                 /* make vfs retry on splice, ENOENT, or symlink */
                 dout("atomic_open finish_no_open on dn %p\n", dn);
                 err = finish_no_open(file, dn);
         } else {
                 dout("atomic_open finish_open on dn %p\n", dn);
                 if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
-                       ceph_init_inode_acls(dentry->d_inode, &acls);
+                       ceph_init_inode_acls(d_inode(dentry), &acls);
                         *opened |= FILE_CREATED;
                 }
                 err = finish_open(file, dentry, ceph_open, opened);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c

index 119c43c..e876e19 100644 (file)
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -940,7 +940,7 @@ static void update_dentry_lease(struct dentry *dentry,
              dentry, duration, ttl);
  
         /* make lease_rdcache_gen match directory */
-       dir = dentry->d_parent->d_inode;
+       dir = d_inode(dentry->d_parent);
         di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
  
         if (duration == 0)
@@ -980,7 +980,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
  {
         struct dentry *realdn;
  
-       BUG_ON(dn->d_inode);
+       BUG_ON(d_inode(dn));
  
         /* dn must be unhashed */
         if (!d_unhashed(dn))
@@ -998,13 +998,13 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
                      "inode %p ino %llx.%llx\n",
                      dn, d_count(dn),
                      realdn, d_count(realdn),
-                    realdn->d_inode, ceph_vinop(realdn->d_inode));
+                    d_inode(realdn), ceph_vinop(d_inode(realdn)));
                 dput(dn);
                 dn = realdn;
         } else {
                 BUG_ON(!ceph_dentry(dn));
                 dout("dn %p attached to %p ino %llx.%llx\n",
-                    dn, dn->d_inode, ceph_vinop(dn->d_inode));
+                    dn, d_inode(dn), ceph_vinop(d_inode(dn)));
         }
         if ((!prehash || *prehash) && d_unhashed(dn))
                 d_rehash(dn);
@@ -1125,11 +1125,11 @@ retry_lookup:
                                         dput(parent);
                                         goto done;
                                 }
-                       } else if (dn->d_inode &&
-                                  (ceph_ino(dn->d_inode) != vino.ino ||
-                                   ceph_snap(dn->d_inode) != vino.snap)) {
+                       } else if (d_really_is_positive(dn) &&
+                                  (ceph_ino(d_inode(dn)) != vino.ino ||
+                                   ceph_snap(d_inode(dn)) != vino.snap)) {
                                 dout(" dn %p points to wrong inode %p\n",
-                                    dn, dn->d_inode);
+                                    dn, d_inode(dn));
                                 d_delete(dn);
                                 dput(dn);
                                 goto retry_lookup;
@@ -1183,7 +1183,7 @@ retry_lookup:
  
                 BUG_ON(!dn);
                 BUG_ON(!dir);
-               BUG_ON(dn->d_parent->d_inode != dir);
+               BUG_ON(d_inode(dn->d_parent) != dir);
                 BUG_ON(ceph_ino(dir) !=
                        le64_to_cpu(rinfo->diri.in->ino));
                 BUG_ON(ceph_snap(dir) !=
@@ -1235,7 +1235,7 @@ retry_lookup:
                 /* null dentry? */
                 if (!rinfo->head->is_target) {
                         dout("fill_trace null dentry\n");
-                       if (dn->d_inode) {
+                       if (d_really_is_positive(dn)) {
                                 ceph_dir_clear_ordered(dir);
                                 dout("d_delete %p\n", dn);
                                 d_delete(dn);
@@ -1252,7 +1252,7 @@ retry_lookup:
                 }
  
                 /* attach proper inode */
-               if (!dn->d_inode) {
+               if (d_really_is_negative(dn)) {
                         ceph_dir_clear_ordered(dir);
                         ihold(in);
                         dn = splice_dentry(dn, in, &have_lease);
@@ -1261,9 +1261,9 @@ retry_lookup:
                                 goto done;
                         }
                         req->r_dentry = dn;  /* may have spliced */
-               } else if (dn->d_inode && dn->d_inode != in) {
+               } else if (d_really_is_positive(dn) && d_inode(dn) != in) {
                         dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
-                            dn, dn->d_inode, ceph_vinop(dn->d_inode),
+                            dn, d_inode(dn), ceph_vinop(d_inode(dn)),
                              ceph_vinop(in));
                         have_lease = false;
                 }
@@ -1363,7 +1363,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                 return readdir_prepopulate_inodes_only(req, session);
  
         if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) {
-               snapdir = ceph_get_snapdir(parent->d_inode);
+               snapdir = ceph_get_snapdir(d_inode(parent));
                 parent = d_find_alias(snapdir);
                 dout("readdir_prepopulate %d items under SNAPDIR dn %p\n",
                      rinfo->dir_nr, parent);
@@ -1371,7 +1371,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
                 dout("readdir_prepopulate %d items under dn %p\n",
                      rinfo->dir_nr, parent);
                 if (rinfo->dir_dir)
-                       ceph_fill_dirfrag(parent->d_inode, rinfo->dir_dir);
+                       ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir);
         }
  
         /* FIXME: release caps/leases if error occurs */
@@ -1405,11 +1405,11 @@ retry_lookup:
                                 err = ret;
                                 goto out;
                         }
-               } else if (dn->d_inode &&
-                          (ceph_ino(dn->d_inode) != vino.ino ||
-                           ceph_snap(dn->d_inode) != vino.snap)) {
+               } else if (d_really_is_positive(dn) &&
+                          (ceph_ino(d_inode(dn)) != vino.ino ||
+                           ceph_snap(d_inode(dn)) != vino.snap)) {
                         dout(" dn %p points to wrong inode %p\n",
-                            dn, dn->d_inode);
+                            dn, d_inode(dn));
                         d_delete(dn);
                         dput(dn);
                         goto retry_lookup;
@@ -1423,8 +1423,8 @@ retry_lookup:
                 }
  
                 /* inode */
-               if (dn->d_inode) {
-                       in = dn->d_inode;
+               if (d_really_is_positive(dn)) {
+                       in = d_inode(dn);
                 } else {
                         in = ceph_get_inode(parent->d_sb, vino);
                         if (IS_ERR(in)) {
@@ -1440,13 +1440,13 @@ retry_lookup:
                                req->r_request_started, -1,
                                &req->r_caps_reservation) < 0) {
                         pr_err("fill_inode badness on %p\n", in);
-                       if (!dn->d_inode)
+                       if (d_really_is_negative(dn))
                                 iput(in);
                         d_drop(dn);
                         goto next_item;
                 }
  
-               if (!dn->d_inode) {
+               if (d_really_is_negative(dn)) {
                         struct dentry *realdn = splice_dentry(dn, in, NULL);
                         if (IS_ERR(realdn)) {
                                 err = PTR_ERR(realdn);
@@ -1693,7 +1693,7 @@ retry:
   */
  static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ceph_inode_info *ci = ceph_inode(dentry->d_inode);
+       struct ceph_inode_info *ci = ceph_inode(d_inode(dentry));
         nd_set_link(nd, ci->i_symlink);
         return NULL;
  }
@@ -1714,7 +1714,7 @@ static const struct inode_operations ceph_symlink_iops = {
   */
  int ceph_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_inode_info *ci = ceph_inode(inode);
         const unsigned int ia_valid = attr->ia_valid;
         struct ceph_mds_request *req;
@@ -1990,7 +1990,7 @@ int ceph_permission(struct inode *inode, int mask)
  int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
                  struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_inode_info *ci = ceph_inode(inode);
         int err;
  
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c

index 71c073f..84f37f3 100644 (file)
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -679,7 +679,7 @@ static struct dentry *get_nonsnap_parent(struct dentry *dentry)
          * except to resplice to another snapdir, and either the old or new
          * result is a valid result.
          */
-       while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       while (!IS_ROOT(dentry) && ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                 dentry = dentry->d_parent;
         return dentry;
  }
@@ -716,20 +716,20 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
         } else if (req->r_dentry) {
                 /* ignore race with rename; old or new d_parent is okay */
                 struct dentry *parent = req->r_dentry->d_parent;
-               struct inode *dir = parent->d_inode;
+               struct inode *dir = d_inode(parent);
  
                 if (dir->i_sb != mdsc->fsc->sb) {
                         /* not this fs! */
-                       inode = req->r_dentry->d_inode;
+                       inode = d_inode(req->r_dentry);
                 } else if (ceph_snap(dir) != CEPH_NOSNAP) {
                         /* direct snapped/virtual snapdir requests
                          * based on parent dir inode */
                         struct dentry *dn = get_nonsnap_parent(parent);
-                       inode = dn->d_inode;
+                       inode = d_inode(dn);
                         dout("__choose_mds using nonsnap parent %p\n", inode);
                 } else {
                         /* dentry target */
-                       inode = req->r_dentry->d_inode;
+                       inode = d_inode(req->r_dentry);
                         if (!inode || mode == USE_AUTH_MDS) {
                                 /* dir + name */
                                 inode = dir;
@@ -1021,6 +1021,33 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
         spin_unlock(&session->s_cap_lock);
  }
  
+static void cleanup_session_requests(struct ceph_mds_client *mdsc,
+                                    struct ceph_mds_session *session)
+{
+       struct ceph_mds_request *req;
+       struct rb_node *p;
+
+       dout("cleanup_session_requests mds%d\n", session->s_mds);
+       mutex_lock(&mdsc->mutex);
+       while (!list_empty(&session->s_unsafe)) {
+               req = list_first_entry(&session->s_unsafe,
+                                      struct ceph_mds_request, r_unsafe_item);
+               list_del_init(&req->r_unsafe_item);
+               pr_info(" dropping unsafe request %llu\n", req->r_tid);
+               __unregister_request(mdsc, req);
+       }
+       /* zero r_attempts, so kick_requests() will re-send requests */
+       p = rb_first(&mdsc->request_tree);
+       while (p) {
+               req = rb_entry(p, struct ceph_mds_request, r_node);
+               p = rb_next(p);
+               if (req->r_session &&
+                   req->r_session->s_mds == session->s_mds)
+                       req->r_attempts = 0;
+       }
+       mutex_unlock(&mdsc->mutex);
+}
+
  /*
   * Helper to safely iterate over all caps associated with a session, with
   * special care taken to handle a racing __ceph_remove_cap().
@@ -1098,7 +1125,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
              cap, ci, &ci->vfs_inode);
         spin_lock(&ci->i_ceph_lock);
         __ceph_remove_cap(cap, false);
-       if (!__ceph_is_any_real_caps(ci)) {
+       if (!ci->i_auth_cap) {
                 struct ceph_mds_client *mdsc =
                         ceph_sb_to_client(inode->i_sb)->mdsc;
  
@@ -1120,13 +1147,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                         mdsc->num_cap_flushing--;
                         drop = 1;
                 }
-               if (drop && ci->i_wrbuffer_ref) {
-                       pr_info(" dropping dirty data for %p %lld\n",
-                               inode, ceph_ino(inode));
-                       ci->i_wrbuffer_ref = 0;
-                       ci->i_wrbuffer_ref_head = 0;
-                       drop++;
-               }
                 spin_unlock(&mdsc->cap_dirty_lock);
         }
         spin_unlock(&ci->i_ceph_lock);
@@ -1712,7 +1732,7 @@ retry:
         seq = read_seqbegin(&rename_lock);
         rcu_read_lock();
         for (temp = dentry; !IS_ROOT(temp);) {
-               struct inode *inode = temp->d_inode;
+               struct inode *inode = d_inode(temp);
                 if (inode && ceph_snap(inode) == CEPH_SNAPDIR)
                         len++;  /* slash only */
                 else if (stop_on_nosnap && inode &&
@@ -1736,7 +1756,7 @@ retry:
                 struct inode *inode;
  
                 spin_lock(&temp->d_lock);
-               inode = temp->d_inode;
+               inode = d_inode(temp);
                 if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
                         dout("build_path path+%d: %p SNAPDIR\n",
                              pos, temp);
@@ -1770,7 +1790,7 @@ retry:
                 goto retry;
         }
  
-       *base = ceph_ino(temp->d_inode);
+       *base = ceph_ino(d_inode(temp));
         *plen = len;
         dout("build_path on %p %d built %llx '%.*s'\n",
              dentry, d_count(dentry), *base, len, path);
@@ -1783,8 +1803,8 @@ static int build_dentry_path(struct dentry *dentry,
  {
         char *path;
  
-       if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) {
-               *pino = ceph_ino(dentry->d_parent->d_inode);
+       if (ceph_snap(d_inode(dentry->d_parent)) == CEPH_NOSNAP) {
+               *pino = ceph_ino(d_inode(dentry->d_parent));
                 *ppath = dentry->d_name.name;
                 *ppathlen = dentry->d_name.len;
                 return 0;
@@ -1853,7 +1873,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
   */
  static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
                                                struct ceph_mds_request *req,
-                                              int mds)
+                                              int mds, bool drop_cap_releases)
  {
         struct ceph_msg *msg;
         struct ceph_mds_request_head *head;
@@ -1925,7 +1945,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
         releases = 0;
         if (req->r_inode_drop)
                 releases += ceph_encode_inode_release(&p,
-                     req->r_inode ? req->r_inode : req->r_dentry->d_inode,
+                     req->r_inode ? req->r_inode : d_inode(req->r_dentry),
                       mds, req->r_inode_drop, req->r_inode_unless, 0);
         if (req->r_dentry_drop)
                 releases += ceph_encode_dentry_release(&p, req->r_dentry,
@@ -1935,8 +1955,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
                        mds, req->r_old_dentry_drop, req->r_old_dentry_unless);
         if (req->r_old_inode_drop)
                 releases += ceph_encode_inode_release(&p,
-                     req->r_old_dentry->d_inode,
+                     d_inode(req->r_old_dentry),
                       mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
+
+       if (drop_cap_releases) {
+               releases = 0;
+               p = msg->front.iov_base + req->r_request_release_offset;
+       }
+
         head->num_releases = cpu_to_le16(releases);
  
         /* time stamp */
@@ -1989,7 +2015,7 @@ static void complete_request(struct ceph_mds_client *mdsc,
   */
  static int __prepare_send_request(struct ceph_mds_client *mdsc,
                                   struct ceph_mds_request *req,
-                                 int mds)
+                                 int mds, bool drop_cap_releases)
  {
         struct ceph_mds_request_head *rhead;
         struct ceph_msg *msg;
@@ -2048,7 +2074,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
                 ceph_msg_put(req->r_request);
                 req->r_request = NULL;
         }
-       msg = create_request_message(mdsc, req, mds);
+       msg = create_request_message(mdsc, req, mds, drop_cap_releases);
         if (IS_ERR(msg)) {
                 req->r_err = PTR_ERR(msg);
                 complete_request(mdsc, req);
@@ -2132,7 +2158,7 @@ static int __do_request(struct ceph_mds_client *mdsc,
         if (req->r_request_started == 0)   /* note request start time */
                 req->r_request_started = jiffies;
  
-       err = __prepare_send_request(mdsc, req, mds);
+       err = __prepare_send_request(mdsc, req, mds, false);
         if (!err) {
                 ceph_msg_get(req->r_request);
                 ceph_con_send(&session->s_con, req->r_request);
@@ -2590,6 +2616,7 @@ static void handle_session(struct ceph_mds_session *session,
         case CEPH_SESSION_CLOSE:
                 if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
                         pr_info("mds%d reconnect denied\n", session->s_mds);
+               cleanup_session_requests(mdsc, session);
                 remove_session_caps(session);
                 wake = 2; /* for good measure */
                 wake_up_all(&mdsc->session_close_wq);
@@ -2658,7 +2685,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
  
         mutex_lock(&mdsc->mutex);
         list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) {
-               err = __prepare_send_request(mdsc, req, session->s_mds);
+               err = __prepare_send_request(mdsc, req, session->s_mds, true);
                 if (!err) {
                         ceph_msg_get(req->r_request);
                         ceph_con_send(&session->s_con, req->r_request);
@@ -2679,7 +2706,8 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
                         continue; /* only old requests */
                 if (req->r_session &&
                     req->r_session->s_mds == session->s_mds) {
-                       err = __prepare_send_request(mdsc, req, session->s_mds);
+                       err = __prepare_send_request(mdsc, req,
+                                                    session->s_mds, true);
                         if (!err) {
                                 ceph_msg_get(req->r_request);
                                 ceph_con_send(&session->s_con, req->r_request);
@@ -2864,7 +2892,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
         spin_unlock(&session->s_cap_lock);
  
         /* trim unused caps to reduce MDS's cache rejoin time */
-       shrink_dcache_parent(mdsc->fsc->sb->s_root);
+       if (mdsc->fsc->sb->s_root)
+               shrink_dcache_parent(mdsc->fsc->sb->s_root);
  
         ceph_con_close(&session->s_con);
         ceph_con_open(&session->s_con,
@@ -3133,7 +3162,7 @@ static void handle_lease(struct ceph_mds_client *mdsc,
                     di->lease_renew_from &&
                     di->lease_renew_after == 0) {
                         unsigned long duration =
-                               le32_to_cpu(h->duration_ms) * HZ / 1000;
+                               msecs_to_jiffies(le32_to_cpu(h->duration_ms));
  
                         di->lease_seq = seq;
                         dentry->d_time = di->lease_renew_from + duration;
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c

index 51cc23e..89e6bc3 100644 (file)
--- a/fs/ceph/strings.c
+++ b/fs/ceph/strings.c
@@ -75,6 +75,7 @@ const char *ceph_mds_op_name(int op)
         case CEPH_MDS_OP_LSSNAP: return "lssnap";
         case CEPH_MDS_OP_MKSNAP: return "mksnap";
         case CEPH_MDS_OP_RMSNAP: return "rmsnap";
+       case CEPH_MDS_OP_RENAMESNAP: return "renamesnap";
         case CEPH_MDS_OP_SETFILELOCK: return "setfilelock";
         case CEPH_MDS_OP_GETFILELOCK: return "getfilelock";
         }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c

index a63997b..4e99053 100644 (file)
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -44,7 +44,7 @@ static void ceph_put_super(struct super_block *s)
  
  static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
-       struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
         struct ceph_monmap *monmap = fsc->client->monc.monmap;
         struct ceph_statfs st;
         u64 fsid;
@@ -345,6 +345,11 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
         fsopt->rsize = CEPH_RSIZE_DEFAULT;
         fsopt->rasize = CEPH_RASIZE_DEFAULT;
         fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+       if (!fsopt->snapdir_name) {
+               err = -ENOMEM;
+               goto out;
+       }
+
         fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
         fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
         fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
@@ -406,31 +411,20 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
  {
         struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
         struct ceph_mount_options *fsopt = fsc->mount_options;
-       struct ceph_options *opt = fsc->client->options;
-
-       if (opt->flags & CEPH_OPT_FSID)
-               seq_printf(m, ",fsid=%pU", &opt->fsid);
-       if (opt->flags & CEPH_OPT_NOSHARE)
-               seq_puts(m, ",noshare");
-       if (opt->flags & CEPH_OPT_NOCRC)
-               seq_puts(m, ",nocrc");
-       if (opt->flags & CEPH_OPT_NOMSGAUTH)
-               seq_puts(m, ",nocephx_require_signatures");
-       if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
-               seq_puts(m, ",notcp_nodelay");
-
-       if (opt->name)
-               seq_printf(m, ",name=%s", opt->name);
-       if (opt->key)
-               seq_puts(m, ",secret=<hidden>");
-
-       if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
-               seq_printf(m, ",mount_timeout=%d", opt->mount_timeout);
-       if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
-               seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl);
-       if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
-               seq_printf(m, ",osdkeepalivetimeout=%d",
-                          opt->osd_keepalive_timeout);
+       size_t pos;
+       int ret;
+
+       /* a comma between MNT/MS and client options */
+       seq_putc(m, ',');
+       pos = m->count;
+
+       ret = ceph_print_client_options(m, fsc->client);
+       if (ret)
+               return ret;
+
+       /* retract our comma if no client options */
+       if (m->count == pos)
+               m->count--;
  
         if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
                 seq_puts(m, ",dirstat");
@@ -438,14 +432,10 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
                 seq_puts(m, ",norbytes");
         if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
                 seq_puts(m, ",noasyncreaddir");
-       if (fsopt->flags & CEPH_MOUNT_OPT_DCACHE)
-               seq_puts(m, ",dcache");
-       else
+       if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
                 seq_puts(m, ",nodcache");
         if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
                 seq_puts(m, ",fsc");
-       else
-               seq_puts(m, ",nofsc");
  
  #ifdef CONFIG_CEPH_FS_POSIX_ACL
         if (fsopt->sb_flags & MS_POSIXACL)
@@ -477,6 +467,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
                 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
         if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
                 seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+
         return 0;
  }
  
@@ -730,6 +721,11 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
         if (IS_ERR(req))
                 return ERR_CAST(req);
         req->r_path1 = kstrdup(path, GFP_NOFS);
+       if (!req->r_path1) {
+               root = ERR_PTR(-ENOMEM);
+               goto out;
+       }
+
         req->r_ino1.ino = CEPH_INO_ROOT;
         req->r_ino1.snap = CEPH_NOSNAP;
         req->r_started = started;
@@ -976,7 +972,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
         if (IS_ERR(res))
                 goto out_splat;
         dout("root %p inode %p ino %llx.%llx\n", res,
-            res->d_inode, ceph_vinop(res->d_inode));
+            d_inode(res), ceph_vinop(d_inode(res)));
         return res;
  
  out_splat:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h

index 04c8124..fa20e13 100644 (file)
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
  #define CEPH_MOUNT_OPT_DCACHE          (1<<9) /* use dcache for readdir etc */
  #define CEPH_MOUNT_OPT_FSCACHE         (1<<10) /* use fscache */
  
-#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES)
+#define CEPH_MOUNT_OPT_DEFAULT    (CEPH_MOUNT_OPT_RBYTES | \
+                                  CEPH_MOUNT_OPT_DCACHE)
  
  #define ceph_set_mount_opt(fsc, opt) \
         (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -881,7 +882,6 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
  
  /* file.c */
  extern const struct file_operations ceph_file_fops;
-extern const struct address_space_operations ceph_aops;
  
  extern int ceph_open(struct inode *inode, struct file *file);
  extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c

index 5a492ca..cd7ffad 100644 (file)
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -776,12 +776,12 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
                 return generic_getxattr(dentry, name, value, size);
  
-       return __ceph_getxattr(dentry->d_inode, name, value, size);
+       return __ceph_getxattr(d_inode(dentry), name, value, size);
  }
  
  ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
         u32 vir_namelen = 0;
@@ -847,7 +847,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
                               const char *value, size_t size, int flags)
  {
         struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_inode_info *ci = ceph_inode(inode);
         struct ceph_mds_request *req;
         struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -877,16 +877,23 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
                 err = PTR_ERR(req);
                 goto out;
         }
-       req->r_inode = inode;
-       ihold(inode);
-       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
-       req->r_num_caps = 1;
+
         req->r_args.setxattr.flags = cpu_to_le32(flags);
         req->r_path2 = kstrdup(name, GFP_NOFS);
+       if (!req->r_path2) {
+               ceph_mdsc_put_request(req);
+               err = -ENOMEM;
+               goto out;
+       }
  
         req->r_pagelist = pagelist;
         pagelist = NULL;
  
+       req->r_inode = inode;
+       ihold(inode);
+       req->r_num_caps = 1;
+       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
+
         dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
         err = ceph_mdsc_do_request(mdsc, NULL, req);
         ceph_mdsc_put_request(req);
@@ -901,7 +908,7 @@ out:
  int __ceph_setxattr(struct dentry *dentry, const char *name,
                         const void *value, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_vxattr *vxattr;
         struct ceph_inode_info *ci = ceph_inode(inode);
         int issued;
@@ -995,7 +1002,7 @@ out:
  int ceph_setxattr(struct dentry *dentry, const char *name,
                   const void *value, size_t size, int flags)
  {
-       if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                 return -EROFS;
  
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
@@ -1011,7 +1018,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
  {
         struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
         struct ceph_mds_client *mdsc = fsc->mdsc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_mds_request *req;
         int err;
  
@@ -1019,12 +1026,14 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
                                        USE_AUTH_MDS);
         if (IS_ERR(req))
                 return PTR_ERR(req);
+       req->r_path2 = kstrdup(name, GFP_NOFS);
+       if (!req->r_path2)
+               return -ENOMEM;
+
         req->r_inode = inode;
         ihold(inode);
-       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
         req->r_num_caps = 1;
-       req->r_path2 = kstrdup(name, GFP_NOFS);
-
+       req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
         err = ceph_mdsc_do_request(mdsc, NULL, req);
         ceph_mdsc_put_request(req);
         return err;
@@ -1032,7 +1041,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
  
  int __ceph_removexattr(struct dentry *dentry, const char *name)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ceph_vxattr *vxattr;
         struct ceph_inode_info *ci = ceph_inode(inode);
         int issued;
@@ -1098,7 +1107,7 @@ out:
  
  int ceph_removexattr(struct dentry *dentry, const char *name)
  {
-       if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
+       if (ceph_snap(d_inode(dentry)) != CEPH_NOSNAP)
                 return -EROFS;
  
         if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c

index b8602f1..430e034 100644 (file)
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -301,7 +301,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
         if (full_path == NULL)
                 goto cdda_exit;
  
-       cifs_sb = CIFS_SB(mntpt->d_inode->i_sb);
+       cifs_sb = CIFS_SB(d_inode(mntpt)->i_sb);
         tlink = cifs_sb_tlink(cifs_sb);
         if (IS_ERR(tlink)) {
                 mnt = ERR_CAST(tlink);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c

index eaab4b2..f5089bd 100644 (file)
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -607,7 +607,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
         p = s = full_path;
  
         do {
-               struct inode *dir = dentry->d_inode;
+               struct inode *dir = d_inode(dentry);
                 struct dentry *child;
  
                 if (!dir) {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c

index fa13d5e..84650a5 100644 (file)
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1898,7 +1898,7 @@ static void
  cifs_writev_requeue(struct cifs_writedata *wdata)
  {
         int i, rc = 0;
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
         struct TCP_Server_Info *server;
         unsigned int rest_len;
  
@@ -1981,7 +1981,7 @@ cifs_writev_complete(struct work_struct *work)
  {
         struct cifs_writedata *wdata = container_of(work,
                                                 struct cifs_writedata, work);
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
         int i = 0;
  
         if (wdata->result == 0) {
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c

index b72bc29..338d569 100644 (file)
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -745,13 +745,13 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
                 goto lookup_out;
         }
  
-       if (direntry->d_inode != NULL) {
+       if (d_really_is_positive(direntry)) {
                 cifs_dbg(FYI, "non-NULL inode in lookup\n");
         } else {
                 cifs_dbg(FYI, "NULL inode in lookup\n");
         }
         cifs_dbg(FYI, "Full path: %s inode = 0x%p\n",
-                full_path, direntry->d_inode);
+                full_path, d_inode(direntry));
  
         if (pTcon->unix_ext) {
                 rc = cifs_get_inode_info_unix(&newInode, full_path,
@@ -792,7 +792,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       if (direntry->d_inode) {
+       if (d_really_is_positive(direntry)) {
                 if (cifs_revalidate_dentry(direntry))
                         return 0;
                 else {
@@ -803,7 +803,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags)
                          * attributes will have been updated by
                          * cifs_revalidate_dentry().
                          */
-                       if (IS_AUTOMOUNT(direntry->d_inode) &&
+                       if (IS_AUTOMOUNT(d_inode(direntry)) &&
                            !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) {
                                 spin_lock(&direntry->d_lock);
                                 direntry->d_flags |= DCACHE_NEED_AUTOMOUNT;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c

index ca2bc54..cafbf10 100644 (file)
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -273,7 +273,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                   struct tcon_link *tlink, __u32 oplock)
  {
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct cifsInodeInfo *cinode = CIFS_I(inode);
         struct cifsFileInfo *cfile;
         struct cifs_fid_locks *fdlocks;
@@ -357,7 +357,7 @@ cifsFileInfo_get(struct cifsFileInfo *cifs_file)
   */
  void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
  {
-       struct inode *inode = cifs_file->dentry->d_inode;
+       struct inode *inode = d_inode(cifs_file->dentry);
         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
         struct TCP_Server_Info *server = tcon->ses->server;
         struct cifsInodeInfo *cifsi = CIFS_I(inode);
@@ -386,7 +386,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
  
         if (list_empty(&cifsi->openFileList)) {
                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
-                        cifs_file->dentry->d_inode);
+                        d_inode(cifs_file->dentry));
                 /*
                  * In strict cache mode we need invalidate mapping on the last
                  * close  because it may cause a error when we open this file
@@ -572,7 +572,7 @@ static int
  cifs_relock_file(struct cifsFileInfo *cfile)
  {
         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
         int rc = 0;
  
@@ -620,7 +620,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
                 return rc;
         }
  
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
         cifs_sb = CIFS_SB(inode->i_sb);
         tcon = tlink_tcon(cfile->tlink);
         server = tcon->ses->server;
@@ -874,7 +874,7 @@ cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
  {
         bool rc = false;
         struct cifs_fid_locks *cur;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
  
         list_for_each_entry(cur, &cinode->llist, llist) {
                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
@@ -899,7 +899,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
  {
         int rc = 0;
         struct cifsLockInfo *conf_lock;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
         bool exist;
  
@@ -927,7 +927,7 @@ cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
  static void
  cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
  {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         down_write(&cinode->lock_sem);
         list_add_tail(&lock->llist, &cfile->llist->locks);
         up_write(&cinode->lock_sem);
@@ -944,7 +944,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
                  bool wait)
  {
         struct cifsLockInfo *conf_lock;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         bool exist;
         int rc = 0;
  
@@ -1125,7 +1125,7 @@ struct lock_to_push {
  static int
  cifs_push_posix_locks(struct cifsFileInfo *cfile)
  {
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
         struct file_lock *flock;
         struct file_lock_context *flctx = inode->i_flctx;
@@ -1214,7 +1214,7 @@ static int
  cifs_push_locks(struct cifsFileInfo *cfile)
  {
         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
         int rc = 0;
  
@@ -1382,7 +1382,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
         unsigned int max_num, num, max_buf;
         LOCKING_ANDX_RANGE *buf, *cur;
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct cifsLockInfo *li, *tmp;
         __u64 length = 1 + flock->fl_end - flock->fl_start;
         struct list_head tmp_llist;
@@ -1488,7 +1488,7 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
         struct TCP_Server_Info *server = tcon->ses->server;
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
  
         if (posix_lck) {
                 int posix_lock_type;
@@ -1643,7 +1643,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
         struct TCP_Server_Info *server;
         unsigned int xid;
         struct dentry *dentry = open_file->dentry;
-       struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
+       struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
         struct cifs_io_parms io_parms;
  
         cifs_sb = CIFS_SB(dentry->d_sb);
@@ -1676,7 +1676,7 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
                                         break;
                         }
  
-                       len = min(server->ops->wp_retry_size(dentry->d_inode),
+                       len = min(server->ops->wp_retry_size(d_inode(dentry)),
                                   (unsigned int)write_size - total_written);
                         /* iov[0] is reserved for smb header */
                         iov[1].iov_base = (char *)write_data + total_written;
@@ -1696,9 +1696,9 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
                                 return rc;
                         }
                 } else {
-                       spin_lock(&dentry->d_inode->i_lock);
+                       spin_lock(&d_inode(dentry)->i_lock);
                         cifs_update_eof(cifsi, *offset, bytes_written);
-                       spin_unlock(&dentry->d_inode->i_lock);
+                       spin_unlock(&d_inode(dentry)->i_lock);
                         *offset += bytes_written;
                 }
         }
@@ -1706,12 +1706,12 @@ cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
         cifs_stats_bytes_written(tcon, total_written);
  
         if (total_written > 0) {
-               spin_lock(&dentry->d_inode->i_lock);
-               if (*offset > dentry->d_inode->i_size)
-                       i_size_write(dentry->d_inode, *offset);
-               spin_unlock(&dentry->d_inode->i_lock);
+               spin_lock(&d_inode(dentry)->i_lock);
+               if (*offset > d_inode(dentry)->i_size)
+                       i_size_write(d_inode(dentry), *offset);
+               spin_unlock(&d_inode(dentry)->i_lock);
         }
-       mark_inode_dirty_sync(dentry->d_inode);
+       mark_inode_dirty_sync(d_inode(dentry));
         free_xid(xid);
         return total_written;
  }
@@ -2406,7 +2406,7 @@ cifs_uncached_writev_complete(struct work_struct *work)
  {
         struct cifs_writedata *wdata = container_of(work,
                                         struct cifs_writedata, work);
-       struct inode *inode = wdata->cfile->dentry->d_inode;
+       struct inode *inode = d_inode(wdata->cfile->dentry);
         struct cifsInodeInfo *cifsi = CIFS_I(inode);
  
         spin_lock(&inode->i_lock);
@@ -3794,7 +3794,7 @@ void cifs_oplock_break(struct work_struct *work)
  {
         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
                                                   oplock_break);
-       struct inode *inode = cfile->dentry->d_inode;
+       struct inode *inode = d_inode(cfile->dentry);
         struct cifsInodeInfo *cinode = CIFS_I(inode);
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
         struct TCP_Server_Info *server = tcon->ses->server;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c

index 3e126d7..55b5811 100644 (file)
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1067,7 +1067,7 @@ cifs_rename_pending_delete(const char *full_path, struct dentry *dentry,
         int rc;
         struct cifs_fid fid;
         struct cifs_open_parms oparms;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct cifsInodeInfo *cifsInode = CIFS_I(inode);
         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
         struct tcon_link *tlink;
@@ -1196,7 +1196,7 @@ cifs_drop_nlink(struct inode *inode)
  }
  
  /*
- * If dentry->d_inode is null (usually meaning the cached dentry
+ * If d_inode(dentry) is null (usually meaning the cached dentry
   * is a negative dentry) then we would attempt a standard SMB delete, but
   * if that fails we can not attempt the fall back mechanisms on EACCESS
   * but will return the EACCESS to the caller. Note that the VFS does not call
@@ -1207,7 +1207,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry)
         int rc = 0;
         unsigned int xid;
         char *full_path = NULL;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct cifsInodeInfo *cifs_inode;
         struct super_block *sb = dir->i_sb;
         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -1551,13 +1551,13 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
         cifs_put_tlink(tlink);
  
         if (!rc) {
-               spin_lock(&direntry->d_inode->i_lock);
-               i_size_write(direntry->d_inode, 0);
-               clear_nlink(direntry->d_inode);
-               spin_unlock(&direntry->d_inode->i_lock);
+               spin_lock(&d_inode(direntry)->i_lock);
+               i_size_write(d_inode(direntry), 0);
+               clear_nlink(d_inode(direntry));
+               spin_unlock(&d_inode(direntry)->i_lock);
         }
  
-       cifsInode = CIFS_I(direntry->d_inode);
+       cifsInode = CIFS_I(d_inode(direntry));
         /* force revalidate to go get info when needed */
         cifsInode->time = 0;
  
@@ -1568,7 +1568,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
          */
         cifsInode->time = 0;
  
-       direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime =
+       d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
                 current_fs_time(inode->i_sb);
  
  rmdir_exit:
@@ -1727,7 +1727,7 @@ cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
  
  unlink_target:
         /* Try unlinking the target dentry if it's not negative */
-       if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
+       if (d_really_is_positive(target_dentry) && (rc == -EACCES || rc == -EEXIST)) {
                 if (d_is_dir(target_dentry))
                         tmprc = cifs_rmdir(target_dir, target_dentry);
                 else
@@ -1867,7 +1867,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry)
  {
         unsigned int xid;
         int rc = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = dentry->d_sb;
         char *full_path = NULL;
  
@@ -1919,7 +1919,7 @@ int cifs_revalidate_file(struct file *filp)
  int cifs_revalidate_dentry(struct dentry *dentry)
  {
         int rc;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         rc = cifs_revalidate_dentry_attr(dentry);
         if (rc)
@@ -1933,7 +1933,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
  {
         struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int rc;
  
         /*
@@ -2110,7 +2110,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
         int rc;
         unsigned int xid;
         char *full_path = NULL;
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
         struct cifsInodeInfo *cifsInode = CIFS_I(inode);
         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
         struct tcon_link *tlink;
@@ -2251,7 +2251,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
         unsigned int xid;
         kuid_t uid = INVALID_UID;
         kgid_t gid = INVALID_GID;
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
         struct cifsInodeInfo *cifsInode = CIFS_I(inode);
         char *full_path = NULL;
@@ -2409,7 +2409,7 @@ cifs_setattr_exit:
  int
  cifs_setattr(struct dentry *direntry, struct iattr *attrs)
  {
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
         struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
  
diff --git a/fs/cifs/link.c b/fs/cifs/link.c

index 2ec6037..252e672 100644 (file)
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -586,12 +586,12 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode,
          * if source file is cached (oplocked) revalidate will not go to server
          * until the file is closed or oplock broken so update nlinks locally
          */
-       if (old_file->d_inode) {
-               cifsInode = CIFS_I(old_file->d_inode);
+       if (d_really_is_positive(old_file)) {
+               cifsInode = CIFS_I(d_inode(old_file));
                 if (rc == 0) {
-                       spin_lock(&old_file->d_inode->i_lock);
-                       inc_nlink(old_file->d_inode);
-                       spin_unlock(&old_file->d_inode->i_lock);
+                       spin_lock(&d_inode(old_file)->i_lock);
+                       inc_nlink(d_inode(old_file));
+                       spin_unlock(&d_inode(old_file)->i_lock);
  
                         /*
                          * parent dir timestamps will update from srv within a
@@ -629,7 +629,7 @@ cifs_hl_exit:
  void *
  cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
  {
-       struct inode *inode = direntry->d_inode;
+       struct inode *inode = d_inode(direntry);
         int rc = -ENOMEM;
         unsigned int xid;
         char *full_path = NULL;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c

index 3379463..8442b8b 100644 (file)
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -473,7 +473,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
                                         continue;
  
                                 cifs_dbg(FYI, "file id match, oplock break\n");
-                               pCifsInode = CIFS_I(netfile->dentry->d_inode);
+                               pCifsInode = CIFS_I(d_inode(netfile->dentry));
  
                                 set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
                                         &pCifsInode->flags);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c

index c295338..b4a4723 100644 (file)
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -78,7 +78,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
  {
         struct dentry *dentry, *alias;
         struct inode *inode;
-       struct super_block *sb = parent->d_inode->i_sb;
+       struct super_block *sb = d_inode(parent)->i_sb;
         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
  
         cifs_dbg(FYI, "%s: for %s\n", __func__, name->name);
@@ -88,7 +88,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
                 return;
  
         if (dentry) {
-               inode = dentry->d_inode;
+               inode = d_inode(dentry);
                 if (inode) {
                         /*
                          * If we're generating inode numbers, then we don't
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c

index d297903..7bfdd60 100644 (file)
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -722,7 +722,7 @@ cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
  static void
  cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
  {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         cfile->fid.netfid = fid->netfid;
         cifs_set_oplock_level(cinode, oplock);
         cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c

index 7198eac..2ab297d 100644 (file)
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -95,7 +95,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
         unsigned int max_num, num = 0, max_buf;
         struct smb2_lock_element *buf, *cur;
         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct cifsLockInfo *li, *tmp;
         __u64 length = 1 + flock->fl_end - flock->fl_start;
         struct list_head tmp_llist;
@@ -231,7 +231,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
         unsigned int xid;
         unsigned int max_num, max_buf;
         struct smb2_lock_element *buf;
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct cifs_fid_locks *fdlocks;
  
         xid = get_xid();
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c

index 22dfdf1..1c59070 100644 (file)
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -453,7 +453,7 @@ smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
  
         list_for_each(tmp, &tcon->openFileList) {
                 cfile = list_entry(tmp, struct cifsFileInfo, tlist);
-               cinode = CIFS_I(cfile->dentry->d_inode);
+               cinode = CIFS_I(d_inode(cfile->dentry));
  
                 if (memcmp(cinode->lease_key, rsp->LeaseKey,
                                                         SMB2_LEASE_KEY_SIZE))
@@ -590,7 +590,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                                         continue;
  
                                 cifs_dbg(FYI, "file id match, oplock break\n");
-                               cinode = CIFS_I(cfile->dentry->d_inode);
+                               cinode = CIFS_I(d_inode(cfile->dentry));
  
                                 if (!CIFS_CACHE_WRITE(cinode) &&
                                     rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c

index eab05e1..54daee5 100644 (file)
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -524,7 +524,7 @@ smb2_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
  static void
  smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
  {
-       struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
  
         cfile->fid.persistent_fid = fid->persistent_fid;
@@ -793,7 +793,7 @@ smb2_set_file_size(const unsigned int xid, struct cifs_tcon *tcon,
          * If extending file more than one page make sparse. Many Linux fs
          * make files sparse by default when extending via ftruncate
          */
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
  
         if (!set_alloc && (size > inode->i_size + 8192)) {
                 __u8 set_sparse = 1;
@@ -1032,7 +1032,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon,
  
         xid = get_xid();
  
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
         cifsi = CIFS_I(inode);
  
         /* if file not oplocked can't be sure whether asking to extend size */
@@ -1083,7 +1083,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
  
         xid = get_xid();
  
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
         cifsi = CIFS_I(inode);
  
         /* Need to make file sparse, if not already, before freeing range. */
@@ -1115,7 +1115,7 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon,
  
         xid = get_xid();
  
-       inode = cfile->dentry->d_inode;
+       inode = d_inode(cfile->dentry);
         cifsi = CIFS_I(inode);
  
         /* if file not oplocked can't be sure whether asking to extend size */
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c

index 72a4d10..ff9e1f8 100644 (file)
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -50,9 +50,9 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
  
         if (direntry == NULL)
                 return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                 return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
         if (sb == NULL)
                 return -EIO;
  
@@ -111,9 +111,9 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
  
         if (direntry == NULL)
                 return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                 return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
         if (sb == NULL)
                 return -EIO;
  
@@ -177,12 +177,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
                         memcpy(pacl, ea_value, value_size);
                         if (pTcon->ses->server->ops->set_acl)
                                 rc = pTcon->ses->server->ops->set_acl(pacl,
-                                               value_size, direntry->d_inode,
+                                               value_size, d_inode(direntry),
                                                 full_path, CIFS_ACL_DACL);
                         else
                                 rc = -EOPNOTSUPP;
                         if (rc == 0) /* force revalidate of the inode */
-                               CIFS_I(direntry->d_inode)->time = 0;
+                               CIFS_I(d_inode(direntry))->time = 0;
                         kfree(pacl);
                 }
  #else
@@ -246,9 +246,9 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
  
         if (direntry == NULL)
                 return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                 return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
         if (sb == NULL)
                 return -EIO;
  
@@ -324,7 +324,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
                                 goto get_ea_exit; /* rc already EOPNOTSUPP */
  
                         pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
-                                       direntry->d_inode, full_path, &acllen);
+                                       d_inode(direntry), full_path, &acllen);
                         if (IS_ERR(pacl)) {
                                 rc = PTR_ERR(pacl);
                                 cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -382,9 +382,9 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
  
         if (direntry == NULL)
                 return -EIO;
-       if (direntry->d_inode == NULL)
+       if (d_really_is_negative(direntry))
                 return -EIO;
-       sb = direntry->d_inode->i_sb;
+       sb = d_inode(direntry)->i_sb;
         if (sb == NULL)
                 return -EIO;
  
diff --git a/fs/coda/cache.c b/fs/coda/cache.c

index 46ee6f2..5bb630a 100644 (file)
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -94,8 +94,8 @@ static void coda_flag_children(struct dentry *parent, int flag)
         spin_lock(&parent->d_lock);
         list_for_each_entry(de, &parent->d_subdirs, d_child) {
                 /* don't know what to do with negative dentries */
-               if (de->d_inode ) 
-                       coda_flag_inode(de->d_inode, flag);
+               if (d_inode(de) ) 
+                       coda_flag_inode(d_inode(de), flag);
         }
         spin_unlock(&parent->d_lock);
         return; 
diff --git a/fs/coda/dir.c b/fs/coda/dir.c

index 60cb88c..fda9f43 100644 (file)
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -201,7 +201,7 @@ err_out:
  static int coda_link(struct dentry *source_de, struct inode *dir_inode, 
           struct dentry *de)
  {
-       struct inode *inode = source_de->d_inode;
+       struct inode *inode = d_inode(source_de);
          const char * name = de->d_name.name;
         int len = de->d_name.len;
         int error;
@@ -266,7 +266,7 @@ static int coda_unlink(struct inode *dir, struct dentry *de)
                 return error;
  
         coda_dir_update_mtime(dir);
-       drop_nlink(de->d_inode);
+       drop_nlink(d_inode(de));
         return 0;
  }
  
@@ -279,8 +279,8 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
         error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len);
         if (!error) {
                 /* VFS may delete the child */
-               if (de->d_inode)
-                       clear_nlink(de->d_inode);
+               if (d_really_is_positive(de))
+                       clear_nlink(d_inode(de));
  
                 /* fix the link count of the parent */
                 coda_dir_drop_nlink(dir);
@@ -303,14 +303,14 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
                              coda_i2f(new_dir), old_length, new_length,
                              (const char *) old_name, (const char *)new_name);
         if (!error) {
-               if (new_dentry->d_inode) {
+               if (d_really_is_positive(new_dentry)) {
                         if (d_is_dir(new_dentry)) {
                                 coda_dir_drop_nlink(old_dir);
                                 coda_dir_inc_nlink(new_dir);
                         }
                         coda_dir_update_mtime(old_dir);
                         coda_dir_update_mtime(new_dir);
-                       coda_flag_inode(new_dentry->d_inode, C_VATTR);
+                       coda_flag_inode(d_inode(new_dentry), C_VATTR);
                 } else {
                         coda_flag_inode(old_dir, C_VATTR);
                         coda_flag_inode(new_dir, C_VATTR);
@@ -449,13 +449,13 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = de->d_inode;
+       inode = d_inode(de);
         if (!inode || is_root_inode(inode))
                 goto out;
         if (is_bad_inode(inode))
                 goto bad;
  
-       cii = ITOC(de->d_inode);
+       cii = ITOC(d_inode(de));
         if (!(cii->c_flags & (C_PURGE | C_FLUSH)))
                 goto out;
  
@@ -487,11 +487,11 @@ static int coda_dentry_delete(const struct dentry * dentry)
  {
         int flags;
  
-       if (!dentry->d_inode) 
+       if (d_really_is_negative(dentry)) 
                 return 0;
  
-       flags = (ITOC(dentry->d_inode)->c_flags) & C_PURGE;
-       if (is_bad_inode(dentry->d_inode) || flags) {
+       flags = (ITOC(d_inode(dentry))->c_flags) & C_PURGE;
+       if (is_bad_inode(d_inode(dentry)) || flags) {
                 return 1;
         }
         return 0;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c

index 82ec68b..cac1390 100644 (file)
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -257,15 +257,15 @@ static void coda_evict_inode(struct inode *inode)
  
  int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       int err = coda_revalidate_inode(dentry->d_inode);
+       int err = coda_revalidate_inode(d_inode(dentry));
         if (!err)
-               generic_fillattr(dentry->d_inode, stat);
+               generic_fillattr(d_inode(dentry), stat);
         return err;
  }
  
  int coda_setattr(struct dentry *de, struct iattr *iattr)
  {
-       struct inode *inode = de->d_inode;
+       struct inode *inode = d_inode(de);
         struct coda_vattr vattr;
         int error;
  
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c

index 4326d17..f36a404 100644 (file)
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -72,7 +72,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd,
         if (error)
                 return error;
  
-       target_inode = path.dentry->d_inode;
+       target_inode = d_inode(path.dentry);
  
         /* return if it is not a Coda inode */
         if (target_inode->i_sb != inode->i_sb) {
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c

index 5bb6e27..9b1ffaa 100644 (file)
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -820,8 +820,8 @@ int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out)
         case CODA_FLUSH:
                 coda_cache_clear_all(sb);
                 shrink_dcache_sb(sb);
-               if (sb->s_root->d_inode)
-                       coda_flag_inode(sb->s_root->d_inode, C_FLUSH);
+               if (d_really_is_positive(sb->s_root))
+                       coda_flag_inode(d_inode(sb->s_root), C_FLUSH);
                 break;
  
         case CODA_PURGEUSER:
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c

index acb3d63..c81ce7f 100644 (file)
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -289,7 +289,7 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry)
         configfs_set_dir_dirent_depth(p->d_fsdata, dentry->d_fsdata);
         error = configfs_create(dentry, mode, init_dir);
         if (!error) {
-               inc_nlink(p->d_inode);
+               inc_nlink(d_inode(p));
                 item->ci_dentry = dentry;
         } else {
                 struct configfs_dirent *sd = dentry->d_fsdata;
@@ -375,8 +375,8 @@ static void remove_dir(struct dentry * d)
         list_del_init(&sd->s_sibling);
         spin_unlock(&configfs_dirent_lock);
         configfs_put(sd);
-       if (d->d_inode)
-               simple_rmdir(parent->d_inode,d);
+       if (d_really_is_positive(d))
+               simple_rmdir(d_inode(parent),d);
  
         pr_debug(" o %pd removing done (%d)\n", d, d_count(d));
  
@@ -513,7 +513,7 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
                         /* Abort if racing with mkdir() */
                         if (sd->s_type & CONFIGFS_USET_IN_MKDIR) {
                                 if (wait_mutex)
-                                       *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
+                                       *wait_mutex = &d_inode(sd->s_dentry)->i_mutex;
                                 return -EAGAIN;
                         }
  
@@ -624,13 +624,13 @@ static void detach_groups(struct config_group *group)
  
                 child = sd->s_dentry;
  
-               mutex_lock(&child->d_inode->i_mutex);
+               mutex_lock(&d_inode(child)->i_mutex);
  
                 configfs_detach_group(sd->s_element);
-               child->d_inode->i_flags |= S_DEAD;
+               d_inode(child)->i_flags |= S_DEAD;
                 dont_mount(child);
  
-               mutex_unlock(&child->d_inode->i_mutex);
+               mutex_unlock(&d_inode(child)->i_mutex);
  
                 d_delete(child);
                 dput(child);
@@ -672,7 +672,7 @@ static int create_default_group(struct config_group *parent_group,
                         sd = child->d_fsdata;
                         sd->s_type |= CONFIGFS_USET_DEFAULT;
                 } else {
-                       BUG_ON(child->d_inode);
+                       BUG_ON(d_inode(child));
                         d_drop(child);
                         dput(child);
                 }
@@ -818,11 +818,11 @@ static int configfs_attach_item(struct config_item *parent_item,
                          * the VFS may already have hit and used them. Thus,
                          * we must lock them as rmdir() would.
                          */
-                       mutex_lock(&dentry->d_inode->i_mutex);
+                       mutex_lock(&d_inode(dentry)->i_mutex);
                         configfs_remove_dir(item);
-                       dentry->d_inode->i_flags |= S_DEAD;
+                       d_inode(dentry)->i_flags |= S_DEAD;
                         dont_mount(dentry);
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                         d_delete(dentry);
                 }
         }
@@ -858,16 +858,16 @@ static int configfs_attach_group(struct config_item *parent_item,
                  * We must also lock the inode to remove it safely in case of
                  * error, as rmdir() would.
                  */
-               mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
                 configfs_adjust_dir_dirent_depth_before_populate(sd);
                 ret = populate_groups(to_config_group(item));
                 if (ret) {
                         configfs_detach_item(item);
-                       dentry->d_inode->i_flags |= S_DEAD;
+                       d_inode(dentry)->i_flags |= S_DEAD;
                         dont_mount(dentry);
                 }
                 configfs_adjust_dir_dirent_depth_after_populate(sd);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
                 if (ret)
                         d_delete(dentry);
         }
@@ -1075,7 +1075,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
          * subsystem is really registered, and so we need to lock out
          * configfs_[un]register_subsystem().
          */
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
  
         root_sd = root->d_fsdata;
  
@@ -1111,7 +1111,7 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
  out_unlock_dirent_lock:
         spin_unlock(&configfs_dirent_lock);
  out_unlock_fs:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  
         /*
          * If we succeeded, the fs is pinned via other methods.  If not,
@@ -1453,11 +1453,11 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
         down_write(&configfs_rename_sem);
         parent = item->parent->dentry;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
  
         new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
         if (!IS_ERR(new_dentry)) {
-               if (!new_dentry->d_inode) {
+               if (d_really_is_negative(new_dentry)) {
                         error = config_item_set_name(item, "%s", new_name);
                         if (!error) {
                                 d_add(new_dentry, NULL);
@@ -1469,7 +1469,7 @@ int configfs_rename_dir(struct config_item * item, const char *new_name)
                         error = -EEXIST;
                 dput(new_dentry);
         }
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         up_write(&configfs_rename_sem);
  
         return error;
@@ -1482,7 +1482,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
         struct configfs_dirent * parent_sd = dentry->d_fsdata;
         int err;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
         /*
          * Fake invisibility if dir belongs to a group/default groups hierarchy
          * being attached
@@ -1495,7 +1495,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
                 else
                         err = 0;
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
  
         return err;
  }
@@ -1505,11 +1505,11 @@ static int configfs_dir_close(struct inode *inode, struct file *file)
         struct dentry * dentry = file->f_path.dentry;
         struct configfs_dirent * cursor = file->private_data;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
         spin_lock(&configfs_dirent_lock);
         list_del_init(&cursor->s_sibling);
         spin_unlock(&configfs_dirent_lock);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
  
         release_configfs_dirent(cursor);
  
@@ -1567,7 +1567,7 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx)
                 spin_lock(&configfs_dirent_lock);
                 dentry = next->s_dentry;
                 if (dentry)
-                       inode = dentry->d_inode;
+                       inode = d_inode(dentry);
                 if (inode)
                         ino = inode->i_ino;
                 spin_unlock(&configfs_dirent_lock);
@@ -1590,7 +1590,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
  {
         struct dentry * dentry = file->f_path.dentry;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
         switch (whence) {
                 case 1:
                         offset += file->f_pos;
@@ -1598,7 +1598,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                         if (offset >= 0)
                                 break;
                 default:
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                         return -EINVAL;
         }
         if (offset != file->f_pos) {
@@ -1624,7 +1624,7 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence)
                         spin_unlock(&configfs_dirent_lock);
                 }
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
         return offset;
  }
  
@@ -1654,7 +1654,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
         sd = root->d_fsdata;
         link_group(to_config_group(sd->s_element), group);
  
-       mutex_lock_nested(&root->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(root)->i_mutex, I_MUTEX_PARENT);
  
         err = -ENOMEM;
         dentry = d_alloc_name(root, group->cg_item.ci_name);
@@ -1664,7 +1664,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                 err = configfs_attach_group(sd->s_element, &group->cg_item,
                                             dentry);
                 if (err) {
-                       BUG_ON(dentry->d_inode);
+                       BUG_ON(d_inode(dentry));
                         d_drop(dentry);
                         dput(dentry);
                 } else {
@@ -1674,7 +1674,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
                 }
         }
  
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  
         if (err) {
                 unlink_group(group);
@@ -1695,9 +1695,9 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
                 return;
         }
  
-       mutex_lock_nested(&root->d_inode->i_mutex,
+       mutex_lock_nested(&d_inode(root)->i_mutex,
                           I_MUTEX_PARENT);
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
         mutex_lock(&configfs_symlink_mutex);
         spin_lock(&configfs_dirent_lock);
         if (configfs_detach_prep(dentry, NULL)) {
@@ -1706,13 +1706,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
         spin_unlock(&configfs_dirent_lock);
         mutex_unlock(&configfs_symlink_mutex);
         configfs_detach_group(&group->cg_item);
-       dentry->d_inode->i_flags |= S_DEAD;
+       d_inode(dentry)->i_flags |= S_DEAD;
         dont_mount(dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
  
         d_delete(dentry);
  
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  
         dput(dentry);
  
diff --git a/fs/configfs/file.c b/fs/configfs/file.c

index 56d2cdc..403269f 100644 (file)
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -326,10 +326,10 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib
         umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
         int error = 0;
  
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_NORMAL);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_NORMAL);
         error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode,
                                      CONFIGFS_ITEM_ATTR);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  
         return error;
  }
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c

index 5423a6a..8d89f5f 100644 (file)
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -56,7 +56,7 @@ static const struct inode_operations configfs_inode_operations ={
  
  int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         struct configfs_dirent * sd = dentry->d_fsdata;
         struct iattr * sd_iattr;
         unsigned int ia_valid = iattr->ia_valid;
@@ -186,7 +186,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
         if (!dentry)
                 return -ENOENT;
  
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 return -EEXIST;
  
         sd = dentry->d_fsdata;
@@ -194,7 +194,7 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
         if (!inode)
                 return -ENOMEM;
  
-       p_inode = dentry->d_parent->d_inode;
+       p_inode = d_inode(dentry->d_parent);
         p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
         configfs_set_inode_lock_class(sd, inode);
  
@@ -236,11 +236,11 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
  
         if (dentry) {
                 spin_lock(&dentry->d_lock);
-               if (!d_unhashed(dentry) && dentry->d_inode) {
+               if (!d_unhashed(dentry) && d_really_is_positive(dentry)) {
                         dget_dlock(dentry);
                         __d_drop(dentry);
                         spin_unlock(&dentry->d_lock);
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
                 } else
                         spin_unlock(&dentry->d_lock);
         }
@@ -251,11 +251,11 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
         struct configfs_dirent * sd;
         struct configfs_dirent * parent_sd = dir->d_fsdata;
  
-       if (dir->d_inode == NULL)
+       if (d_really_is_negative(dir))
                 /* no inode means this hasn't been made visible yet */
                 return;
  
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
         list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
                 if (!sd->s_element)
                         continue;
@@ -268,5 +268,5 @@ void configfs_hash_and_remove(struct dentry * dir, const char * name)
                         break;
                 }
         }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  }
diff --git a/fs/dax.c b/fs/dax.c

index 0bb0aec..6f65f00 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -209,7 +209,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
         }
  
         /* Protects against truncate */
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
  
         retval = dax_io(inode, iter, pos, end, get_block, &bh);
  
@@ -219,7 +219,7 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
         if ((retval > 0) && end_io)
                 end_io(iocb, pos, retval, bh.b_private);
  
-       inode_dio_done(inode);
+       inode_dio_end(inode);
   out:
         return retval;
  }
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c

index 517e649..830a7e7 100644 (file)
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -45,7 +45,7 @@ const struct file_operations debugfs_file_operations = {
  
  static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       nd_set_link(nd, dentry->d_inode->i_private);
+       nd_set_link(nd, d_inode(dentry)->i_private);
         return NULL;
  }
  
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c

index c9ee0df..c1e7ffb 100644 (file)
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -46,7 +46,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
  
  static inline int debugfs_positive(struct dentry *dentry)
  {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  struct debugfs_mount_opts {
@@ -124,7 +124,7 @@ static int debugfs_parse_options(char *data, struct debugfs_mount_opts *opts)
  static int debugfs_apply_options(struct super_block *sb)
  {
         struct debugfs_fs_info *fsi = sb->s_fs_info;
-       struct inode *inode = sb->s_root->d_inode;
+       struct inode *inode = d_inode(sb->s_root);
         struct debugfs_mount_opts *opts = &fsi->mount_opts;
  
         inode->i_mode &= ~S_IALLUGO;
@@ -188,7 +188,7 @@ static struct vfsmount *debugfs_automount(struct path *path)
  {
         struct vfsmount *(*f)(void *);
         f = (struct vfsmount *(*)(void *))path->dentry->d_fsdata;
-       return f(path->dentry->d_inode->i_private);
+       return f(d_inode(path->dentry)->i_private);
  }
  
  static const struct dentry_operations debugfs_dops = {
@@ -270,20 +270,20 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
         if (!parent)
                 parent = debugfs_mount->mnt_root;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         dentry = lookup_one_len(name, parent, strlen(name));
-       if (!IS_ERR(dentry) && dentry->d_inode) {
+       if (!IS_ERR(dentry) && d_really_is_positive(dentry)) {
                 dput(dentry);
                 dentry = ERR_PTR(-EEXIST);
         }
         if (IS_ERR(dentry))
-               mutex_unlock(&parent->d_inode->i_mutex);
+               mutex_unlock(&d_inode(parent)->i_mutex);
         return dentry;
  }
  
  static struct dentry *failed_creating(struct dentry *dentry)
  {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
         dput(dentry);
         simple_release_fs(&debugfs_mount, &debugfs_mount_count);
         return NULL;
@@ -291,7 +291,7 @@ static struct dentry *failed_creating(struct dentry *dentry)
  
  static struct dentry *end_creating(struct dentry *dentry)
  {
-       mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry->d_parent)->i_mutex);
         return dentry;
  }
  
@@ -344,7 +344,7 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode,
         inode->i_fop = fops ? fops : &debugfs_file_operations;
         inode->i_private = data;
         d_instantiate(dentry, inode);
-       fsnotify_create(dentry->d_parent->d_inode, dentry);
+       fsnotify_create(d_inode(dentry->d_parent), dentry);
         return end_creating(dentry);
  }
  EXPORT_SYMBOL_GPL(debugfs_create_file);
@@ -384,7 +384,7 @@ struct dentry *debugfs_create_file_size(const char *name, umode_t mode,
         struct dentry *de = debugfs_create_file(name, mode, parent, data, fops);
  
         if (de)
-               de->d_inode->i_size = file_size;
+               d_inode(de)->i_size = file_size;
         return de;
  }
  EXPORT_SYMBOL_GPL(debugfs_create_file_size);
@@ -426,8 +426,8 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
         /* directory inodes start off with i_nlink == 2 (for "." entry) */
         inc_nlink(inode);
         d_instantiate(dentry, inode);
-       inc_nlink(dentry->d_parent->d_inode);
-       fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
+       inc_nlink(d_inode(dentry->d_parent));
+       fsnotify_mkdir(d_inode(dentry->d_parent), dentry);
         return end_creating(dentry);
  }
  EXPORT_SYMBOL_GPL(debugfs_create_dir);
@@ -525,9 +525,9 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
         if (debugfs_positive(dentry)) {
                 dget(dentry);
                 if (d_is_dir(dentry))
-                       ret = simple_rmdir(parent->d_inode, dentry);
+                       ret = simple_rmdir(d_inode(parent), dentry);
                 else
-                       simple_unlink(parent->d_inode, dentry);
+                       simple_unlink(d_inode(parent), dentry);
                 if (!ret)
                         d_delete(dentry);
                 dput(dentry);
@@ -557,12 +557,12 @@ void debugfs_remove(struct dentry *dentry)
                 return;
  
         parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                 return;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         ret = __debugfs_remove(dentry, parent);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         if (!ret)
                 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
  }
@@ -588,12 +588,12 @@ void debugfs_remove_recursive(struct dentry *dentry)
                 return;
  
         parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                 return;
  
         parent = dentry;
   down:
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
   loop:
         /*
          * The parent->d_subdirs is protected by the d_lock. Outside that
@@ -608,7 +608,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
                 /* perhaps simple_empty(child) makes more sense */
                 if (!list_empty(&child->d_subdirs)) {
                         spin_unlock(&parent->d_lock);
-                       mutex_unlock(&parent->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(parent)->i_mutex);
                         parent = child;
                         goto down;
                 }
@@ -629,10 +629,10 @@ void debugfs_remove_recursive(struct dentry *dentry)
         }
         spin_unlock(&parent->d_lock);
  
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         child = parent;
         parent = parent->d_parent;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
  
         if (child != dentry)
                 /* go up */
@@ -640,7 +640,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
  
         if (!__debugfs_remove(child, parent))
                 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  }
  EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
  
@@ -672,27 +672,27 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
  
         trap = lock_rename(new_dir, old_dir);
         /* Source or destination directories don't exist? */
-       if (!old_dir->d_inode || !new_dir->d_inode)
+       if (d_really_is_negative(old_dir) || d_really_is_negative(new_dir))
                 goto exit;
         /* Source does not exist, cyclic rename, or mountpoint? */
-       if (!old_dentry->d_inode || old_dentry == trap ||
+       if (d_really_is_negative(old_dentry) || old_dentry == trap ||
             d_mountpoint(old_dentry))
                 goto exit;
         dentry = lookup_one_len(new_name, new_dir, strlen(new_name));
         /* Lookup failed, cyclic rename or target exists? */
-       if (IS_ERR(dentry) || dentry == trap || dentry->d_inode)
+       if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
                 goto exit;
  
         old_name = fsnotify_oldname_init(old_dentry->d_name.name);
  
-       error = simple_rename(old_dir->d_inode, old_dentry, new_dir->d_inode,
+       error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
                 dentry);
         if (error) {
                 fsnotify_oldname_free(old_name);
                 goto exit;
         }
         d_move(old_dentry, dentry);
-       fsnotify_move(old_dir->d_inode, new_dir->d_inode, old_name,
+       fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
                 d_is_dir(old_dentry),
                 NULL, old_dentry);
         fsnotify_oldname_free(old_name);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c

index cfe8466..add5663 100644 (file)
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -253,7 +253,7 @@ static int mknod_ptmx(struct super_block *sb)
         if (!uid_valid(root_uid) || !gid_valid(root_gid))
                 return -EINVAL;
  
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
  
         /* If we have already created ptmx node, return */
         if (fsi->ptmx_dentry) {
@@ -290,7 +290,7 @@ static int mknod_ptmx(struct super_block *sb)
         fsi->ptmx_dentry = dentry;
         rc = 0;
  out:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         return rc;
  }
  
@@ -298,7 +298,7 @@ static void update_ptmx_mode(struct pts_fs_info *fsi)
  {
         struct inode *inode;
         if (fsi->ptmx_dentry) {
-               inode = fsi->ptmx_dentry->d_inode;
+               inode = d_inode(fsi->ptmx_dentry);
                 inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode;
         }
  }
@@ -602,18 +602,18 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
  
         sprintf(s, "%d", index);
  
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
  
         dentry = d_alloc_name(root, s);
         if (dentry) {
                 d_add(dentry, inode);
-               fsnotify_create(root->d_inode, dentry);
+               fsnotify_create(d_inode(root), dentry);
         } else {
                 iput(inode);
                 inode = ERR_PTR(-ENOMEM);
         }
  
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  
         return inode;
  }
@@ -658,7 +658,7 @@ void devpts_pty_kill(struct inode *inode)
  
         BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
  
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
  
         dentry = d_find_alias(inode);
  
@@ -667,7 +667,7 @@ void devpts_pty_kill(struct inode *inode)
         dput(dentry);   /* d_alloc_name() in devpts_pty_new() */
         dput(dentry);           /* d_find_alias above */
  
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  }
  
  static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c

index c3b560b..745d234 100644 (file)
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -253,7 +253,9 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
         if (dio->end_io && dio->result)
                 dio->end_io(dio->iocb, offset, transferred, dio->private);
  
-       inode_dio_done(dio->inode);
+       if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_end(dio->inode);
+
         if (is_async) {
                 if (dio->rw & WRITE) {
                         int err;
@@ -1195,7 +1197,8 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
         /*
          * Will be decremented at I/O completion time.
          */
-       atomic_inc(&inode->i_dio_count);
+       if (!(dio->flags & DIO_SKIP_DIO_COUNT))
+               inode_dio_begin(inode);
  
         retval = 0;
         sdio.blkbits = blkbits;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c

index 719e1ce..97315f2 100644 (file)
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1326,7 +1326,7 @@ static int ecryptfs_read_headers_virt(char *page_virt,
         if (rc)
                 goto out;
         if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED))
-               ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode);
+               ecryptfs_i_size_init(page_virt, d_inode(ecryptfs_dentry));
         offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
         rc = ecryptfs_process_flags(crypt_stat, (page_virt + offset),
                                     &bytes_read);
@@ -1425,7 +1425,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
  {
         int rc;
         char *page_virt;
-       struct inode *ecryptfs_inode = ecryptfs_dentry->d_inode;
+       struct inode *ecryptfs_inode = d_inode(ecryptfs_dentry);
         struct ecryptfs_crypt_stat *crypt_stat =
             &ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
         struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c

index 4000f6b..8db0b46 100644 (file)
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -54,11 +54,11 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 struct inode *lower_inode =
-                       ecryptfs_inode_to_lower(dentry->d_inode);
+                       ecryptfs_inode_to_lower(d_inode(dentry));
  
-               fsstack_copy_attr_all(dentry->d_inode, lower_inode);
+               fsstack_copy_attr_all(d_inode(dentry), lower_inode);
         }
         return rc;
  }
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c

index a65786e..72afcc6 100644 (file)
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -130,7 +130,7 @@ struct kmem_cache *ecryptfs_file_info_cache;
  
  static int read_or_initialize_metadata(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
         struct ecryptfs_crypt_stat *crypt_stat;
         int rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c

index b08b518..fc850b5 100644 (file)
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -41,13 +41,13 @@ static struct dentry *lock_parent(struct dentry *dentry)
         struct dentry *dir;
  
         dir = dget_parent(dentry);
-       mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT);
+       mutex_lock_nested(&(d_inode(dir)->i_mutex), I_MUTEX_PARENT);
         return dir;
  }
  
  static void unlock_dir(struct dentry *dir)
  {
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         dput(dir);
  }
  
@@ -131,7 +131,7 @@ struct inode *ecryptfs_get_inode(struct inode *lower_inode,
  static int ecryptfs_interpose(struct dentry *lower_dentry,
                               struct dentry *dentry, struct super_block *sb)
  {
-       struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb);
+       struct inode *inode = ecryptfs_get_inode(d_inode(lower_dentry), sb);
  
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
@@ -189,21 +189,21 @@ ecryptfs_do_create(struct inode *directory_inode,
  
         lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
         lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true);
+       rc = vfs_create(d_inode(lower_dir_dentry), lower_dentry, mode, true);
         if (rc) {
                 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
                        "rc = [%d]\n", __func__, rc);
                 inode = ERR_PTR(rc);
                 goto out_lock;
         }
-       inode = __ecryptfs_get_inode(lower_dentry->d_inode,
+       inode = __ecryptfs_get_inode(d_inode(lower_dentry),
                                      directory_inode->i_sb);
         if (IS_ERR(inode)) {
-               vfs_unlink(lower_dir_dentry->d_inode, lower_dentry, NULL);
+               vfs_unlink(d_inode(lower_dir_dentry), lower_dentry, NULL);
                 goto out_lock;
         }
-       fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(directory_inode, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(directory_inode, d_inode(lower_dir_dentry));
  out_lock:
         unlock_dir(lower_dir_dentry);
         return inode;
@@ -332,7 +332,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
                                      struct dentry *lower_dentry,
                                      struct inode *dir_inode)
  {
-       struct inode *inode, *lower_inode = lower_dentry->d_inode;
+       struct inode *inode, *lower_inode = d_inode(lower_dentry);
         struct ecryptfs_dentry_info *dentry_info;
         struct vfsmount *lower_mnt;
         int rc = 0;
@@ -347,14 +347,14 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
         }
  
         lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
-       fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+       fsstack_copy_attr_atime(dir_inode, d_inode(lower_dentry->d_parent));
         BUG_ON(!d_count(lower_dentry));
  
         ecryptfs_set_dentry_private(dentry, dentry_info);
         dentry_info->lower_path.mnt = lower_mnt;
         dentry_info->lower_path.dentry = lower_dentry;
  
-       if (!lower_dentry->d_inode) {
+       if (d_really_is_negative(lower_dentry)) {
                 /* We want to add because we couldn't find in lower */
                 d_add(dentry, NULL);
                 return 0;
@@ -400,11 +400,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
         int rc = 0;
  
         lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
-       mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
         lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
                                       lower_dir_dentry,
                                       ecryptfs_dentry->d_name.len);
-       mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
         if (IS_ERR(lower_dentry)) {
                 rc = PTR_ERR(lower_dentry);
                 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -412,7 +412,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                                 ecryptfs_dentry);
                 goto out;
         }
-       if (lower_dentry->d_inode)
+       if (d_really_is_positive(lower_dentry))
                 goto interpose;
         mount_crypt_stat = &ecryptfs_superblock_to_private(
                                 ecryptfs_dentry->d_sb)->mount_crypt_stat;
@@ -429,11 +429,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
                        "filename; rc = [%d]\n", __func__, rc);
                 goto out;
         }
-       mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dir_dentry)->i_mutex);
         lower_dentry = lookup_one_len(encrypted_and_encoded_name,
                                       lower_dir_dentry,
                                       encrypted_and_encoded_name_size);
-       mutex_unlock(&lower_dir_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dir_dentry)->i_mutex);
         if (IS_ERR(lower_dentry)) {
                 rc = PTR_ERR(lower_dentry);
                 ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -458,24 +458,24 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
         u64 file_size_save;
         int rc;
  
-       file_size_save = i_size_read(old_dentry->d_inode);
+       file_size_save = i_size_read(d_inode(old_dentry));
         lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
         lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
         dget(lower_old_dentry);
         dget(lower_new_dentry);
         lower_dir_dentry = lock_parent(lower_new_dentry);
-       rc = vfs_link(lower_old_dentry, lower_dir_dentry->d_inode,
+       rc = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
                       lower_new_dentry, NULL);
-       if (rc || !lower_new_dentry->d_inode)
+       if (rc || d_really_is_negative(lower_new_dentry))
                 goto out_lock;
         rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
         if (rc)
                 goto out_lock;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       set_nlink(old_dentry->d_inode,
-                 ecryptfs_inode_to_lower(old_dentry->d_inode)->i_nlink);
-       i_size_write(new_dentry->d_inode, file_size_save);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+       set_nlink(d_inode(old_dentry),
+                 ecryptfs_inode_to_lower(d_inode(old_dentry))->i_nlink);
+       i_size_write(d_inode(new_dentry), file_size_save);
  out_lock:
         unlock_dir(lower_dir_dentry);
         dput(lower_new_dentry);
@@ -485,7 +485,7 @@ out_lock:
  
  static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
  {
-       return ecryptfs_do_unlink(dir, dentry, dentry->d_inode);
+       return ecryptfs_do_unlink(dir, dentry, d_inode(dentry));
  }
  
  static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -510,20 +510,20 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
                                                   strlen(symname));
         if (rc)
                 goto out_lock;
-       rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
+       rc = vfs_symlink(d_inode(lower_dir_dentry), lower_dentry,
                          encoded_symname);
         kfree(encoded_symname);
-       if (rc || !lower_dentry->d_inode)
+       if (rc || d_really_is_negative(lower_dentry))
                 goto out_lock;
         rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
         if (rc)
                 goto out_lock;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
  out_lock:
         unlock_dir(lower_dir_dentry);
         dput(lower_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 d_drop(dentry);
         return rc;
  }
@@ -536,18 +536,18 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
  
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
         lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode);
-       if (rc || !lower_dentry->d_inode)
+       rc = vfs_mkdir(d_inode(lower_dir_dentry), lower_dentry, mode);
+       if (rc || d_really_is_negative(lower_dentry))
                 goto out;
         rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
         if (rc)
                 goto out;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
-       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
+       set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
  out:
         unlock_dir(lower_dir_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 d_drop(dentry);
         return rc;
  }
@@ -562,12 +562,12 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
         dget(dentry);
         lower_dir_dentry = lock_parent(lower_dentry);
         dget(lower_dentry);
-       rc = vfs_rmdir(lower_dir_dentry->d_inode, lower_dentry);
+       rc = vfs_rmdir(d_inode(lower_dir_dentry), lower_dentry);
         dput(lower_dentry);
-       if (!rc && dentry->d_inode)
-               clear_nlink(dentry->d_inode);
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       set_nlink(dir, lower_dir_dentry->d_inode->i_nlink);
+       if (!rc && d_really_is_positive(dentry))
+               clear_nlink(d_inode(dentry));
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       set_nlink(dir, d_inode(lower_dir_dentry)->i_nlink);
         unlock_dir(lower_dir_dentry);
         if (!rc)
                 d_drop(dentry);
@@ -584,17 +584,17 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev
  
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
         lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev);
-       if (rc || !lower_dentry->d_inode)
+       rc = vfs_mknod(d_inode(lower_dir_dentry), lower_dentry, mode, dev);
+       if (rc || d_really_is_negative(lower_dentry))
                 goto out;
         rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
         if (rc)
                 goto out;
-       fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode);
-       fsstack_copy_inode_size(dir, lower_dir_dentry->d_inode);
+       fsstack_copy_attr_times(dir, d_inode(lower_dir_dentry));
+       fsstack_copy_inode_size(dir, d_inode(lower_dir_dentry));
  out:
         unlock_dir(lower_dir_dentry);
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 d_drop(dentry);
         return rc;
  }
@@ -617,7 +617,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         dget(lower_new_dentry);
         lower_old_dir_dentry = dget_parent(lower_old_dentry);
         lower_new_dir_dentry = dget_parent(lower_new_dentry);
-       target_inode = new_dentry->d_inode;
+       target_inode = d_inode(new_dentry);
         trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
         /* source should not be ancestor of target */
         if (trap == lower_old_dentry) {
@@ -629,17 +629,17 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 rc = -ENOTEMPTY;
                 goto out_lock;
         }
-       rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
-                       lower_new_dir_dentry->d_inode, lower_new_dentry,
+       rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
+                       d_inode(lower_new_dir_dentry), lower_new_dentry,
                         NULL, 0);
         if (rc)
                 goto out_lock;
         if (target_inode)
                 fsstack_copy_attr_all(target_inode,
                                       ecryptfs_inode_to_lower(target_inode));
-       fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
+       fsstack_copy_attr_all(new_dir, d_inode(lower_new_dir_dentry));
         if (new_dir != old_dir)
-               fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
+               fsstack_copy_attr_all(old_dir, d_inode(lower_old_dir_dentry));
  out_lock:
         unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
         dput(lower_new_dir_dentry);
@@ -662,7 +662,7 @@ static char *ecryptfs_readlink_lower(struct dentry *dentry, size_t *bufsiz)
                 return ERR_PTR(-ENOMEM);
         old_fs = get_fs();
         set_fs(get_ds());
-       rc = lower_dentry->d_inode->i_op->readlink(lower_dentry,
+       rc = d_inode(lower_dentry)->i_op->readlink(lower_dentry,
                                                    (char __user *)lower_buf,
                                                    PATH_MAX);
         set_fs(old_fs);
@@ -681,8 +681,8 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
         char *buf = ecryptfs_readlink_lower(dentry, &len);
         if (IS_ERR(buf))
                 goto out;
-       fsstack_copy_attr_atime(dentry->d_inode,
-                               ecryptfs_dentry_to_lower(dentry)->d_inode);
+       fsstack_copy_attr_atime(d_inode(dentry),
+                               d_inode(ecryptfs_dentry_to_lower(dentry)));
         buf[len] = '\0';
  out:
         nd_set_link(nd, buf);
@@ -738,7 +738,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
                           struct iattr *lower_ia)
  {
         int rc = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ecryptfs_crypt_stat *crypt_stat;
         loff_t i_size = i_size_read(inode);
         loff_t lower_size_before_truncate;
@@ -751,7 +751,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
         rc = ecryptfs_get_lower_file(dentry, inode);
         if (rc)
                 return rc;
-       crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+       crypt_stat = &ecryptfs_inode_to_private(d_inode(dentry))->crypt_stat;
         /* Switch on growing or shrinking file */
         if (ia->ia_size > i_size) {
                 char zero[] = { 0x00 };
@@ -858,7 +858,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
         struct iattr lower_ia = { .ia_valid = 0 };
         int rc;
  
-       rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length);
+       rc = ecryptfs_inode_newsize_ok(d_inode(dentry), new_length);
         if (rc)
                 return rc;
  
@@ -866,9 +866,9 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
         if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
                 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
  
-               mutex_lock(&lower_dentry->d_inode->i_mutex);
+               mutex_lock(&d_inode(lower_dentry)->i_mutex);
                 rc = notify_change(lower_dentry, &lower_ia, NULL);
-               mutex_unlock(&lower_dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(lower_dentry)->i_mutex);
         }
         return rc;
  }
@@ -900,10 +900,10 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
         struct inode *lower_inode;
         struct ecryptfs_crypt_stat *crypt_stat;
  
-       crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
+       crypt_stat = &ecryptfs_inode_to_private(d_inode(dentry))->crypt_stat;
         if (!(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED))
                 ecryptfs_init_crypt_stat(crypt_stat);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         lower_inode = ecryptfs_inode_to_lower(inode);
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
         mutex_lock(&crypt_stat->cs_mutex);
@@ -967,9 +967,9 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
         if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
                 lower_ia.ia_valid &= ~ATTR_MODE;
  
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
         rc = notify_change(lower_dentry, &lower_ia, NULL);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
  out:
         fsstack_copy_attr_all(inode, lower_inode);
         return rc;
@@ -983,7 +983,7 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry,
  
         mount_crypt_stat = &ecryptfs_superblock_to_private(
                                                 dentry->d_sb)->mount_crypt_stat;
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
         if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
                 char *target;
                 size_t targetsiz;
@@ -1007,9 +1007,9 @@ static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
  
         rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat);
         if (!rc) {
-               fsstack_copy_attr_all(dentry->d_inode,
-                                     ecryptfs_inode_to_lower(dentry->d_inode));
-               generic_fillattr(dentry->d_inode, stat);
+               fsstack_copy_attr_all(d_inode(dentry),
+                                     ecryptfs_inode_to_lower(d_inode(dentry)));
+               generic_fillattr(d_inode(dentry), stat);
                 stat->blocks = lower_stat.blocks;
         }
         return rc;
@@ -1023,14 +1023,14 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
         struct dentry *lower_dentry;
  
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->setxattr) {
+       if (!d_inode(lower_dentry)->i_op->setxattr) {
                 rc = -EOPNOTSUPP;
                 goto out;
         }
  
         rc = vfs_setxattr(lower_dentry, name, value, size, flags);
-       if (!rc && dentry->d_inode)
-               fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
+       if (!rc && d_really_is_positive(dentry))
+               fsstack_copy_attr_all(d_inode(dentry), d_inode(lower_dentry));
  out:
         return rc;
  }
@@ -1041,14 +1041,14 @@ ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name,
  {
         int rc = 0;
  
-       if (!lower_dentry->d_inode->i_op->getxattr) {
+       if (!d_inode(lower_dentry)->i_op->getxattr) {
                 rc = -EOPNOTSUPP;
                 goto out;
         }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->getxattr(lower_dentry, name, value,
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value,
                                                    size);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
  out:
         return rc;
  }
@@ -1068,13 +1068,13 @@ ecryptfs_listxattr(struct dentry *dentry, char *list, size_t size)
         struct dentry *lower_dentry;
  
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->listxattr) {
+       if (!d_inode(lower_dentry)->i_op->listxattr) {
                 rc = -EOPNOTSUPP;
                 goto out;
         }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->listxattr(lower_dentry, list, size);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->listxattr(lower_dentry, list, size);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
  out:
         return rc;
  }
@@ -1085,13 +1085,13 @@ static int ecryptfs_removexattr(struct dentry *dentry, const char *name)
         struct dentry *lower_dentry;
  
         lower_dentry = ecryptfs_dentry_to_lower(dentry);
-       if (!lower_dentry->d_inode->i_op->removexattr) {
+       if (!d_inode(lower_dentry)->i_op->removexattr) {
                 rc = -EOPNOTSUPP;
                 goto out;
         }
-       mutex_lock(&lower_dentry->d_inode->i_mutex);
-       rc = lower_dentry->d_inode->i_op->removexattr(lower_dentry, name);
-       mutex_unlock(&lower_dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(lower_dentry)->i_mutex);
+       rc = d_inode(lower_dentry)->i_op->removexattr(lower_dentry, name);
+       mutex_unlock(&d_inode(lower_dentry)->i_mutex);
  out:
         return rc;
  }
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c

index f1ea610..866bb18 100644 (file)
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -144,7 +144,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
         /* Corresponding dput() and mntput() are done when the
          * lower file is fput() when all eCryptfs files for the inode are
          * released. */
-       flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR;
+       flags |= IS_RDONLY(d_inode(lower_dentry)) ? O_RDONLY : O_RDWR;
         (*lower_file) = dentry_open(&req.path, flags, cred);
         if (!IS_ERR(*lower_file))
                 goto out;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c

index c095d32..4f4d047 100644 (file)
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -546,11 +546,11 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                 goto out_free;
         }
  
-       if (check_ruid && !uid_eq(path.dentry->d_inode->i_uid, current_uid())) {
+       if (check_ruid && !uid_eq(d_inode(path.dentry)->i_uid, current_uid())) {
                 rc = -EPERM;
                 printk(KERN_ERR "Mount of device (uid: %d) not owned by "
                        "requested user (uid: %d)\n",
-                       i_uid_read(path.dentry->d_inode),
+                       i_uid_read(d_inode(path.dentry)),
                         from_kuid(&init_user_ns, current_uid()));
                 goto out_free;
         }
@@ -584,7 +584,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                 goto out_free;
         }
  
-       inode = ecryptfs_get_inode(path.dentry->d_inode, s);
+       inode = ecryptfs_get_inode(d_inode(path.dentry), s);
         rc = PTR_ERR(inode);
         if (IS_ERR(inode))
                 goto out_free;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c

index 4626976..cf20852 100644 (file)
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -420,7 +420,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
         void *xattr_virt;
         struct dentry *lower_dentry =
                 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
-       struct inode *lower_inode = lower_dentry->d_inode;
+       struct inode *lower_inode = d_inode(lower_dentry);
         int rc;
  
         if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c

index 07ab497..3381b9d 100644 (file)
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -145,12 +145,12 @@ out:
  
  static int efivarfs_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct efivar_entry *var = dentry->d_inode->i_private;
+       struct efivar_entry *var = d_inode(dentry)->i_private;
  
         if (efivar_entry_delete(var))
                 return -EINVAL;
  
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
         dput(dentry);
         return 0;
  };
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c

index ddbce42..59fedbc 100644 (file)
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -144,7 +144,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor,
  
         name[len + EFI_VARIABLE_GUID_LEN+1] = '\0';
  
-       inode = efivarfs_get_inode(sb, root->d_inode, S_IFREG | 0644, 0);
+       inode = efivarfs_get_inode(sb, d_inode(root), S_IFREG | 0644, 0);
         if (!inode)
                 goto fail_name;
  
diff --git a/fs/efs/namei.c b/fs/efs/namei.c

index bbee8f0..40ba9cc 100644 (file)
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -111,9 +111,9 @@ struct dentry *efs_get_parent(struct dentry *child)
         struct dentry *parent = ERR_PTR(-ENOENT);
         efs_ino_t ino;
  
-       ino = efs_find_entry(child->d_inode, "..", 2);
+       ino = efs_find_entry(d_inode(child), "..", 2);
         if (ino)
-               parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino));
+               parent = d_obtain_alias(efs_iget(d_inode(child)->i_sb, ino));
  
         return parent;
  }
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c

index d7defd5..4deb0b0 100644 (file)
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -379,7 +379,7 @@ ino_t exofs_parent_ino(struct dentry *child)
         struct exofs_dir_entry *de;
         ino_t ino;
  
-       de = exofs_dotdot(child->d_inode, &page);
+       de = exofs_dotdot(d_inode(child), &page);
         if (!de)
                 return 0;
  
@@ -429,7 +429,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de,
  
  int exofs_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const unsigned char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         unsigned chunk_size = exofs_chunk_size(dir);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c

index 35073aa..786e4cc 100644 (file)
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1028,7 +1028,7 @@ static int _do_truncate(struct inode *inode, loff_t newsize)
   */
  int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         /* if we are about to modify an object, and it hasn't been
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c

index 2890746..5ae25e4 100644 (file)
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -141,7 +141,7 @@ out_fail:
  static int exofs_link(struct dentry *old_dentry, struct inode *dir,
                 struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         inode->i_ctime = CURRENT_TIME;
         inode_inc_link_count(inode);
@@ -191,7 +191,7 @@ out_dir:
  
  static int exofs_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct exofs_dir_entry *de;
         struct page *page;
         int err = -ENOENT;
@@ -213,7 +213,7 @@ out:
  
  static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int err = -ENOTEMPTY;
  
         if (exofs_empty_dir(inode)) {
@@ -230,8 +230,8 @@ static int exofs_rmdir(struct inode *dir, struct dentry *dentry)
  static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct page *dir_page = NULL;
         struct exofs_dir_entry *dir_de = NULL;
         struct page *old_page;
diff --git a/fs/exofs/super.c b/fs/exofs/super.c

index fcc2e56..b795c56 100644 (file)
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -958,7 +958,7 @@ static struct dentry *exofs_get_parent(struct dentry *child)
         if (!ino)
                 return ERR_PTR(-ESTALE);
  
-       return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(exofs_iget(d_inode(child)->i_sb, ino));
  }
  
  static struct inode *exofs_nfs_get_inode(struct super_block *sb,
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c

index 832e262..6f6f3a4 100644 (file)
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -37,7 +37,7 @@
  
  static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct exofs_i_info *oi = exofs_i(dentry->d_inode);
+       struct exofs_i_info *oi = exofs_i(d_inode(dentry));
  
         nd_set_link(nd, (char *)oi->i_data);
         return NULL;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c

index 6e1d4ab..796b491 100644 (file)
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -486,7 +486,7 @@ void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
   */
  int ext2_add_link (struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         unsigned chunk_size = ext2_chunk_size(dir);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c

index 6c14bb8..5c04a0d 100644 (file)
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -278,7 +278,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
         avefreeb = free_blocks / ngroups;
         ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
  
-       if ((parent == sb->s_root->d_inode) ||
+       if ((parent == d_inode(sb->s_root)) ||
             (EXT2_I(parent)->i_flags & EXT2_TOPDIR_FL)) {
                 struct ext2_group_desc *best_desc = NULL;
                 int best_ndir = inodes_per_group;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 5d92139..f460ae3 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1544,7 +1544,7 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
  
  int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, iattr);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c

index ce42293..3e074a9 100644 (file)
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -79,10 +79,10 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, uns
  struct dentry *ext2_get_parent(struct dentry *child)
  {
         struct qstr dotdot = QSTR_INIT("..", 2);
-       unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
+       unsigned long ino = ext2_inode_by_name(d_inode(child), &dotdot);
         if (!ino)
                 return ERR_PTR(-ENOENT);
-       return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext2_iget(d_inode(child)->i_sb, ino));
  } 
  
  /*
@@ -208,7 +208,7 @@ out_fail:
  static int ext2_link (struct dentry * old_dentry, struct inode * dir,
         struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int err;
  
         dquot_initialize(dir);
@@ -275,7 +275,7 @@ out_dir:
  
  static int ext2_unlink(struct inode * dir, struct dentry *dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         struct ext2_dir_entry_2 * de;
         struct page * page;
         int err = -ENOENT;
@@ -299,7 +299,7 @@ out:
  
  static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         int err = -ENOTEMPTY;
  
         if (ext2_empty_dir(inode)) {
@@ -316,8 +316,8 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
  static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
         struct inode * new_dir, struct dentry * new_dentry )
  {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
         struct page * dir_page = NULL;
         struct ext2_dir_entry_2 * dir_de = NULL;
         struct page * old_page;
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c

index 565cf81..20608f1 100644 (file)
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -23,7 +23,7 @@
  
  static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ext2_inode_info *ei = EXT2_I(dentry->d_inode);
+       struct ext2_inode_info *ei = EXT2_I(d_inode(dentry));
         nd_set_link(nd, (char *)ei->i_data);
         return NULL;
  }
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c

index 9142614..0b6bfd3 100644 (file)
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -243,7 +243,7 @@ cleanup:
  static int
  ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct buffer_head *bh = NULL;
         struct ext2_xattr_entry *entry;
         char *end;
@@ -319,7 +319,7 @@ cleanup:
  /*
   * Inode operation listxattr()
   *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
   */
  ssize_t
  ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c

index c0ebc4d..702fc68 100644 (file)
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -28,7 +28,7 @@ ext2_xattr_security_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
                               buffer, size);
  }
  
@@ -38,7 +38,7 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name,
                               value, size, flags);
  }
  
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c

index 7e19257..42b6e98 100644 (file)
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -32,7 +32,7 @@ ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
                               buffer, size);
  }
  
@@ -42,7 +42,7 @@ ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name,
                               value, size, flags);
  }
  
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c

index f470e44..ecdc460 100644 (file)
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -36,7 +36,7 @@ ext2_xattr_user_get(struct dentry *dentry, const char *name,
                 return -EINVAL;
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
-       return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+       return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER,
                               name, buffer, size);
  }
  
@@ -49,7 +49,7 @@ ext2_xattr_user_set(struct dentry *dentry, const char *name,
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
  
-       return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+       return ext2_xattr_set(d_inode(dentry), EXT2_XATTR_INDEX_USER,
                               name, value, size, flags);
  }
  
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c

index a1b8102..3ad242e 100644 (file)
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -210,7 +210,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
         avefreeb = freeb / ngroups;
         ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
  
-       if ((parent == sb->s_root->d_inode) ||
+       if ((parent == d_inode(sb->s_root)) ||
             (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
                 int best_ndir = inodes_per_group;
                 int best_group = -1;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index 13c0868..2ee2dc4 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3240,7 +3240,7 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
   */
  int ext3_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error, rc = 0;
         const unsigned int ia_valid = attr->ia_valid;
  
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c

index f197736..4264b9b 100644 (file)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1049,19 +1049,19 @@ struct dentry *ext3_get_parent(struct dentry *child)
         struct ext3_dir_entry_2 * de;
         struct buffer_head *bh;
  
-       bh = ext3_find_entry(child->d_inode, &dotdot, &de);
+       bh = ext3_find_entry(d_inode(child), &dotdot, &de);
         if (!bh)
                 return ERR_PTR(-ENOENT);
         ino = le32_to_cpu(de->inode);
         brelse(bh);
  
-       if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
-               ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+       if (!ext3_valid_inum(d_inode(child)->i_sb, ino)) {
+               ext3_error(d_inode(child)->i_sb, "ext3_get_parent",
                            "bad inode number: %lu", ino);
                 return ERR_PTR(-EIO);
         }
  
-       return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext3_iget(d_inode(child)->i_sb, ino));
  }
  
  #define S_SHIFT 12
@@ -1243,7 +1243,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                              struct inode *inode, struct ext3_dir_entry_2 *de,
                              struct buffer_head * bh)
  {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
         const char      *name = dentry->d_name.name;
         int             namelen = dentry->d_name.len;
         unsigned long   offset = 0;
@@ -1330,7 +1330,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
  static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                             struct inode *inode, struct buffer_head *bh)
  {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
         const char      *name = dentry->d_name.name;
         int             namelen = dentry->d_name.len;
         struct buffer_head *bh2;
@@ -1435,7 +1435,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
  static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
         struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct buffer_head * bh;
         struct ext3_dir_entry_2 *de;
         struct super_block * sb;
@@ -1489,7 +1489,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
         struct dx_entry *entries, *at;
         struct dx_hash_info hinfo;
         struct buffer_head * bh;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct super_block * sb = dir->i_sb;
         struct ext3_dir_entry_2 *de;
         int err;
@@ -2111,7 +2111,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
         /* Initialize quotas before so that eventual writes go in
          * separate transaction */
         dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
  
         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
         if (IS_ERR(handle))
@@ -2125,7 +2125,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
         if (IS_DIRSYNC(dir))
                 handle->h_sync = 1;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         retval = -EIO;
         if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -2173,7 +2173,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
         dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
  
         handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
         if (IS_ERR(handle))
@@ -2187,7 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
         if (!bh)
                 goto end_unlink;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         retval = -EIO;
         if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -2328,7 +2328,7 @@ static int ext3_link (struct dentry * old_dentry,
                 struct inode * dir, struct dentry *dentry)
  {
         handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int err, retries = 0;
  
         if (inode->i_nlink >= EXT3_LINK_MAX)
@@ -2391,8 +2391,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
  
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
-       if (new_dentry->d_inode)
-               dquot_initialize(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry))
+               dquot_initialize(d_inode(new_dentry));
         handle = ext3_journal_start(old_dir, 2 *
                                         EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
                                         EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
@@ -2409,12 +2409,12 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
          *  and merrily kill the link to whatever was created under the
          *  same name. Goodbye sticky bit ;-<
          */
-       old_inode = old_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
         retval = -ENOENT;
         if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
                 goto end_rename;
  
-       new_inode = new_dentry->d_inode;
+       new_inode = d_inode(new_dentry);
         new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de);
         if (new_bh) {
                 if (!new_inode) {
diff --git a/fs/ext3/super.c b/fs/ext3/super.c

index f037b4b..a9312f0 100644 (file)
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1170,7 +1170,7 @@ static int parse_options (char *options, struct super_block *sb,
                                 return 0;
                         }
  
-                       journal_inode = path.dentry->d_inode;
+                       journal_inode = d_inode(path.dentry);
                         if (!S_ISBLK(journal_inode->i_mode)) {
                                 ext3_msg(sb, KERN_ERR, "error: journal path %s "
                                         "is not a block device", journal_path);
@@ -2947,7 +2947,7 @@ static int ext3_write_info(struct super_block *sb, int type)
         handle_t *handle;
  
         /* Data block + inode block */
-       handle = ext3_journal_start(sb->s_root->d_inode, 2);
+       handle = ext3_journal_start(d_inode(sb->s_root), 2);
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
         ret = dquot_commit_info(sb, type);
@@ -2994,7 +2994,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
          * When we journal data on quota file, we have to flush journal to see
          * all updates to the file when we bypass pagecache...
          */
-       if (ext3_should_journal_data(path->dentry->d_inode)) {
+       if (ext3_should_journal_data(d_inode(path->dentry))) {
                 /*
                  * We don't need to lock updates but journal_flush() could
                  * otherwise be livelocked...
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c

index 6b01c3e..ea96df3 100644 (file)
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -23,7 +23,7 @@
  
  static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+       struct ext3_inode_info *ei = EXT3_I(d_inode(dentry));
         nd_set_link(nd, (char*)ei->i_data);
         return NULL;
  }
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c

index 24215dc..7cf3650 100644 (file)
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -137,7 +137,7 @@ ext3_xattr_handler(int name_index)
  /*
   * Inode operation listxattr()
   *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
   */
  ssize_t
  ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -355,7 +355,7 @@ ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
  static int
  ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct buffer_head *bh = NULL;
         int error;
  
@@ -391,7 +391,7 @@ cleanup:
  static int
  ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ext3_xattr_ibody_header *header;
         struct ext3_inode *raw_inode;
         struct ext3_iloc iloc;
@@ -432,7 +432,7 @@ ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
         int i_error, b_error;
  
-       down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+       down_read(&EXT3_I(d_inode(dentry))->xattr_sem);
         i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
         if (i_error < 0) {
                 b_error = 0;
@@ -445,7 +445,7 @@ ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                 if (b_error < 0)
                         i_error = 0;
         }
-       up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+       up_read(&EXT3_I(d_inode(dentry))->xattr_sem);
         return i_error + b_error;
  }
  
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c

index 722c2bf..c9506d5 100644 (file)
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -29,7 +29,7 @@ ext3_xattr_security_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY,
                               name, buffer, size);
  }
  
@@ -39,7 +39,7 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_SECURITY,
                               name, value, size, flags);
  }
  
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c

index d75727c..206cc66 100644 (file)
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -32,7 +32,7 @@ ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED,
                               name, buffer, size);
  }
  
@@ -42,7 +42,7 @@ ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_TRUSTED, name,
                               value, size, flags);
  }
  
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c

index 5612af3..021508a 100644 (file)
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -34,7 +34,7 @@ ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
                 return -EINVAL;
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
-       return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+       return ext3_xattr_get(d_inode(dentry), EXT3_XATTR_INDEX_USER,
                               name, buffer, size);
  }
  
@@ -46,7 +46,7 @@ ext3_xattr_user_set(struct dentry *dentry, const char *name,
                 return -EINVAL;
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
-       return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+       return ext3_xattr_set(d_inode(dentry), EXT3_XATTR_INDEX_USER,
                               name, value, size, flags);
  }
  
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c

index e9d632e..8850254 100644 (file)
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -55,7 +55,7 @@ static int ext4_sync_parent(struct inode *inode)
                 dentry = d_find_any_alias(inode);
                 if (!dentry)
                         break;
-               next = igrab(dentry->d_parent->d_inode);
+               next = igrab(d_inode(dentry->d_parent));
                 dput(dentry);
                 if (!next)
                         break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c

index 2cf18a2..1eaa6cb 100644 (file)
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -443,7 +443,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
         ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
  
         if (S_ISDIR(mode) &&
-           ((parent == sb->s_root->d_inode) ||
+           ((parent == d_inode(sb->s_root)) ||
              (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
                 int best_ndir = inodes_per_group;
                 int ret = -1;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c

index 3580629..9588240 100644 (file)
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -682,11 +682,11 @@ retry:
                  * via ext4_inode_block_unlocked_dio(). Check inode's state
                  * while holding extra i_dio_count ref.
                  */
-               atomic_inc(&inode->i_dio_count);
+               inode_dio_begin(inode);
                 smp_mb();
                 if (unlikely(ext4_test_inode_state(inode,
                                                     EXT4_STATE_DIOREAD_LOCK))) {
-                       inode_dio_done(inode);
+                       inode_dio_end(inode);
                         goto locked;
                 }
                 if (IS_DAX(inode))
@@ -697,7 +697,7 @@ retry:
                                                    inode->i_sb->s_bdev, iter,
                                                    offset, ext4_get_block, NULL,
                                                    NULL, 0);
-               inode_dio_done(inode);
+               inode_dio_end(inode);
         } else {
  locked:
                 if (IS_DAX(inode))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c

index feb2caf..095c7a2 100644 (file)
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1000,7 +1000,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
                                      struct ext4_iloc *iloc,
                                      void *inline_start, int inline_size)
  {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
         const char      *name = dentry->d_name.name;
         int             namelen = dentry->d_name.len;
         int             err;
@@ -1254,7 +1254,7 @@ int ext4_try_add_inline_entry(handle_t *handle, struct dentry *dentry,
         int ret, inline_size;
         void *inline_start;
         struct ext4_iloc iloc;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
  
         ret = ext4_get_inode_loc(dir, &iloc);
         if (ret)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 366476e..cbd0654 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3077,7 +3077,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
          * overwrite DIO as i_dio_count needs to be incremented under i_mutex.
          */
         if (iov_iter_rw(iter) == WRITE)
-               atomic_inc(&inode->i_dio_count);
+               inode_dio_begin(inode);
  
         /* If we do a overwrite dio, i_mutex locking can be released */
         overwrite = *((int *)iocb->private);
@@ -3182,7 +3182,7 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
  
  retake_lock:
         if (iov_iter_rw(iter) == WRITE)
-               inode_dio_done(inode);
+               inode_dio_end(inode);
         /* take i_mutex locking again if we do a ovewrite dio */
         if (overwrite) {
                 up_read(&EXT4_I(inode)->i_data_sem);
@@ -4637,7 +4637,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
   */
  int ext4_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error, rc = 0;
         int orphan = 0;
         const unsigned int ia_valid = attr->ia_valid;
@@ -4785,7 +4785,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
         struct inode *inode;
         unsigned long long delalloc_blocks;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         generic_fillattr(inode, stat);
  
         /*
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c

index 3cb267a..b52374e 100644 (file)
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -475,7 +475,7 @@ int ext4_ext_migrate(struct inode *inode)
                 EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
         owner[0] = i_uid_read(inode);
         owner[1] = i_gid_read(inode);
-       tmp_inode = ext4_new_inode(handle, inode->i_sb->s_root->d_inode,
+       tmp_inode = ext4_new_inode(handle, d_inode(inode->i_sb->s_root),
                                    S_IFREG, NULL, goal, owner);
         if (IS_ERR(tmp_inode)) {
                 retval = PTR_ERR(tmp_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index ef22cd9..7223b0b 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1664,7 +1664,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
         struct ext4_dir_entry_2 * de;
         struct buffer_head *bh;
  
-       bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
+       bh = ext4_find_entry(d_inode(child), &dotdot, &de, NULL);
         if (IS_ERR(bh))
                 return (struct dentry *) bh;
         if (!bh)
@@ -1672,13 +1672,13 @@ struct dentry *ext4_get_parent(struct dentry *child)
         ino = le32_to_cpu(de->inode);
         brelse(bh);
  
-       if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
-               EXT4_ERROR_INODE(child->d_inode,
+       if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) {
+               EXT4_ERROR_INODE(d_inode(child),
                                  "bad parent inode number: %u", ino);
                 return ERR_PTR(-EIO);
         }
  
-       return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino));
  }
  
  /*
@@ -1988,7 +1988,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
                              struct inode *inode, struct ext4_dir_entry_2 *de,
                              struct buffer_head *bh)
  {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
         const char      *name = dentry->d_name.name;
         int             namelen = dentry->d_name.len;
         unsigned int    blocksize = dir->i_sb->s_blocksize;
@@ -2048,7 +2048,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
  static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
                             struct inode *inode, struct buffer_head *bh)
  {
-       struct inode    *dir = dentry->d_parent->d_inode;
+       struct inode    *dir = d_inode(dentry->d_parent);
  #ifdef CONFIG_EXT4_FS_ENCRYPTION
         struct ext4_fname_crypto_ctx *ctx = NULL;
         int res;
@@ -2202,7 +2202,7 @@ out_frames:
  static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
                           struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct buffer_head *bh = NULL;
         struct ext4_dir_entry_2 *de;
         struct ext4_dir_entry_tail *t;
@@ -2287,7 +2287,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
         struct dx_entry *entries, *at;
         struct dx_hash_info hinfo;
         struct buffer_head *bh;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct super_block *sb = dir->i_sb;
         struct ext4_dir_entry_2 *de;
         int err;
@@ -3063,7 +3063,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
         /* Initialize quotas before so that eventual writes go in
          * separate transaction */
         dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
  
         retval = -ENOENT;
         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -3072,7 +3072,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
         if (!bh)
                 goto end_rmdir;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         retval = -EIO;
         if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -3132,7 +3132,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
         /* Initialize quotas before so that eventual writes go
          * in separate transaction */
         dquot_initialize(dir);
-       dquot_initialize(dentry->d_inode);
+       dquot_initialize(d_inode(dentry));
  
         retval = -ENOENT;
         bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
@@ -3141,7 +3141,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
         if (!bh)
                 goto end_unlink;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         retval = -EIO;
         if (le32_to_cpu(de->inode) != inode->i_ino)
@@ -3339,7 +3339,7 @@ static int ext4_link(struct dentry *old_dentry,
                      struct inode *dir, struct dentry *dentry)
  {
         handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int err, retries = 0;
  
         if (inode->i_nlink >= EXT4_LINK_MAX)
@@ -3613,12 +3613,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct ext4_renament old = {
                 .dir = old_dir,
                 .dentry = old_dentry,
-               .inode = old_dentry->d_inode,
+               .inode = d_inode(old_dentry),
         };
         struct ext4_renament new = {
                 .dir = new_dir,
                 .dentry = new_dentry,
-               .inode = new_dentry->d_inode,
+               .inode = d_inode(new_dentry),
         };
         int force_reread;
         int retval;
@@ -3809,12 +3809,12 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct ext4_renament old = {
                 .dir = old_dir,
                 .dentry = old_dentry,
-               .inode = old_dentry->d_inode,
+               .inode = d_inode(old_dentry),
         };
         struct ext4_renament new = {
                 .dir = new_dir,
                 .dentry = new_dentry,
-               .inode = new_dentry->d_inode,
+               .inode = d_inode(new_dentry),
         };
         u8 new_file_type;
         int retval;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c

index 821f22d..f06d058 100644 (file)
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1556,7 +1556,7 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
                         return -1;
                 }
  
-               journal_inode = path.dentry->d_inode;
+               journal_inode = d_inode(path.dentry);
                 if (!S_ISBLK(journal_inode->i_mode)) {
                         ext4_msg(sb, KERN_ERR, "error: journal path %s "
                                 "is not a block device", journal_path);
@@ -5217,7 +5217,7 @@ static int ext4_write_info(struct super_block *sb, int type)
         handle_t *handle;
  
         /* Data block + inode block */
-       handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);
+       handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2);
         if (IS_ERR(handle))
                 return PTR_ERR(handle);
         ret = dquot_commit_info(sb, type);
@@ -5265,7 +5265,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
          * all updates to the file when we bypass pagecache...
          */
         if (EXT4_SB(sb)->s_journal &&
-           ext4_should_journal_data(path->dentry->d_inode)) {
+           ext4_should_journal_data(d_inode(path->dentry))) {
                 /*
                  * We don't need to lock updates but journal_flush() could
                  * otherwise be livelocked...
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c

index 136ca0e..19f78f2 100644 (file)
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -28,7 +28,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
         struct page *cpage = NULL;
         char *caddr, *paddr = NULL;
         struct ext4_str cstr, pstr;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ext4_fname_crypto_ctx *ctx = NULL;
         struct ext4_encrypted_symlink_data *sd;
         loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
@@ -43,8 +43,8 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
                 return ctx;
  
         if (ext4_inode_is_fast_symlink(inode)) {
-               caddr = (char *) EXT4_I(dentry->d_inode)->i_data;
-               max_size = sizeof(EXT4_I(dentry->d_inode)->i_data);
+               caddr = (char *) EXT4_I(inode)->i_data;
+               max_size = sizeof(EXT4_I(inode)->i_data);
         } else {
                 cpage = read_mapping_page(inode->i_mapping, 0, NULL);
                 if (IS_ERR(cpage)) {
@@ -113,7 +113,7 @@ static void ext4_put_link(struct dentry *dentry, struct nameidata *nd,
  
  static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
+       struct ext4_inode_info *ei = EXT4_I(d_inode(dentry));
         nd_set_link(nd, (char *) ei->i_data);
         return NULL;
  }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c

index 759842f..16e28c0 100644 (file)
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -178,7 +178,7 @@ ext4_xattr_handler(int name_index)
  /*
   * Inode operation listxattr()
   *
- * dentry->d_inode->i_mutex: don't care
+ * d_inode(dentry)->i_mutex: don't care
   */
  ssize_t
  ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
@@ -423,7 +423,7 @@ ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
  static int
  ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct buffer_head *bh = NULL;
         int error;
         struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
@@ -460,7 +460,7 @@ cleanup:
  static int
  ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ext4_xattr_ibody_header *header;
         struct ext4_inode *raw_inode;
         struct ext4_iloc iloc;
@@ -501,7 +501,7 @@ ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
         int ret, ret2;
  
-       down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+       down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
         ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
         if (ret < 0)
                 goto errout;
@@ -514,7 +514,7 @@ ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
                 goto errout;
         ret += ret2;
  errout:
-       up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+       up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
         return ret;
  }
  
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c

index d2a2006..95d90e0 100644 (file)
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -33,7 +33,7 @@ ext4_xattr_security_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
                               name, buffer, size);
  }
  
@@ -43,7 +43,7 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY,
                               name, value, size, flags);
  }
  
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c

index 95f1f4a..891ee2d 100644 (file)
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -36,7 +36,7 @@ ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
                               name, buffer, size);
  }
  
@@ -46,7 +46,7 @@ ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED,
                               name, value, size, flags);
  }
  
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c

index 0edb761..6ed932b 100644 (file)
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -37,7 +37,7 @@ ext4_xattr_user_get(struct dentry *dentry, const char *name,
                 return -EINVAL;
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
-       return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+       return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER,
                               name, buffer, size);
  }
  
@@ -49,7 +49,7 @@ ext4_xattr_user_set(struct dentry *dentry, const char *name,
                 return -EINVAL;
         if (!test_opt(dentry->d_sb, XATTR_USER))
                 return -EOPNOTSUPP;
-       return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+       return ext4_xattr_set(d_inode(dentry), EXT4_XATTR_INDEX_USER,
                               name, value, size, flags);
  }
  
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index c06a25e..d8921cf 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1482,7 +1482,7 @@ bool f2fs_empty_dir(struct inode *);
  
  static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
  {
-       return __f2fs_add_link(dentry->d_parent->d_inode, &dentry->d_name,
+       return __f2fs_add_link(d_inode(dentry->d_parent), &dentry->d_name,
                                 inode, inode->i_ino, inode->i_mode);
  }
  
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index a6f3f61..2b52e48 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -574,7 +574,7 @@ void f2fs_truncate(struct inode *inode)
  int f2fs_getattr(struct vfsmount *mnt,
                          struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         generic_fillattr(inode, stat);
         stat->blocks <<= 3;
         return 0;
@@ -613,7 +613,7 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
  
  int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct f2fs_inode_info *fi = F2FS_I(inode);
         int err;
  
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c

index 407dde3..7e3794e 100644 (file)
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -151,7 +151,7 @@ out:
  static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                 struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
         int err;
  
@@ -182,10 +182,10 @@ out:
  struct dentry *f2fs_get_parent(struct dentry *child)
  {
         struct qstr dotdot = QSTR_INIT("..", 2);
-       unsigned long ino = f2fs_inode_by_name(child->d_inode, &dotdot);
+       unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot);
         if (!ino)
                 return ERR_PTR(-ENOENT);
-       return d_obtain_alias(f2fs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(f2fs_iget(d_inode(child)->i_sb, ino));
  }
  
  static int __recover_dot_dentries(struct inode *dir, nid_t pino)
@@ -263,7 +263,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
  static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct f2fs_dir_entry *de;
         struct page *page;
         int err = -ENOENT;
@@ -403,7 +403,7 @@ out_fail:
  
  static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         if (f2fs_empty_dir(inode))
                 return f2fs_unlink(dir, dentry);
         return -ENOTEMPTY;
@@ -451,8 +451,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir, struct dentry *new_dentry)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct page *old_dir_page;
         struct page *old_page, *new_page;
         struct f2fs_dir_entry *old_dir_entry = NULL;
@@ -578,8 +578,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                              struct inode *new_dir, struct dentry *new_dentry)
  {
         struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct page *old_dir_page, *new_dir_page;
         struct page *old_page, *new_page;
         struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL;
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c

index b0fd2f2..9757f65 100644 (file)
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name,
         }
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL);
+       return f2fs_getxattr(d_inode(dentry), type, name, buffer, size, NULL);
  }
  
  static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
@@ -108,7 +108,7 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name,
         if (strcmp(name, "") == 0)
                 return -EINVAL;
  
-       return f2fs_setxattr(dentry->d_inode, type, name,
+       return f2fs_setxattr(d_inode(dentry), type, name,
                                         value, size, NULL, flags);
  }
  
@@ -130,7 +130,7 @@ static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list,
  static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
                 void *buffer, size_t size, int type)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (strcmp(name, "") != 0)
                 return -EINVAL;
@@ -143,7 +143,7 @@ static int f2fs_xattr_advise_get(struct dentry *dentry, const char *name,
  static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name,
                 const void *value, size_t size, int flags, int type)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (strcmp(name, "") != 0)
                 return -EINVAL;
@@ -444,7 +444,7 @@ cleanup:
  
  ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct f2fs_xattr_entry *entry;
         void *base_addr;
         int error = 0;
diff --git a/fs/fat/file.c b/fs/fat/file.c

index cf50d93..442d50a 100644 (file)
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -305,7 +305,7 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
  
  int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         generic_fillattr(inode, stat);
         stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
  
@@ -377,7 +377,7 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
  int fat_setattr(struct dentry *dentry, struct iattr *attr)
  {
         struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         unsigned int ia_valid;
         int error;
  
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c

index cc6a854..b7e2b33 100644 (file)
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -308,7 +308,7 @@ out:
  static int msdos_rmdir(struct inode *dir, struct dentry *dentry)
  {
         struct super_block *sb = dir->i_sb;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct fat_slot_info sinfo;
         int err;
  
@@ -402,7 +402,7 @@ out:
  /***** Unlink a file */
  static int msdos_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = inode->i_sb;
         struct fat_slot_info sinfo;
         int err;
@@ -440,8 +440,8 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
         int err, old_attrs, is_dir, update_dotdot, corrupt = 0;
  
         old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
  
         err = fat_scan(old_dir, old_name, &old_sinfo);
         if (err) {
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c

index 7e0974e..7092584 100644 (file)
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -33,7 +33,7 @@ static int vfat_revalidate_shortname(struct dentry *dentry)
  {
         int ret = 1;
         spin_lock(&dentry->d_lock);
-       if (dentry->d_time != dentry->d_parent->d_inode->i_version)
+       if (dentry->d_time != d_inode(dentry->d_parent)->i_version)
                 ret = 0;
         spin_unlock(&dentry->d_lock);
         return ret;
@@ -45,7 +45,7 @@ static int vfat_revalidate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         /* This is not negative dentry. Always valid. */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 return 1;
         return vfat_revalidate_shortname(dentry);
  }
@@ -65,7 +65,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags)
          * positive dentry isn't good idea. So it's unsupported like
          * rename("filename", "FILENAME") for now.
          */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 return 1;
  
         /*
@@ -801,7 +801,7 @@ out:
  
  static int vfat_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = dir->i_sb;
         struct fat_slot_info sinfo;
         int err;
@@ -832,7 +832,7 @@ out:
  
  static int vfat_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = dir->i_sb;
         struct fat_slot_info sinfo;
         int err;
@@ -915,8 +915,8 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
         struct super_block *sb = old_dir->i_sb;
  
         old_sinfo.bh = sinfo.bh = dotdot_bh = NULL;
-       old_inode = old_dentry->d_inode;
-       new_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_inode = d_inode(new_dentry);
         mutex_lock(&MSDOS_SB(sb)->s_lock);
         err = vfat_find(old_dir, &old_dentry->d_name, &old_sinfo);
         if (err)
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c

index 93e1493..eb19265 100644 (file)
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -266,7 +266,7 @@ struct inode *fat_rebuild_parent(struct super_block *sb, int parent_logstart)
   * Find the parent for a directory that is not currently connected to
   * the filesystem root.
   *
- * On entry, the caller holds child_dir->d_inode->i_mutex.
+ * On entry, the caller holds d_inode(child_dir)->i_mutex.
   */
  static struct dentry *fat_get_parent(struct dentry *child_dir)
  {
@@ -276,7 +276,7 @@ static struct dentry *fat_get_parent(struct dentry *child_dir)
         struct inode *parent_inode = NULL;
         struct msdos_sb_info *sbi = MSDOS_SB(sb);
  
-       if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) {
+       if (!fat_get_dotdot_entry(d_inode(child_dir), &bh, &de)) {
                 int parent_logstart = fat_get_start(sbi, de);
                 parent_inode = fat_dget(sb, parent_logstart);
                 if (!parent_inode && sbi->options.nfs == FAT_NFS_NOSTALE_RO)
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c

index c36aeaf..8b9229e 100644 (file)
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -76,7 +76,7 @@ const struct address_space_operations vxfs_immed_aops = {
  static void *
  vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np)
  {
-       struct vxfs_inode_info          *vip = VXFS_INO(dp->d_inode);
+       struct vxfs_inode_info          *vip = VXFS_INO(d_inode(dp));
         nd_set_link(np, vip->vii_immed.vi_immed);
         return NULL;
  }
diff --git a/fs/fuse/control.c b/fs/fuse/control.c

index 205e0d5..f863ac6 100644 (file)
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -244,7 +244,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
                 return 0;
  
         parent = fuse_control_sb->s_root;
-       inc_nlink(parent->d_inode);
+       inc_nlink(d_inode(parent));
         sprintf(name, "%u", fc->dev);
         parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
                                      &simple_dir_inode_operations,
@@ -283,11 +283,11 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
  
         for (i = fc->ctl_ndents - 1; i >= 0; i--) {
                 struct dentry *dentry = fc->ctl_dentry[i];
-               dentry->d_inode->i_private = NULL;
+               d_inode(dentry)->i_private = NULL;
                 d_drop(dentry);
                 dput(dentry);
         }
-       drop_nlink(fuse_control_sb->s_root->d_inode);
+       drop_nlink(d_inode(fuse_control_sb->s_root));
  }
  
  static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c

index 1545b71..0572bca 100644 (file)
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -192,7 +192,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
         struct fuse_inode *fi;
         int ret;
  
-       inode = ACCESS_ONCE(entry->d_inode);
+       inode = d_inode_rcu(entry);
         if (inode && is_bad_inode(inode))
                 goto invalid;
         else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
@@ -220,7 +220,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                 attr_version = fuse_get_attr_version(fc);
  
                 parent = dget_parent(entry);
-               fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
+               fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
                                  &entry->d_name, &outarg);
                 ret = fuse_simple_request(fc, &args);
                 dput(parent);
@@ -254,7 +254,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                                 return -ECHILD;
                 } else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
                         parent = dget_parent(entry);
-                       fuse_advise_use_readdirplus(parent->d_inode);
+                       fuse_advise_use_readdirplus(d_inode(parent));
                         dput(parent);
                 }
         }
@@ -487,7 +487,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
                         entry = res;
         }
  
-       if (!(flags & O_CREAT) || entry->d_inode)
+       if (!(flags & O_CREAT) || d_really_is_positive(entry))
                 goto no_open;
  
         /* Only creates */
@@ -653,7 +653,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
         args.in.args[0].value = entry->d_name.name;
         err = fuse_simple_request(fc, &args);
         if (!err) {
-               struct inode *inode = entry->d_inode;
+               struct inode *inode = d_inode(entry);
                 struct fuse_inode *fi = get_fuse_inode(inode);
  
                 spin_lock(&fc->lock);
@@ -689,7 +689,7 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
         args.in.args[0].value = entry->d_name.name;
         err = fuse_simple_request(fc, &args);
         if (!err) {
-               clear_nlink(entry->d_inode);
+               clear_nlink(d_inode(entry));
                 fuse_invalidate_attr(dir);
                 fuse_invalidate_entry_cache(entry);
         } else if (err == -EINTR)
@@ -721,12 +721,12 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
         err = fuse_simple_request(fc, &args);
         if (!err) {
                 /* ctime changes */
-               fuse_invalidate_attr(oldent->d_inode);
-               fuse_update_ctime(oldent->d_inode);
+               fuse_invalidate_attr(d_inode(oldent));
+               fuse_update_ctime(d_inode(oldent));
  
                 if (flags & RENAME_EXCHANGE) {
-                       fuse_invalidate_attr(newent->d_inode);
-                       fuse_update_ctime(newent->d_inode);
+                       fuse_invalidate_attr(d_inode(newent));
+                       fuse_update_ctime(d_inode(newent));
                 }
  
                 fuse_invalidate_attr(olddir);
@@ -734,10 +734,10 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                         fuse_invalidate_attr(newdir);
  
                 /* newent will end up negative */
-               if (!(flags & RENAME_EXCHANGE) && newent->d_inode) {
-                       fuse_invalidate_attr(newent->d_inode);
+               if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
+                       fuse_invalidate_attr(d_inode(newent));
                         fuse_invalidate_entry_cache(newent);
-                       fuse_update_ctime(newent->d_inode);
+                       fuse_update_ctime(d_inode(newent));
                 }
         } else if (err == -EINTR) {
                 /* If request was interrupted, DEITY only knows if the
@@ -746,7 +746,7 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
                    directory), then there can be inconsistency between
                    the dcache and the real filesystem.  Tough luck. */
                 fuse_invalidate_entry(oldent);
-               if (newent->d_inode)
+               if (d_really_is_positive(newent))
                         fuse_invalidate_entry(newent);
         }
  
@@ -788,7 +788,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
  {
         int err;
         struct fuse_link_in inarg;
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
  
@@ -961,9 +961,9 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
         fuse_invalidate_attr(parent);
         fuse_invalidate_entry(entry);
  
-       if (child_nodeid != 0 && entry->d_inode) {
-               mutex_lock(&entry->d_inode->i_mutex);
-               if (get_node_id(entry->d_inode) != child_nodeid) {
+       if (child_nodeid != 0 && d_really_is_positive(entry)) {
+               mutex_lock(&d_inode(entry)->i_mutex);
+               if (get_node_id(d_inode(entry)) != child_nodeid) {
                         err = -ENOENT;
                         goto badentry;
                 }
@@ -977,13 +977,13 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
                                 err = -ENOTEMPTY;
                                 goto badentry;
                         }
-                       entry->d_inode->i_flags |= S_DEAD;
+                       d_inode(entry)->i_flags |= S_DEAD;
                 }
                 dont_mount(entry);
-               clear_nlink(entry->d_inode);
+               clear_nlink(d_inode(entry));
                 err = 0;
   badentry:
-               mutex_unlock(&entry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(entry)->i_mutex);
                 if (!err)
                         d_delete(entry);
         } else {
@@ -1169,7 +1169,7 @@ static int fuse_direntplus_link(struct file *file,
         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
         struct dentry *dentry;
         struct dentry *alias;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct fuse_conn *fc;
         struct inode *inode;
  
@@ -1205,7 +1205,7 @@ static int fuse_direntplus_link(struct file *file,
         name.hash = full_name_hash(name.name, name.len);
         dentry = d_lookup(parent, &name);
         if (dentry) {
-               inode = dentry->d_inode;
+               inode = d_inode(dentry);
                 if (!inode) {
                         d_drop(dentry);
                 } else if (get_node_id(inode) != o->nodeid ||
@@ -1367,7 +1367,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx)
  
  static char *read_link(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
         char *link;
@@ -1712,7 +1712,7 @@ error:
  
  static int fuse_setattr(struct dentry *entry, struct iattr *attr)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
  
         if (!fuse_allow_current_process(get_fuse_conn(inode)))
                 return -EACCES;
@@ -1726,7 +1726,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
  static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
                         struct kstat *stat)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
  
         if (!fuse_allow_current_process(fc))
@@ -1738,7 +1738,7 @@ static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
  static int fuse_setxattr(struct dentry *entry, const char *name,
                          const void *value, size_t size, int flags)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
         struct fuse_setxattr_in inarg;
@@ -1774,7 +1774,7 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
  static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
                              void *value, size_t size)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
         struct fuse_getxattr_in inarg;
@@ -1815,7 +1815,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
  
  static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
         struct fuse_getxattr_in inarg;
@@ -1857,7 +1857,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
  
  static int fuse_removexattr(struct dentry *entry, const char *name)
  {
-       struct inode *inode = entry->d_inode;
+       struct inode *inode = d_inode(entry);
         struct fuse_conn *fc = get_fuse_conn(inode);
         FUSE_ARGS(args);
         int err;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c

index e8799c1..082ac1c 100644 (file)
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -421,7 +421,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
         memset(&outarg, 0, sizeof(outarg));
         args.in.numargs = 0;
         args.in.h.opcode = FUSE_STATFS;
-       args.in.h.nodeid = get_node_id(dentry->d_inode);
+       args.in.h.nodeid = get_node_id(d_inode(dentry));
         args.out.numargs = 1;
         args.out.args[0].size = sizeof(outarg);
         args.out.args[0].value = &outarg;
@@ -740,7 +740,7 @@ static struct dentry *fuse_fh_to_parent(struct super_block *sb,
  
  static struct dentry *fuse_get_parent(struct dentry *child)
  {
-       struct inode *child_inode = child->d_inode;
+       struct inode *child_inode = d_inode(child);
         struct fuse_conn *fc = get_fuse_conn(child_inode);
         struct inode *inode;
         struct dentry *parent;
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c

index 589f4ea..30822b1 100644 (file)
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -48,9 +48,9 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         parent = dget_parent(dentry);
-       sdp = GFS2_SB(parent->d_inode);
-       dip = GFS2_I(parent->d_inode);
-       inode = dentry->d_inode;
+       sdp = GFS2_SB(d_inode(parent));
+       dip = GFS2_I(d_inode(parent));
+       inode = d_inode(dentry);
  
         if (inode) {
                 if (is_bad_inode(inode))
@@ -68,7 +68,7 @@ static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags)
                         goto fail;
         } 
  
-       error = gfs2_dir_check(parent->d_inode, &dentry->d_name, ip);
+       error = gfs2_dir_check(d_inode(parent), &dentry->d_name, ip);
         switch (error) {
         case 0:
                 if (!inode)
@@ -113,10 +113,10 @@ static int gfs2_dentry_delete(const struct dentry *dentry)
  {
         struct gfs2_inode *ginode;
  
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 return 0;
  
-       ginode = GFS2_I(dentry->d_inode);
+       ginode = GFS2_I(d_inode(dentry));
         if (!ginode->i_iopen_gh.gh_gl)
                 return 0;
  
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c

index c41d255..5d15e94 100644 (file)
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -49,7 +49,7 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
         fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
         *len = GFS2_SMALL_FH_SIZE;
  
-       if (!parent || inode == sb->s_root->d_inode)
+       if (!parent || inode == d_inode(sb->s_root))
                 return *len;
  
         ip = GFS2_I(parent);
@@ -88,8 +88,8 @@ static int get_name_filldir(struct dir_context *ctx, const char *name,
  static int gfs2_get_name(struct dentry *parent, char *name,
                          struct dentry *child)
  {
-       struct inode *dir = parent->d_inode;
-       struct inode *inode = child->d_inode;
+       struct inode *dir = d_inode(parent);
+       struct inode *inode = d_inode(child);
         struct gfs2_inode *dip, *ip;
         struct get_name_filldir gnfd = {
                 .ctx.actor = get_name_filldir,
@@ -128,7 +128,7 @@ static int gfs2_get_name(struct dentry *parent, char *name,
  
  static struct dentry *gfs2_get_parent(struct dentry *child)
  {
-       return d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1));
+       return d_obtain_alias(gfs2_lookupi(d_inode(child), &gfs2_qdotdot, 1));
  }
  
  static struct dentry *gfs2_get_dentry(struct super_block *sb,
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c

index 08bc84d..1b3ca7a 100644 (file)
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -295,7 +295,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
  
         if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
             (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
-            dir == sb->s_root->d_inode)) {
+            dir == d_inode(sb->s_root))) {
                 igrab(dir);
                 return dir;
         }
@@ -687,7 +687,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
         }
         gfs2_set_inode_flags(inode);
  
-       if ((GFS2_I(sdp->sd_root_dir->d_inode) == dip) ||
+       if ((GFS2_I(d_inode(sdp->sd_root_dir)) == dip) ||
             (dip->i_diskflags & GFS2_DIF_TOPDIR))
                 aflags |= GFS2_AF_ORLOV;
  
@@ -888,7 +888,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
  {
         struct gfs2_inode *dip = GFS2_I(dir);
         struct gfs2_sbd *sdp = GFS2_SB(dir);
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder ghs[2];
         struct buffer_head *dibh;
@@ -1055,7 +1055,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
  static int gfs2_unlink_inode(struct gfs2_inode *dip,
                              const struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         int error;
  
@@ -1091,7 +1091,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct gfs2_inode *dip = GFS2_I(dir);
         struct gfs2_sbd *sdp = GFS2_SB(dir);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder ghs[3];
         struct gfs2_rgrpd *rgd;
@@ -1241,7 +1241,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
                 return PTR_ERR(d);
         if (d != NULL)
                 dentry = d;
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 if (!(*opened & FILE_OPENED))
                         return finish_no_open(file, d);
                 dput(d);
@@ -1282,7 +1282,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
                         error = -EINVAL;
                         break;
                 }
-               if (dir == sb->s_root->d_inode) {
+               if (dir == d_inode(sb->s_root)) {
                         error = 0;
                         break;
                 }
@@ -1321,7 +1321,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
  {
         struct gfs2_inode *odip = GFS2_I(odir);
         struct gfs2_inode *ndip = GFS2_I(ndir);
-       struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(odentry));
         struct gfs2_inode *nip = NULL;
         struct gfs2_sbd *sdp = GFS2_SB(odir);
         struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
@@ -1332,8 +1332,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
         unsigned int x;
         int error;
  
-       if (ndentry->d_inode) {
-               nip = GFS2_I(ndentry->d_inode);
+       if (d_really_is_positive(ndentry)) {
+               nip = GFS2_I(d_inode(ndentry));
                 if (ip == nip)
                         return 0;
         }
@@ -1457,7 +1457,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
         /* Check out the dir to be renamed */
  
         if (dir_rename) {
-               error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+               error = gfs2_permission(d_inode(odentry), MAY_WRITE);
                 if (error)
                         goto out_gunlock;
         }
@@ -1550,7 +1550,7 @@ out:
  
  static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
         struct gfs2_holder i_gh;
         struct buffer_head *dibh;
         unsigned int size;
@@ -1742,7 +1742,7 @@ out:
  
  static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder i_gh;
         int error;
@@ -1798,7 +1798,7 @@ out:
  static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
                         struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder gh;
         int error;
@@ -1821,7 +1821,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
  static int gfs2_setxattr(struct dentry *dentry, const char *name,
                          const void *data, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder gh;
         int ret;
@@ -1841,7 +1841,7 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
  static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
                              void *data, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder gh;
         int ret;
@@ -1862,7 +1862,7 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
  
  static int gfs2_removexattr(struct dentry *dentry, const char *name)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct gfs2_inode *ip = GFS2_I(inode);
         struct gfs2_holder gh;
         int ret;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c

index efc8e25..35b49f4 100644 (file)
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -647,7 +647,7 @@ out_unlock:
  
  static int init_journal(struct gfs2_sbd *sdp, int undo)
  {
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
         struct gfs2_holder ji_gh;
         struct gfs2_inode *ip;
         int jindex = 1;
@@ -782,7 +782,7 @@ static struct lock_class_key gfs2_quota_imutex_key;
  static int init_inodes(struct gfs2_sbd *sdp, int undo)
  {
         int error = 0;
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
  
         if (undo)
                 goto fail_qinode;
@@ -848,7 +848,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
         char buf[30];
         int error = 0;
         struct gfs2_inode *ip;
-       struct inode *master = sdp->sd_master_dir->d_inode;
+       struct inode *master = d_inode(sdp->sd_master_dir);
  
         if (sdp->sd_args.ar_spectator)
                 return 0;
@@ -1357,7 +1357,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
                 return ERR_PTR(error);
         }
         s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags,
-                path.dentry->d_inode->i_sb->s_bdev);
+                d_inode(path.dentry)->i_sb->s_bdev);
         path_put(&path);
         if (IS_ERR(s)) {
                 pr_warn("gfs2 mount does not exist\n");
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c

index 1666382..859c6ed 100644 (file)
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1171,7 +1171,7 @@ static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *s
  
  static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
-       struct super_block *sb = dentry->d_inode->i_sb;
+       struct super_block *sb = d_inode(dentry)->i_sb;
         struct gfs2_sbd *sdp = sb->s_fs_info;
         struct gfs2_statfs_change_host sc;
         int error;
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c

index fd260ce..4c096fa 100644 (file)
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -420,7 +420,7 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
  
  ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
         struct gfs2_ea_request er;
         struct gfs2_holder i_gh;
         int error;
@@ -586,7 +586,7 @@ out:
  static int gfs2_xattr_get(struct dentry *dentry, const char *name,
                 void *buffer, size_t size, int type)
  {
-       struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+       struct gfs2_inode *ip = GFS2_I(d_inode(dentry));
         struct gfs2_ea_location el;
         int error;
  
@@ -1230,7 +1230,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
  static int gfs2_xattr_set(struct dentry *dentry, const char *name,
                 const void *value, size_t size, int flags, int type)
  {
-       return __gfs2_xattr_set(dentry->d_inode, name, value,
+       return __gfs2_xattr_set(d_inode(dentry), name, value,
                                 size, flags, type);
  }
  
diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c

index e057ec5..8d931b1 100644 (file)
--- a/fs/hfs/attr.c
+++ b/fs/hfs/attr.c
@@ -16,7 +16,7 @@
  int hfs_setxattr(struct dentry *dentry, const char *name,
                  const void *value, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfs_find_data fd;
         hfs_cat_rec rec;
         struct hfs_cat_file *file;
@@ -59,7 +59,7 @@ out:
  ssize_t hfs_getxattr(struct dentry *dentry, const char *name,
                          void *value, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfs_find_data fd;
         hfs_cat_rec rec;
         struct hfs_cat_file *file;
@@ -105,7 +105,7 @@ out:
  
  ssize_t hfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (!S_ISREG(inode->i_mode) || HFS_IS_RSRC(inode))
                 return -EOPNOTSUPP;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c

index 36d1a6a..70788e0 100644 (file)
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -253,7 +253,7 @@ static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
   */
  static int hfs_remove(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int res;
  
         if (S_ISDIR(inode->i_mode) && inode->i_size != 2)
@@ -285,18 +285,18 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         int res;
  
         /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                 res = hfs_remove(new_dir, new_dentry);
                 if (res)
                         return res;
         }
  
-       res = hfs_cat_move(old_dentry->d_inode->i_ino,
+       res = hfs_cat_move(d_inode(old_dentry)->i_ino,
                            old_dir, &old_dentry->d_name,
                            new_dir, &new_dentry->d_name);
         if (!res)
                 hfs_cat_build_key(old_dir->i_sb,
-                                 (btree_key *)&HFS_I(old_dentry->d_inode)->cat_key,
+                                 (btree_key *)&HFS_I(d_inode(old_dentry))->cat_key,
                                   new_dir->i_ino, &new_dentry->d_name);
         return res;
  }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c

index 75fd5d8..b99ebdd 100644 (file)
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -600,7 +600,7 @@ static int hfs_file_release(struct inode *inode, struct file *file)
  
  int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
         int error;
  
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c

index 91b91fd..2875961 100644 (file)
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -21,7 +21,7 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         if(!inode)
                 return 1;
  
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c

index 3074609..d0f39dc 100644 (file)
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -81,7 +81,7 @@ again:
                                         HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->
                                                 create_date ||
                                 entry.file.create_date ==
-                                       HFSPLUS_I(sb->s_root->d_inode)->
+                                       HFSPLUS_I(d_inode(sb->s_root))->
                                                 create_date) &&
                                 HFSPLUS_SB(sb)->hidden_dir) {
                         struct qstr str;
@@ -296,8 +296,8 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
                         struct dentry *dst_dentry)
  {
         struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb);
-       struct inode *inode = src_dentry->d_inode;
-       struct inode *src_dir = src_dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(src_dentry);
+       struct inode *src_dir = d_inode(src_dentry->d_parent);
         struct qstr str;
         char name[32];
         u32 cnid, id;
@@ -353,7 +353,7 @@ out:
  static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct qstr str;
         char name[32];
         u32 cnid;
@@ -410,7 +410,7 @@ out:
  static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
  {
         struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int res;
  
         if (inode->i_size != 2)
@@ -529,7 +529,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
         int res;
  
         /* Unlink destination if it already exists */
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                 if (d_is_dir(new_dentry))
                         res = hfsplus_rmdir(new_dir, new_dentry);
                 else
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c

index b0afedb..6dd107d 100644 (file)
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -243,7 +243,7 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
  
  static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, attr);
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c

index 8e98f5d..0624ce4 100644 (file)
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -26,7 +26,7 @@
  static int hfsplus_ioctl_bless(struct file *file, int __user *user_flags)
  {
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
         struct hfsplus_vh *vh = sbi->s_vhdr;
         struct hfsplus_vh *bvh = sbi->s_backup_vhdr;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c

index 89f262d..416b1db 100644 (file)
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -440,7 +440,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name,
                 return -ENOMEM;
         strcpy(xattr_name, prefix);
         strcpy(xattr_name + prefixlen, name);
-       res = __hfsplus_setxattr(dentry->d_inode, xattr_name, value, size,
+       res = __hfsplus_setxattr(d_inode(dentry), xattr_name, value, size,
                                  flags);
         kfree(xattr_name);
         return res;
@@ -600,7 +600,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
         strcpy(xattr_name, prefix);
         strcpy(xattr_name + prefixlen, name);
  
-       res = __hfsplus_getxattr(dentry->d_inode, xattr_name, value, size);
+       res = __hfsplus_getxattr(d_inode(dentry), xattr_name, value, size);
         kfree(xattr_name);
         return res;
  
@@ -620,7 +620,7 @@ static ssize_t hfsplus_listxattr_finder_info(struct dentry *dentry,
                                                 char *buffer, size_t size)
  {
         ssize_t res = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfs_find_data fd;
         u16 entry_type;
         u8 folder_finder_info[sizeof(struct DInfo) + sizeof(struct DXInfo)];
@@ -688,7 +688,7 @@ ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
         ssize_t err;
         ssize_t res = 0;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hfs_find_data fd;
         u16 key_len = 0;
         struct hfsplus_attr_key attr_key;
@@ -868,7 +868,7 @@ static int hfsplus_osx_getxattr(struct dentry *dentry, const char *name,
          * creates), so we pass the name through unmodified (after
          * ensuring it doesn't conflict with another namespace).
          */
-       return __hfsplus_getxattr(dentry->d_inode, name, buffer, size);
+       return __hfsplus_getxattr(d_inode(dentry), name, buffer, size);
  }
  
  static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
@@ -890,7 +890,7 @@ static int hfsplus_osx_setxattr(struct dentry *dentry, const char *name,
          * creates), so we pass the name through unmodified (after
          * ensuring it doesn't conflict with another namespace).
          */
-       return __hfsplus_setxattr(dentry->d_inode, name, buffer, size, flags);
+       return __hfsplus_setxattr(d_inode(dentry), name, buffer, size, flags);
  }
  
  static size_t hfsplus_osx_listxattr(struct dentry *dentry, char *list,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c

index b83a034..ef26317 100644 (file)
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -807,7 +807,7 @@ static int hostfs_permission(struct inode *ino, int desired)
  
  static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hostfs_iattr attrs;
         char *name;
         int err;
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c

index 7ce4b74..933c737 100644 (file)
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -257,7 +257,7 @@ void hpfs_write_inode_nolock(struct inode *i)
  
  int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error = -EINVAL;
  
         hpfs_lock(inode->i_sb);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c

index bdbc2c3..a0872f2 100644 (file)
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -359,7 +359,7 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
         unsigned len = dentry->d_name.len;
         struct quad_buffer_head qbh;
         struct hpfs_dirent *de;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         dnode_secno dno;
         int r;
         int rep = 0;
@@ -433,7 +433,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
         unsigned len = dentry->d_name.len;
         struct quad_buffer_head qbh;
         struct hpfs_dirent *de;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         dnode_secno dno;
         int n_items = 0;
         int err;
@@ -522,8 +522,8 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         unsigned old_len = old_dentry->d_name.len;
         const unsigned char *new_name = new_dentry->d_name.name;
         unsigned new_len = new_dentry->d_name.len;
-       struct inode *i = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *i = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct quad_buffer_head qbh, qbh1;
         struct hpfs_dirent *dep, *nde;
         struct hpfs_dirent de;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c

index 043ac9d..fa2bd53 100644 (file)
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -153,9 +153,9 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
                 return ERR_PTR(-ENOENT);
  
         parent = HPPFS_I(ino)->proc_dentry;
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         proc_dentry = lookup_one_len(name->name, parent, name->len);
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
  
         if (IS_ERR(proc_dentry))
                 return proc_dentry;
@@ -637,25 +637,25 @@ static const struct super_operations hppfs_sbops = {
  static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
                           int buflen)
  {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
-       return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
+       return d_inode(proc_dentry)->i_op->readlink(proc_dentry, buffer,
                                                     buflen);
  }
  
  static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
  
-       return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
+       return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd);
  }
  
  static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
                            void *cookie)
  {
-       struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+       struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry;
  
-       if (proc_dentry->d_inode->i_op->put_link)
-               proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
+       if (d_inode(proc_dentry)->i_op->put_link)
+               d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie);
  }
  
  static const struct inode_operations hppfs_dir_iops = {
@@ -670,7 +670,7 @@ static const struct inode_operations hppfs_link_iops = {
  
  static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
  {
-       struct inode *proc_ino = dentry->d_inode;
+       struct inode *proc_ino = d_inode(dentry);
         struct inode *inode = new_inode(sb);
  
         if (!inode) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index 2640d88..87724c1 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -393,7 +393,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
  
  static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct hstate *h = hstate_inode(inode);
         int error;
         unsigned int ia_valid = attr->ia_valid;
@@ -587,7 +587,7 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
  static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
         struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
-       struct hstate *h = hstate_inode(dentry->d_inode);
+       struct hstate *h = hstate_inode(d_inode(dentry));
  
         buf->f_type = HUGETLBFS_MAGIC;
         buf->f_bsize = huge_page_size(h);
diff --git a/fs/inode.c b/fs/inode.c

index f00b16f..ea37cd1 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1587,7 +1587,7 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
  void touch_atime(const struct path *path)
  {
         struct vfsmount *mnt = path->mnt;
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_inode(path->dentry);
         struct timespec now;
  
         if (inode->i_flags & S_NOATIME)
@@ -1639,7 +1639,7 @@ EXPORT_SYMBOL(touch_atime);
   */
  int should_remove_suid(struct dentry *dentry)
  {
-       umode_t mode = dentry->d_inode->i_mode;
+       umode_t mode = d_inode(dentry)->i_mode;
         int kill = 0;
  
         /* suid always must be killed */
@@ -1675,7 +1675,7 @@ static int __remove_suid(struct dentry *dentry, int kill)
  int file_remove_suid(struct file *file)
  {
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int killsuid;
         int killpriv;
         int error = 0;
@@ -1945,20 +1945,6 @@ void inode_dio_wait(struct inode *inode)
  }
  EXPORT_SYMBOL(inode_dio_wait);
  
-/*
- * inode_dio_done - signal finish of a direct I/O requests
- * @inode: inode the direct I/O happens on
- *
- * This is called once we've finished processing a direct I/O request,
- * and is used to wake up callers waiting for direct I/O to be quiesced.
- */
-void inode_dio_done(struct inode *inode)
-{
-       if (atomic_dec_and_test(&inode->i_dio_count))
-               wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
-}
-EXPORT_SYMBOL(inode_dio_done);
-
  /*
   * inode_set_flags - atomically set some inode flags
   *
diff --git a/fs/isofs/export.c b/fs/isofs/export.c

index 12088d8..0c5f721 100644 (file)
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -44,7 +44,7 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
  {
         unsigned long parent_block = 0;
         unsigned long parent_offset = 0;
-       struct inode *child_inode = child->d_inode;
+       struct inode *child_inode = d_inode(child);
         struct iso_inode_info *e_child_inode = ISOFS_I(child_inode);
         struct iso_directory_record *de = NULL;
         struct buffer_head * bh = NULL;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c

index f21b6fb..1ba5c97 100644 (file)
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -224,14 +224,14 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
  {
         struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
         struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
-       struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *dead_f = JFFS2_INODE_INFO(d_inode(dentry));
         int ret;
         uint32_t now = get_seconds();
  
         ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
                               dentry->d_name.len, dead_f, now);
         if (dead_f->inocache)
-               set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
+               set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
         if (!ret)
                 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
         return ret;
@@ -241,8 +241,8 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
  
  static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry)
  {
-       struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dentry->d_inode->i_sb);
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
+       struct jffs2_sb_info *c = JFFS2_SB_INFO(d_inode(old_dentry)->i_sb);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(old_dentry));
         struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
         int ret;
         uint8_t type;
@@ -256,7 +256,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
                 return -EPERM;
  
         /* XXX: This is ugly */
-       type = (old_dentry->d_inode->i_mode & S_IFMT) >> 12;
+       type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
         if (!type) type = DT_REG;
  
         now = get_seconds();
@@ -264,11 +264,11 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
  
         if (!ret) {
                 mutex_lock(&f->sem);
-               set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
+               set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
                 mutex_unlock(&f->sem);
-               d_instantiate(dentry, old_dentry->d_inode);
+               d_instantiate(dentry, d_inode(old_dentry));
                 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-               ihold(old_dentry->d_inode);
+               ihold(d_inode(old_dentry));
         }
         return ret;
  }
@@ -585,7 +585,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
  {
         struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
         struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
         struct jffs2_full_dirent *fd;
         int ret;
         uint32_t now = get_seconds();
@@ -599,7 +599,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
                               dentry->d_name.len, f, now);
         if (!ret) {
                 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-               clear_nlink(dentry->d_inode);
+               clear_nlink(d_inode(dentry));
                 drop_nlink(dir_i);
         }
         return ret;
@@ -770,8 +770,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
          * the VFS can't check whether the victim is empty. The filesystem
          * needs to do that for itself.
          */
-       if (new_dentry->d_inode) {
-               victim_f = JFFS2_INODE_INFO(new_dentry->d_inode);
+       if (d_really_is_positive(new_dentry)) {
+               victim_f = JFFS2_INODE_INFO(d_inode(new_dentry));
                 if (d_is_dir(new_dentry)) {
                         struct jffs2_full_dirent *fd;
  
@@ -794,12 +794,12 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
         /* Make a hard link */
  
         /* XXX: This is ugly */
-       type = (old_dentry->d_inode->i_mode & S_IFMT) >> 12;
+       type = (d_inode(old_dentry)->i_mode & S_IFMT) >> 12;
         if (!type) type = DT_REG;
  
         now = get_seconds();
         ret = jffs2_do_link(c, JFFS2_INODE_INFO(new_dir_i),
-                           old_dentry->d_inode->i_ino, type,
+                           d_inode(old_dentry)->i_ino, type,
                             new_dentry->d_name.name, new_dentry->d_name.len, now);
  
         if (ret)
@@ -808,9 +808,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
         if (victim_f) {
                 /* There was a victim. Kill it off nicely */
                 if (d_is_dir(new_dentry))
-                       clear_nlink(new_dentry->d_inode);
+                       clear_nlink(d_inode(new_dentry));
                 else
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                 /* Don't oops if the victim was a dirent pointing to an
                    inode which didn't exist. */
                 if (victim_f->inocache) {
@@ -836,9 +836,9 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
  
         if (ret) {
                 /* Oh shit. We really ought to make a single node which can do both atomically */
-               struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
+               struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(old_dentry));
                 mutex_lock(&f->sem);
-               inc_nlink(old_dentry->d_inode);
+               inc_nlink(d_inode(old_dentry));
                 if (f->inocache && !d_is_dir(old_dentry))
                         f->inocache->pino_nlink++;
                 mutex_unlock(&f->sem);
@@ -846,8 +846,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
                 pr_notice("%s(): Link succeeded, unlink failed (err %d). You now have a hard link\n",
                           __func__, ret);
                 /* Might as well let the VFS know */
-               d_instantiate(new_dentry, old_dentry->d_inode);
-               ihold(old_dentry->d_inode);
+               d_instantiate(new_dentry, d_inode(old_dentry));
+               ihold(d_inode(old_dentry));
                 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
                 return ret;
         }
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c

index 601afd1..fe5ea08 100644 (file)
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -190,7 +190,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
  
  int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int rc;
  
         rc = inode_change_ok(inode, iattr);
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c

index aca97f3..d4b43fb 100644 (file)
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -54,7 +54,7 @@ static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
         if (!strcmp(name, ""))
                 return -EINVAL;
  
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
                                  name, buffer, size);
  }
  
@@ -64,7 +64,7 @@ static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
         if (!strcmp(name, ""))
                 return -EINVAL;
  
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY,
                                  name, buffer, size, flags);
  }
  
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c

index 3d76f28..d86c5e3 100644 (file)
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -140,14 +140,14 @@ static struct dentry *jffs2_get_parent(struct dentry *child)
  
         BUG_ON(!d_is_dir(child));
  
-       f = JFFS2_INODE_INFO(child->d_inode);
+       f = JFFS2_INODE_INFO(d_inode(child));
  
         pino = f->inocache->pino_nlink;
  
         JFFS2_DEBUG("Parent of directory ino #%u is #%u\n",
                     f->inocache->ino, pino);
  
-       return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
+       return d_obtain_alias(jffs2_iget(d_inode(child)->i_sb, pino));
  }
  
  static const struct export_operations jffs2_export_ops = {
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c

index c7c77b0..1fefa25 100644 (file)
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -31,7 +31,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
  
  static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
+       struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry));
         char *p = (char *)f->target;
  
         /*
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c

index 762c7a3..f092fee 100644 (file)
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -960,7 +960,7 @@ static const struct xattr_handler *xprefix_to_handler(int xprefix) {
  
  ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
         struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
         struct jffs2_inode_cache *ic = f->inocache;
@@ -1266,7 +1266,6 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
         if (rc) {
                 JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
                               __func__, rc, totlen);
-               rc = rc ? rc : -EBADFD;
                 goto out;
         }
         rc = save_xattr_ref(c, ref);
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c

index 1c86819..ceaf9c6 100644 (file)
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -21,7 +21,7 @@ static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
  {
         if (!strcmp(name, ""))
                 return -EINVAL;
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
                                  name, buffer, size);
  }
  
@@ -30,7 +30,7 @@ static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
  {
         if (!strcmp(name, ""))
                 return -EINVAL;
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED,
                                  name, buffer, size, flags);
  }
  
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c

index 916b5c9..a71391e 100644 (file)
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -21,7 +21,7 @@ static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
  {
         if (!strcmp(name, ""))
                 return -EINVAL;
-       return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+       return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
                                  name, buffer, size);
  }
  
@@ -30,7 +30,7 @@ static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
  {
         if (!strcmp(name, ""))
                 return -EINVAL;
-       return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+       return do_jffs2_setxattr(d_inode(dentry), JFFS2_XPREFIX_USER,
                                  name, buffer, size, flags);
  }
  
diff --git a/fs/jfs/file.c b/fs/jfs/file.c

index ae46788..e98d39d 100644 (file)
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -100,7 +100,7 @@ static int jfs_release(struct inode *inode, struct file *file)
  
  int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int rc;
  
         rc = inode_change_ok(inode, iattr);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c

index 38fdc53..66db7bc 100644 (file)
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -346,7 +346,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
  {
         int rc;
         tid_t tid;              /* transaction id */
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
         ino_t ino;
         struct component_name dname;
         struct inode *iplist[2];
@@ -472,7 +472,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
  {
         int rc;
         tid_t tid;              /* transaction id */
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
         ino_t ino;
         struct component_name dname;    /* object name */
         struct inode *iplist[2];
@@ -791,7 +791,7 @@ static int jfs_link(struct dentry *old_dentry,
  {
         int rc;
         tid_t tid;
-       struct inode *ip = old_dentry->d_inode;
+       struct inode *ip = d_inode(old_dentry);
         ino_t ino;
         struct component_name dname;
         struct btstack btstack;
@@ -879,7 +879,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
         struct component_name dname;
         int ssize;              /* source pathname size */
         struct btstack btstack;
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_inode(dentry);
         unchar *i_fastsymlink;
         s64 xlen = 0;
         int bmask = 0, xsize;
@@ -1086,8 +1086,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         dquot_initialize(old_dir);
         dquot_initialize(new_dir);
  
-       old_ip = old_dentry->d_inode;
-       new_ip = new_dentry->d_inode;
+       old_ip = d_inode(old_dentry);
+       new_ip = d_inode(new_dentry);
  
         if ((rc = get_UCSname(&old_dname, old_dentry)))
                 goto out1;
@@ -1500,9 +1500,9 @@ struct dentry *jfs_get_parent(struct dentry *dentry)
         unsigned long parent_ino;
  
         parent_ino =
-               le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot);
+               le32_to_cpu(JFS_IP(d_inode(dentry))->i_dtroot.header.idotdot);
  
-       return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino));
+       return d_obtain_alias(jfs_iget(d_inode(dentry)->i_sb, parent_ino));
  }
  
  const struct inode_operations jfs_dir_inode_operations = {
@@ -1578,7 +1578,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags)
          * positive dentry isn't good idea. So it's unsupported like
          * rename("filename", "FILENAME") for now.
          */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 return 1;
  
         /*
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c

index 205b946..80f42bc 100644 (file)
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -24,7 +24,7 @@
  
  static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       char *s = JFS_IP(dentry->d_inode)->i_inline;
+       char *s = JFS_IP(d_inode(dentry))->i_inline;
         nd_set_link(nd, s);
         return NULL;
  }
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c

index 46325d5..48b15a6 100644 (file)
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -849,7 +849,7 @@ int __jfs_setxattr(tid_t tid, struct inode *inode, const char *name,
  int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
                  size_t value_len, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct jfs_inode_info *ji = JFS_IP(inode);
         int rc;
         tid_t tid;
@@ -872,7 +872,7 @@ int jfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  
         tid = txBegin(inode->i_sb, 0);
         mutex_lock(&ji->commit_mutex);
-       rc = __jfs_setxattr(tid, dentry->d_inode, name, value, value_len,
+       rc = __jfs_setxattr(tid, d_inode(dentry), name, value, value_len,
                             flags);
         if (!rc)
                 rc = txCommit(tid, 1, &inode, 0);
@@ -959,7 +959,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
                         return -EOPNOTSUPP;
         }
  
-       err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
+       err = __jfs_getxattr(d_inode(dentry), name, data, buf_size);
  
         return err;
  }
@@ -976,7 +976,7 @@ static inline int can_list(struct jfs_ea *ea)
  
  ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         char *buffer;
         ssize_t size = 0;
         int xattr_size;
@@ -1029,7 +1029,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
  
  int jfs_removexattr(struct dentry *dentry, const char *name)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct jfs_inode_info *ji = JFS_IP(inode);
         int rc;
         tid_t tid;
@@ -1047,7 +1047,7 @@ int jfs_removexattr(struct dentry *dentry, const char *name)
  
         tid = txBegin(inode->i_sb, 0);
         mutex_lock(&ji->commit_mutex);
-       rc = __jfs_setxattr(tid, dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+       rc = __jfs_setxattr(tid, d_inode(dentry), name, NULL, 0, XATTR_REPLACE);
         if (!rc)
                 rc = txCommit(tid, 1, &inode, 0);
         txEnd(tid);
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c

index 6acc964..f131fc2 100644 (file)
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -444,7 +444,7 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         /* Always perform fresh lookup for negatives */
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 goto out_bad_unlocked;
  
         kn = dentry->d_fsdata;
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c

index 9000874..2da8493 100644 (file)
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -111,7 +111,7 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
  
  int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct kernfs_node *kn = dentry->d_fsdata;
         int error;
  
@@ -172,11 +172,11 @@ int kernfs_iop_setxattr(struct dentry *dentry, const char *name,
  
         if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
                 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-               error = security_inode_setsecurity(dentry->d_inode, suffix,
+               error = security_inode_setsecurity(d_inode(dentry), suffix,
                                                 value, size, flags);
                 if (error)
                         return error;
-               error = security_inode_getsecctx(dentry->d_inode,
+               error = security_inode_getsecctx(d_inode(dentry),
                                                 &secdata, &secdata_len);
                 if (error)
                         return error;
@@ -271,7 +271,7 @@ int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry,
                    struct kstat *stat)
  {
         struct kernfs_node *kn = dentry->d_fsdata;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         mutex_lock(&kernfs_mutex);
         kernfs_refresh_inode(kn, inode);
diff --git a/fs/libfs.c b/fs/libfs.c

index 0ab6512..cb1fb4b 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -22,13 +22,13 @@
  
  static inline int simple_positive(struct dentry *dentry)
  {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
                    struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         generic_fillattr(inode, stat);
         stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
         return 0;
@@ -94,7 +94,7 @@ EXPORT_SYMBOL(dcache_dir_close);
  loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
  {
         struct dentry *dentry = file->f_path.dentry;
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
         switch (whence) {
                 case 1:
                         offset += file->f_pos;
@@ -102,7 +102,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                         if (offset >= 0)
                                 break;
                 default:
-                       mutex_unlock(&dentry->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dentry)->i_mutex);
                         return -EINVAL;
         }
         if (offset != file->f_pos) {
@@ -129,7 +129,7 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
                         spin_unlock(&dentry->d_lock);
                 }
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
         return offset;
  }
  EXPORT_SYMBOL(dcache_dir_lseek);
@@ -169,7 +169,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
                 spin_unlock(&next->d_lock);
                 spin_unlock(&dentry->d_lock);
                 if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
-                             next->d_inode->i_ino, dt_type(next->d_inode)))
+                             d_inode(next)->i_ino, dt_type(d_inode(next))))
                         return 0;
                 spin_lock(&dentry->d_lock);
                 spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
@@ -270,7 +270,7 @@ EXPORT_SYMBOL(simple_open);
  
  int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         inc_nlink(inode);
@@ -304,7 +304,7 @@ EXPORT_SYMBOL(simple_empty);
  
  int simple_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         drop_nlink(inode);
@@ -318,7 +318,7 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
         if (!simple_empty(dentry))
                 return -ENOTEMPTY;
  
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
         simple_unlink(dir, dentry);
         drop_nlink(dir);
         return 0;
@@ -328,16 +328,16 @@ EXPORT_SYMBOL(simple_rmdir);
  int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
                 struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int they_are_dirs = d_is_dir(old_dentry);
  
         if (!simple_empty(new_dentry))
                 return -ENOTEMPTY;
  
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                 simple_unlink(new_dir, new_dentry);
                 if (they_are_dirs) {
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                         drop_nlink(old_dir);
                 }
         } else if (they_are_dirs) {
@@ -368,7 +368,7 @@ EXPORT_SYMBOL(simple_rename);
   */
  int simple_setattr(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, iattr);
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c

index 665ef5a..a563ddb 100644 (file)
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -31,7 +31,7 @@
  static struct hlist_head       nlm_files[FILE_NRHASH];
  static DEFINE_MUTEX(nlm_file_mutex);
  
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
  static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
  {
         u32 *fhp = (u32*)f->data;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c

index 6bdc347..4cf38f1 100644 (file)
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -213,7 +213,7 @@ static void abort_transaction(struct inode *inode, struct logfs_transaction *ta)
  static int logfs_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct logfs_super *super = logfs_super(dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct logfs_transaction *ta;
         struct page *page;
         pgoff_t index;
@@ -271,7 +271,7 @@ static inline int logfs_empty_dir(struct inode *dir)
  
  static int logfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (!logfs_empty_dir(inode))
                 return -ENOTEMPTY;
@@ -537,7 +537,7 @@ static int logfs_symlink(struct inode *dir, struct dentry *dentry,
  static int logfs_link(struct dentry *old_dentry, struct inode *dir,
                 struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         ihold(inode);
@@ -607,7 +607,7 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry,
         /* 2. write target dd */
         mutex_lock(&super->s_dirop_mutex);
         logfs_add_transaction(new_dir, ta);
-       err = logfs_write_dir(new_dir, new_dentry, old_dentry->d_inode);
+       err = logfs_write_dir(new_dir, new_dentry, d_inode(old_dentry));
         if (!err)
                 err = write_inode(new_dir);
  
@@ -658,8 +658,8 @@ static int logfs_rename_target(struct inode *old_dir, struct dentry *old_dentry,
                                struct inode *new_dir, struct dentry *new_dentry)
  {
         struct logfs_super *super = logfs_super(old_dir->i_sb);
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         int isdir = S_ISDIR(old_inode->i_mode);
         struct logfs_disk_dentry dd;
         struct logfs_transaction *ta;
@@ -719,7 +719,7 @@ out:
  static int logfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir, struct dentry *new_dentry)
  {
-       if (new_dentry->d_inode)
+       if (d_really_is_positive(new_dentry))
                 return logfs_rename_target(old_dir, old_dentry,
                                            new_dir, new_dentry);
         return logfs_rename_cross(old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/logfs/file.c b/fs/logfs/file.c

index b2c13f7..1a6f016 100644 (file)
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -241,7 +241,7 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  
  static int logfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int err = 0;
  
         err = inode_change_ok(inode, attr);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c

index dfaf6fa..118e4e7 100644 (file)
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -156,7 +156,7 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
  {
         const char * name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
-       struct inode * dir = dentry->d_parent->d_inode;
+       struct inode * dir = d_inode(dentry->d_parent);
         struct super_block * sb = dir->i_sb;
         struct minix_sb_info * sbi = minix_sb(sb);
         unsigned long n;
@@ -203,7 +203,7 @@ found:
  
  int minix_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const char * name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         struct super_block * sb = dir->i_sb;
diff --git a/fs/minix/file.c b/fs/minix/file.c

index 6d63e27..94f0eb9 100644 (file)
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -23,7 +23,7 @@ const struct file_operations minix_file_operations = {
  
  static int minix_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, attr);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c

index 3f57af1..1182d1e 100644 (file)
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -626,8 +626,8 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
  int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
         struct super_block *sb = dentry->d_sb;
-       generic_fillattr(dentry->d_inode, stat);
-       if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
+       generic_fillattr(d_inode(dentry), stat);
+       if (INODE_VERSION(d_inode(dentry)) == MINIX_V1)
                 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
         else
                 stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
diff --git a/fs/minix/namei.c b/fs/minix/namei.c

index cd950e2..a795a11 100644 (file)
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -104,7 +104,7 @@ out_fail:
  static int minix_link(struct dentry * old_dentry, struct inode * dir,
         struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
@@ -151,7 +151,7 @@ out_dir:
  static int minix_unlink(struct inode * dir, struct dentry *dentry)
  {
         int err = -ENOENT;
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         struct page * page;
         struct minix_dir_entry * de;
  
@@ -171,7 +171,7 @@ end_unlink:
  
  static int minix_rmdir(struct inode * dir, struct dentry *dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         int err = -ENOTEMPTY;
  
         if (minix_empty_dir(inode)) {
@@ -187,8 +187,8 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
  static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
                            struct inode * new_dir, struct dentry *new_dentry)
  {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
         struct page * dir_page = NULL;
         struct minix_dir_entry * dir_de = NULL;
         struct page * old_page;
diff --git a/fs/namei.c b/fs/namei.c

index ffab2e0..4a8d998 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1590,7 +1590,8 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
  
         if (should_follow_link(path->dentry, follow)) {
                 if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(unlazy_walk(nd, path->dentry))) {
+                       if (unlikely(nd->path.mnt != path->mnt ||
+                                    unlazy_walk(nd, path->dentry))) {
                                 err = -ECHILD;
                                 goto out_err;
                         }
@@ -3045,7 +3046,8 @@ finish_lookup:
  
         if (should_follow_link(path->dentry, !symlink_ok)) {
                 if (nd->flags & LOOKUP_RCU) {
-                       if (unlikely(unlazy_walk(nd, path->dentry))) {
+                       if (unlikely(nd->path.mnt != path->mnt ||
+                                    unlazy_walk(nd, path->dentry))) {
                                 error = -ECHILD;
                                 goto out;
                         }
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c

index e7ca827..80021c7 100644 (file)
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -127,7 +127,7 @@ static inline int ncp_case_sensitive(const struct inode *i)
  static int 
  ncp_hash_dentry(const struct dentry *dentry, struct qstr *this)
  {
-       struct inode *inode = ACCESS_ONCE(dentry->d_inode);
+       struct inode *inode = d_inode_rcu(dentry);
  
         if (!inode)
                 return 0;
@@ -162,7 +162,7 @@ ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
         if (len != name->len)
                 return 1;
  
-       pinode = ACCESS_ONCE(parent->d_inode);
+       pinode = d_inode_rcu(parent);
         if (!pinode)
                 return 1;
  
@@ -180,7 +180,7 @@ ncp_compare_dentry(const struct dentry *parent, const struct dentry *dentry,
  static int
  ncp_delete_dentry(const struct dentry * dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (inode) {
                 if (is_bad_inode(inode))
@@ -224,7 +224,7 @@ ncp_force_unlink(struct inode *dir, struct dentry* dentry)
         memset(&info, 0, sizeof(info));
         
          /* remove the Read-Only flag on the NW server */
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         old_nwattr = NCP_FINFO(inode)->nwattr;
         info.attributes = old_nwattr & ~(aRONLY|aDELETEINHIBIT|aRENAMEINHIBIT);
@@ -254,7 +254,7 @@ ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_na
  {
         struct nw_modify_dos_info info;
          int res=0x90,res2;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
         __le32 old_nwattr = NCP_FINFO(old_inode)->nwattr;
         __le32 new_nwattr = 0; /* shut compiler warning */
         int old_nwattr_changed = 0;
@@ -268,8 +268,8 @@ ncp_force_rename(struct inode *old_dir, struct dentry* old_dentry, char *_old_na
         res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(old_inode), old_inode, NULL, DM_ATTRIBUTES, &info);
         if (!res2)
                 old_nwattr_changed = 1;
-       if (new_dentry && new_dentry->d_inode) {
-               new_nwattr = NCP_FINFO(new_dentry->d_inode)->nwattr;
+       if (new_dentry && d_really_is_positive(new_dentry)) {
+               new_nwattr = NCP_FINFO(d_inode(new_dentry))->nwattr;
                 info.attributes = new_nwattr & ~(aRONLY|aRENAMEINHIBIT|aDELETEINHIBIT);
                 res2 = ncp_modify_file_or_subdir_dos_info_path(NCP_SERVER(new_dir), new_dir, _new_name, DM_ATTRIBUTES, &info);
                 if (!res2)
@@ -324,9 +324,9 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
                 return -ECHILD;
  
         parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
  
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 goto finished;
  
         server = NCP_SERVER(dir);
@@ -367,7 +367,7 @@ ncp_lookup_validate(struct dentry *dentry, unsigned int flags)
          * what we remember, it's not valid any more.
          */
         if (!res) {
-               struct inode *inode = dentry->d_inode;
+               struct inode *inode = d_inode(dentry);
  
                 mutex_lock(&inode->i_mutex);
                 if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) {
@@ -388,7 +388,7 @@ finished:
  
  static time_t ncp_obtain_mtime(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ncp_server *server = NCP_SERVER(inode);
         struct nw_info_struct i;
  
@@ -404,7 +404,7 @@ static time_t ncp_obtain_mtime(struct dentry *dentry)
  static inline void
  ncp_invalidate_dircache_entries(struct dentry *parent)
  {
-       struct ncp_server *server = NCP_SERVER(parent->d_inode);
+       struct ncp_server *server = NCP_SERVER(d_inode(parent));
         struct dentry *dentry;
  
         spin_lock(&parent->d_lock);
@@ -418,7 +418,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
  static int ncp_readdir(struct file *file, struct dir_context *ctx)
  {
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct page *page = NULL;
         struct ncp_server *server = NCP_SERVER(inode);
         union  ncp_dir_cache *cache = NULL;
@@ -491,13 +491,13 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
                                 goto invalid_cache;
                         }
                         spin_unlock(&dentry->d_lock);
-                       if (!dent->d_inode) {
+                       if (d_really_is_negative(dent)) {
                                 dput(dent);
                                 goto invalid_cache;
                         }
                         over = !dir_emit(ctx, dent->d_name.name,
                                         dent->d_name.len,
-                                       dent->d_inode->i_ino, DT_UNKNOWN);
+                                       d_inode(dent)->i_ino, DT_UNKNOWN);
                         dput(dent);
                         if (over)
                                 goto finished;
@@ -571,7 +571,7 @@ static void ncp_d_prune(struct dentry *dentry)
  {
         if (!dentry->d_fsdata)  /* not referenced from page cache */
                 return;
-       NCP_FINFO(dentry->d_parent->d_inode)->flags &= ~NCPI_DIR_CACHE;
+       NCP_FINFO(d_inode(dentry->d_parent))->flags &= ~NCPI_DIR_CACHE;
  }
  
  static int
@@ -580,7 +580,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                 int inval_childs)
  {
         struct dentry *newdent, *dentry = file->f_path.dentry;
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
         struct ncp_cache_control ctl = *ctrl;
         struct qstr qname;
         int valid = 0;
@@ -621,7 +621,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                 dentry_update_name_case(newdent, &qname);
         }
  
-       if (!newdent->d_inode) {
+       if (d_really_is_negative(newdent)) {
                 struct inode *inode;
  
                 entry->opened = 0;
@@ -637,7 +637,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                         spin_unlock(&dentry->d_lock);
                 }
         } else {
-               struct inode *inode = newdent->d_inode;
+               struct inode *inode = d_inode(newdent);
  
                 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
                 ncp_update_inode2(inode, entry);
@@ -659,10 +659,10 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
                         ctl.cache = kmap(ctl.page);
         }
         if (ctl.cache) {
-               if (newdent->d_inode) {
+               if (d_really_is_positive(newdent)) {
                         newdent->d_fsdata = newdent;
                         ctl.cache->dentry[ctl.idx] = newdent;
-                       ino = newdent->d_inode->i_ino;
+                       ino = d_inode(newdent)->i_ino;
                         ncp_new_dentry(newdent);
                 }
                 valid = 1;
@@ -807,7 +807,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                 }
                 dent = sb->s_root;
                 if (dent) {
-                       struct inode* ino = dent->d_inode;
+                       struct inode* ino = d_inode(dent);
                         if (ino) {
                                 ncp_update_known_namespace(server, volNumber, NULL);
                                 NCP_FINFO(ino)->volNumber = volNumber;
@@ -815,7 +815,7 @@ int ncp_conn_logged_in(struct super_block *sb)
                                 NCP_FINFO(ino)->DosDirNum = DosDirNum;
                                 result = 0;
                         } else {
-                               ncp_dbg(1, "sb->s_root->d_inode == NULL!\n");
+                               ncp_dbg(1, "d_inode(sb->s_root) == NULL!\n");
                         }
                 } else {
                         ncp_dbg(1, "sb->s_root == NULL!\n");
@@ -1055,7 +1055,7 @@ out:
  
  static int ncp_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ncp_server *server;
         int error;
  
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c

index 01a9e16..9605a2f 100644 (file)
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -812,7 +812,7 @@ static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
         if (!d) {
                 goto dflt;
         }
-       i = d->d_inode;
+       i = d_inode(d);
         if (!i) {
                 goto dflt;
         }
@@ -865,7 +865,7 @@ dflt:;
  
  int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int result = 0;
         __le32 info_mask;
         struct nw_modify_dos_info info;
@@ -878,7 +878,7 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr)
                 goto out;
  
         result = -EPERM;
-       if (IS_DEADDIR(dentry->d_inode))
+       if (IS_DEADDIR(d_inode(dentry)))
                 goto out;
  
         /* ageing the dentry to force validation */
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c

index cf7e043..79b1130 100644 (file)
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -376,7 +376,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                 struct dentry* dentry = inode->i_sb->s_root;
  
                                 if (dentry) {
-                                       struct inode* s_inode = dentry->d_inode;
+                                       struct inode* s_inode = d_inode(dentry);
  
                                         if (s_inode) {
                                                 sr.volNumber = NCP_FINFO(s_inode)->volNumber;
@@ -384,7 +384,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                 sr.namespace = server->name_space[sr.volNumber];
                                                 result = 0;
                                         } else
-                                               ncp_dbg(1, "s_root->d_inode==NULL\n");
+                                               ncp_dbg(1, "d_inode(s_root)==NULL\n");
                                 } else
                                         ncp_dbg(1, "s_root==NULL\n");
                         } else {
@@ -431,7 +431,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                 if (result == 0) {
                                         dentry = inode->i_sb->s_root;
                                         if (dentry) {
-                                               struct inode* s_inode = dentry->d_inode;
+                                               struct inode* s_inode = d_inode(dentry);
  
                                                 if (s_inode) {
                                                         NCP_FINFO(s_inode)->volNumber = vnum;
@@ -439,7 +439,7 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
                                                         NCP_FINFO(s_inode)->DosDirNum = dosde;
                                                         server->root_setuped = 1;
                                                 } else {
-                                                       ncp_dbg(1, "s_root->d_inode==NULL\n");
+                                                       ncp_dbg(1, "d_inode(s_root)==NULL\n");
                                                         result = -EIO;
                                                 }
                                         } else {
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c

index 2b502a0..88dbbc9 100644 (file)
--- a/fs/ncpfs/ncplib_kernel.c
+++ b/fs/ncpfs/ncplib_kernel.c
@@ -727,7 +727,7 @@ int
  ncp_del_file_or_subdir2(struct ncp_server *server,
                         struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         __u8  volnum;
         __le32 dirent;
  
diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c

index 1a63bfd..421b6f9 100644 (file)
--- a/fs/ncpfs/symlink.c
+++ b/fs/ncpfs/symlink.c
@@ -156,7 +156,7 @@ int ncp_symlink(struct inode *dir, struct dentry *dentry, const char *symname) {
                 goto failfree;
         }
  
-       inode=dentry->d_inode;
+       inode=d_inode(dentry);
  
         if (ncp_make_open(inode, O_WRONLY))
                 goto failfree;
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile

index 1e987ac..8664417 100644 (file)
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -22,7 +22,7 @@ nfsv3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
  obj-$(CONFIG_NFS_V4) += nfsv4.o
  CFLAGS_nfs4trace.o += -I$(src)
  nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
-         delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
+         delegation.o nfs4idmap.o callback.o callback_xdr.o callback_proc.o \
           nfs4namespace.o nfs4getroot.o nfs4client.o nfs4session.o \
           dns_resolve.o nfs4trace.o
  nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c

index 1cac3c1..d2554fe 100644 (file)
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -890,6 +890,7 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
         .free_deviceid_node             = bl_free_deviceid_node,
         .pg_read_ops                    = &bl_pg_read_ops,
         .pg_write_ops                   = &bl_pg_write_ops,
+       .sync                           = pnfs_generic_sync,
  };
  
  static int __init nfs4blocklayout_init(void)
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c

index 5aed4f9..e535599 100644 (file)
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -33,7 +33,7 @@ bl_free_deviceid_node(struct nfs4_deviceid_node *d)
                 container_of(d, struct pnfs_block_dev, node);
  
         bl_free_device(dev);
-       kfree(dev);
+       kfree_rcu(dev, node.rcu);
  }
  
  static int
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c

index 351be92..8d129bb 100644 (file)
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -128,7 +128,7 @@ nfs41_callback_svc(void *vrqstp)
                 if (try_to_freeze())
                         continue;
  
-               prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE);
+               prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
                 spin_lock_bh(&serv->sv_cb_lock);
                 if (!list_empty(&serv->sv_cb_list)) {
                         req = list_first_entry(&serv->sv_cb_list,
@@ -142,10 +142,10 @@ nfs41_callback_svc(void *vrqstp)
                                 error);
                 } else {
                         spin_unlock_bh(&serv->sv_cb_lock);
-                       /* schedule_timeout to game the hung task watchdog */
-                       schedule_timeout(60 * HZ);
+                       schedule();
                         finish_wait(&serv->sv_cb_waitq, &wq);
                 }
+               flush_signals(current);
         }
         return 0;
  }
diff --git a/fs/nfs/client.c b/fs/nfs/client.c

index 1987415..892aeff 100644 (file)
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -31,7 +31,6 @@
  #include <linux/lockd/bind.h>
  #include <linux/seq_file.h>
  #include <linux/mount.h>
-#include <linux/nfs_idmap.h>
  #include <linux/vfs.h>
  #include <linux/inet.h>
  #include <linux/in6.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c

index a6ad688..029d688 100644 (file)
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -378,7 +378,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                 if (freeme == NULL)
                         goto out;
         }
-       list_add_rcu(&delegation->super_list, &server->delegations);
+       list_add_tail_rcu(&delegation->super_list, &server->delegations);
         rcu_assign_pointer(nfsi->delegation, delegation);
         delegation = NULL;
  
@@ -514,7 +514,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
  
         delegation = nfs_inode_detach_delegation(inode);
         if (delegation != NULL)
-               nfs_do_return_delegation(inode, delegation, 0);
+               nfs_do_return_delegation(inode, delegation, 1);
  }
  
  /**
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index c19e16f..b2c8b31 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -416,15 +416,14 @@ int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
  {
         struct nfs_inode *nfsi;
  
-       if (dentry->d_inode == NULL)
-               goto different;
+       if (d_really_is_negative(dentry))
+               return 0;
  
-       nfsi = NFS_I(dentry->d_inode);
+       nfsi = NFS_I(d_inode(dentry));
         if (entry->fattr->fileid == nfsi->fileid)
                 return 1;
         if (nfs_compare_fh(entry->fh, &nfsi->fh) == 0)
                 return 1;
-different:
         return 0;
  }
  
@@ -473,7 +472,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
         struct qstr filename = QSTR_INIT(entry->name, entry->len);
         struct dentry *dentry;
         struct dentry *alias;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct inode *inode;
         int status;
  
@@ -497,9 +496,9 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
                         goto out;
                 if (nfs_same_file(dentry, entry)) {
                         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-                       status = nfs_refresh_inode(dentry->d_inode, entry->fattr);
+                       status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
                         if (!status)
-                               nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
+                               nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
                         goto out;
                 } else {
                         d_invalidate(dentry);
@@ -544,6 +543,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
         if (scratch == NULL)
                 return -ENOMEM;
  
+       if (buflen == 0)
+               goto out_nopages;
+
         xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
         xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
  
@@ -565,6 +567,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
                         break;
         } while (!entry->eof);
  
+out_nopages:
         if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
                 array = nfs_readdir_get_array(page);
                 if (!IS_ERR(array)) {
@@ -870,7 +873,7 @@ static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
  static int nfs_readdir(struct file *file, struct dir_context *ctx)
  {
         struct dentry   *dentry = file->f_path.dentry;
-       struct inode    *inode = dentry->d_inode;
+       struct inode    *inode = d_inode(dentry);
         nfs_readdir_descriptor_t my_desc,
                         *desc = &my_desc;
         struct nfs_open_dir_context *dir_ctx = file->private_data;
@@ -1118,15 +1121,15 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
  
         if (flags & LOOKUP_RCU) {
                 parent = ACCESS_ONCE(dentry->d_parent);
-               dir = ACCESS_ONCE(parent->d_inode);
+               dir = d_inode_rcu(parent);
                 if (!dir)
                         return -ECHILD;
         } else {
                 parent = dget_parent(dentry);
-               dir = parent->d_inode;
+               dir = d_inode(parent);
         }
         nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         if (!inode) {
                 if (nfs_neg_need_reval(dir, dentry, flags)) {
@@ -1242,7 +1245,7 @@ out_error:
  }
  
  /*
- * A weaker form of d_revalidate for revalidating just the dentry->d_inode
+ * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
   * when we don't really care about the dentry name. This is called when a
   * pathwalk ends on a dentry that was not found via a normal lookup in the
   * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
@@ -1253,7 +1256,7 @@ out_error:
  static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
  {
         int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         /*
          * I believe we can only get a negative dentry here in the case of a
@@ -1287,7 +1290,7 @@ static int nfs_dentry_delete(const struct dentry *dentry)
                 dentry, dentry->d_flags);
  
         /* Unhash any dentry with a stale inode */
-       if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
+       if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
                 return 1;
  
         if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
@@ -1491,7 +1494,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
         int err;
  
         /* Expect a negative dentry */
-       BUG_ON(dentry->d_inode);
+       BUG_ON(d_inode(dentry));
  
         dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
                         dir->i_sb->s_id, dir->i_ino, dentry);
@@ -1587,7 +1590,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
         if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
                 goto no_open;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         /* We can't create new files in nfs_open_revalidate(), so we
          * optimize away revalidation of negative dentries.
@@ -1598,12 +1601,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
  
                 if (flags & LOOKUP_RCU) {
                         parent = ACCESS_ONCE(dentry->d_parent);
-                       dir = ACCESS_ONCE(parent->d_inode);
+                       dir = d_inode_rcu(parent);
                         if (!dir)
                                 return -ECHILD;
                 } else {
                         parent = dget_parent(dentry);
-                       dir = parent->d_inode;
+                       dir = d_inode(parent);
                 }
                 if (!nfs_neg_need_reval(dir, dentry, flags))
                         ret = 1;
@@ -1643,14 +1646,14 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
                                 struct nfs4_label *label)
  {
         struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct inode *inode;
         int error = -EACCES;
  
         d_drop(dentry);
  
         /* We may have been initialized further down */
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 goto out;
         if (fhandle->size == 0) {
                 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
@@ -1768,7 +1771,7 @@ EXPORT_SYMBOL_GPL(nfs_mkdir);
  
  static void nfs_dentry_handle_enoent(struct dentry *dentry)
  {
-       if (dentry->d_inode != NULL && !d_unhashed(dentry))
+       if (d_really_is_positive(dentry) && !d_unhashed(dentry))
                 d_delete(dentry);
  }
  
@@ -1780,13 +1783,13 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry)
                         dir->i_sb->s_id, dir->i_ino, dentry);
  
         trace_nfs_rmdir_enter(dir, dentry);
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 nfs_wait_on_sillyrename(dentry);
                 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
                 /* Ensure the VFS deletes this inode */
                 switch (error) {
                 case 0:
-                       clear_nlink(dentry->d_inode);
+                       clear_nlink(d_inode(dentry));
                         break;
                 case -ENOENT:
                         nfs_dentry_handle_enoent(dentry);
@@ -1808,8 +1811,8 @@ EXPORT_SYMBOL_GPL(nfs_rmdir);
   */
  static int nfs_safe_remove(struct dentry *dentry)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
-       struct inode *inode = dentry->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
+       struct inode *inode = d_inode(dentry);
         int error = -EBUSY;
                 
         dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
@@ -1853,7 +1856,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
         if (d_count(dentry) > 1) {
                 spin_unlock(&dentry->d_lock);
                 /* Start asynchronous writeout of the inode */
-               write_inode_now(dentry->d_inode, 0);
+               write_inode_now(d_inode(dentry), 0);
                 error = nfs_sillyrename(dir, dentry);
                 goto out;
         }
@@ -1931,7 +1934,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
          * No big deal if we can't add this page to the page cache here.
          * READLINK will get the missing page from the server if needed.
          */
-       if (!add_to_page_cache_lru(page, dentry->d_inode->i_mapping, 0,
+       if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
                                                         GFP_KERNEL)) {
                 SetPageUptodate(page);
                 unlock_page(page);
@@ -1950,7 +1953,7 @@ EXPORT_SYMBOL_GPL(nfs_symlink);
  int
  nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int error;
  
         dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
@@ -1997,8 +2000,8 @@ EXPORT_SYMBOL_GPL(nfs_link);
  int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                       struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct dentry *dentry = NULL, *rehash = NULL;
         struct rpc_task *task;
         int error = -EBUSY;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c

index 682f65f..38678d9 100644 (file)
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -129,22 +129,25 @@ nfs_direct_good_bytes(struct nfs_direct_req *dreq, struct nfs_pgio_header *hdr)
         int i;
         ssize_t count;
  
-       WARN_ON_ONCE(hdr->pgio_mirror_idx >= dreq->mirror_count);
-
-       count = dreq->mirrors[hdr->pgio_mirror_idx].count;
-       if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
-               count = hdr->io_start + hdr->good_bytes - dreq->io_start;
-               dreq->mirrors[hdr->pgio_mirror_idx].count = count;
-       }
-
-       /* update the dreq->count by finding the minimum agreed count from all
-        * mirrors */
-       count = dreq->mirrors[0].count;
+       if (dreq->mirror_count == 1) {
+               dreq->mirrors[hdr->pgio_mirror_idx].count += hdr->good_bytes;
+               dreq->count += hdr->good_bytes;
+       } else {
+               /* mirrored writes */
+               count = dreq->mirrors[hdr->pgio_mirror_idx].count;
+               if (count + dreq->io_start < hdr->io_start + hdr->good_bytes) {
+                       count = hdr->io_start + hdr->good_bytes - dreq->io_start;
+                       dreq->mirrors[hdr->pgio_mirror_idx].count = count;
+               }
+               /* update the dreq->count by finding the minimum agreed count from all
+                * mirrors */
+               count = dreq->mirrors[0].count;
  
-       for (i = 1; i < dreq->mirror_count; i++)
-               count = min(count, dreq->mirrors[i].count);
+               for (i = 1; i < dreq->mirror_count; i++)
+                       count = min(count, dreq->mirrors[i].count);
  
-       dreq->count = count;
+               dreq->count = count;
+       }
  }
  
  /*
@@ -258,18 +261,11 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
         if (!IS_SWAPFILE(inode))
                 return 0;
  
-#ifndef CONFIG_NFS_SWAP
-       dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-                       iocb->ki_filp, (long long) pos, iter->nr_segs);
-
-       return -EINVAL;
-#else
         VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
  
         if (iov_iter_rw(iter) == READ)
                 return nfs_file_direct_read(iocb, iter, pos);
         return nfs_file_direct_write(iocb, iter);
-#endif /* CONFIG_NFS_SWAP */
  }
  
  static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -386,7 +382,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
         if (write)
                 nfs_zap_mapping(inode, inode->i_mapping);
  
-       inode_dio_done(inode);
+       inode_dio_end(inode);
  
         if (dreq->iocb) {
                 long res = (long) dreq->error;
@@ -403,8 +399,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
  static void nfs_direct_readpage_release(struct nfs_page *req)
  {
         dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
-               req->wb_context->dentry->d_inode->i_sb->s_id,
-               (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+               d_inode(req->wb_context->dentry)->i_sb->s_id,
+               (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
                 req->wb_bytes,
                 (long long)req_offset(req));
         nfs_release_request(req);
@@ -486,7 +482,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
                              &nfs_direct_read_completion_ops);
         get_dreq(dreq);
         desc.pg_dreq = dreq;
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
  
         while (iov_iter_count(iter)) {
                 struct page **pagevec;
@@ -538,7 +534,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
          * generic layer handle the completion.
          */
         if (requested_bytes == 0) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                 nfs_direct_req_release(dreq);
                 return result < 0 ? result : -EIO;
         }
@@ -872,7 +868,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
                               &nfs_direct_write_completion_ops);
         desc.pg_dreq = dreq;
         get_dreq(dreq);
-       atomic_inc(&inode->i_dio_count);
+       inode_dio_begin(inode);
  
         NFS_I(inode)->write_io += iov_iter_count(iter);
         while (iov_iter_count(iter)) {
@@ -928,7 +924,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
          * generic layer handle the completion.
          */
         if (requested_bytes == 0) {
-               inode_dio_done(inode);
+               inode_dio_end(inode);
                 nfs_direct_req_release(dreq);
                 return result < 0 ? result : -EIO;
         }
@@ -1030,6 +1026,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
                         if (i_size_read(inode) < iocb->ki_pos)
                                 i_size_write(inode, iocb->ki_pos);
                         spin_unlock(&inode->i_lock);
+                       generic_write_sync(file, pos, result);
                 }
         }
         nfs_direct_req_release(dreq);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c

index c40e436..8b8d83a 100644 (file)
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -280,6 +280,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  
         trace_nfs_fsync_enter(inode);
  
+       nfs_inode_dio_wait(inode);
         do {
                 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                 if (ret != 0)
@@ -782,7 +783,7 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
          * Flush all pending writes before doing anything
          * with locks..
          */
-       nfs_sync_mapping(filp->f_mapping);
+       vfs_fsync(filp, 0);
  
         l_ctx = nfs_get_lock_context(nfs_file_open_context(filp));
         if (!IS_ERR(l_ctx)) {
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c

index 91e88a7..a46bf6d 100644 (file)
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -258,7 +258,8 @@ filelayout_set_layoutcommit(struct nfs_pgio_header *hdr)
             hdr->res.verf->committed != NFS_DATA_SYNC)
                 return;
  
-       pnfs_set_layoutcommit(hdr);
+       pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                       hdr->mds_offset + hdr->res.count);
         dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
                 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
  }
@@ -373,7 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
         }
  
         if (data->verf.committed == NFS_UNSTABLE)
-               pnfs_commit_set_layoutcommit(data);
+               pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
  
         return 0;
  }
@@ -1086,7 +1087,7 @@ filelayout_alloc_deviceid_node(struct nfs_server *server,
  }
  
  static void
-filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d)
+filelayout_free_deviceid_node(struct nfs4_deviceid_node *d)
  {
         nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node));
  }
@@ -1137,7 +1138,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
         .read_pagelist          = filelayout_read_pagelist,
         .write_pagelist         = filelayout_write_pagelist,
         .alloc_deviceid_node    = filelayout_alloc_deviceid_node,
-       .free_deviceid_node     = filelayout_free_deveiceid_node,
+       .free_deviceid_node     = filelayout_free_deviceid_node,
+       .sync                   = pnfs_nfs_generic_sync,
  };
  
  static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c

index 4f372e2..4946ef4 100644 (file)
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -55,7 +55,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
                         nfs4_pnfs_ds_put(ds);
         }
         kfree(dsaddr->stripe_indices);
-       kfree(dsaddr);
+       kfree_rcu(dsaddr, id_node.rcu);
  }
  
  /* Decode opaque device data and return the result */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c

index 315cc68..7d05089 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,10 +11,10 @@
  #include <linux/module.h>
  
  #include <linux/sunrpc/metrics.h>
-#include <linux/nfs_idmap.h>
  
  #include "flexfilelayout.h"
  #include "../nfs4session.h"
+#include "../nfs4idmap.h"
  #include "../internal.h"
  #include "../delegation.h"
  #include "../nfs4trace.h"
@@ -891,7 +891,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
  static void
  ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr)
  {
-       pnfs_set_layoutcommit(hdr);
+       pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                       hdr->mds_offset + hdr->res.count);
         dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
                 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
  }
@@ -1074,7 +1075,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
         }
  
         if (data->verf.committed == NFS_UNSTABLE)
-               pnfs_commit_set_layoutcommit(data);
+               pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
  
         return 0;
  }
@@ -1414,7 +1415,7 @@ ff_layout_get_ds_info(struct inode *inode)
  }
  
  static void
-ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d)
+ff_layout_free_deviceid_node(struct nfs4_deviceid_node *d)
  {
         nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds,
                                                   id_node));
@@ -1498,7 +1499,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
         .pg_read_ops            = &ff_layout_pg_read_ops,
         .pg_write_ops           = &ff_layout_pg_write_ops,
         .get_ds_info            = ff_layout_get_ds_info,
-       .free_deviceid_node     = ff_layout_free_deveiceid_node,
+       .free_deviceid_node     = ff_layout_free_deviceid_node,
         .mark_request_commit    = pnfs_layout_mark_request_commit,
         .clear_request_commit   = pnfs_generic_clear_request_commit,
         .scan_commit_lists      = pnfs_generic_scan_commit_lists,
@@ -1508,6 +1509,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = {
         .write_pagelist         = ff_layout_write_pagelist,
         .alloc_deviceid_node    = ff_layout_alloc_deviceid_node,
         .encode_layoutreturn    = ff_layout_encode_layoutreturn,
+       .sync                   = pnfs_nfs_generic_sync,
  };
  
  static int __init nfs4flexfilelayout_init(void)
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c

index e2c01f2..77a2d02 100644 (file)
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -30,7 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
  {
         nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
         nfs4_pnfs_ds_put(mirror_ds->ds);
-       kfree(mirror_ds);
+       kfree_rcu(mirror_ds, id_node.rcu);
  }
  
  /* Decode opaque device data and construct new_ds using it */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c

index 9ac3846..a608ffd 100644 (file)
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -56,11 +56,11 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
                  * This again causes shrink_dcache_for_umount_subtree() to
                  * Oops, since the test for IS_ROOT() will fail.
                  */
-               spin_lock(&sb->s_root->d_inode->i_lock);
+               spin_lock(&d_inode(sb->s_root)->i_lock);
                 spin_lock(&sb->s_root->d_lock);
                 hlist_del_init(&sb->s_root->d_u.d_alias);
                 spin_unlock(&sb->s_root->d_lock);
-               spin_unlock(&sb->s_root->d_inode->i_lock);
+               spin_unlock(&d_inode(sb->s_root)->i_lock);
         }
         return 0;
  }
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c

deleted file mode 100644 (file)

index 857e2a9..0000000
--- a/fs/nfs/idmap.c
+++ /dev/null
@@ -1,792 +0,0 @@
-/*
- * fs/nfs/idmap.c
- *
- *  UID and GID to name mapping for clients.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#include <linux/types.h>
-#include <linux/parser.h>
-#include <linux/fs.h>
-#include <linux/nfs_idmap.h>
-#include <net/net_namespace.h>
-#include <linux/sunrpc/rpc_pipe_fs.h>
-#include <linux/nfs_fs.h>
-#include <linux/nfs_fs_sb.h>
-#include <linux/key.h>
-#include <linux/keyctl.h>
-#include <linux/key-type.h>
-#include <keys/user-type.h>
-#include <linux/module.h>
-
-#include "internal.h"
-#include "netns.h"
-#include "nfs4trace.h"
-
-#define NFS_UINT_MAXLEN 11
-
-static const struct cred *id_resolver_cache;
-static struct key_type key_type_id_resolver_legacy;
-
-struct idmap_legacy_upcalldata {
-       struct rpc_pipe_msg pipe_msg;
-       struct idmap_msg idmap_msg;
-       struct key_construction *key_cons;
-       struct idmap *idmap;
-};
-
-struct idmap {
-       struct rpc_pipe_dir_object idmap_pdo;
-       struct rpc_pipe         *idmap_pipe;
-       struct idmap_legacy_upcalldata *idmap_upcall_data;
-       struct mutex            idmap_mutex;
-};
-
-/**
- * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
- * @fattr: fully initialised struct nfs_fattr
- * @owner_name: owner name string cache
- * @group_name: group name string cache
- */
-void nfs_fattr_init_names(struct nfs_fattr *fattr,
-               struct nfs4_string *owner_name,
-               struct nfs4_string *group_name)
-{
-       fattr->owner_name = owner_name;
-       fattr->group_name = group_name;
-}
-
-static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
-{
-       fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
-       kfree(fattr->owner_name->data);
-}
-
-static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
-{
-       fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
-       kfree(fattr->group_name->data);
-}
-
-static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       struct nfs4_string *owner = fattr->owner_name;
-       kuid_t uid;
-
-       if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
-               return false;
-       if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
-               fattr->uid = uid;
-               fattr->valid |= NFS_ATTR_FATTR_OWNER;
-       }
-       return true;
-}
-
-static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       struct nfs4_string *group = fattr->group_name;
-       kgid_t gid;
-
-       if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
-               return false;
-       if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
-               fattr->gid = gid;
-               fattr->valid |= NFS_ATTR_FATTR_GROUP;
-       }
-       return true;
-}
-
-/**
- * nfs_fattr_free_names - free up the NFSv4 owner and group strings
- * @fattr: a fully initialised nfs_fattr structure
- */
-void nfs_fattr_free_names(struct nfs_fattr *fattr)
-{
-       if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
-               nfs_fattr_free_owner_name(fattr);
-       if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
-               nfs_fattr_free_group_name(fattr);
-}
-
-/**
- * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
- * @server: pointer to the filesystem nfs_server structure
- * @fattr: a fully initialised nfs_fattr structure
- *
- * This helper maps the cached NFSv4 owner/group strings in fattr into
- * their numeric uid/gid equivalents, and then frees the cached strings.
- */
-void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
-{
-       if (nfs_fattr_map_owner_name(server, fattr))
-               nfs_fattr_free_owner_name(fattr);
-       if (nfs_fattr_map_group_name(server, fattr))
-               nfs_fattr_free_group_name(fattr);
-}
-
-int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
-{
-       unsigned long val;
-       char buf[16];
-
-       if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
-               return 0;
-       memcpy(buf, name, namelen);
-       buf[namelen] = '\0';
-       if (kstrtoul(buf, 0, &val) != 0)
-               return 0;
-       *res = val;
-       return 1;
-}
-EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric);
-
-static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
-{
-       return snprintf(buf, buflen, "%u", id);
-}
-
-static struct key_type key_type_id_resolver = {
-       .name           = "id_resolver",
-       .preparse       = user_preparse,
-       .free_preparse  = user_free_preparse,
-       .instantiate    = generic_key_instantiate,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
-};
-
-static int nfs_idmap_init_keyring(void)
-{
-       struct cred *cred;
-       struct key *keyring;
-       int ret = 0;
-
-       printk(KERN_NOTICE "NFS: Registering the %s key type\n",
-               key_type_id_resolver.name);
-
-       cred = prepare_kernel_cred(NULL);
-       if (!cred)
-               return -ENOMEM;
-
-       keyring = keyring_alloc(".id_resolver",
-                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
-                               (KEY_POS_ALL & ~KEY_POS_SETATTR) |
-                               KEY_USR_VIEW | KEY_USR_READ,
-                               KEY_ALLOC_NOT_IN_QUOTA, NULL);
-       if (IS_ERR(keyring)) {
-               ret = PTR_ERR(keyring);
-               goto failed_put_cred;
-       }
-
-       ret = register_key_type(&key_type_id_resolver);
-       if (ret < 0)
-               goto failed_put_key;
-
-       ret = register_key_type(&key_type_id_resolver_legacy);
-       if (ret < 0)
-               goto failed_reg_legacy;
-
-       set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
-       cred->thread_keyring = keyring;
-       cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
-       id_resolver_cache = cred;
-       return 0;
-
-failed_reg_legacy:
-       unregister_key_type(&key_type_id_resolver);
-failed_put_key:
-       key_put(keyring);
-failed_put_cred:
-       put_cred(cred);
-       return ret;
-}
-
-static void nfs_idmap_quit_keyring(void)
-{
-       key_revoke(id_resolver_cache->thread_keyring);
-       unregister_key_type(&key_type_id_resolver);
-       unregister_key_type(&key_type_id_resolver_legacy);
-       put_cred(id_resolver_cache);
-}
-
-/*
- * Assemble the description to pass to request_key()
- * This function will allocate a new string and update dest to point
- * at it.  The caller is responsible for freeing dest.
- *
- * On error 0 is returned.  Otherwise, the length of dest is returned.
- */
-static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
-                               const char *type, size_t typelen, char **desc)
-{
-       char *cp;
-       size_t desclen = typelen + namelen + 2;
-
-       *desc = kmalloc(desclen, GFP_KERNEL);
-       if (!*desc)
-               return -ENOMEM;
-
-       cp = *desc;
-       memcpy(cp, type, typelen);
-       cp += typelen;
-       *cp++ = ':';
-
-       memcpy(cp, name, namelen);
-       cp += namelen;
-       *cp = '\0';
-       return desclen;
-}
-
-static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
-                                        const char *type, struct idmap *idmap)
-{
-       char *desc;
-       struct key *rkey;
-       ssize_t ret;
-
-       ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
-       if (ret <= 0)
-               return ERR_PTR(ret);
-
-       rkey = request_key(&key_type_id_resolver, desc, "");
-       if (IS_ERR(rkey)) {
-               mutex_lock(&idmap->idmap_mutex);
-               rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
-                                               desc, "", 0, idmap);
-               mutex_unlock(&idmap->idmap_mutex);
-       }
-       if (!IS_ERR(rkey))
-               set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
-
-       kfree(desc);
-       return rkey;
-}
-
-static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
-                                const char *type, void *data,
-                                size_t data_size, struct idmap *idmap)
-{
-       const struct cred *saved_cred;
-       struct key *rkey;
-       struct user_key_payload *payload;
-       ssize_t ret;
-
-       saved_cred = override_creds(id_resolver_cache);
-       rkey = nfs_idmap_request_key(name, namelen, type, idmap);
-       revert_creds(saved_cred);
-
-       if (IS_ERR(rkey)) {
-               ret = PTR_ERR(rkey);
-               goto out;
-       }
-
-       rcu_read_lock();
-       rkey->perm |= KEY_USR_VIEW;
-
-       ret = key_validate(rkey);
-       if (ret < 0)
-               goto out_up;
-
-       payload = rcu_dereference(rkey->payload.rcudata);
-       if (IS_ERR_OR_NULL(payload)) {
-               ret = PTR_ERR(payload);
-               goto out_up;
-       }
-
-       ret = payload->datalen;
-       if (ret > 0 && ret <= data_size)
-               memcpy(data, payload->data, ret);
-       else
-               ret = -EINVAL;
-
-out_up:
-       rcu_read_unlock();
-       key_put(rkey);
-out:
-       return ret;
-}
-
-/* ID -> Name */
-static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
-                                    size_t buflen, struct idmap *idmap)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       int id_len;
-       ssize_t ret;
-
-       id_len = snprintf(id_str, sizeof(id_str), "%u", id);
-       ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
-       if (ret < 0)
-               return -EINVAL;
-       return ret;
-}
-
-/* Name -> ID */
-static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
-                              __u32 *id, struct idmap *idmap)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       long id_long;
-       ssize_t data_size;
-       int ret = 0;
-
-       data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
-       if (data_size <= 0) {
-               ret = -EINVAL;
-       } else {
-               ret = kstrtol(id_str, 10, &id_long);
-               *id = (__u32)id_long;
-       }
-       return ret;
-}
-
-/* idmap classic begins here */
-
-enum {
-       Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
-};
-
-static const match_table_t nfs_idmap_tokens = {
-       { Opt_find_uid, "uid:%s" },
-       { Opt_find_gid, "gid:%s" },
-       { Opt_find_user, "user:%s" },
-       { Opt_find_group, "group:%s" },
-       { Opt_find_err, NULL }
-};
-
-static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
-static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
-                                  size_t);
-static void idmap_release_pipe(struct inode *);
-static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
-
-static const struct rpc_pipe_ops idmap_upcall_ops = {
-       .upcall         = rpc_pipe_generic_upcall,
-       .downcall       = idmap_pipe_downcall,
-       .release_pipe   = idmap_release_pipe,
-       .destroy_msg    = idmap_pipe_destroy_msg,
-};
-
-static struct key_type key_type_id_resolver_legacy = {
-       .name           = "id_legacy",
-       .preparse       = user_preparse,
-       .free_preparse  = user_free_preparse,
-       .instantiate    = generic_key_instantiate,
-       .revoke         = user_revoke,
-       .destroy        = user_destroy,
-       .describe       = user_describe,
-       .read           = user_read,
-       .request_key    = nfs_idmap_legacy_upcall,
-};
-
-static void nfs_idmap_pipe_destroy(struct dentry *dir,
-               struct rpc_pipe_dir_object *pdo)
-{
-       struct idmap *idmap = pdo->pdo_data;
-       struct rpc_pipe *pipe = idmap->idmap_pipe;
-
-       if (pipe->dentry) {
-               rpc_unlink(pipe->dentry);
-               pipe->dentry = NULL;
-       }
-}
-
-static int nfs_idmap_pipe_create(struct dentry *dir,
-               struct rpc_pipe_dir_object *pdo)
-{
-       struct idmap *idmap = pdo->pdo_data;
-       struct rpc_pipe *pipe = idmap->idmap_pipe;
-       struct dentry *dentry;
-
-       dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
-       if (IS_ERR(dentry))
-               return PTR_ERR(dentry);
-       pipe->dentry = dentry;
-       return 0;
-}
-
-static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
-       .create = nfs_idmap_pipe_create,
-       .destroy = nfs_idmap_pipe_destroy,
-};
-
-int
-nfs_idmap_new(struct nfs_client *clp)
-{
-       struct idmap *idmap;
-       struct rpc_pipe *pipe;
-       int error;
-
-       idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
-       if (idmap == NULL)
-               return -ENOMEM;
-
-       rpc_init_pipe_dir_object(&idmap->idmap_pdo,
-                       &nfs_idmap_pipe_dir_object_ops,
-                       idmap);
-
-       pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
-       if (IS_ERR(pipe)) {
-               error = PTR_ERR(pipe);
-               goto err;
-       }
-       idmap->idmap_pipe = pipe;
-       mutex_init(&idmap->idmap_mutex);
-
-       error = rpc_add_pipe_dir_object(clp->cl_net,
-                       &clp->cl_rpcclient->cl_pipedir_objects,
-                       &idmap->idmap_pdo);
-       if (error)
-               goto err_destroy_pipe;
-
-       clp->cl_idmap = idmap;
-       return 0;
-err_destroy_pipe:
-       rpc_destroy_pipe_data(idmap->idmap_pipe);
-err:
-       kfree(idmap);
-       return error;
-}
-
-void
-nfs_idmap_delete(struct nfs_client *clp)
-{
-       struct idmap *idmap = clp->cl_idmap;
-
-       if (!idmap)
-               return;
-       clp->cl_idmap = NULL;
-       rpc_remove_pipe_dir_object(clp->cl_net,
-                       &clp->cl_rpcclient->cl_pipedir_objects,
-                       &idmap->idmap_pdo);
-       rpc_destroy_pipe_data(idmap->idmap_pipe);
-       kfree(idmap);
-}
-
-int nfs_idmap_init(void)
-{
-       int ret;
-       ret = nfs_idmap_init_keyring();
-       if (ret != 0)
-               goto out;
-out:
-       return ret;
-}
-
-void nfs_idmap_quit(void)
-{
-       nfs_idmap_quit_keyring();
-}
-
-static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
-                                    struct idmap_msg *im,
-                                    struct rpc_pipe_msg *msg)
-{
-       substring_t substr;
-       int token, ret;
-
-       im->im_type = IDMAP_TYPE_GROUP;
-       token = match_token(desc, nfs_idmap_tokens, &substr);
-
-       switch (token) {
-       case Opt_find_uid:
-               im->im_type = IDMAP_TYPE_USER;
-       case Opt_find_gid:
-               im->im_conv = IDMAP_CONV_NAMETOID;
-               ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
-               break;
-
-       case Opt_find_user:
-               im->im_type = IDMAP_TYPE_USER;
-       case Opt_find_group:
-               im->im_conv = IDMAP_CONV_IDTONAME;
-               ret = match_int(&substr, &im->im_id);
-               break;
-
-       default:
-               ret = -EINVAL;
-               goto out;
-       }
-
-       msg->data = im;
-       msg->len  = sizeof(struct idmap_msg);
-
-out:
-       return ret;
-}
-
-static bool
-nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
-               struct idmap_legacy_upcalldata *data)
-{
-       if (idmap->idmap_upcall_data != NULL) {
-               WARN_ON_ONCE(1);
-               return false;
-       }
-       idmap->idmap_upcall_data = data;
-       return true;
-}
-
-static void
-nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
-{
-       struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
-
-       kfree(idmap->idmap_upcall_data);
-       idmap->idmap_upcall_data = NULL;
-       complete_request_key(cons, ret);
-}
-
-static void
-nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
-{
-       if (idmap->idmap_upcall_data != NULL)
-               nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
-}
-
-static int nfs_idmap_legacy_upcall(struct key_construction *cons,
-                                  const char *op,
-                                  void *aux)
-{
-       struct idmap_legacy_upcalldata *data;
-       struct rpc_pipe_msg *msg;
-       struct idmap_msg *im;
-       struct idmap *idmap = (struct idmap *)aux;
-       struct key *key = cons->key;
-       int ret = -ENOMEM;
-
-       /* msg and im are freed in idmap_pipe_destroy_msg */
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
-       if (!data)
-               goto out1;
-
-       msg = &data->pipe_msg;
-       im = &data->idmap_msg;
-       data->idmap = idmap;
-       data->key_cons = cons;
-
-       ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
-       if (ret < 0)
-               goto out2;
-
-       ret = -EAGAIN;
-       if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
-               goto out2;
-
-       ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
-       if (ret < 0)
-               nfs_idmap_abort_pipe_upcall(idmap, ret);
-
-       return ret;
-out2:
-       kfree(data);
-out1:
-       complete_request_key(cons, ret);
-       return ret;
-}
-
-static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
-{
-       return key_instantiate_and_link(key, data, datalen,
-                                       id_resolver_cache->thread_keyring,
-                                       authkey);
-}
-
-static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
-               struct idmap_msg *upcall,
-               struct key *key, struct key *authkey)
-{
-       char id_str[NFS_UINT_MAXLEN];
-       size_t len;
-       int ret = -ENOKEY;
-
-       /* ret = -ENOKEY */
-       if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
-               goto out;
-       switch (im->im_conv) {
-       case IDMAP_CONV_NAMETOID:
-               if (strcmp(upcall->im_name, im->im_name) != 0)
-                       break;
-               /* Note: here we store the NUL terminator too */
-               len = sprintf(id_str, "%d", im->im_id) + 1;
-               ret = nfs_idmap_instantiate(key, authkey, id_str, len);
-               break;
-       case IDMAP_CONV_IDTONAME:
-               if (upcall->im_id != im->im_id)
-                       break;
-               len = strlen(im->im_name);
-               ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
-               break;
-       default:
-               ret = -EINVAL;
-       }
-out:
-       return ret;
-}
-
-static ssize_t
-idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
-{
-       struct rpc_inode *rpci = RPC_I(file_inode(filp));
-       struct idmap *idmap = (struct idmap *)rpci->private;
-       struct key_construction *cons;
-       struct idmap_msg im;
-       size_t namelen_in;
-       int ret = -ENOKEY;
-
-       /* If instantiation is successful, anyone waiting for key construction
-        * will have been woken up and someone else may now have used
-        * idmap_key_cons - so after this point we may no longer touch it.
-        */
-       if (idmap->idmap_upcall_data == NULL)
-               goto out_noupcall;
-
-       cons = idmap->idmap_upcall_data->key_cons;
-
-       if (mlen != sizeof(im)) {
-               ret = -ENOSPC;
-               goto out;
-       }
-
-       if (copy_from_user(&im, src, mlen) != 0) {
-               ret = -EFAULT;
-               goto out;
-       }
-
-       if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
-               ret = -ENOKEY;
-               goto out;
-       }
-
-       namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
-       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
-               ret = -EINVAL;
-               goto out;
-}
-
-       ret = nfs_idmap_read_and_verify_message(&im,
-                       &idmap->idmap_upcall_data->idmap_msg,
-                       cons->key, cons->authkey);
-       if (ret >= 0) {
-               key_set_timeout(cons->key, nfs_idmap_cache_timeout);
-               ret = mlen;
-       }
-
-out:
-       nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
-out_noupcall:
-       return ret;
-}
-
-static void
-idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
-{
-       struct idmap_legacy_upcalldata *data = container_of(msg,
-                       struct idmap_legacy_upcalldata,
-                       pipe_msg);
-       struct idmap *idmap = data->idmap;
-
-       if (msg->errno)
-               nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
-}
-
-static void
-idmap_release_pipe(struct inode *inode)
-{
-       struct rpc_inode *rpci = RPC_I(inode);
-       struct idmap *idmap = (struct idmap *)rpci->private;
-
-       nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
-}
-
-int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       __u32 id = -1;
-       int ret = 0;
-
-       if (!nfs_map_string_to_numeric(name, namelen, &id))
-               ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
-       if (ret == 0) {
-               *uid = make_kuid(&init_user_ns, id);
-               if (!uid_valid(*uid))
-                       ret = -ERANGE;
-       }
-       trace_nfs4_map_name_to_uid(name, namelen, id, ret);
-       return ret;
-}
-
-int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       __u32 id = -1;
-       int ret = 0;
-
-       if (!nfs_map_string_to_numeric(name, namelen, &id))
-               ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
-       if (ret == 0) {
-               *gid = make_kgid(&init_user_ns, id);
-               if (!gid_valid(*gid))
-                       ret = -ERANGE;
-       }
-       trace_nfs4_map_group_to_gid(name, namelen, id, ret);
-       return ret;
-}
-
-int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       int ret = -EINVAL;
-       __u32 id;
-
-       id = from_kuid(&init_user_ns, uid);
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(id, buf, buflen);
-       trace_nfs4_map_uid_to_name(buf, ret, id, ret);
-       return ret;
-}
-int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
-{
-       struct idmap *idmap = server->nfs_client->cl_idmap;
-       int ret = -EINVAL;
-       __u32 id;
-
-       id = from_kgid(&init_user_ns, gid);
-       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
-               ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
-       if (ret < 0)
-               ret = nfs_map_numeric_to_string(id, buf, buflen);
-       trace_nfs4_map_gid_to_group(buf, ret, id, ret);
-       return ret;
-}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c

index d42dff6..f734562 100644 (file)
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -133,6 +133,13 @@ void nfs_evict_inode(struct inode *inode)
         nfs_clear_inode(inode);
  }
  
+int nfs_sync_inode(struct inode *inode)
+{
+       nfs_inode_dio_wait(inode);
+       return nfs_wb_all(inode);
+}
+EXPORT_SYMBOL_GPL(nfs_sync_inode);
+
  /**
   * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
   */
@@ -192,7 +199,6 @@ void nfs_zap_caches(struct inode *inode)
         nfs_zap_caches_locked(inode);
         spin_unlock(&inode->i_lock);
  }
-EXPORT_SYMBOL_GPL(nfs_zap_caches);
  
  void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
  {
@@ -495,7 +501,7 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
  int
  nfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct nfs_fattr *fattr;
         int error = -ENOMEM;
  
@@ -525,10 +531,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
         trace_nfs_setattr_enter(inode);
  
         /* Write all dirty data */
-       if (S_ISREG(inode->i_mode)) {
-               nfs_inode_dio_wait(inode);
-               nfs_wb_all(inode);
-       }
+       if (S_ISREG(inode->i_mode))
+               nfs_sync_inode(inode);
  
         fattr = nfs_alloc_fattr();
         if (fattr == NULL)
@@ -621,7 +625,7 @@ static void nfs_request_parent_use_readdirplus(struct dentry *dentry)
         struct dentry *parent;
  
         parent = dget_parent(dentry);
-       nfs_force_use_readdirplus(parent->d_inode);
+       nfs_force_use_readdirplus(d_inode(parent));
         dput(parent);
  }
  
@@ -637,15 +641,16 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
  
  int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
         int err = 0;
  
         trace_nfs_getattr_enter(inode);
         /* Flush out writes to the server in order to update c/mtime.  */
         if (S_ISREG(inode->i_mode)) {
-               nfs_inode_dio_wait(inode);
-               err = filemap_write_and_wait(inode->i_mapping);
+               mutex_lock(&inode->i_mutex);
+               err = nfs_sync_inode(inode);
+               mutex_unlock(&inode->i_mutex);
                 if (err)
                         goto out;
         }
@@ -708,7 +713,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
  struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
  {
         struct nfs_lock_context *res, *new = NULL;
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
  
         spin_lock(&inode->i_lock);
         res = __nfs_find_lock_context(ctx);
@@ -736,7 +741,7 @@ EXPORT_SYMBOL_GPL(nfs_get_lock_context);
  void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
  {
         struct nfs_open_context *ctx = l_ctx->open_context;
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
  
         if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
                 return;
@@ -763,7 +768,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
                 return;
         if (!is_sync)
                 return;
-       inode = ctx->dentry->d_inode;
+       inode = d_inode(ctx->dentry);
         if (!list_empty(&NFS_I(inode)->open_files))
                 return;
         server = NFS_SERVER(inode);
@@ -810,7 +815,7 @@ EXPORT_SYMBOL_GPL(get_nfs_open_context);
  
  static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
  {
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
         struct super_block *sb = ctx->dentry->d_sb;
  
         if (!list_empty(&ctx->list)) {
@@ -842,7 +847,7 @@ EXPORT_SYMBOL_GPL(put_nfs_open_context);
   */
  void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
  {
-       struct inode *inode = ctx->dentry->d_inode;
+       struct inode *inode = d_inode(ctx->dentry);
         struct nfs_inode *nfsi = NFS_I(inode);
  
         spin_lock(&inode->i_lock);
@@ -885,7 +890,7 @@ static void nfs_file_clear_open_context(struct file *filp)
         struct nfs_open_context *ctx = nfs_file_open_context(filp);
  
         if (ctx) {
-               struct inode *inode = ctx->dentry->d_inode;
+               struct inode *inode = d_inode(ctx->dentry);
  
                 filp->private_data = NULL;
                 spin_lock(&inode->i_lock);
@@ -1588,6 +1593,19 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
  }
  EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
  
+
+static inline bool nfs_fileid_valid(struct nfs_inode *nfsi,
+                                   struct nfs_fattr *fattr)
+{
+       bool ret1 = true, ret2 = true;
+
+       if (fattr->valid & NFS_ATTR_FATTR_FILEID)
+               ret1 = (nfsi->fileid == fattr->fileid);
+       if (fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
+               ret2 = (nfsi->fileid == fattr->mounted_on_fileid);
+       return ret1 || ret2;
+}
+
  /*
   * Many nfs protocol calls return the new file attributes after
   * an operation.  Here we update the inode to reflect the state
@@ -1614,7 +1632,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                         nfs_display_fhandle_hash(NFS_FH(inode)),
                         atomic_read(&inode->i_count), fattr->valid);
  
-       if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid) {
+       if (!nfs_fileid_valid(nfsi, fattr)) {
                 printk(KERN_ERR "NFS: server %s error: fileid changed\n"
                         "fsid %s: expected fileid 0x%Lx, got 0x%Lx\n",
                         NFS_SERVER(inode)->nfs_client->cl_hostname,
@@ -1819,7 +1837,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  struct inode *nfs_alloc_inode(struct super_block *sb)
  {
         struct nfs_inode *nfsi;
-       nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
+       nfsi = kmem_cache_alloc(nfs_inode_cachep, GFP_KERNEL);
         if (!nfsi)
                 return NULL;
         nfsi->flags = 0UL;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c

index b5a0afc..c8162c6 100644 (file)
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -139,7 +139,7 @@ EXPORT_SYMBOL_GPL(nfs_path);
  struct vfsmount *nfs_d_automount(struct path *path)
  {
         struct vfsmount *mnt;
-       struct nfs_server *server = NFS_SERVER(path->dentry->d_inode);
+       struct nfs_server *server = NFS_SERVER(d_inode(path->dentry));
         struct nfs_fh *fh = NULL;
         struct nfs_fattr *fattr = NULL;
  
@@ -180,16 +180,16 @@ out_nofree:
  static int
  nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       if (NFS_FH(dentry->d_inode)->size != 0)
+       if (NFS_FH(d_inode(dentry))->size != 0)
                 return nfs_getattr(mnt, dentry, stat);
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
         return 0;
  }
  
  static int
  nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       if (NFS_FH(dentry->d_inode)->size != 0)
+       if (NFS_FH(d_inode(dentry))->size != 0)
                 return nfs_setattr(dentry, attr);
         return -EACCES;
  }
@@ -279,7 +279,7 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
         struct dentry *parent = dget_parent(dentry);
  
         /* Look it up again to get its attributes */
-       err = server->nfs_client->rpc_ops->lookup(parent->d_inode, &dentry->d_name, fh, fattr, NULL);
+       err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL);
         dput(parent);
         if (err != 0)
                 return ERR_PTR(err);
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c

index 658e586..1ebe2fc 100644 (file)
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -279,7 +279,7 @@ nfs3_list_one_acl(struct inode *inode, int type, const char *name, void *data,
  ssize_t
  nfs3_listxattr(struct dentry *dentry, char *data, size_t size)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         ssize_t result = 0;
         int error;
  
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c

index 1f11d25..cb28cce 100644 (file)
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -120,7 +120,7 @@ static int
  nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                         struct iattr *sattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct nfs3_sattrargs   arg = {
                 .fh             = NFS_FH(inode),
                 .sattr          = sattr,
@@ -386,13 +386,13 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  * not sure this buys us anything (and I'd have
                  * to revamp the NFSv3 XDR code) */
                 status = nfs3_proc_setattr(dentry, data->res.fattr, sattr);
-               nfs_post_op_update_inode(dentry->d_inode, data->res.fattr);
+               nfs_post_op_update_inode(d_inode(dentry), data->res.fattr);
                 dprintk("NFS reply setattr (post-create): %d\n", status);
                 if (status != 0)
                         goto out_release_acls;
         }
  
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
  
  out_release_acls:
         posix_acl_release(acl);
@@ -570,7 +570,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
         if (status != 0)
                 goto out_release_acls;
  
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
  
  out_release_acls:
         posix_acl_release(acl);
@@ -623,7 +623,7 @@ static int
  nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                   u64 cookie, struct page **pages, unsigned int count, int plus)
  {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
         __be32                  *verf = NFS_I(dir)->cookieverf;
         struct nfs3_readdirargs arg = {
                 .fh             = NFS_FH(dir),
@@ -715,7 +715,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
         if (status != 0)
                 goto out_release_acls;
  
-       status = nfs3_proc_setacls(dentry->d_inode, acl, default_acl);
+       status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
  
  out_release_acls:
         posix_acl_release(acl);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c

index cb17072..3a9e752 100644 (file)
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -36,13 +36,16 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
                                  loff_t offset, loff_t len)
  {
         struct inode *inode = file_inode(filep);
+       struct nfs_server *server = NFS_SERVER(inode);
         struct nfs42_falloc_args args = {
                 .falloc_fh      = NFS_FH(inode),
                 .falloc_offset  = offset,
                 .falloc_length  = len,
+               .falloc_bitmask = server->cache_consistency_bitmask,
+       };
+       struct nfs42_falloc_res res = {
+               .falloc_server  = server,
         };
-       struct nfs42_falloc_res res;
-       struct nfs_server *server = NFS_SERVER(inode);
         int status;
  
         msg->rpc_argp = &args;
@@ -52,8 +55,17 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
         if (status)
                 return status;
  
-       return nfs4_call_sync(server->client, server, msg,
-                             &args.seq_args, &res.seq_res, 0);
+       res.falloc_fattr = nfs_alloc_fattr();
+       if (!res.falloc_fattr)
+               return -ENOMEM;
+
+       status = nfs4_call_sync(server->client, server, msg,
+                               &args.seq_args, &res.seq_res, 0);
+       if (status == 0)
+               status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+
+       kfree(res.falloc_fattr);
+       return status;
  }
  
  static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
@@ -84,9 +96,13 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
         if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
                 return -EOPNOTSUPP;
  
+       mutex_lock(&inode->i_mutex);
+
         err = nfs42_proc_fallocate(&msg, filep, offset, len);
         if (err == -EOPNOTSUPP)
                 NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+
+       mutex_unlock(&inode->i_mutex);
         return err;
  }
  
@@ -101,9 +117,16 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
         if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
                 return -EOPNOTSUPP;
  
+       nfs_wb_all(inode);
+       mutex_lock(&inode->i_mutex);
+
         err = nfs42_proc_fallocate(&msg, filep, offset, len);
+       if (err == 0)
+               truncate_pagecache_range(inode, offset, (offset + len) -1);
         if (err == -EOPNOTSUPP)
                 NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+
+       mutex_unlock(&inode->i_mutex);
         return err;
  }
  
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c

index 038a7e1..1a25b27 100644 (file)
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -25,16 +25,20 @@
  
  #define NFS4_enc_allocate_sz           (compound_encode_hdr_maxsz + \
                                          encode_putfh_maxsz + \
-                                        encode_allocate_maxsz)
+                                        encode_allocate_maxsz + \
+                                        encode_getattr_maxsz)
  #define NFS4_dec_allocate_sz           (compound_decode_hdr_maxsz + \
                                          decode_putfh_maxsz + \
-                                        decode_allocate_maxsz)
+                                        decode_allocate_maxsz + \
+                                        decode_getattr_maxsz)
  #define NFS4_enc_deallocate_sz         (compound_encode_hdr_maxsz + \
                                          encode_putfh_maxsz + \
-                                        encode_deallocate_maxsz)
+                                        encode_deallocate_maxsz + \
+                                        encode_getattr_maxsz)
  #define NFS4_dec_deallocate_sz         (compound_decode_hdr_maxsz + \
                                          decode_putfh_maxsz + \
-                                        decode_deallocate_maxsz)
+                                        decode_deallocate_maxsz + \
+                                        decode_getattr_maxsz)
  #define NFS4_enc_seek_sz               (compound_encode_hdr_maxsz + \
                                          encode_putfh_maxsz + \
                                          encode_seek_maxsz)
@@ -92,6 +96,7 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
         encode_sequence(xdr, &args->seq_args, &hdr);
         encode_putfh(xdr, args->falloc_fh, &hdr);
         encode_allocate(xdr, args, &hdr);
+       encode_getfattr(xdr, args->falloc_bitmask, &hdr);
         encode_nops(&hdr);
  }
  
@@ -110,6 +115,7 @@ static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
         encode_sequence(xdr, &args->seq_args, &hdr);
         encode_putfh(xdr, args->falloc_fh, &hdr);
         encode_deallocate(xdr, args, &hdr);
+       encode_getfattr(xdr, args->falloc_bitmask, &hdr);
         encode_nops(&hdr);
  }
  
@@ -183,6 +189,9 @@ static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
         if (status)
                 goto out;
         status = decode_allocate(xdr, res);
+       if (status)
+               goto out;
+       decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
  out:
         return status;
  }
@@ -207,6 +216,9 @@ static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
         if (status)
                 goto out;
         status = decode_deallocate(xdr, res);
+       if (status)
+               goto out;
+       decode_getfattr(xdr, res->falloc_fattr, res->falloc_server);
  out:
         return status;
  }
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c

index 86d6214..e42be52 100644 (file)
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -4,7 +4,6 @@
   */
  #include <linux/module.h>
  #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
  #include <linux/nfs_mount.h>
  #include <linux/sunrpc/addr.h>
  #include <linux/sunrpc/auth.h>
@@ -15,6 +14,7 @@
  #include "callback.h"
  #include "delegation.h"
  #include "nfs4session.h"
+#include "nfs4idmap.h"
  #include "pnfs.h"
  #include "netns.h"
  
@@ -1130,7 +1130,7 @@ error:
   */
  static int nfs_probe_destination(struct nfs_server *server)
  {
-       struct inode *inode = server->super->s_root->d_inode;
+       struct inode *inode = d_inode(server->super->s_root);
         struct nfs_fattr *fattr;
         int error;
  
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c

index 0181cde..f58c17b 100644 (file)
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -10,6 +10,8 @@
  #include "fscache.h"
  #include "pnfs.h"
  
+#include "nfstrace.h"
+
  #ifdef CONFIG_NFS_V4_2
  #include "nfs42.h"
  #endif
@@ -46,7 +48,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
         openflags &= ~(O_CREAT|O_EXCL);
  
         parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
  
         ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
         err = PTR_ERR(ctx);
@@ -57,7 +59,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
         if (openflags & O_TRUNC) {
                 attr.ia_valid |= ATTR_SIZE;
                 attr.ia_size = 0;
-               nfs_wb_all(inode);
+               nfs_sync_inode(inode);
         }
  
         inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr, &opened);
@@ -74,7 +76,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
                         goto out_drop;
                 }
         }
-       if (inode != dentry->d_inode)
+       if (inode != d_inode(dentry))
                 goto out_drop;
  
         nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
@@ -100,6 +102,9 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
         int ret;
         struct inode *inode = file_inode(file);
  
+       trace_nfs_fsync_enter(inode);
+
+       nfs_inode_dio_wait(inode);
         do {
                 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
                 if (ret != 0)
@@ -107,7 +112,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                 mutex_lock(&inode->i_mutex);
                 ret = nfs_file_fsync_commit(file, start, end, datasync);
                 if (!ret)
-                       ret = pnfs_layoutcommit_inode(inode, true);
+                       ret = pnfs_sync_inode(inode, !!datasync);
                 mutex_unlock(&inode->i_mutex);
                 /*
                  * If nfs_file_fsync_commit detected a server reboot, then
@@ -118,6 +123,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                 end = LLONG_MAX;
         } while (ret == -EAGAIN);
  
+       trace_nfs_fsync_exit(inode, ret);
         return ret;
  }
  
@@ -152,15 +158,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
         if (ret < 0)
                 return ret;
  
-       mutex_lock(&inode->i_mutex);
         if (mode & FALLOC_FL_PUNCH_HOLE)
-               ret = nfs42_proc_deallocate(filep, offset, len);
-       else
-               ret = nfs42_proc_allocate(filep, offset, len);
-       mutex_unlock(&inode->i_mutex);
-
-       nfs_zap_caches(inode);
-       return ret;
+               return nfs42_proc_deallocate(filep, offset, len);
+       return nfs42_proc_allocate(filep, offset, len);
  }
  #endif /* CONFIG_NFS_V4_2 */
  
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c

new file mode 100644 (file)

index 0000000..2e1737c
--- /dev/null
+++ b/fs/nfs/nfs4idmap.c
@@ -0,0 +1,792 @@
+/*
+ * fs/nfs/idmap.c
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <linux/types.h>
+#include <linux/parser.h>
+#include <linux/fs.h>
+#include <net/net_namespace.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_fs_sb.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <keys/user-type.h>
+#include <linux/module.h>
+
+#include "internal.h"
+#include "netns.h"
+#include "nfs4idmap.h"
+#include "nfs4trace.h"
+
+#define NFS_UINT_MAXLEN 11
+
+static const struct cred *id_resolver_cache;
+static struct key_type key_type_id_resolver_legacy;
+
+struct idmap_legacy_upcalldata {
+       struct rpc_pipe_msg pipe_msg;
+       struct idmap_msg idmap_msg;
+       struct key_construction *key_cons;
+       struct idmap *idmap;
+};
+
+struct idmap {
+       struct rpc_pipe_dir_object idmap_pdo;
+       struct rpc_pipe         *idmap_pipe;
+       struct idmap_legacy_upcalldata *idmap_upcall_data;
+       struct mutex            idmap_mutex;
+};
+
+/**
+ * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
+ * @fattr: fully initialised struct nfs_fattr
+ * @owner_name: owner name string cache
+ * @group_name: group name string cache
+ */
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+               struct nfs4_string *owner_name,
+               struct nfs4_string *group_name)
+{
+       fattr->owner_name = owner_name;
+       fattr->group_name = group_name;
+}
+
+static void nfs_fattr_free_owner_name(struct nfs_fattr *fattr)
+{
+       fattr->valid &= ~NFS_ATTR_FATTR_OWNER_NAME;
+       kfree(fattr->owner_name->data);
+}
+
+static void nfs_fattr_free_group_name(struct nfs_fattr *fattr)
+{
+       fattr->valid &= ~NFS_ATTR_FATTR_GROUP_NAME;
+       kfree(fattr->group_name->data);
+}
+
+static bool nfs_fattr_map_owner_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       struct nfs4_string *owner = fattr->owner_name;
+       kuid_t uid;
+
+       if (!(fattr->valid & NFS_ATTR_FATTR_OWNER_NAME))
+               return false;
+       if (nfs_map_name_to_uid(server, owner->data, owner->len, &uid) == 0) {
+               fattr->uid = uid;
+               fattr->valid |= NFS_ATTR_FATTR_OWNER;
+       }
+       return true;
+}
+
+static bool nfs_fattr_map_group_name(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       struct nfs4_string *group = fattr->group_name;
+       kgid_t gid;
+
+       if (!(fattr->valid & NFS_ATTR_FATTR_GROUP_NAME))
+               return false;
+       if (nfs_map_group_to_gid(server, group->data, group->len, &gid) == 0) {
+               fattr->gid = gid;
+               fattr->valid |= NFS_ATTR_FATTR_GROUP;
+       }
+       return true;
+}
+
+/**
+ * nfs_fattr_free_names - free up the NFSv4 owner and group strings
+ * @fattr: a fully initialised nfs_fattr structure
+ */
+void nfs_fattr_free_names(struct nfs_fattr *fattr)
+{
+       if (fattr->valid & NFS_ATTR_FATTR_OWNER_NAME)
+               nfs_fattr_free_owner_name(fattr);
+       if (fattr->valid & NFS_ATTR_FATTR_GROUP_NAME)
+               nfs_fattr_free_group_name(fattr);
+}
+
+/**
+ * nfs_fattr_map_and_free_names - map owner/group strings into uid/gid and free
+ * @server: pointer to the filesystem nfs_server structure
+ * @fattr: a fully initialised nfs_fattr structure
+ *
+ * This helper maps the cached NFSv4 owner/group strings in fattr into
+ * their numeric uid/gid equivalents, and then frees the cached strings.
+ */
+void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *fattr)
+{
+       if (nfs_fattr_map_owner_name(server, fattr))
+               nfs_fattr_free_owner_name(fattr);
+       if (nfs_fattr_map_group_name(server, fattr))
+               nfs_fattr_free_group_name(fattr);
+}
+
+int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
+{
+       unsigned long val;
+       char buf[16];
+
+       if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
+               return 0;
+       memcpy(buf, name, namelen);
+       buf[namelen] = '\0';
+       if (kstrtoul(buf, 0, &val) != 0)
+               return 0;
+       *res = val;
+       return 1;
+}
+EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric);
+
+static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
+{
+       return snprintf(buf, buflen, "%u", id);
+}
+
+static struct key_type key_type_id_resolver = {
+       .name           = "id_resolver",
+       .preparse       = user_preparse,
+       .free_preparse  = user_free_preparse,
+       .instantiate    = generic_key_instantiate,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+};
+
+static int nfs_idmap_init_keyring(void)
+{
+       struct cred *cred;
+       struct key *keyring;
+       int ret = 0;
+
+       printk(KERN_NOTICE "NFS: Registering the %s key type\n",
+               key_type_id_resolver.name);
+
+       cred = prepare_kernel_cred(NULL);
+       if (!cred)
+               return -ENOMEM;
+
+       keyring = keyring_alloc(".id_resolver",
+                               GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
+                               (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                               KEY_USR_VIEW | KEY_USR_READ,
+                               KEY_ALLOC_NOT_IN_QUOTA, NULL);
+       if (IS_ERR(keyring)) {
+               ret = PTR_ERR(keyring);
+               goto failed_put_cred;
+       }
+
+       ret = register_key_type(&key_type_id_resolver);
+       if (ret < 0)
+               goto failed_put_key;
+
+       ret = register_key_type(&key_type_id_resolver_legacy);
+       if (ret < 0)
+               goto failed_reg_legacy;
+
+       set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
+       cred->thread_keyring = keyring;
+       cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+       id_resolver_cache = cred;
+       return 0;
+
+failed_reg_legacy:
+       unregister_key_type(&key_type_id_resolver);
+failed_put_key:
+       key_put(keyring);
+failed_put_cred:
+       put_cred(cred);
+       return ret;
+}
+
+static void nfs_idmap_quit_keyring(void)
+{
+       key_revoke(id_resolver_cache->thread_keyring);
+       unregister_key_type(&key_type_id_resolver);
+       unregister_key_type(&key_type_id_resolver_legacy);
+       put_cred(id_resolver_cache);
+}
+
+/*
+ * Assemble the description to pass to request_key()
+ * This function will allocate a new string and update dest to point
+ * at it.  The caller is responsible for freeing dest.
+ *
+ * On error 0 is returned.  Otherwise, the length of dest is returned.
+ */
+static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
+                               const char *type, size_t typelen, char **desc)
+{
+       char *cp;
+       size_t desclen = typelen + namelen + 2;
+
+       *desc = kmalloc(desclen, GFP_KERNEL);
+       if (!*desc)
+               return -ENOMEM;
+
+       cp = *desc;
+       memcpy(cp, type, typelen);
+       cp += typelen;
+       *cp++ = ':';
+
+       memcpy(cp, name, namelen);
+       cp += namelen;
+       *cp = '\0';
+       return desclen;
+}
+
+static struct key *nfs_idmap_request_key(const char *name, size_t namelen,
+                                        const char *type, struct idmap *idmap)
+{
+       char *desc;
+       struct key *rkey;
+       ssize_t ret;
+
+       ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
+       if (ret <= 0)
+               return ERR_PTR(ret);
+
+       rkey = request_key(&key_type_id_resolver, desc, "");
+       if (IS_ERR(rkey)) {
+               mutex_lock(&idmap->idmap_mutex);
+               rkey = request_key_with_auxdata(&key_type_id_resolver_legacy,
+                                               desc, "", 0, idmap);
+               mutex_unlock(&idmap->idmap_mutex);
+       }
+       if (!IS_ERR(rkey))
+               set_bit(KEY_FLAG_ROOT_CAN_INVAL, &rkey->flags);
+
+       kfree(desc);
+       return rkey;
+}
+
+static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
+                                const char *type, void *data,
+                                size_t data_size, struct idmap *idmap)
+{
+       const struct cred *saved_cred;
+       struct key *rkey;
+       struct user_key_payload *payload;
+       ssize_t ret;
+
+       saved_cred = override_creds(id_resolver_cache);
+       rkey = nfs_idmap_request_key(name, namelen, type, idmap);
+       revert_creds(saved_cred);
+
+       if (IS_ERR(rkey)) {
+               ret = PTR_ERR(rkey);
+               goto out;
+       }
+
+       rcu_read_lock();
+       rkey->perm |= KEY_USR_VIEW;
+
+       ret = key_validate(rkey);
+       if (ret < 0)
+               goto out_up;
+
+       payload = rcu_dereference(rkey->payload.rcudata);
+       if (IS_ERR_OR_NULL(payload)) {
+               ret = PTR_ERR(payload);
+               goto out_up;
+       }
+
+       ret = payload->datalen;
+       if (ret > 0 && ret <= data_size)
+               memcpy(data, payload->data, ret);
+       else
+               ret = -EINVAL;
+
+out_up:
+       rcu_read_unlock();
+       key_put(rkey);
+out:
+       return ret;
+}
+
+/* ID -> Name */
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf,
+                                    size_t buflen, struct idmap *idmap)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       int id_len;
+       ssize_t ret;
+
+       id_len = snprintf(id_str, sizeof(id_str), "%u", id);
+       ret = nfs_idmap_get_key(id_str, id_len, type, buf, buflen, idmap);
+       if (ret < 0)
+               return -EINVAL;
+       return ret;
+}
+
+/* Name -> ID */
+static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *type,
+                              __u32 *id, struct idmap *idmap)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       long id_long;
+       ssize_t data_size;
+       int ret = 0;
+
+       data_size = nfs_idmap_get_key(name, namelen, type, id_str, NFS_UINT_MAXLEN, idmap);
+       if (data_size <= 0) {
+               ret = -EINVAL;
+       } else {
+               ret = kstrtol(id_str, 10, &id_long);
+               *id = (__u32)id_long;
+       }
+       return ret;
+}
+
+/* idmap classic begins here */
+
+enum {
+       Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
+};
+
+static const match_table_t nfs_idmap_tokens = {
+       { Opt_find_uid, "uid:%s" },
+       { Opt_find_gid, "gid:%s" },
+       { Opt_find_user, "user:%s" },
+       { Opt_find_group, "group:%s" },
+       { Opt_find_err, NULL }
+};
+
+static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
+static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
+                                  size_t);
+static void idmap_release_pipe(struct inode *);
+static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+
+static const struct rpc_pipe_ops idmap_upcall_ops = {
+       .upcall         = rpc_pipe_generic_upcall,
+       .downcall       = idmap_pipe_downcall,
+       .release_pipe   = idmap_release_pipe,
+       .destroy_msg    = idmap_pipe_destroy_msg,
+};
+
+static struct key_type key_type_id_resolver_legacy = {
+       .name           = "id_legacy",
+       .preparse       = user_preparse,
+       .free_preparse  = user_free_preparse,
+       .instantiate    = generic_key_instantiate,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+       .request_key    = nfs_idmap_legacy_upcall,
+};
+
+static void nfs_idmap_pipe_destroy(struct dentry *dir,
+               struct rpc_pipe_dir_object *pdo)
+{
+       struct idmap *idmap = pdo->pdo_data;
+       struct rpc_pipe *pipe = idmap->idmap_pipe;
+
+       if (pipe->dentry) {
+               rpc_unlink(pipe->dentry);
+               pipe->dentry = NULL;
+       }
+}
+
+static int nfs_idmap_pipe_create(struct dentry *dir,
+               struct rpc_pipe_dir_object *pdo)
+{
+       struct idmap *idmap = pdo->pdo_data;
+       struct rpc_pipe *pipe = idmap->idmap_pipe;
+       struct dentry *dentry;
+
+       dentry = rpc_mkpipe_dentry(dir, "idmap", idmap, pipe);
+       if (IS_ERR(dentry))
+               return PTR_ERR(dentry);
+       pipe->dentry = dentry;
+       return 0;
+}
+
+static const struct rpc_pipe_dir_object_ops nfs_idmap_pipe_dir_object_ops = {
+       .create = nfs_idmap_pipe_create,
+       .destroy = nfs_idmap_pipe_destroy,
+};
+
+int
+nfs_idmap_new(struct nfs_client *clp)
+{
+       struct idmap *idmap;
+       struct rpc_pipe *pipe;
+       int error;
+
+       idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
+       if (idmap == NULL)
+               return -ENOMEM;
+
+       rpc_init_pipe_dir_object(&idmap->idmap_pdo,
+                       &nfs_idmap_pipe_dir_object_ops,
+                       idmap);
+
+       pipe = rpc_mkpipe_data(&idmap_upcall_ops, 0);
+       if (IS_ERR(pipe)) {
+               error = PTR_ERR(pipe);
+               goto err;
+       }
+       idmap->idmap_pipe = pipe;
+       mutex_init(&idmap->idmap_mutex);
+
+       error = rpc_add_pipe_dir_object(clp->cl_net,
+                       &clp->cl_rpcclient->cl_pipedir_objects,
+                       &idmap->idmap_pdo);
+       if (error)
+               goto err_destroy_pipe;
+
+       clp->cl_idmap = idmap;
+       return 0;
+err_destroy_pipe:
+       rpc_destroy_pipe_data(idmap->idmap_pipe);
+err:
+       kfree(idmap);
+       return error;
+}
+
+void
+nfs_idmap_delete(struct nfs_client *clp)
+{
+       struct idmap *idmap = clp->cl_idmap;
+
+       if (!idmap)
+               return;
+       clp->cl_idmap = NULL;
+       rpc_remove_pipe_dir_object(clp->cl_net,
+                       &clp->cl_rpcclient->cl_pipedir_objects,
+                       &idmap->idmap_pdo);
+       rpc_destroy_pipe_data(idmap->idmap_pipe);
+       kfree(idmap);
+}
+
+int nfs_idmap_init(void)
+{
+       int ret;
+       ret = nfs_idmap_init_keyring();
+       if (ret != 0)
+               goto out;
+out:
+       return ret;
+}
+
+void nfs_idmap_quit(void)
+{
+       nfs_idmap_quit_keyring();
+}
+
+static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
+                                    struct idmap_msg *im,
+                                    struct rpc_pipe_msg *msg)
+{
+       substring_t substr;
+       int token, ret;
+
+       im->im_type = IDMAP_TYPE_GROUP;
+       token = match_token(desc, nfs_idmap_tokens, &substr);
+
+       switch (token) {
+       case Opt_find_uid:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_gid:
+               im->im_conv = IDMAP_CONV_NAMETOID;
+               ret = match_strlcpy(im->im_name, &substr, IDMAP_NAMESZ);
+               break;
+
+       case Opt_find_user:
+               im->im_type = IDMAP_TYPE_USER;
+       case Opt_find_group:
+               im->im_conv = IDMAP_CONV_IDTONAME;
+               ret = match_int(&substr, &im->im_id);
+               break;
+
+       default:
+               ret = -EINVAL;
+               goto out;
+       }
+
+       msg->data = im;
+       msg->len  = sizeof(struct idmap_msg);
+
+out:
+       return ret;
+}
+
+static bool
+nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
+               struct idmap_legacy_upcalldata *data)
+{
+       if (idmap->idmap_upcall_data != NULL) {
+               WARN_ON_ONCE(1);
+               return false;
+       }
+       idmap->idmap_upcall_data = data;
+       return true;
+}
+
+static void
+nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
+{
+       struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
+
+       kfree(idmap->idmap_upcall_data);
+       idmap->idmap_upcall_data = NULL;
+       complete_request_key(cons, ret);
+}
+
+static void
+nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
+{
+       if (idmap->idmap_upcall_data != NULL)
+               nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+}
+
+static int nfs_idmap_legacy_upcall(struct key_construction *cons,
+                                  const char *op,
+                                  void *aux)
+{
+       struct idmap_legacy_upcalldata *data;
+       struct rpc_pipe_msg *msg;
+       struct idmap_msg *im;
+       struct idmap *idmap = (struct idmap *)aux;
+       struct key *key = cons->key;
+       int ret = -ENOMEM;
+
+       /* msg and im are freed in idmap_pipe_destroy_msg */
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               goto out1;
+
+       msg = &data->pipe_msg;
+       im = &data->idmap_msg;
+       data->idmap = idmap;
+       data->key_cons = cons;
+
+       ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
+       if (ret < 0)
+               goto out2;
+
+       ret = -EAGAIN;
+       if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
+               goto out2;
+
+       ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
+       if (ret < 0)
+               nfs_idmap_abort_pipe_upcall(idmap, ret);
+
+       return ret;
+out2:
+       kfree(data);
+out1:
+       complete_request_key(cons, ret);
+       return ret;
+}
+
+static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *data, size_t datalen)
+{
+       return key_instantiate_and_link(key, data, datalen,
+                                       id_resolver_cache->thread_keyring,
+                                       authkey);
+}
+
+static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
+               struct idmap_msg *upcall,
+               struct key *key, struct key *authkey)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       size_t len;
+       int ret = -ENOKEY;
+
+       /* ret = -ENOKEY */
+       if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
+               goto out;
+       switch (im->im_conv) {
+       case IDMAP_CONV_NAMETOID:
+               if (strcmp(upcall->im_name, im->im_name) != 0)
+                       break;
+               /* Note: here we store the NUL terminator too */
+               len = sprintf(id_str, "%d", im->im_id) + 1;
+               ret = nfs_idmap_instantiate(key, authkey, id_str, len);
+               break;
+       case IDMAP_CONV_IDTONAME:
+               if (upcall->im_id != im->im_id)
+                       break;
+               len = strlen(im->im_name);
+               ret = nfs_idmap_instantiate(key, authkey, im->im_name, len);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+out:
+       return ret;
+}
+
+static ssize_t
+idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+{
+       struct rpc_inode *rpci = RPC_I(file_inode(filp));
+       struct idmap *idmap = (struct idmap *)rpci->private;
+       struct key_construction *cons;
+       struct idmap_msg im;
+       size_t namelen_in;
+       int ret = -ENOKEY;
+
+       /* If instantiation is successful, anyone waiting for key construction
+        * will have been woken up and someone else may now have used
+        * idmap_key_cons - so after this point we may no longer touch it.
+        */
+       if (idmap->idmap_upcall_data == NULL)
+               goto out_noupcall;
+
+       cons = idmap->idmap_upcall_data->key_cons;
+
+       if (mlen != sizeof(im)) {
+               ret = -ENOSPC;
+               goto out;
+       }
+
+       if (copy_from_user(&im, src, mlen) != 0) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+       if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
+               ret = -ENOKEY;
+               goto out;
+       }
+
+       namelen_in = strnlen(im.im_name, IDMAP_NAMESZ);
+       if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
+               ret = -EINVAL;
+               goto out;
+}
+
+       ret = nfs_idmap_read_and_verify_message(&im,
+                       &idmap->idmap_upcall_data->idmap_msg,
+                       cons->key, cons->authkey);
+       if (ret >= 0) {
+               key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+               ret = mlen;
+       }
+
+out:
+       nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
+out_noupcall:
+       return ret;
+}
+
+static void
+idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
+{
+       struct idmap_legacy_upcalldata *data = container_of(msg,
+                       struct idmap_legacy_upcalldata,
+                       pipe_msg);
+       struct idmap *idmap = data->idmap;
+
+       if (msg->errno)
+               nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
+}
+
+static void
+idmap_release_pipe(struct inode *inode)
+{
+       struct rpc_inode *rpci = RPC_I(inode);
+       struct idmap *idmap = (struct idmap *)rpci->private;
+
+       nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
+}
+
+int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       __u32 id = -1;
+       int ret = 0;
+
+       if (!nfs_map_string_to_numeric(name, namelen, &id))
+               ret = nfs_idmap_lookup_id(name, namelen, "uid", &id, idmap);
+       if (ret == 0) {
+               *uid = make_kuid(&init_user_ns, id);
+               if (!uid_valid(*uid))
+                       ret = -ERANGE;
+       }
+       trace_nfs4_map_name_to_uid(name, namelen, id, ret);
+       return ret;
+}
+
+int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, kgid_t *gid)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       __u32 id = -1;
+       int ret = 0;
+
+       if (!nfs_map_string_to_numeric(name, namelen, &id))
+               ret = nfs_idmap_lookup_id(name, namelen, "gid", &id, idmap);
+       if (ret == 0) {
+               *gid = make_kgid(&init_user_ns, id);
+               if (!gid_valid(*gid))
+                       ret = -ERANGE;
+       }
+       trace_nfs4_map_group_to_gid(name, namelen, id, ret);
+       return ret;
+}
+
+int nfs_map_uid_to_name(const struct nfs_server *server, kuid_t uid, char *buf, size_t buflen)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       int ret = -EINVAL;
+       __u32 id;
+
+       id = from_kuid(&init_user_ns, uid);
+       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+               ret = nfs_idmap_lookup_name(id, "user", buf, buflen, idmap);
+       if (ret < 0)
+               ret = nfs_map_numeric_to_string(id, buf, buflen);
+       trace_nfs4_map_uid_to_name(buf, ret, id, ret);
+       return ret;
+}
+int nfs_map_gid_to_group(const struct nfs_server *server, kgid_t gid, char *buf, size_t buflen)
+{
+       struct idmap *idmap = server->nfs_client->cl_idmap;
+       int ret = -EINVAL;
+       __u32 id;
+
+       id = from_kgid(&init_user_ns, gid);
+       if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
+               ret = nfs_idmap_lookup_name(id, "group", buf, buflen, idmap);
+       if (ret < 0)
+               ret = nfs_map_numeric_to_string(id, buf, buflen);
+       trace_nfs4_map_gid_to_group(buf, ret, id, ret);
+       return ret;
+}
diff --git a/fs/nfs/nfs4idmap.h b/fs/nfs/nfs4idmap.h

new file mode 100644 (file)

index 0000000..de44d73
--- /dev/null
+++ b/fs/nfs/nfs4idmap.h
@@ -0,0 +1,68 @@
+/*
+ * fs/nfs/nfs4idmap.h
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef NFS_IDMAP_H
+#define NFS_IDMAP_H
+
+#include <linux/uidgid.h>
+#include <uapi/linux/nfs_idmap.h>
+
+
+/* Forward declaration to make this header independent of others */
+struct nfs_client;
+struct nfs_server;
+struct nfs_fattr;
+struct nfs4_string;
+
+int nfs_idmap_init(void);
+void nfs_idmap_quit(void);
+int nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
+
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+               struct nfs4_string *owner_name,
+               struct nfs4_string *group_name);
+void nfs_fattr_free_names(struct nfs_fattr *);
+void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
+
+int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
+int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
+int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
+int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
+
+int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
+
+extern unsigned int nfs_idmap_cache_timeout;
+#endif /* NFS_IDMAP_H */
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c

index 3d83cb1..f592672 100644 (file)
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -375,7 +375,7 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
         dprintk("%s: getting locations for %pd2\n",
                 __func__, dentry);
  
-       err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page);
+       err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page);
         dput(parent);
         if (err != 0 ||
             fs_locations->nlocations <= 0 ||
@@ -396,7 +396,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry,
  {
         rpc_authflavor_t flavor = server->client->cl_auth->au_flavor;
         struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct qstr *name = &dentry->d_name;
         struct rpc_clnt *client;
         struct vfsmount *mnt;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c

index 627f37c..45b35b9 100644 (file)
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,7 +51,6 @@
  #include <linux/namei.h>
  #include <linux/mount.h>
  #include <linux/module.h>
-#include <linux/nfs_idmap.h>
  #include <linux/xattr.h>
  #include <linux/utsname.h>
  #include <linux/freezer.h>
@@ -63,6 +62,7 @@
  #include "callback.h"
  #include "pnfs.h"
  #include "netns.h"
+#include "nfs4idmap.h"
  #include "nfs4session.h"
  #include "fscache.h"
  
@@ -185,7 +185,8 @@ const u32 nfs4_fattr_bitmap[3] = {
         | FATTR4_WORD1_SPACE_USED
         | FATTR4_WORD1_TIME_ACCESS
         | FATTR4_WORD1_TIME_METADATA
-       | FATTR4_WORD1_TIME_MODIFY,
+       | FATTR4_WORD1_TIME_MODIFY
+       | FATTR4_WORD1_MOUNTED_ON_FILEID,
  #ifdef CONFIG_NFS_V4_SECURITY_LABEL
         FATTR4_WORD2_SECURITY_LABEL
  #endif
@@ -293,7 +294,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
                 *p++ = xdr_one;                         /* bitmap length */
                 *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
                 *p++ = htonl(8);              /* attribute buffer length */
-               p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode));
+               p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry)));
         }
         
         *p++ = xdr_one;                                  /* next */
@@ -305,7 +306,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
         *p++ = xdr_one;                         /* bitmap length */
         *p++ = htonl(FATTR4_WORD0_FILEID);             /* bitmap */
         *p++ = htonl(8);              /* attribute buffer length */
-       p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+       p = xdr_encode_hyper(p, NFS_FILEID(d_inode(dentry->d_parent)));
  
         readdir->pgbase = (char *)p - (char *)start;
         readdir->count -= readdir->pgbase;
@@ -1004,7 +1005,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
                 gfp_t gfp_mask)
  {
         struct dentry *parent = dget_parent(dentry);
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct nfs_server *server = NFS_SERVER(dir);
         struct nfs_seqid *(*alloc_seqid)(struct nfs_seqid_counter *, gfp_t);
         struct nfs4_opendata *p;
@@ -1057,7 +1058,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
         case NFS4_OPEN_CLAIM_FH:
         case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
         case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
-               p->o_arg.fh = NFS_FH(dentry->d_inode);
+               p->o_arg.fh = NFS_FH(d_inode(dentry));
         }
         if (attrs != NULL && attrs->ia_valid != 0) {
                 __u32 verf[2];
@@ -1794,7 +1795,7 @@ static const struct rpc_call_ops nfs4_open_confirm_ops = {
   */
  static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
  {
-       struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
+       struct nfs_server *server = NFS_SERVER(d_inode(data->dir));
         struct rpc_task *task;
         struct  rpc_message msg = {
                 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
@@ -1951,7 +1952,7 @@ static const struct rpc_call_ops nfs4_open_ops = {
  
  static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
  {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
         struct nfs_server *server = NFS_SERVER(dir);
         struct nfs_openargs *o_arg = &data->o_arg;
         struct nfs_openres *o_res = &data->o_res;
@@ -1998,7 +1999,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
  
  static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
  {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
         struct nfs_openres *o_res = &data->o_res;
          int status;
  
@@ -2067,7 +2068,7 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
   */
  static int _nfs4_proc_open(struct nfs4_opendata *data)
  {
-       struct inode *dir = data->dir->d_inode;
+       struct inode *dir = d_inode(data->dir);
         struct nfs_server *server = NFS_SERVER(dir);
         struct nfs_openargs *o_arg = &data->o_arg;
         struct nfs_openres *o_res = &data->o_res;
@@ -2314,7 +2315,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
  
         dentry = opendata->dentry;
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                 /* FIXME: Is this d_drop() ever needed? */
                 d_drop(dentry);
                 dentry = d_add_unique(dentry, igrab(state->inode));
@@ -2325,7 +2326,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                         ctx->dentry = dget(dentry);
                 }
                 nfs_set_verifier(dentry,
-                               nfs_save_change_attribute(opendata->dir->d_inode));
+                               nfs_save_change_attribute(d_inode(opendata->dir)));
         }
  
         ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
@@ -2333,7 +2334,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                 goto out;
  
         ctx->state = state;
-       if (dentry->d_inode == state->inode) {
+       if (d_inode(dentry) == state->inode) {
                 nfs_inode_attach_open_context(ctx);
                 if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
                         nfs4_schedule_stateid_recovery(server, state);
@@ -2374,10 +2375,10 @@ static int _nfs4_do_open(struct inode *dir,
         status = nfs4_recover_expired_lease(server);
         if (status != 0)
                 goto err_put_state_owner;
-       if (dentry->d_inode != NULL)
-               nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
+       if (d_really_is_positive(dentry))
+               nfs4_return_incompatible_delegation(d_inode(dentry), fmode);
         status = -ENOMEM;
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 claim = NFS4_OPEN_CLAIM_FH;
         opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
                         label, claim, GFP_KERNEL);
@@ -2400,8 +2401,8 @@ static int _nfs4_do_open(struct inode *dir,
                 }
                 opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
         }
-       if (dentry->d_inode != NULL)
-               opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
+       if (d_really_is_positive(dentry))
+               opendata->state = nfs4_get_open_state(d_inode(dentry), sp);
  
         status = _nfs4_open_and_get_state(opendata, fmode, flags, ctx);
         if (status != 0)
@@ -3095,16 +3096,13 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle,
                          struct nfs_fsinfo *info,
                          bool auth_probe)
  {
-       int status;
+       int status = 0;
  
-       switch (auth_probe) {
-       case false:
+       if (!auth_probe)
                 status = nfs4_lookup_root(server, fhandle, info);
-               if (status != -NFS4ERR_WRONGSEC)
-                       break;
-       default:
+
+       if (auth_probe || status == NFS4ERR_WRONGSEC)
                 status = nfs4_do_find_root_sec(server, fhandle, info);
-       }
  
         if (status == 0)
                 status = nfs4_server_capabilities(server, fhandle);
@@ -3254,7 +3252,7 @@ static int
  nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                   struct iattr *sattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct rpc_cred *cred = NULL;
         struct nfs4_state *state = NULL;
         struct nfs4_label *label = NULL;
@@ -3871,13 +3869,13 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
  static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                 u64 cookie, struct page **pages, unsigned int count, int plus)
  {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
         struct nfs4_readdir_arg args = {
                 .fh = NFS_FH(dir),
                 .pages = pages,
                 .pgbase = 0,
                 .count = count,
-               .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+               .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
                 .plus = plus,
         };
         struct nfs4_readdir_res res;
@@ -3914,8 +3912,8 @@ static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
         do {
                 err = _nfs4_proc_readdir(dentry, cred, cookie,
                                 pages, count, plus);
-               trace_nfs4_readdir(dentry->d_inode, err);
-               err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), err,
+               trace_nfs4_readdir(d_inode(dentry), err);
+               err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
                                 &exception);
         } while (exception.retry);
         return err;
@@ -4830,7 +4828,7 @@ nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen)
         struct nfs4_label ilabel, *olabel = NULL;
         struct nfs_fattr fattr;
         struct rpc_cred *cred;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int status;
  
         if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
@@ -5670,7 +5668,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
         data->rpc_status = task->tk_status;
         switch (task->tk_status) {
         case 0:
-               renew_lease(NFS_SERVER(data->ctx->dentry->d_inode),
+               renew_lease(NFS_SERVER(d_inode(data->ctx->dentry)),
                                 data->timestamp);
                 if (data->arg.new_lock) {
                         data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
@@ -6112,7 +6110,7 @@ static int nfs4_xattr_set_nfs4_acl(struct dentry *dentry, const char *key,
         if (strcmp(key, "") != 0)
                 return -EINVAL;
  
-       return nfs4_proc_set_acl(dentry->d_inode, buf, buflen);
+       return nfs4_proc_set_acl(d_inode(dentry), buf, buflen);
  }
  
  static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
@@ -6121,7 +6119,7 @@ static int nfs4_xattr_get_nfs4_acl(struct dentry *dentry, const char *key,
         if (strcmp(key, "") != 0)
                 return -EINVAL;
  
-       return nfs4_proc_get_acl(dentry->d_inode, buf, buflen);
+       return nfs4_proc_get_acl(d_inode(dentry), buf, buflen);
  }
  
  static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
@@ -6130,7 +6128,7 @@ static size_t nfs4_xattr_list_nfs4_acl(struct dentry *dentry, char *list,
  {
         size_t len = sizeof(XATTR_NAME_NFSV4_ACL);
  
-       if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+       if (!nfs4_server_supports_acls(NFS_SERVER(d_inode(dentry))))
                 return 0;
  
         if (list && len <= list_len)
@@ -6158,7 +6156,7 @@ static int nfs4_xattr_get_nfs4_label(struct dentry *dentry, const char *key,
                                    void *buf, size_t buflen, int type)
  {
         if (security_ismaclabel(key))
-               return nfs4_get_security_label(dentry->d_inode, buf, buflen);
+               return nfs4_get_security_label(d_inode(dentry), buf, buflen);
         return -EOPNOTSUPP;
  }
  
@@ -6168,10 +6166,10 @@ static size_t nfs4_xattr_list_nfs4_label(struct dentry *dentry, char *list,
  {
         size_t len = 0;
  
-       if (nfs_server_capable(dentry->d_inode, NFS_CAP_SECURITY_LABEL)) {
-               len = security_inode_listsecurity(dentry->d_inode, NULL, 0);
+       if (nfs_server_capable(d_inode(dentry), NFS_CAP_SECURITY_LABEL)) {
+               len = security_inode_listsecurity(d_inode(dentry), NULL, 0);
                 if (list && len <= list_len)
-                       security_inode_listsecurity(dentry->d_inode, list, len);
+                       security_inode_listsecurity(d_inode(dentry), list, len);
         }
         return len;
  }
@@ -7944,6 +7942,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
  {
         struct nfs4_getdeviceinfo_args args = {
                 .pdev = pdev,
+               .notify_types = NOTIFY_DEVICEID4_CHANGE |
+                       NOTIFY_DEVICEID4_DELETE,
         };
         struct nfs4_getdeviceinfo_res res = {
                 .pdev = pdev,
@@ -7958,6 +7958,11 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
  
         dprintk("--> %s\n", __func__);
         status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+       if (res.notification & ~args.notify_types)
+               dprintk("%s: unsupported notification\n", __func__);
+       if (res.notification != args.notify_types)
+               pdev->nocache = 1;
+
         dprintk("<-- %s status=%d\n", __func__, status);
  
         return status;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c

index f95e3b5..2782cfc 100644 (file)
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -42,7 +42,6 @@
  #include <linux/slab.h>
  #include <linux/fs.h>
  #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
  #include <linux/kthread.h>
  #include <linux/module.h>
  #include <linux/random.h>
@@ -57,6 +56,7 @@
  #include "callback.h"
  #include "delegation.h"
  #include "internal.h"
+#include "nfs4idmap.h"
  #include "nfs4session.h"
  #include "pnfs.h"
  #include "netns.h"
@@ -1902,7 +1902,7 @@ static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
                 goto out;
         }
  
-       inode = server->super->s_root->d_inode;
+       inode = d_inode(server->super->s_root);
         result = nfs4_proc_get_locations(inode, locations, page, cred);
         if (result) {
                 dprintk("<-- %s: failed to retrieve fs_locations: %d\n",
@@ -2021,7 +2021,7 @@ restart:
  
                 rcu_read_unlock();
  
-               inode = server->super->s_root->d_inode;
+               inode = d_inode(server->super->s_root);
                 status = nfs4_proc_fsid_present(inode, cred);
                 if (status != -NFS4ERR_MOVED)
                         goto restart;   /* wasn't this one */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c

index 75090fe..6fb7cb6 100644 (file)
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -3,12 +3,12 @@
   */
  #include <linux/init.h>
  #include <linux/module.h>
-#include <linux/nfs_idmap.h>
  #include <linux/nfs4_mount.h>
  #include <linux/nfs_fs.h>
  #include "delegation.h"
  #include "internal.h"
  #include "nfs4_fs.h"
+#include "nfs4idmap.h"
  #include "dns_resolve.h"
  #include "pnfs.h"
  #include "nfs.h"
@@ -91,10 +91,11 @@ static void nfs4_evict_inode(struct inode *inode)
  {
         truncate_inode_pages_final(&inode->i_data);
         clear_inode(inode);
-       pnfs_return_layout(inode);
-       pnfs_destroy_layout(NFS_I(inode));
         /* If we are holding a delegation, return it! */
         nfs_inode_return_delegation_noreclaim(inode);
+       /* Note that above delegreturn would trigger pnfs return-on-close */
+       pnfs_return_layout(inode);
+       pnfs_destroy_layout(NFS_I(inode));
         /* First call standard NFS clear_inode() code */
         nfs_clear_inode(inode);
  }
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c

index b6ebe7e..0fbd3ab 100644 (file)
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -6,10 +6,10 @@
   * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com>
   */
  #include <linux/sysctl.h>
-#include <linux/nfs_idmap.h>
  #include <linux/nfs_fs.h>
  
  #include "nfs4_fs.h"
+#include "nfs4idmap.h"
  #include "callback.h"
  
  static const int nfs_set_port_min = 0;
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h

index 1c32adb..470af1a 100644 (file)
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -418,7 +418,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
                                 __entry->fileid = 0;
                                 __entry->fhandle = 0;
                         }
-                       __entry->dir = NFS_FILEID(ctx->dentry->d_parent->d_inode);
+                       __entry->dir = NFS_FILEID(d_inode(ctx->dentry->d_parent));
                         __assign_str(name, ctx->dentry->d_name.name);
                 ),
  
@@ -1110,7 +1110,7 @@ TRACE_EVENT(nfs4_layoutget,
                 ),
  
                 TP_fast_assign(
-                       const struct inode *inode = ctx->dentry->d_inode;
+                       const struct inode *inode = d_inode(ctx->dentry);
                         __entry->dev = inode->i_sb->s_dev;
                         __entry->fileid = NFS_FILEID(inode);
                         __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode));
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c

index 5c399ec..0aea978 100644 (file)
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,10 +52,10 @@
  #include <linux/nfs.h>
  #include <linux/nfs4.h>
  #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
  
  #include "nfs4_fs.h"
  #include "internal.h"
+#include "nfs4idmap.h"
  #include "nfs4session.h"
  #include "pnfs.h"
  #include "netns.h"
@@ -1920,7 +1920,7 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
  
         p = reserve_space(xdr, 4 + 4);
         *p++ = cpu_to_be32(1);                  /* bitmap length */
-       *p++ = cpu_to_be32(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE);
+       *p++ = cpu_to_be32(args->notify_types);
  }
  
  static void
@@ -5753,8 +5753,9 @@ out_overflow:
  
  #if defined(CONFIG_NFS_V4_1)
  static int decode_getdeviceinfo(struct xdr_stream *xdr,
-                               struct pnfs_device *pdev)
+                               struct nfs4_getdeviceinfo_res *res)
  {
+       struct pnfs_device *pdev = res->pdev;
         __be32 *p;
         uint32_t len, type;
         int status;
@@ -5802,12 +5803,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
                 if (unlikely(!p))
                         goto out_overflow;
  
-               if (be32_to_cpup(p++) &
-                   ~(NOTIFY_DEVICEID4_CHANGE | NOTIFY_DEVICEID4_DELETE)) {
-                       dprintk("%s: unsupported notification\n",
-                               __func__);
-               }
-
+               res->notification = be32_to_cpup(p++);
                 for (i = 1; i < len; i++) {
                         if (be32_to_cpup(p++)) {
                                 dprintk("%s: unsupported notification\n",
@@ -7061,7 +7057,7 @@ static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp,
         status = decode_sequence(xdr, &res->seq_res, rqstp);
         if (status != 0)
                 goto out;
-       status = decode_getdeviceinfo(xdr, res->pdev);
+       status = decode_getdeviceinfo(xdr, res);
  out:
         return status;
  }
@@ -7365,6 +7361,11 @@ nfs4_stat_to_errno(int stat)
         .p_name   = #proc,                                      \
  }
  
+#define STUB(proc)             \
+[NFSPROC4_CLNT_##proc] = {     \
+       .p_name = #proc,        \
+}
+
  struct rpc_procinfo    nfs4_procedures[] = {
         PROC(READ,              enc_read,               dec_read),
         PROC(WRITE,             enc_write,              dec_write),
@@ -7417,6 +7418,7 @@ struct rpc_procinfo       nfs4_procedures[] = {
         PROC(SECINFO_NO_NAME,   enc_secinfo_no_name,    dec_secinfo_no_name),
         PROC(TEST_STATEID,      enc_test_stateid,       dec_test_stateid),
         PROC(FREE_STATEID,      enc_free_stateid,       dec_free_stateid),
+       STUB(GETDEVICELIST),
         PROC(BIND_CONN_TO_SESSION,
                         enc_bind_conn_to_session, dec_bind_conn_to_session),
         PROC(DESTROY_CLIENTID,  enc_destroy_clientid,   dec_destroy_clientid),
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c

index 4eb0aea..c74f7af 100644 (file)
--- a/fs/nfs/nfstrace.c
+++ b/fs/nfs/nfstrace.c
@@ -7,3 +7,6 @@
  
  #define CREATE_TRACE_POINTS
  #include "nfstrace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c

index 24e1d74..5aaed36 100644 (file)
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -57,7 +57,7 @@ objio_free_deviceid_node(struct nfs4_deviceid_node *d)
  
         dprintk("%s: free od=%p\n", __func__, de->od.od);
         osduld_put_device(de->od.od);
-       kfree(de);
+       kfree_rcu(d, rcu);
  }
  
  struct objio_segment {
@@ -637,6 +637,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
         .pg_read_ops             = &objio_pg_read_ops,
         .pg_write_ops            = &objio_pg_write_ops,
  
+       .sync                    = pnfs_generic_sync,
+
         .free_deviceid_node      = objio_free_deviceid_node,
  
         .encode_layoutcommit     = objlayout_encode_layoutcommit,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c

index d57190a..282b393 100644 (file)
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -938,7 +938,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
         if (prev) {
                 if (!nfs_match_open_context(req->wb_context, prev->wb_context))
                         return false;
-               flctx = req->wb_context->dentry->d_inode->i_flctx;
+               flctx = d_inode(req->wb_context->dentry)->i_flctx;
                 if (flctx != NULL &&
                     !(list_empty_careful(&flctx->flc_posix) &&
                       list_empty_careful(&flctx->flc_flock)) &&
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c

index 4f802b0..2306062 100644 (file)
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1090,6 +1090,7 @@ bool pnfs_roc(struct inode *ino)
         pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
         spin_unlock(&ino->i_lock);
         pnfs_free_lseg_list(&tmp_list);
+       pnfs_layoutcommit_inode(ino, true);
         return true;
  
  out_noroc:
@@ -1104,8 +1105,10 @@ out_noroc:
                 }
         }
         spin_unlock(&ino->i_lock);
-       if (layoutreturn)
+       if (layoutreturn) {
+               pnfs_layoutcommit_inode(ino, true);
                 pnfs_send_layoutreturn(lo, stateid, IOMODE_ANY, true);
+       }
         return false;
  }
  
@@ -1841,7 +1844,8 @@ void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
  {
         trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
         if (!hdr->pnfs_error) {
-               pnfs_set_layoutcommit(hdr);
+               pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
+                               hdr->mds_offset + hdr->res.count);
                 hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
         } else
                 pnfs_ld_handle_write_error(hdr);
@@ -1902,7 +1906,6 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
         pnfs_put_lseg(hdr->lseg);
         nfs_pgio_header_free(hdr);
  }
-EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
  
  int
  pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
@@ -2032,7 +2035,6 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
         pnfs_put_lseg(hdr->lseg);
         nfs_pgio_header_free(hdr);
  }
-EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
  
  int
  pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
@@ -2099,64 +2101,34 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
  EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
  
  void
-pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
+pnfs_set_layoutcommit(struct inode *inode, struct pnfs_layout_segment *lseg,
+               loff_t end_pos)
  {
-       struct inode *inode = hdr->inode;
         struct nfs_inode *nfsi = NFS_I(inode);
-       loff_t end_pos = hdr->mds_offset + hdr->res.count;
         bool mark_as_dirty = false;
  
         spin_lock(&inode->i_lock);
         if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
-               mark_as_dirty = true;
-               dprintk("%s: Set layoutcommit for inode %lu ",
-                       __func__, inode->i_ino);
-       }
-       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
-               /* references matched in nfs4_layoutcommit_release */
-               pnfs_get_lseg(hdr->lseg);
-       }
-       if (end_pos > nfsi->layout->plh_lwb)
                 nfsi->layout->plh_lwb = end_pos;
-       spin_unlock(&inode->i_lock);
-       dprintk("%s: lseg %p end_pos %llu\n",
-               __func__, hdr->lseg, nfsi->layout->plh_lwb);
-
-       /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
-        * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
-       if (mark_as_dirty)
-               mark_inode_dirty_sync(inode);
-}
-EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
-
-void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data)
-{
-       struct inode *inode = data->inode;
-       struct nfs_inode *nfsi = NFS_I(inode);
-       bool mark_as_dirty = false;
-
-       spin_lock(&inode->i_lock);
-       if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
                 mark_as_dirty = true;
                 dprintk("%s: Set layoutcommit for inode %lu ",
                         __func__, inode->i_ino);
-       }
-       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) {
+       } else if (end_pos > nfsi->layout->plh_lwb)
+               nfsi->layout->plh_lwb = end_pos;
+       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags)) {
                 /* references matched in nfs4_layoutcommit_release */
-               pnfs_get_lseg(data->lseg);
+               pnfs_get_lseg(lseg);
         }
-       if (data->lwb > nfsi->layout->plh_lwb)
-               nfsi->layout->plh_lwb = data->lwb;
         spin_unlock(&inode->i_lock);
         dprintk("%s: lseg %p end_pos %llu\n",
-               __func__, data->lseg, nfsi->layout->plh_lwb);
+               __func__, lseg, nfsi->layout->plh_lwb);
  
         /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
          * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
         if (mark_as_dirty)
                 mark_inode_dirty_sync(inode);
  }
-EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit);
+EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
  
  void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
  {
@@ -2216,7 +2188,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
         pnfs_list_write_lseg(inode, &data->lseg_list);
  
         end_pos = nfsi->layout->plh_lwb;
-       nfsi->layout->plh_lwb = 0;
  
         nfs4_stateid_copy(&data->args.stateid, &nfsi->layout->plh_stateid);
         spin_unlock(&inode->i_lock);
@@ -2233,11 +2204,11 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
                 status = ld->prepare_layoutcommit(&data->args);
                 if (status) {
                         spin_lock(&inode->i_lock);
-                       if (end_pos < nfsi->layout->plh_lwb)
+                       set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
+                       if (end_pos > nfsi->layout->plh_lwb)
                                 nfsi->layout->plh_lwb = end_pos;
                         spin_unlock(&inode->i_lock);
                         put_rpccred(data->cred);
-                       set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
                         goto clear_layoutcommitting;
                 }
         }
@@ -2258,6 +2229,13 @@ clear_layoutcommitting:
  }
  EXPORT_SYMBOL_GPL(pnfs_layoutcommit_inode);
  
+int
+pnfs_generic_sync(struct inode *inode, bool datasync)
+{
+       return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_sync);
+
  struct nfs4_threshold *pnfs_mdsthreshold_alloc(void)
  {
         struct nfs4_threshold *thp;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h

index 635f086..1e6308f 100644 (file)
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -155,6 +155,8 @@ struct pnfs_layoutdriver_type {
                                int how,
                                struct nfs_commit_info *cinfo);
  
+       int (*sync)(struct inode *inode, bool datasync);
+
         /*
          * Return PNFS_ATTEMPTED to indicate the layout code has attempted
          * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
@@ -203,6 +205,7 @@ struct pnfs_device {
         struct page **pages;
         unsigned int  pgbase;
         unsigned int  pglen;    /* reply buffer length */
+       unsigned char nocache : 1;/* May not be cached */
  };
  
  #define NFS4_PNFS_GETDEVLIST_MAXNUM 16
@@ -263,10 +266,11 @@ bool pnfs_roc(struct inode *ino);
  void pnfs_roc_release(struct inode *ino);
  void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
  bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_pgio_header *);
-void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data);
+void pnfs_set_layoutcommit(struct inode *, struct pnfs_layout_segment *, loff_t);
  void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
  int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
+int pnfs_generic_sync(struct inode *inode, bool datasync);
+int pnfs_nfs_generic_sync(struct inode *inode, bool datasync);
  int _pnfs_return_layout(struct inode *);
  int pnfs_commit_and_return_layout(struct inode *);
  void pnfs_ld_write_done(struct nfs_pgio_header *);
@@ -291,6 +295,7 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
  enum {
         NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
         NFS_DEVICEID_UNAVAILABLE,       /* device temporarily unavailable */
+       NFS_DEVICEID_NOCACHE,           /* device may not be cached */
  };
  
  /* pnfs_dev.c */
@@ -302,6 +307,7 @@ struct nfs4_deviceid_node {
         unsigned long                   flags;
         unsigned long                   timestamp_unavailable;
         struct nfs4_deviceid            deviceid;
+       struct rcu_head                 rcu;
         atomic_t                        ref;
  };
  
@@ -426,7 +432,7 @@ static inline bool
  pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
                          struct nfs_commit_info *cinfo, u32 ds_commit_idx)
  {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
         struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
  
         if (lseg == NULL || ld->mark_request_commit == NULL)
@@ -438,7 +444,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
  static inline bool
  pnfs_clear_request_commit(struct nfs_page *req, struct nfs_commit_info *cinfo)
  {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
         struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
  
         if (ld == NULL || ld->clear_request_commit == NULL)
@@ -486,6 +492,14 @@ pnfs_ld_read_whole_page(struct inode *inode)
         return NFS_SERVER(inode)->pnfs_curr_ld->flags & PNFS_READ_WHOLE_PAGE;
  }
  
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+       if (!pnfs_enabled_sb(NFS_SERVER(inode)))
+               return 0;
+       return NFS_SERVER(inode)->pnfs_curr_ld->sync(inode, datasync);
+}
+
  static inline bool
  pnfs_layoutcommit_outstanding(struct inode *inode)
  {
@@ -568,6 +582,12 @@ pnfs_ld_read_whole_page(struct inode *inode)
         return false;
  }
  
+static inline int
+pnfs_sync_inode(struct inode *inode, bool datasync)
+{
+       return 0;
+}
+
  static inline bool
  pnfs_roc(struct inode *ino)
  {
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c

index aa2ec00..2961fcd 100644 (file)
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -149,6 +149,8 @@ nfs4_get_device_info(struct nfs_server *server,
          */
         d = server->pnfs_curr_ld->alloc_deviceid_node(server, pdev,
                         gfp_flags);
+       if (d && pdev->nocache)
+               set_bit(NFS_DEVICEID_NOCACHE, &d->flags);
  
  out_free_pages:
         for (i = 0; i < max_pages; i++)
@@ -175,8 +177,8 @@ __nfs4_find_get_deviceid(struct nfs_server *server,
         rcu_read_lock();
         d = _lookup_deviceid(server->pnfs_curr_ld, server->nfs_client, id,
                         hash);
-       if (d != NULL)
-               atomic_inc(&d->ref);
+       if (d != NULL && !atomic_inc_not_zero(&d->ref))
+               d = NULL;
         rcu_read_unlock();
         return d;
  }
@@ -235,12 +237,11 @@ nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
                 return;
         }
         hlist_del_init_rcu(&d->node);
+       clear_bit(NFS_DEVICEID_NOCACHE, &d->flags);
         spin_unlock(&nfs4_deviceid_lock);
-       synchronize_rcu();
  
         /* balance the initial ref set in pnfs_insert_deviceid */
-       if (atomic_dec_and_test(&d->ref))
-               d->ld->free_deviceid_node(d);
+       nfs4_put_deviceid_node(d);
  }
  EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
  
@@ -271,6 +272,11 @@ EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node);
  bool
  nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
  {
+       if (test_bit(NFS_DEVICEID_NOCACHE, &d->flags)) {
+               if (atomic_add_unless(&d->ref, -1, 2))
+                       return false;
+               nfs4_delete_deviceid(d->ld, d->nfs_client, &d->deviceid);
+       }
         if (!atomic_dec_and_test(&d->ref))
                 return false;
         d->ld->free_deviceid_node(d);
@@ -314,6 +320,7 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
                 if (d->nfs_client == clp && atomic_read(&d->ref)) {
                         hlist_del_init_rcu(&d->node);
                         hlist_add_head(&d->tmpnode, &tmp);
+                       clear_bit(NFS_DEVICEID_NOCACHE, &d->flags);
                 }
         rcu_read_unlock();
         spin_unlock(&nfs4_deviceid_lock);
@@ -321,12 +328,10 @@ _deviceid_purge_client(const struct nfs_client *clp, long hash)
         if (hlist_empty(&tmp))
                 return;
  
-       synchronize_rcu();
         while (!hlist_empty(&tmp)) {
                 d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
                 hlist_del(&d->tmpnode);
-               if (atomic_dec_and_test(&d->ref))
-                       d->ld->free_deviceid_node(d);
+               nfs4_put_deviceid_node(d);
         }
  }
  
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c

index 54e36b3..f37e25b 100644 (file)
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -561,7 +561,7 @@ static bool load_v3_ds_connect(void)
         return(get_v3_ds_connect != NULL);
  }
  
-void __exit nfs4_pnfs_v3_ds_connect_unload(void)
+void nfs4_pnfs_v3_ds_connect_unload(void)
  {
         if (get_v3_ds_connect) {
                 symbol_put(nfs3_set_ds_client);
@@ -868,3 +868,13 @@ pnfs_layout_mark_request_commit(struct nfs_page *req,
         nfs_request_add_commit_list(req, list, cinfo);
  }
  EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
+
+int
+pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
+{
+       if (datasync)
+               return 0;
+       return pnfs_layoutcommit_inode(inode, true);
+}
+EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);
+
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c

index c63189a..b417bbc 100644 (file)
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -118,7 +118,7 @@ static int
  nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                  struct iattr *sattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct nfs_sattrargs    arg = { 
                 .fh     = NFS_FH(inode),
                 .sattr  = sattr
@@ -487,7 +487,7 @@ static int
  nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                  u64 cookie, struct page **pages, unsigned int count, int plus)
  {
-       struct inode            *dir = dentry->d_inode;
+       struct inode            *dir = d_inode(dentry);
         struct nfs_readdirargs  arg = {
                 .fh             = NFS_FH(dir),
                 .cookie         = cookie,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c

index b8f5c63..ae0ff7a 100644 (file)
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -117,7 +117,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
  
  static void nfs_readpage_release(struct nfs_page *req)
  {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
  
         dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id,
                 (unsigned long long)NFS_FILEID(inode), req->wb_bytes,
@@ -284,7 +284,7 @@ int nfs_readpage(struct file *file, struct page *page)
         dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                 page, PAGE_CACHE_SIZE, page_file_index(page));
         nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
-       nfs_inc_stats(inode, NFSIOS_READPAGES);
+       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
  
         /*
          * Try to flush any pending writes to the file..
diff --git a/fs/nfs/super.c b/fs/nfs/super.c

index 322b2de..f175b83 100644 (file)
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -43,7 +43,6 @@
  #include <linux/seq_file.h>
  #include <linux/mount.h>
  #include <linux/namei.h>
-#include <linux/nfs_idmap.h>
  #include <linux/vfs.h>
  #include <linux/inet.h>
  #include <linux/in6.h>
@@ -433,7 +432,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         struct nfs_server *server = NFS_SB(dentry->d_sb);
         unsigned char blockbits;
         unsigned long blockres;
-       struct nfs_fh *fh = NFS_FH(dentry->d_inode);
+       struct nfs_fh *fh = NFS_FH(d_inode(dentry));
         struct nfs_fsstat res;
         int error = -ENOMEM;
  
@@ -447,7 +446,7 @@ int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  
                 pd_dentry = dget_parent(dentry);
                 if (pd_dentry != NULL) {
-                       nfs_zap_caches(pd_dentry->d_inode);
+                       nfs_zap_caches(d_inode(pd_dentry));
                         dput(pd_dentry);
                 }
         }
@@ -2193,7 +2192,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
             data->version != nfss->nfs_client->rpc_ops->version ||
             data->minorversion != nfss->nfs_client->cl_minorversion ||
             data->retrans != nfss->client->cl_timeout->to_retries ||
-           data->selected_flavor != nfss->client->cl_auth->au_flavor ||
+           !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) ||
             data->acregmin != nfss->acregmin / HZ ||
             data->acregmax != nfss->acregmax / HZ ||
             data->acdirmin != nfss->acdirmin / HZ ||
@@ -2241,7 +2240,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
         data->wsize = nfss->wsize;
         data->retrans = nfss->client->cl_timeout->to_retries;
         data->selected_flavor = nfss->client->cl_auth->au_flavor;
-       data->auth_info = nfss->auth_info;
         data->acregmin = nfss->acregmin / HZ;
         data->acregmax = nfss->acregmax / HZ;
         data->acdirmin = nfss->acdirmin / HZ;
@@ -2526,7 +2524,7 @@ int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
                           struct nfs_mount_info *mount_info)
  {
         /* clone any lsm security options from the parent to the new sb */
-       if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
+       if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops)
                 return -ESTALE;
         return security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
  }
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c

index 05c9e02..2d56200 100644 (file)
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -45,7 +45,7 @@ error:
  
  static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct page *page;
         void *err;
  
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c

index de54129..fa538b2 100644 (file)
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -143,7 +143,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
                 nfs_free_dname(data);
                 ret = nfs_copy_dname(alias, data);
                 spin_lock(&alias->d_lock);
-               if (ret == 0 && alias->d_inode != NULL &&
+               if (ret == 0 && d_really_is_positive(alias) &&
                     !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
                         devname_garbage = alias->d_fsdata;
                         alias->d_fsdata = data;
@@ -190,7 +190,7 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
         parent = dget_parent(dentry);
         if (parent == NULL)
                 goto out_free;
-       dir = parent->d_inode;
+       dir = d_inode(parent);
         /* Non-exclusive lock protects against concurrent lookup() calls */
         spin_lock(&dir->i_lock);
         if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -210,21 +210,21 @@ out_free:
  
  void nfs_wait_on_sillyrename(struct dentry *dentry)
  {
-       struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
  
         wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1);
  }
  
  void nfs_block_sillyrename(struct dentry *dentry)
  {
-       struct nfs_inode *nfsi = NFS_I(dentry->d_inode);
+       struct nfs_inode *nfsi = NFS_I(d_inode(dentry));
  
         wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1);
  }
  
  void nfs_unblock_sillyrename(struct dentry *dentry)
  {
-       struct inode *dir = dentry->d_inode;
+       struct inode *dir = d_inode(dentry);
         struct nfs_inode *nfsi = NFS_I(dir);
         struct nfs_unlinkdata *data;
  
@@ -367,8 +367,8 @@ static void nfs_async_rename_release(void *calldata)
         struct nfs_renamedata   *data = calldata;
         struct super_block *sb = data->old_dir->i_sb;
  
-       if (data->old_dentry->d_inode)
-               nfs_mark_for_revalidate(data->old_dentry->d_inode);
+       if (d_really_is_positive(data->old_dentry))
+               nfs_mark_for_revalidate(d_inode(data->old_dentry));
  
         dput(data->old_dentry);
         dput(data->new_dentry);
@@ -529,10 +529,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
         if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
                 goto out;
  
-       fileid = NFS_FILEID(dentry->d_inode);
+       fileid = NFS_FILEID(d_inode(dentry));
  
         /* Return delegation in anticipation of the rename */
-       NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
+       NFS_PROTO(d_inode(dentry))->return_delegation(d_inode(dentry));
  
         sdentry = NULL;
         do {
@@ -554,7 +554,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
                  */
                 if (IS_ERR(sdentry))
                         goto out;
-       } while (sdentry->d_inode != NULL); /* need negative lookup */
+       } while (d_inode(sdentry) != NULL); /* need negative lookup */
  
         /* queue unlink first. Can't do this from rpc_release as it
          * has to allocate memory
diff --git a/fs/nfs/write.c b/fs/nfs/write.c

index 7599310..d12a4be 100644 (file)
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -580,7 +580,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
         int ret;
  
         nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
-       nfs_inc_stats(inode, NFSIOS_WRITEPAGES);
+       nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
  
         nfs_pageio_cond_complete(pgio, page_file_index(page));
         ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
@@ -702,7 +702,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
   */
  static void nfs_inode_remove_request(struct nfs_page *req)
  {
-       struct inode *inode = req->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(req->wb_context->dentry);
         struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_page *head;
  
@@ -861,7 +861,7 @@ static void
  nfs_clear_request_commit(struct nfs_page *req)
  {
         if (test_bit(PG_CLEAN, &req->wb_flags)) {
-               struct inode *inode = req->wb_context->dentry->d_inode;
+               struct inode *inode = d_inode(req->wb_context->dentry);
                 struct nfs_commit_info cinfo;
  
                 nfs_init_cinfo_from_inode(&cinfo, inode);
@@ -1591,7 +1591,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
                      struct nfs_commit_info *cinfo)
  {
         struct nfs_page *first = nfs_list_entry(head->next);
-       struct inode *inode = first->wb_context->dentry->d_inode;
+       struct inode *inode = d_inode(first->wb_context->dentry);
  
         /* Set up the RPC argument and reply structs
          * NB: take care not to mess about with data->commit et al. */
@@ -1690,7 +1690,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
  
                 dprintk("NFS:       commit (%s/%llu %d@%lld)",
                         req->wb_context->dentry->d_sb->s_id,
-                       (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+                       (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
                         req->wb_bytes,
                         (long long)req_offset(req));
                 if (status < 0) {
@@ -1840,17 +1840,16 @@ EXPORT_SYMBOL_GPL(nfs_write_inode);
   */
  int nfs_wb_all(struct inode *inode)
  {
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = LONG_MAX,
-               .range_start = 0,
-               .range_end = LLONG_MAX,
-       };
         int ret;
  
         trace_nfs_writeback_inode_enter(inode);
  
-       ret = sync_inode(inode, &wbc);
+       ret = filemap_write_and_wait(inode->i_mapping);
+       if (!ret) {
+               ret = nfs_commit_inode(inode, FLUSH_SYNC);
+               if (!ret)
+                       pnfs_sync_inode(inode, true);
+       }
  
         trace_nfs_writeback_inode_exit(inode, ret);
         return ret;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig

index fc2d108..a0b77fc 100644 (file)
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -108,7 +108,7 @@ config NFSD_V4_SECURITY_LABEL
  
  config NFSD_FAULT_INJECTION
         bool "NFS server manual fault injection"
-       depends on NFSD_V4 && DEBUG_KERNEL
+       depends on NFSD_V4 && DEBUG_KERNEL && DEBUG_FS
         help
           This option enables support for manually injecting faults
           into the NFS server.  This is intended to be used for
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c

index c3e3b6e..f79521a 100644 (file)
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -599,7 +599,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
                                 goto out4;
                 }
  
-               err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
+               err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags,
                                    exp.ex_uuid);
                 if (err)
                         goto out4;
@@ -691,8 +691,7 @@ static int svc_export_match(struct cache_head *a, struct cache_head *b)
         struct svc_export *orig = container_of(a, struct svc_export, h);
         struct svc_export *new = container_of(b, struct svc_export, h);
         return orig->ex_client == new->ex_client &&
-               orig->ex_path.dentry == new->ex_path.dentry &&
-               orig->ex_path.mnt == new->ex_path.mnt;
+               path_equal(&orig->ex_path, &new->ex_path);
  }
  
  static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
@@ -891,7 +890,7 @@ exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
                 printk("nfsd: exp_rootfh path not found %s", name);
                 return err;
         }
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
  
         dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
                  name, path.dentry, clp->name,
@@ -1159,6 +1158,7 @@ static struct flags {
         { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
         { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
         { NFSEXP_V4ROOT, {"v4root", ""}},
+       { NFSEXP_PNFS, {"pnfs", ""}},
         { 0, {"", ""}}
  };
  
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c

index ac54ea6..d54701f 100644 (file)
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -42,7 +42,7 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
         if (nfserr)
                 RETURN_STATUS(nfserr);
  
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
  
         if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
                 RETURN_STATUS(nfserr_inval);
@@ -103,7 +103,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
         if (nfserr)
                 goto out;
  
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
         if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
                 error = -EOPNOTSUPP;
                 goto out_errno;
@@ -266,9 +266,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
          * nfsd_dispatch actually ensures the following cannot happen.
          * However, it seems fragile to depend on that.
          */
-       if (dentry == NULL || dentry->d_inode == NULL)
+       if (dentry == NULL || d_really_is_negative(dentry))
                 return 0;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         p = nfs2svc_encode_fattr(rqstp, p, &resp->fh, &resp->stat);
         *p++ = htonl(resp->mask);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c

index 34cbbab..882b1a1 100644 (file)
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -39,7 +39,7 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
         if (nfserr)
                 RETURN_STATUS(nfserr);
  
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
  
         if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
                 RETURN_STATUS(nfserr_inval);
@@ -94,7 +94,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
         if (nfserr)
                 goto out;
  
-       inode = fh->fh_dentry->d_inode;
+       inode = d_inode(fh->fh_dentry);
         if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
                 error = -EOPNOTSUPP;
                 goto out_errno;
@@ -174,8 +174,8 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
         struct dentry *dentry = resp->fh.fh_dentry;
  
         p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
-       if (resp->status == 0 && dentry && dentry->d_inode) {
-               struct inode *inode = dentry->d_inode;
+       if (resp->status == 0 && dentry && d_really_is_positive(dentry)) {
+               struct inode *inode = d_inode(dentry);
                 struct kvec *head = rqstp->rq_res.head;
                 unsigned int base;
                 int n;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c

index 12f2aab..7b755b7 100644 (file)
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -166,7 +166,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
                                   rqstp->rq_vec, argp->vlen,
                                   &resp->count);
         if (nfserr == 0) {
-               struct inode    *inode = resp->fh.fh_dentry->d_inode;
+               struct inode    *inode = d_inode(resp->fh.fh_dentry);
  
                 resp->eof = (argp->offset + resp->count) >= inode->i_size;
         }
@@ -551,7 +551,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
          * different read/write sizes for file systems known to have
          * problems with large blocks */
         if (nfserr == 0) {
-               struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+               struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb;
  
                 /* Note that we don't care for remote fs's here */
                 if (sb->s_magic == MSDOS_SUPER_MAGIC) {
@@ -587,7 +587,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
         nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
  
         if (nfserr == 0) {
-               struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+               struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb;
  
                 /* Note that we don't care for remote fs's here */
                 switch (sb->s_magic) {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c

index 39c5eb3..e4b2b43 100644 (file)
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -146,7 +146,7 @@ static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
         default:
         case FSIDSOURCE_DEV:
                 p = xdr_encode_hyper(p, (u64)huge_encode_dev
-                                    (fhp->fh_dentry->d_inode->i_sb->s_dev));
+                                    (d_inode(fhp->fh_dentry)->i_sb->s_dev));
                 break;
         case FSIDSOURCE_FSID:
                 p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
@@ -203,14 +203,14 @@ static __be32 *
  encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
  {
         struct dentry *dentry = fhp->fh_dentry;
-       if (dentry && dentry->d_inode) {
+       if (dentry && d_really_is_positive(dentry)) {
                 __be32 err;
                 struct kstat stat;
  
                 err = fh_getattr(fhp, &stat);
                 if (!err) {
                         *p++ = xdr_one;         /* attributes follow */
-                       lease_get_mtime(dentry->d_inode, &stat.mtime);
+                       lease_get_mtime(d_inode(dentry), &stat.mtime);
                         return encode_fattr3(rqstp, p, fhp, &stat);
                 }
         }
@@ -233,7 +233,7 @@ encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
  {
         struct dentry   *dentry = fhp->fh_dentry;
  
-       if (dentry && dentry->d_inode && fhp->fh_post_saved) {
+       if (dentry && d_really_is_positive(dentry) && fhp->fh_post_saved) {
                 if (fhp->fh_pre_saved) {
                         *p++ = xdr_one;
                         p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
@@ -260,11 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
                 printk("nfsd: inode locked twice during operation.\n");
  
         err = fh_getattr(fhp, &fhp->fh_post_attr);
-       fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
+       fhp->fh_post_change = d_inode(fhp->fh_dentry)->i_version;
         if (err) {
                 fhp->fh_post_saved = 0;
                 /* Grab the ctime anyway - set_change_info might use it */
-               fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+               fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
         } else
                 fhp->fh_post_saved = 1;
  }
@@ -628,7 +628,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
                                         struct nfsd3_attrstat *resp)
  {
         if (resp->status == 0) {
-               lease_get_mtime(resp->fh.fh_dentry->d_inode,
+               lease_get_mtime(d_inode(resp->fh.fh_dentry),
                                 &resp->stat.mtime);
                 p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
         }
@@ -828,7 +828,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
                 return rv;
         if (d_mountpoint(dchild))
                 goto out;
-       if (!dchild->d_inode)
+       if (d_really_is_negative(dchild))
                 goto out;
         rv = fh_compose(fhp, exp, dchild, &cd->fh);
  out:
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c

index 59fd766..67242bf 100644 (file)
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -139,7 +139,7 @@ int
  nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
                 struct nfs4_acl **acl)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error = 0;
         struct posix_acl *pacl = NULL, *dpacl = NULL;
         unsigned int flags = 0;
@@ -499,43 +499,13 @@ static inline void add_to_mask(struct posix_acl_state *state, struct posix_ace_s
         state->mask.allow |= astate->allow;
  }
  
-/*
- * Certain bits (SYNCHRONIZE, DELETE, WRITE_OWNER, READ/WRITE_NAMED_ATTRS,
- * READ_ATTRIBUTES, READ_ACL) are currently unenforceable and don't translate
- * to traditional read/write/execute permissions.
- *
- * It's problematic to reject acls that use certain mode bits, because it
- * places the burden on users to learn the rules about which bits one
- * particular server sets, without giving the user a lot of help--we return an
- * error that could mean any number of different things.  To make matters
- * worse, the problematic bits might be introduced by some application that's
- * automatically mapping from some other acl model.
- *
- * So wherever possible we accept anything, possibly erring on the side of
- * denying more permissions than necessary.
- *
- * However we do reject *explicit* DENY's of a few bits representing
- * permissions we could never deny:
- */
-
-static inline int check_deny(u32 mask, int isowner)
-{
-       if (mask & (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL))
-               return -EINVAL;
-       if (!isowner)
-               return 0;
-       if (mask & (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL))
-               return -EINVAL;
-       return 0;
-}
-
  static struct posix_acl *
  posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
  {
         struct posix_acl_entry *pace;
         struct posix_acl *pacl;
         int nace;
-       int i, error = 0;
+       int i;
  
         /*
          * ACLs with no ACEs are treated differently in the inheritable
@@ -560,17 +530,11 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
  
         pace = pacl->a_entries;
         pace->e_tag = ACL_USER_OBJ;
-       error = check_deny(state->owner.deny, 1);
-       if (error)
-               goto out_err;
         low_mode_from_nfs4(state->owner.allow, &pace->e_perm, flags);
  
         for (i=0; i < state->users->n; i++) {
                 pace++;
                 pace->e_tag = ACL_USER;
-               error = check_deny(state->users->aces[i].perms.deny, 0);
-               if (error)
-                       goto out_err;
                 low_mode_from_nfs4(state->users->aces[i].perms.allow,
                                         &pace->e_perm, flags);
                 pace->e_uid = state->users->aces[i].uid;
@@ -579,18 +543,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
  
         pace++;
         pace->e_tag = ACL_GROUP_OBJ;
-       error = check_deny(state->group.deny, 0);
-       if (error)
-               goto out_err;
         low_mode_from_nfs4(state->group.allow, &pace->e_perm, flags);
         add_to_mask(state, &state->group);
  
         for (i=0; i < state->groups->n; i++) {
                 pace++;
                 pace->e_tag = ACL_GROUP;
-               error = check_deny(state->groups->aces[i].perms.deny, 0);
-               if (error)
-                       goto out_err;
                 low_mode_from_nfs4(state->groups->aces[i].perms.allow,
                                         &pace->e_perm, flags);
                 pace->e_gid = state->groups->aces[i].gid;
@@ -605,15 +563,9 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
  
         pace++;
         pace->e_tag = ACL_OTHER;
-       error = check_deny(state->other.deny, 0);
-       if (error)
-               goto out_err;
         low_mode_from_nfs4(state->other.allow, &pace->e_perm, flags);
  
         return pacl;
-out_err:
-       posix_acl_release(pacl);
-       return ERR_PTR(error);
  }
  
  static inline void allow_bits(struct posix_ace_state *astate, u32 mask)
@@ -828,7 +780,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 return error;
  
         dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
                 return nfserr_attrnotsupp;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c

index 92b9d97..864e200 100644 (file)
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -52,7 +52,7 @@
  static inline void
  nfsd4_security_inode_setsecctx(struct svc_fh *resfh, struct xdr_netobj *label, u32 *bmval)
  {
-       struct inode *inode = resfh->fh_dentry->d_inode;
+       struct inode *inode = d_inode(resfh->fh_dentry);
         int status;
  
         mutex_lock(&inode->i_mutex);
@@ -110,7 +110,7 @@ check_attr_support(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
          * in current environment or not.
          */
         if (bmval[0] & FATTR4_WORD0_ACL) {
-               if (!IS_POSIXACL(dentry->d_inode))
+               if (!IS_POSIXACL(d_inode(dentry)))
                         return nfserr_attrnotsupp;
         }
  
@@ -209,7 +209,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
  
  static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
  {
-       umode_t mode = fh->fh_dentry->d_inode->i_mode;
+       umode_t mode = d_inode(fh->fh_dentry)->i_mode;
  
         if (S_ISREG(mode))
                 return nfs_ok;
@@ -470,7 +470,7 @@ out:
                 fh_put(resfh);
                 kfree(resfh);
         }
-       nfsd4_cleanup_open_state(cstate, open, status);
+       nfsd4_cleanup_open_state(cstate, open);
         nfsd4_bump_seqid(cstate, status);
         return status;
  }
@@ -881,7 +881,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                     &exp, &dentry);
         if (err)
                 return err;
-       if (dentry->d_inode == NULL) {
+       if (d_really_is_negative(dentry)) {
                 exp_put(exp);
                 err = nfserr_noent;
         } else
@@ -1030,6 +1030,8 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
                 return status;
         }
+       if (!file)
+               return nfserr_bad_stateid;
  
         status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
                                      fallocate->falloc_offset,
@@ -1069,6 +1071,8 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
                 return status;
         }
+       if (!file)
+               return nfserr_bad_stateid;
  
         switch (seek->seek_whence) {
         case NFS4_CONTENT_DATA:
@@ -1308,7 +1312,7 @@ nfsd4_layoutget(struct svc_rqst *rqstp,
         if (atomic_read(&ls->ls_stid.sc_file->fi_lo_recalls))
                 goto out_put_stid;
  
-       nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
+       nfserr = ops->proc_layoutget(d_inode(current_fh->fh_dentry),
                                      current_fh, lgp);
         if (nfserr)
                 goto out_put_stid;
@@ -1342,7 +1346,7 @@ nfsd4_layoutcommit(struct svc_rqst *rqstp,
         ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
         if (!ops)
                 goto out;
-       inode = current_fh->fh_dentry->d_inode;
+       inode = d_inode(current_fh->fh_dentry);
  
         nfserr = nfserr_inval;
         if (new_size <= seg->offset) {
@@ -1815,7 +1819,7 @@ static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
                 bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
         }
         if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
-               ret += NFSD4_MAX_SEC_LABEL_LEN + 12;
+               ret += NFS4_MAXLABELLEN + 12;
                 bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
         }
         /*
@@ -2282,13 +2286,13 @@ static struct nfsd4_operation nfsd4_ops[] = {
                 .op_func = (nfsd4op_func)nfsd4_allocate,
                 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
                 .op_name = "OP_ALLOCATE",
-               .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
         },
         [OP_DEALLOCATE] = {
                 .op_func = (nfsd4op_func)nfsd4_deallocate,
                 .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
                 .op_name = "OP_DEALLOCATE",
-               .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
+               .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
         },
         [OP_SEEK] = {
                 .op_func = (nfsd4op_func)nfsd4_seek,
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c

index 1c307f0..d88ea7b 100644 (file)
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -192,14 +192,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
  
         dir = nn->rec_file->f_path.dentry;
         /* lock the parent */
-       mutex_lock(&dir->d_inode->i_mutex);
+       mutex_lock(&d_inode(dir)->i_mutex);
  
         dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
         if (IS_ERR(dentry)) {
                 status = PTR_ERR(dentry);
                 goto out_unlock;
         }
-       if (dentry->d_inode)
+       if (d_really_is_positive(dentry))
                 /*
                  * In the 4.1 case, where we're called from
                  * reclaim_complete(), records from the previous reboot
@@ -209,11 +209,11 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                  * as well be forgiving and just succeed silently.
                  */
                 goto out_put;
-       status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
+       status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
  out_put:
         dput(dentry);
  out_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         if (status == 0) {
                 if (nn->in_grace) {
                         crp = nfs4_client_to_reclaim(dname, nn);
@@ -285,7 +285,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
         }
  
         status = iterate_dir(nn->rec_file, &ctx.ctx);
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
         while (!list_empty(&ctx.names)) {
                 struct name_list *entry;
                 entry = list_entry(ctx.names.next, struct name_list, list);
@@ -302,7 +302,7 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
                 list_del(&entry->list);
                 kfree(entry);
         }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         nfs4_reset_creds(original_cred);
         return status;
  }
@@ -316,20 +316,20 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
         dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
  
         dir = nn->rec_file->f_path.dentry;
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_one_len(name, dir, namlen);
         if (IS_ERR(dentry)) {
                 status = PTR_ERR(dentry);
                 goto out_unlock;
         }
         status = -ENOENT;
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 goto out;
-       status = vfs_rmdir(dir->d_inode, dentry);
+       status = vfs_rmdir(d_inode(dir), dentry);
  out:
         dput(dentry);
  out_unlock:
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
         return status;
  }
  
@@ -385,7 +385,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
         if (nfs4_has_reclaimed_state(child->d_name.name, nn))
                 return 0;
  
-       status = vfs_rmdir(parent->d_inode, child);
+       status = vfs_rmdir(d_inode(parent), child);
         if (status)
                 printk("failed to remove client recovery directory %pd\n",
                                 child);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c

index 326a545..38f2d7a 100644 (file)
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
         return sid->sequence % SESSION_HASH_SIZE;
  }
  
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
  static inline void
  dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
  {
@@ -4049,7 +4049,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                 status = nfserr_bad_stateid;
                 if (nfsd4_is_deleg_cur(open))
                         goto out;
-               status = nfserr_jukebox;
         }
  
         /*
@@ -4118,7 +4117,7 @@ out:
  }
  
  void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
-                             struct nfsd4_open *open, __be32 status)
+                             struct nfsd4_open *open)
  {
         if (open->op_openowner) {
                 struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
@@ -4473,7 +4472,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
         struct nfs4_ol_stateid *stp = NULL;
         struct nfs4_delegation *dp = NULL;
         struct svc_fh *current_fh = &cstate->current_fh;
-       struct inode *ino = current_fh->fh_dentry->d_inode;
+       struct inode *ino = d_inode(current_fh->fh_dentry);
         struct nfsd_net *nn = net_generic(net, nfsd_net_id);
         struct file *file = NULL;
         __be32 status;
@@ -5171,7 +5170,7 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
         struct nfs4_file *fi = ost->st_stid.sc_file;
         struct nfs4_openowner *oo = openowner(ost->st_stateowner);
         struct nfs4_client *cl = oo->oo_owner.so_client;
-       struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
+       struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
         struct nfs4_lockowner *lo;
         unsigned int strhashval;
  
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c

index 5fb7e78..158badf 100644 (file)
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -424,7 +424,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                 len += 4;
                 dummy32 = be32_to_cpup(p++);
                 READ_BUF(dummy32);
-               if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)
+               if (dummy32 > NFS4_MAXLABELLEN)
                         return nfserr_badlabel;
                 len += (XDR_QUADLEN(dummy32) << 2);
                 READMEM(buf, dummy32);
@@ -2020,7 +2020,7 @@ static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
          * dentries/path components in an array.
          */
         for (;;) {
-               if (cur.dentry == root->dentry && cur.mnt == root->mnt)
+               if (path_equal(&cur, root))
                         break;
                 if (cur.dentry == cur.mnt->mnt_root) {
                         if (follow_up(&cur))
@@ -2292,7 +2292,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
  #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
         if ((bmval[2] & FATTR4_WORD2_SECURITY_LABEL) ||
                         bmval[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
-               err = security_inode_getsecctx(dentry->d_inode,
+               err = security_inode_getsecctx(d_inode(dentry),
                                                 &context, &contextlen);
                 contextsupport = (err == 0);
                 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
@@ -2384,7 +2384,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
                 p = xdr_reserve_space(xdr, 8);
                 if (!p)
                         goto out_resource;
-               p = encode_change(p, &stat, dentry->d_inode);
+               p = encode_change(p, &stat, d_inode(dentry));
         }
         if (bmval0 & FATTR4_WORD0_SIZE) {
                 p = xdr_reserve_space(xdr, 8);
@@ -2807,7 +2807,7 @@ nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
         dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
         if (IS_ERR(dentry))
                 return nfserrno(PTR_ERR(dentry));
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                 /*
                  * nfsd_buffered_readdir drops the i_mutex between
                  * readdir and calling this callback, leaving a window
@@ -3324,7 +3324,7 @@ static __be32 nfsd4_encode_splice_read(
         }
  
         eof = (read->rd_offset + maxcount >=
-              read->rd_fhp->fh_dentry->d_inode->i_size);
+              d_inode(read->rd_fhp->fh_dentry)->i_size);
  
         *(p++) = htonl(eof);
         *(p++) = htonl(maxcount);
@@ -3401,7 +3401,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
         xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
  
         eof = (read->rd_offset + maxcount >=
-              read->rd_fhp->fh_dentry->d_inode->i_size);
+              d_inode(read->rd_fhp->fh_dentry)->i_size);
  
         tmp = htonl(eof);
         write_bytes_to_xdr_buf(xdr->buf, starting_len    , &tmp, 4);
@@ -3422,6 +3422,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
         unsigned long maxcount;
         struct xdr_stream *xdr = &resp->xdr;
         struct file *file = read->rd_filp;
+       struct svc_fh *fhp = read->rd_fhp;
         int starting_len = xdr->buf->len;
         struct raparms *ra;
         __be32 *p;
@@ -3445,12 +3446,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
         maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
         maxcount = min_t(unsigned long, maxcount, read->rd_length);
  
-       if (!read->rd_filp) {
+       if (read->rd_filp)
+               err = nfsd_permission(resp->rqstp, fhp->fh_export,
+                               fhp->fh_dentry,
+                               NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
+       else
                 err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
                                                 &file, &ra);
-               if (err)
-                       goto err_truncate;
-       }
+       if (err)
+               goto err_truncate;
  
         if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
                 err = nfsd4_encode_splice_read(resp, read, file, maxcount);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c

index aa47d75..9690cb4 100644 (file)
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1250,15 +1250,15 @@ static int __init init_nfsd(void)
         int retval;
         printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
  
-       retval = register_cld_notifier();
-       if (retval)
-               return retval;
         retval = register_pernet_subsys(&nfsd_net_ops);
         if (retval < 0)
-               goto out_unregister_notifier;
-       retval = nfsd4_init_slabs();
+               return retval;
+       retval = register_cld_notifier();
         if (retval)
                 goto out_unregister_pernet;
+       retval = nfsd4_init_slabs();
+       if (retval)
+               goto out_unregister_notifier;
         retval = nfsd4_init_pnfs();
         if (retval)
                 goto out_free_slabs;
@@ -1290,10 +1290,10 @@ out_exit_pnfs:
         nfsd4_exit_pnfs();
  out_free_slabs:
         nfsd4_free_slabs();
-out_unregister_pernet:
-       unregister_pernet_subsys(&nfsd_net_ops);
  out_unregister_notifier:
         unregister_cld_notifier();
+out_unregister_pernet:
+       unregister_pernet_subsys(&nfsd_net_ops);
         return retval;
  }
  
@@ -1308,8 +1308,8 @@ static void __exit exit_nfsd(void)
         nfsd4_exit_pnfs();
         nfsd_fault_inject_cleanup();
         unregister_filesystem(&nfsd_fs_type);
-       unregister_pernet_subsys(&nfsd_net_ops);
         unregister_cld_notifier();
+       unregister_pernet_subsys(&nfsd_net_ops);
  }
  
  MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h

index 565c4da..cf98052 100644 (file)
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -24,7 +24,7 @@
  #include "export.h"
  
  #undef ifdebug
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
  # define ifdebug(flag)         if (nfsd_debug & NFSDDBG_##flag)
  #else
  # define ifdebug(flag)         if (0)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c

index e9fa966..350041a 100644 (file)
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -38,7 +38,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
                 /* make sure parents give x permission to user */
                 int err;
                 parent = dget_parent(tdentry);
-               err = inode_permission(parent->d_inode, MAY_EXEC);
+               err = inode_permission(d_inode(parent), MAY_EXEC);
                 if (err < 0) {
                         dput(parent);
                         break;
@@ -340,7 +340,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
         if (error)
                 goto out;
  
-       error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+       error = nfsd_mode_check(rqstp, d_inode(dentry)->i_mode, type);
         if (error)
                 goto out;
  
@@ -412,8 +412,8 @@ static inline void _fh_update_old(struct dentry *dentry,
                                   struct svc_export *exp,
                                   struct knfsd_fh *fh)
  {
-       fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino);
-       fh->ofh_generation = dentry->d_inode->i_generation;
+       fh->ofh_ino = ino_t_to_u32(d_inode(dentry)->i_ino);
+       fh->ofh_generation = d_inode(dentry)->i_generation;
         if (d_is_dir(dentry) ||
             (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
                 fh->ofh_dirino = 0;
@@ -426,7 +426,7 @@ static bool is_root_export(struct svc_export *exp)
  
  static struct super_block *exp_sb(struct svc_export *exp)
  {
-       return exp->ex_path.dentry->d_inode->i_sb;
+       return d_inode(exp->ex_path.dentry)->i_sb;
  }
  
  static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp)
@@ -520,12 +520,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
          *
          */
  
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         dev_t ex_dev = exp_sb(exp)->s_dev;
  
         dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
                 MAJOR(ex_dev), MINOR(ex_dev),
-               (long) exp->ex_path.dentry->d_inode->i_ino,
+               (long) d_inode(exp->ex_path.dentry)->i_ino,
                 dentry,
                 (inode ? inode->i_ino : 0));
  
@@ -558,7 +558,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
                 fhp->fh_handle.ofh_dev =  old_encode_dev(ex_dev);
                 fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
                 fhp->fh_handle.ofh_xino =
-                       ino_t_to_u32(exp->ex_path.dentry->d_inode->i_ino);
+                       ino_t_to_u32(d_inode(exp->ex_path.dentry)->i_ino);
                 fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
                 if (inode)
                         _fh_update_old(dentry, exp, &fhp->fh_handle);
@@ -570,7 +570,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
                 mk_fsid(fhp->fh_handle.fh_fsid_type,
                         fhp->fh_handle.fh_fsid,
                         ex_dev,
-                       exp->ex_path.dentry->d_inode->i_ino,
+                       d_inode(exp->ex_path.dentry)->i_ino,
                         exp->ex_fsid, exp->ex_uuid);
  
                 if (inode)
@@ -597,7 +597,7 @@ fh_update(struct svc_fh *fhp)
                 goto out_bad;
  
         dentry = fhp->fh_dentry;
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 goto out_negative;
         if (fhp->fh_handle.fh_version != 1) {
                 _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h

index f229204..1e90dad 100644 (file)
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -225,7 +225,7 @@ fill_pre_wcc(struct svc_fh *fhp)
  {
         struct inode    *inode;
  
-       inode = fhp->fh_dentry->d_inode;
+       inode = d_inode(fhp->fh_dentry);
         if (!fhp->fh_pre_saved) {
                 fhp->fh_pre_mtime = inode->i_mtime;
                 fhp->fh_pre_ctime = inode->i_ctime;
@@ -264,7 +264,7 @@ fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
                 return;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         mutex_lock_nested(&inode->i_mutex, subclass);
         fill_pre_wcc(fhp);
         fhp->fh_locked = 1;
@@ -284,7 +284,7 @@ fh_unlock(struct svc_fh *fhp)
  {
         if (fhp->fh_locked) {
                 fill_post_wcc(fhp);
-               mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(fhp->fh_dentry)->i_mutex);
                 fhp->fh_locked = 0;
         }
  }
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c

index b868073..aecbcd3 100644 (file)
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -223,7 +223,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
         }
         fh_init(newfhp, NFS_FHSIZE);
         nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
-       if (!nfserr && !dchild->d_inode)
+       if (!nfserr && d_really_is_negative(dchild))
                 nfserr = nfserr_noent;
         dput(dchild);
         if (nfserr) {
@@ -241,7 +241,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
                 }
         }
  
-       inode = newfhp->fh_dentry->d_inode;
+       inode = d_inode(newfhp->fh_dentry);
  
         /* Unfudge the mode bits */
         if (attr->ia_valid & ATTR_MODE) {
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c

index 412d706..79d964a 100644 (file)
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -187,7 +187,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
         *p++ = htonl((u32) stat->ino);
         *p++ = htonl((u32) stat->atime.tv_sec);
         *p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
-       lease_get_mtime(dentry->d_inode, &time); 
+       lease_get_mtime(d_inode(dentry), &time); 
         *p++ = htonl((u32) time.tv_sec);
         *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
         *p++ = htonl((u32) stat->ctime.tv_sec);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 3685265..84d770b 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -174,7 +174,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
                 return 1;
         if (!(exp->ex_flags & NFSEXP_V4ROOT))
                 return 0;
-       return dentry->d_inode != NULL;
+       return d_inode(dentry) != NULL;
  }
  
  __be32
@@ -270,7 +270,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
          * dentry may be negative, it may need to be updated.
          */
         err = fh_compose(resfh, exp, dentry, fhp);
-       if (!err && !dentry->d_inode)
+       if (!err && d_really_is_negative(dentry))
                 err = nfserr_noent;
  out:
         dput(dentry);
@@ -284,7 +284,7 @@ out:
  static int
  commit_metadata(struct svc_fh *fhp)
  {
-       struct inode *inode = fhp->fh_dentry->d_inode;
+       struct inode *inode = d_inode(fhp->fh_dentry);
         const struct export_operations *export_ops = inode->i_sb->s_export_op;
  
         if (!EX_ISSYNC(fhp->fh_export))
@@ -364,7 +364,7 @@ static __be32
  nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 struct iattr *iap)
  {
-       struct inode *inode = fhp->fh_dentry->d_inode;
+       struct inode *inode = d_inode(fhp->fh_dentry);
         int host_err;
  
         if (iap->ia_size < inode->i_size) {
@@ -426,7 +426,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
         }
  
         dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         /* Ignore any mode updates on symlinks */
         if (S_ISLNK(inode->i_mode))
@@ -495,7 +495,7 @@ out:
   */
  int nfsd4_is_junction(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (inode == NULL)
                 return 0;
@@ -521,9 +521,9 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
  
         dentry = fhp->fh_dentry;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&d_inode(dentry)->i_mutex);
         host_error = security_inode_setsecctx(dentry, label->data, label->len);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
         return nfserrno(host_error);
  }
  #else
@@ -706,7 +706,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
  
         path.mnt = fhp->fh_export->ex_path.mnt;
         path.dentry = fhp->fh_dentry;
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
  
         /* Disallow write access to files with the append-only bit set
          * or any access when mandatory locking enabled
@@ -1211,7 +1211,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 goto out;
  
         dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
  
         err = nfserr_notdir;
         if (!dirp->i_op->lookup)
@@ -1250,7 +1250,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
          * Make sure the child dentry is still negative ...
          */
         err = nfserr_exist;
-       if (dchild->d_inode) {
+       if (d_really_is_positive(dchild)) {
                 dprintk("nfsd_create: dentry %pd/%pd not negative!\n",
                         dentry, dchild);
                 goto out; 
@@ -1353,7 +1353,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 goto out;
  
         dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
  
         /* Get all the sanity checks out of the way before
          * we lock the parent. */
@@ -1376,7 +1376,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 goto out_nfserr;
  
         /* If file doesn't exist, check for permissions to create one */
-       if (!dchild->d_inode) {
+       if (d_really_is_negative(dchild)) {
                 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
                 if (err)
                         goto out;
@@ -1397,7 +1397,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 v_atime = verifier[1]&0x7fffffff;
         }
         
-       if (dchild->d_inode) {
+       if (d_really_is_positive(dchild)) {
                 err = 0;
  
                 switch (createmode) {
@@ -1420,17 +1420,17 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                         }
                         break;
                 case NFS3_CREATE_EXCLUSIVE:
-                       if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
-                           && dchild->d_inode->i_atime.tv_sec == v_atime
-                           && dchild->d_inode->i_size  == 0 ) {
+                       if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+                           && d_inode(dchild)->i_atime.tv_sec == v_atime
+                           && d_inode(dchild)->i_size  == 0 ) {
                                 if (created)
                                         *created = 1;
                                 break;
                         }
                 case NFS4_CREATE_EXCLUSIVE4_1:
-                       if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
-                           && dchild->d_inode->i_atime.tv_sec == v_atime
-                           && dchild->d_inode->i_size  == 0 ) {
+                       if (   d_inode(dchild)->i_mtime.tv_sec == v_mtime
+                           && d_inode(dchild)->i_atime.tv_sec == v_atime
+                           && d_inode(dchild)->i_size  == 0 ) {
                                 if (created)
                                         *created = 1;
                                 goto set_attr;
@@ -1513,7 +1513,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
  
         path.mnt = fhp->fh_export->ex_path.mnt;
         path.dentry = fhp->fh_dentry;
-       inode = path.dentry->d_inode;
+       inode = d_inode(path.dentry);
  
         err = nfserr_inval;
         if (!inode->i_op->readlink)
@@ -1576,7 +1576,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
         if (IS_ERR(dnew))
                 goto out_nfserr;
  
-       host_err = vfs_symlink(dentry->d_inode, dnew, path);
+       host_err = vfs_symlink(d_inode(dentry), dnew, path);
         err = nfserrno(host_err);
         if (!err)
                 err = nfserrno(commit_metadata(fhp));
@@ -1632,7 +1632,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
  
         fh_lock_nested(ffhp, I_MUTEX_PARENT);
         ddir = ffhp->fh_dentry;
-       dirp = ddir->d_inode;
+       dirp = d_inode(ddir);
  
         dnew = lookup_one_len(name, ddir, len);
         host_err = PTR_ERR(dnew);
@@ -1642,7 +1642,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
         dold = tfhp->fh_dentry;
  
         err = nfserr_noent;
-       if (!dold->d_inode)
+       if (d_really_is_negative(dold))
                 goto out_dput;
         host_err = vfs_link(dold, dirp, dnew, NULL);
         if (!host_err) {
@@ -1689,10 +1689,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                 goto out;
  
         fdentry = ffhp->fh_dentry;
-       fdir = fdentry->d_inode;
+       fdir = d_inode(fdentry);
  
         tdentry = tfhp->fh_dentry;
-       tdir = tdentry->d_inode;
+       tdir = d_inode(tdentry);
  
         err = nfserr_perm;
         if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
@@ -1717,7 +1717,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
                 goto out_nfserr;
  
         host_err = -ENOENT;
-       if (!odentry->d_inode)
+       if (d_really_is_negative(odentry))
                 goto out_dput_old;
         host_err = -EINVAL;
         if (odentry == trap)
@@ -1790,21 +1790,21 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
  
         fh_lock_nested(fhp, I_MUTEX_PARENT);
         dentry = fhp->fh_dentry;
-       dirp = dentry->d_inode;
+       dirp = d_inode(dentry);
  
         rdentry = lookup_one_len(fname, dentry, flen);
         host_err = PTR_ERR(rdentry);
         if (IS_ERR(rdentry))
                 goto out_nfserr;
  
-       if (!rdentry->d_inode) {
+       if (d_really_is_negative(rdentry)) {
                 dput(rdentry);
                 err = nfserr_noent;
                 goto out;
         }
  
         if (!type)
-               type = rdentry->d_inode->i_mode & S_IFMT;
+               type = d_inode(rdentry)->i_mode & S_IFMT;
  
         if (type != S_IFDIR)
                 host_err = vfs_unlink(dirp, rdentry, NULL);
@@ -2015,7 +2015,7 @@ __be32
  nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                                         struct dentry *dentry, int acc)
  {
-       struct inode    *inode = dentry->d_inode;
+       struct inode    *inode = d_inode(dentry);
         int             err;
  
         if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h

index 0bda93e..f982ae8 100644 (file)
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -40,7 +40,6 @@
  #include "state.h"
  #include "nfsd.h"
  
-#define NFSD4_MAX_SEC_LABEL_LEN        2048
  #define NFSD4_MAX_TAGLEN       128
  #define XDR_LEN(n)                     (((n) + 3) & ~3)
  
@@ -632,7 +631,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
  {
         BUG_ON(!fhp->fh_pre_saved);
         cinfo->atomic = fhp->fh_post_saved;
-       cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+       cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
  
         cinfo->before_change = fhp->fh_pre_change;
         cinfo->after_change = fhp->fh_post_change;
@@ -683,7 +682,7 @@ extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
                 struct svc_fh *current_fh, struct nfsd4_open *open);
  extern void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate);
  extern void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
-               struct nfsd4_open *open, __be32 status);
+               struct nfsd4_open *open);
  extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
                 struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
  extern __be32 nfsd4_close(struct svc_rqst *rqstp,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c

index 197a63e..0ee0bed 100644 (file)
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -435,7 +435,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
   */
  int nilfs_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const unsigned char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         unsigned chunk_size = nilfs_chunk_size(dir);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c

index be936df..258d9fe 100644 (file)
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -835,7 +835,7 @@ void nilfs_evict_inode(struct inode *inode)
  int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
  {
         struct nilfs_transaction_info ti;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = inode->i_sb;
         int err;
  
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c

index 0f84b25..2218083 100644 (file)
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -192,7 +192,7 @@ out_fail:
  static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
                       struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct nilfs_transaction_info ti;
         int err;
  
@@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
         if (!de)
                 goto out;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         err = -EIO;
         if (le64_to_cpu(de->inode) != inode->i_ino)
                 goto out;
@@ -318,7 +318,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
  
         if (!err) {
                 nilfs_mark_inode_dirty(dir);
-               nilfs_mark_inode_dirty(dentry->d_inode);
+               nilfs_mark_inode_dirty(d_inode(dentry));
                 err = nilfs_transaction_commit(dir->i_sb);
         } else
                 nilfs_transaction_abort(dir->i_sb);
@@ -328,7 +328,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry)
  
  static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct nilfs_transaction_info ti;
         int err;
  
@@ -358,8 +358,8 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
  static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir,  struct dentry *new_dentry)
  {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct page *dir_page = NULL;
         struct nilfs_dir_entry *dir_de = NULL;
         struct page *old_page;
@@ -453,13 +453,13 @@ static struct dentry *nilfs_get_parent(struct dentry *child)
         struct qstr dotdot = QSTR_INIT("..", 2);
         struct nilfs_root *root;
  
-       ino = nilfs_inode_by_name(child->d_inode, &dotdot);
+       ino = nilfs_inode_by_name(d_inode(child), &dotdot);
         if (!ino)
                 return ERR_PTR(-ENOENT);
  
-       root = NILFS_I(child->d_inode)->i_root;
+       root = NILFS_I(d_inode(child))->i_root;
  
-       inode = nilfs_iget(child->d_inode->i_sb, root, ino);
+       inode = nilfs_iget(d_inode(child)->i_sb, root, ino);
         if (IS_ERR(inode))
                 return ERR_CAST(inode);
  
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c

index c1725f2..f47585b 100644 (file)
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -610,7 +610,7 @@ static int nilfs_unfreeze(struct super_block *sb)
  static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
  {
         struct super_block *sb = dentry->d_sb;
-       struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+       struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
         struct the_nilfs *nilfs = root->nilfs;
         u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
         unsigned long long blocks;
@@ -681,7 +681,7 @@ static int nilfs_show_options(struct seq_file *seq, struct dentry *dentry)
  {
         struct super_block *sb = dentry->d_sb;
         struct the_nilfs *nilfs = sb->s_fs_info;
-       struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root;
+       struct nilfs_root *root = NILFS_I(d_inode(dentry))->i_root;
  
         if (!nilfs_test_opt(nilfs, BARRIER))
                 seq_puts(seq, ",nobarrier");
@@ -1190,7 +1190,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
  
                 sb->s_flags &= ~MS_RDONLY;
  
-               root = NILFS_I(sb->s_root->d_inode)->i_root;
+               root = NILFS_I(d_inode(sb->s_root))->i_root;
                 err = nilfs_attach_log_writer(sb, root);
                 if (err)
                         goto restore_opts;
diff --git a/fs/nsfs.c b/fs/nsfs.c

index af1b24f..99521e7 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -13,7 +13,7 @@ static const struct file_operations ns_file_operations = {
  
  static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
  
         return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
@@ -22,7 +22,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
  
  static void ns_prune_dentry(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         if (inode) {
                 struct ns_common *ns = inode->i_private;
                 atomic_long_set(&ns->stashed, 0);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c

index 1d0c21d..d284f07 100644 (file)
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2889,7 +2889,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
   */
  int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *vi = dentry->d_inode;
+       struct inode *vi = d_inode(dentry);
         int err;
         unsigned int ia_valid = attr->ia_valid;
  
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c

index b3973c2..0f35b80 100644 (file)
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -292,14 +292,14 @@ const struct inode_operations ntfs_dir_inode_ops = {
   * The code is based on the ext3 ->get_parent() implementation found in
   * fs/ext3/namei.c::ext3_get_parent().
   *
- * Note: ntfs_get_parent() is called with @child_dent->d_inode->i_mutex down.
+ * Note: ntfs_get_parent() is called with @d_inode(child_dent)->i_mutex down.
   *
   * Return the dentry of the parent directory on success or the error code on
   * error (IS_ERR() is true).
   */
  static struct dentry *ntfs_get_parent(struct dentry *child_dent)
  {
-       struct inode *vi = child_dent->d_inode;
+       struct inode *vi = d_inode(child_dent);
         ntfs_inode *ni = NTFS_I(vi);
         MFT_RECORD *mrec;
         ntfs_attr_search_ctx *ctx;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c

index 4fda7a5..2903730 100644 (file)
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -42,8 +42,8 @@
  void ocfs2_dentry_attach_gen(struct dentry *dentry)
  {
         unsigned long gen =
-               OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
-       BUG_ON(dentry->d_inode);
+               OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
+       BUG_ON(d_inode(dentry));
         dentry->d_fsdata = (void *)gen;
  }
  
@@ -57,7 +57,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         osb = OCFS2_SB(dentry->d_sb);
  
         trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
@@ -71,7 +71,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
                 unsigned long gen = (unsigned long) dentry->d_fsdata;
                 unsigned long pgen;
                 spin_lock(&dentry->d_lock);
-               pgen = OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen;
+               pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
                 spin_unlock(&dentry->d_lock);
                 trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
                                                        dentry->d_name.name,
@@ -146,7 +146,7 @@ static int ocfs2_match_dentry(struct dentry *dentry,
         if (skip_unhashed && d_unhashed(dentry))
                 return 0;
  
-       parent = dentry->d_parent->d_inode;
+       parent = d_inode(dentry->d_parent);
         /* Negative parent dentry? */
         if (!parent)
                 return 0;
@@ -243,7 +243,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry,
         if (!inode)
                 return 0;
  
-       if (!dentry->d_inode && dentry->d_fsdata) {
+       if (d_really_is_negative(dentry) && dentry->d_fsdata) {
                 /* Converting a negative dentry to positive
                    Clear dentry->d_fsdata */
                 dentry->d_fsdata = dl = NULL;
@@ -446,7 +446,7 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target,
  {
         int ret;
         struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb);
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         /*
          * Move within the same directory, so the actual lock info won't
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h

index f0344b7..3d8639f 100644 (file)
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -72,7 +72,7 @@ static inline int ocfs2_add_entry(handle_t *handle,
                                   struct buffer_head *parent_fe_bh,
                                   struct ocfs2_dir_lookup_result *lookup)
  {
-       return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
+       return __ocfs2_add_entry(handle, d_inode(dentry->d_parent),
                                  dentry->d_name.name, dentry->d_name.len,
                                  inode, blkno, parent_fe_bh, lookup);
  }
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c

index 061ba6a..b5cf27d 100644 (file)
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -208,7 +208,7 @@ static int dlmfs_file_release(struct inode *inode,
  static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         attr->ia_valid &= ~ATTR_SIZE;
         error = inode_change_ok(inode, attr);
@@ -549,7 +549,7 @@ static int dlmfs_unlink(struct inode *dir,
                         struct dentry *dentry)
  {
         int status;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         mlog(0, "unlink inode %lu\n", inode->i_ino);
  
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c

index 540dc4b..827fc98 100644 (file)
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -147,7 +147,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
         int status;
         u64 blkno;
         struct dentry *parent;
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
  
         trace_ocfs2_get_parent(child, child->d_name.len, child->d_name.name,
                                (unsigned long long)OCFS2_I(dir)->ip_blkno);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 913fc25..d8b670c 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1126,7 +1126,7 @@ out:
  int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int status = 0, size_change;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct super_block *sb = inode->i_sb;
         struct ocfs2_super *osb = OCFS2_SB(sb);
         struct buffer_head *bh = NULL;
@@ -1275,8 +1275,8 @@ int ocfs2_getattr(struct vfsmount *mnt,
                   struct dentry *dentry,
                   struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
-       struct super_block *sb = dentry->d_inode->i_sb;
+       struct inode *inode = d_inode(dentry);
+       struct super_block *sb = d_inode(dentry)->i_sb;
         struct ocfs2_super *osb = sb->s_fs_info;
         int err;
  
@@ -2114,7 +2114,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
  {
         int ret = 0, meta_level = 0;
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         loff_t end;
         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
         int full_coherency = !(osb->s_mount_opt &
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c

index be71ca0..b254416 100644 (file)
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -1209,7 +1209,7 @@ int ocfs2_drop_inode(struct inode *inode)
   */
  int ocfs2_inode_revalidate(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int status = 0;
  
         trace_ocfs2_inode_revalidate(inode,
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c

index 09f90cb..176fe6a 100644 (file)
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -689,8 +689,8 @@ static int ocfs2_link(struct dentry *old_dentry,
                       struct dentry *dentry)
  {
         handle_t *handle;
-       struct inode *inode = old_dentry->d_inode;
-       struct inode *old_dir = old_dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(old_dentry);
+       struct inode *old_dir = d_inode(old_dentry->d_parent);
         int err;
         struct buffer_head *fe_bh = NULL;
         struct buffer_head *old_dir_bh = NULL;
@@ -879,7 +879,7 @@ static int ocfs2_unlink(struct inode *dir,
         int status;
         int child_locked = 0;
         bool is_unlinkable = false;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct inode *orphan_dir = NULL;
         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
         u64 blkno;
@@ -898,7 +898,7 @@ static int ocfs2_unlink(struct inode *dir,
  
         dquot_initialize(dir);
  
-       BUG_ON(dentry->d_parent->d_inode != dir);
+       BUG_ON(d_inode(dentry->d_parent) != dir);
  
         if (inode == osb->root_inode)
                 return -EPERM;
@@ -1209,8 +1209,8 @@ static int ocfs2_rename(struct inode *old_dir,
  {
         int status = 0, rename_lock = 0, parents_locked = 0, target_exists = 0;
         int old_child_locked = 0, new_child_locked = 0, update_dot_dot = 0;
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct inode *orphan_dir = NULL;
         struct ocfs2_dinode *newfe = NULL;
         char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
@@ -1454,7 +1454,7 @@ static int ocfs2_rename(struct inode *old_dir,
                         should_add_orphan = true;
                 }
         } else {
-               BUG_ON(new_dentry->d_parent->d_inode != new_dir);
+               BUG_ON(d_inode(new_dentry->d_parent) != new_dir);
  
                 status = ocfs2_check_dir_for_entry(new_dir,
                                                    new_dentry->d_name.name,
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c

index df3a500..d8c6af1 100644 (file)
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4194,7 +4194,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
                            bool preserve)
  {
         int ret;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct buffer_head *new_bh = NULL;
  
         if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
@@ -4263,7 +4263,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
                          struct dentry *new_dentry, bool preserve)
  {
         int error;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct buffer_head *old_bh = NULL;
         struct inode *new_orphan_inode = NULL;
         struct posix_acl *default_acl, *acl;
@@ -4357,7 +4357,7 @@ out:
  /* copied from may_create in VFS. */
  static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
  {
-       if (child->d_inode)
+       if (d_really_is_positive(child))
                 return -EEXIST;
         if (IS_DEADDIR(dir))
                 return -ENOENT;
@@ -4375,7 +4375,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
  static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
                              struct dentry *new_dentry, bool preserve)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int error;
  
         if (!inode)
@@ -4463,7 +4463,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
         }
  
         error = ocfs2_vfs_reflink(old_path.dentry,
-                                 new_path.dentry->d_inode,
+                                 d_inode(new_path.dentry),
                                   new_dentry, preserve);
  out_dput:
         done_path_create(&new_path, new_dentry);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c

index 4ca7533..d03bfbf 100644 (file)
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1020,7 +1020,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
         int ret = 0, i_ret = 0, b_ret = 0;
         struct buffer_head *di_bh = NULL;
         struct ocfs2_dinode *di = NULL;
-       struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
+       struct ocfs2_inode_info *oi = OCFS2_I(d_inode(dentry));
  
         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
                 return -EOPNOTSUPP;
@@ -1028,7 +1028,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
                 return ret;
  
-       ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
+       ret = ocfs2_inode_lock(d_inode(dentry), &di_bh, 0);
         if (ret < 0) {
                 mlog_errno(ret);
                 return ret;
@@ -1037,7 +1037,7 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
         di = (struct ocfs2_dinode *)di_bh->b_data;
  
         down_read(&oi->ip_xattr_sem);
-       i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
+       i_ret = ocfs2_xattr_ibody_list(d_inode(dentry), di, buffer, size);
         if (i_ret < 0)
                 b_ret = 0;
         else {
@@ -1045,13 +1045,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
                         buffer += i_ret;
                         size -= i_ret;
                 }
-               b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
+               b_ret = ocfs2_xattr_block_list(d_inode(dentry), di,
                                                buffer, size);
                 if (b_ret < 0)
                         i_ret = 0;
         }
         up_read(&oi->ip_xattr_sem);
-       ocfs2_inode_unlock(dentry->d_inode, 0);
+       ocfs2_inode_unlock(d_inode(dentry), 0);
  
         brelse(di_bh);
  
@@ -7257,7 +7257,7 @@ static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
                                name, buffer, size);
  }
  
@@ -7267,7 +7267,7 @@ static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
         if (strcmp(name, "") == 0)
                 return -EINVAL;
  
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY,
                                name, value, size, flags);
  }
  
@@ -7347,7 +7347,7 @@ static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
  {
         if (strcmp(name, "") == 0)
                 return -EINVAL;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
                                name, buffer, size);
  }
  
@@ -7357,7 +7357,7 @@ static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
         if (strcmp(name, "") == 0)
                 return -EINVAL;
  
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED,
                                name, value, size, flags);
  }
  
@@ -7399,7 +7399,7 @@ static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
                 return -EINVAL;
         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
                 return -EOPNOTSUPP;
-       return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
+       return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name,
                                buffer, size);
  }
  
@@ -7413,7 +7413,7 @@ static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
                 return -EOPNOTSUPP;
  
-       return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+       return ocfs2_xattr_set(d_inode(dentry), OCFS2_XATTR_INDEX_USER,
                                name, value, size, flags);
  }
  
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c

index 1b8e9e8..f833bf8 100644 (file)
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -110,7 +110,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
  
  static int omfs_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         struct omfs_inode *oi;
@@ -155,7 +155,7 @@ out:
  
  static int omfs_delete_entry(struct dentry *dentry)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct inode *dirty;
         const char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
@@ -237,7 +237,7 @@ static int omfs_dir_is_empty(struct inode *inode)
  
  static int omfs_remove(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int ret;
  
  
@@ -373,8 +373,8 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx,
  static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *new_inode = new_dentry->d_inode;
-       struct inode *old_inode = old_dentry->d_inode;
+       struct inode *new_inode = d_inode(new_dentry);
+       struct inode *old_inode = d_inode(old_dentry);
         int err;
  
         if (new_inode) {
diff --git a/fs/omfs/file.c b/fs/omfs/file.c

index f993be7..d9e26cf 100644 (file)
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -346,7 +346,7 @@ const struct file_operations omfs_file_operations = {
  
  static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, attr);
diff --git a/fs/open.c b/fs/open.c

index 6796f04..98e5a52 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                 return -EINVAL;
  
         /* Return error if mode is not supported */
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+       if (mode & ~FALLOC_FL_SUPPORTED_MASK)
                 return -EOPNOTSUPP;
  
         /* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
             (mode & ~FALLOC_FL_COLLAPSE_RANGE))
                 return -EINVAL;
  
+       /* Insert range should only be used exclusively. */
+       if ((mode & FALLOC_FL_INSERT_RANGE) &&
+           (mode & ~FALLOC_FL_INSERT_RANGE))
+               return -EINVAL;
+
         if (!(file->f_mode & FMODE_WRITE))
                 return -EBADF;
  
diff --git a/fs/pipe.c b/fs/pipe.c

index 822da5b..8865f79 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -627,7 +627,7 @@ static struct vfsmount *pipe_mnt __read_mostly;
  static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
         return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
-                               dentry->d_inode->i_ino);
+                               d_inode(dentry)->i_ino);
  }
  
  static const struct dentry_operations pipefs_dentry_operations = {
diff --git a/fs/posix_acl.c b/fs/posix_acl.c

index 3a48bb7..84bb65b 100644 (file)
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -774,12 +774,12 @@ posix_acl_xattr_get(struct dentry *dentry, const char *name,
         struct posix_acl *acl;
         int error;
  
-       if (!IS_POSIXACL(dentry->d_inode))
+       if (!IS_POSIXACL(d_backing_inode(dentry)))
                 return -EOPNOTSUPP;
         if (d_is_symlink(dentry))
                 return -EOPNOTSUPP;
  
-       acl = get_acl(dentry->d_inode, type);
+       acl = get_acl(d_backing_inode(dentry), type);
         if (IS_ERR(acl))
                 return PTR_ERR(acl);
         if (acl == NULL)
@@ -795,7 +795,7 @@ static int
  posix_acl_xattr_set(struct dentry *dentry, const char *name,
                 const void *value, size_t size, int flags, int type)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct posix_acl *acl = NULL;
         int ret;
  
@@ -834,7 +834,7 @@ posix_acl_xattr_list(struct dentry *dentry, char *list, size_t list_size,
         const char *xname;
         size_t size;
  
-       if (!IS_POSIXACL(dentry->d_inode))
+       if (!IS_POSIXACL(d_backing_inode(dentry)))
                 return -EOPNOTSUPP;
         if (d_is_symlink(dentry))
                 return -EOPNOTSUPP;
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 7a3b82f..093ca14 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -169,7 +169,7 @@ static int get_task_root(struct task_struct *task, struct path *root)
  
  static int proc_cwd_link(struct dentry *dentry, struct path *path)
  {
-       struct task_struct *task = get_proc_task(dentry->d_inode);
+       struct task_struct *task = get_proc_task(d_inode(dentry));
         int result = -ENOENT;
  
         if (task) {
@@ -186,7 +186,7 @@ static int proc_cwd_link(struct dentry *dentry, struct path *path)
  
  static int proc_root_link(struct dentry *dentry, struct path *path)
  {
-       struct task_struct *task = get_proc_task(dentry->d_inode);
+       struct task_struct *task = get_proc_task(d_inode(dentry));
         int result = -ENOENT;
  
         if (task) {
@@ -514,7 +514,7 @@ static int proc_fd_access_allowed(struct inode *inode)
  int proc_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int error;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (attr->ia_valid & ATTR_MODE)
                 return -EPERM;
@@ -1362,7 +1362,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
         struct mm_struct *mm;
         struct file *exe_file;
  
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
         if (!task)
                 return -ENOENT;
         mm = get_task_mm(task);
@@ -1382,7 +1382,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
  
  static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct path path;
         int error = -EACCES;
  
@@ -1427,7 +1427,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
  static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
  {
         int error = -EACCES;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct path path;
  
         /* Are we allowed to snoop on the tasks file descriptors? */
@@ -1497,7 +1497,7 @@ out_unlock:
  
  int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct task_struct *task;
         const struct cred *cred;
         struct pid_namespace *pid = dentry->d_sb->s_fs_info;
@@ -1554,7 +1554,7 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         task = get_proc_task(inode);
  
         if (task) {
@@ -1588,7 +1588,7 @@ int pid_delete_dentry(const struct dentry *dentry)
          * If so, then don't put the dentry on the lru list,
          * kill it immediately.
          */
-       return proc_inode_is_dead(dentry->d_inode);
+       return proc_inode_is_dead(d_inode(dentry));
  }
  
  const struct dentry_operations pid_dentry_operations =
@@ -1626,12 +1626,12 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx,
                 child = d_alloc(dir, &qname);
                 if (!child)
                         goto end_instantiate;
-               if (instantiate(dir->d_inode, child, task, ptr) < 0) {
+               if (instantiate(d_inode(dir), child, task, ptr) < 0) {
                         dput(child);
                         goto end_instantiate;
                 }
         }
-       inode = child->d_inode;
+       inode = d_inode(child);
         ino = inode->i_ino;
         type = inode->i_mode >> 12;
         dput(child);
@@ -1674,7 +1674,7 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
                 goto out_notask;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         task = get_proc_task(inode);
         if (!task)
                 goto out_notask;
@@ -1727,7 +1727,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
         int rc;
  
         rc = -ENOENT;
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
         if (!task)
                 goto out;
  
@@ -2863,13 +2863,13 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
                 return 0;
  
         if (pos == TGID_OFFSET - 2) {
-               struct inode *inode = ns->proc_self->d_inode;
+               struct inode *inode = d_inode(ns->proc_self);
                 if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
                         return 0;
                 ctx->pos = pos = pos + 1;
         }
         if (pos == TGID_OFFSET - 1) {
-               struct inode *inode = ns->proc_thread_self->d_inode;
+               struct inode *inode = d_inode(ns->proc_thread_self);
                 if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
                         return 0;
                 ctx->pos = pos = pos + 1;
@@ -3188,7 +3188,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
  
  static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct task_struct *p = get_proc_task(inode);
         generic_fillattr(inode, stat);
  
diff --git a/fs/proc/fd.c b/fs/proc/fd.c

index af84ad0..6e5fcd0 100644 (file)
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -91,7 +91,7 @@ static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags)
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         task = get_proc_task(inode);
         fd = proc_fd(inode);
  
@@ -151,14 +151,14 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
         struct task_struct *task;
         int ret = -ENOENT;
  
-       task = get_proc_task(dentry->d_inode);
+       task = get_proc_task(d_inode(dentry));
         if (task) {
                 files = get_files_struct(task);
                 put_task_struct(task);
         }
  
         if (files) {
-               int fd = proc_fd(dentry->d_inode);
+               int fd = proc_fd(d_inode(dentry));
                 struct file *fd_file;
  
                 spin_lock(&files->file_lock);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c

index be65b20..df6327a 100644 (file)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -101,7 +101,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
  
  static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct proc_dir_entry *de = PDE(inode);
         int error;
  
@@ -120,7 +120,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
  static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
                         struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct proc_dir_entry *de = PDE(inode);
         if (de && de->nlink)
                 set_nlink(inode, de->nlink);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c

index 7697b66..8272aab 100644 (file)
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -396,7 +396,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
  
  static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct proc_dir_entry *pde = PDE(dentry->d_inode);
+       struct proc_dir_entry *pde = PDE(d_inode(dentry));
         if (unlikely(!use_pde(pde)))
                 return ERR_PTR(-EINVAL);
         nd_set_link(nd, pde->data);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c

index c9eac45..e512642 100644 (file)
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -32,7 +32,7 @@ static const struct proc_ns_operations *ns_entries[] = {
  
  static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
         struct task_struct *task;
         struct path ns_path;
@@ -53,7 +53,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
  
  static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
         struct task_struct *task;
         char name[50];
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c

index 1bde894..350984a 100644 (file)
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -142,7 +142,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
  static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry,
                 struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct net *net;
  
         net = get_proc_task_net(inode);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index f92d5dd..fea2561 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -604,7 +604,7 @@ static bool proc_sys_fill_cache(struct file *file,
                         return false;
                 }
         }
-       inode = child->d_inode;
+       inode = d_inode(child);
         ino  = inode->i_ino;
         type = inode->i_mode >> 12;
         dput(child);
@@ -710,7 +710,7 @@ static int proc_sys_permission(struct inode *inode, int mask)
  
  static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
@@ -727,7 +727,7 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
  
  static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ctl_table_header *head = grab_header(inode);
         struct ctl_table *table = PROC_I(inode)->sysctl_entry;
  
@@ -773,12 +773,12 @@ static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags)
  {
         if (flags & LOOKUP_RCU)
                 return -ECHILD;
-       return !PROC_I(dentry->d_inode)->sysctl->unregistering;
+       return !PROC_I(d_inode(dentry))->sysctl->unregistering;
  }
  
  static int proc_sys_delete(const struct dentry *dentry)
  {
-       return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
+       return !!PROC_I(d_inode(dentry))->sysctl->unregistering;
  }
  
  static int sysctl_is_seen(struct ctl_table_header *p)
@@ -805,7 +805,7 @@ static int proc_sys_compare(const struct dentry *parent, const struct dentry *de
         /* Although proc doesn't have negative dentries, rcu-walk means
          * that inode here can be NULL */
         /* AV: can it, indeed? */
-       inode = ACCESS_ONCE(dentry->d_inode);
+       inode = d_inode_rcu(dentry);
         if (!inode)
                 return 1;
         if (name->len != len)
diff --git a/fs/proc/root.c b/fs/proc/root.c

index e74ac9f..b7fa4bf 100644 (file)
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -195,7 +195,7 @@ void __init proc_root_init(void)
  static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
  )
  {
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
         stat->nlink = proc_root.nlink + nr_processes();
         return 0;
  }
diff --git a/fs/proc/self.c b/fs/proc/self.c

index 4348bb8..6195b4a 100644 (file)
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -46,7 +46,7 @@ static unsigned self_inum;
  
  int proc_setup_self(struct super_block *s)
  {
-       struct inode *root_inode = s->s_root->d_inode;
+       struct inode *root_inode = d_inode(s->s_root);
         struct pid_namespace *ns = s->s_fs_info;
         struct dentry *self;
         
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c

index 59075b5..a837199 100644 (file)
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -47,7 +47,7 @@ static unsigned thread_self_inum;
  
  int proc_setup_thread_self(struct super_block *s)
  {
-       struct inode *root_inode = s->s_root->d_inode;
+       struct inode *root_inode = d_inode(s->s_root);
         struct pid_namespace *ns = s->s_fs_info;
         struct dentry *thread_self;
  
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c

index 56e1ffd..dc43b5f 100644 (file)
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -190,7 +190,7 @@ static const struct file_operations pstore_file_operations = {
   */
  static int pstore_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct pstore_private *p = dentry->d_inode->i_private;
+       struct pstore_private *p = d_inode(dentry)->i_private;
         int err;
  
         err = pstore_check_syslog_permissions(p);
@@ -199,7 +199,7 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
  
         if (p->psi->erase)
                 p->psi->erase(p->type, p->id, p->count,
-                             dentry->d_inode->i_ctime, p->psi);
+                             d_inode(dentry)->i_ctime, p->psi);
         else
                 return -EPERM;
  
@@ -376,7 +376,7 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
                 break;
         }
  
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
  
         dentry = d_alloc_name(root, name);
         if (!dentry)
@@ -396,12 +396,12 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
         list_add(&private->list, &allpstore);
         spin_unlock_irqrestore(&allpstore_lock, flags);
  
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
  
         return 0;
  
  fail_lockedalloc:
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         kfree(private);
  fail_alloc:
         iput(inode);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c

index 44e7392..32d2e1a 100644 (file)
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -182,7 +182,7 @@ static const char *qnx6_checkroot(struct super_block *s)
         static char match_root[2][3] = {".\0\0", "..\0"};
         int i, error = 0;
         struct qnx6_dir_entry *dir_entry;
-       struct inode *root = s->s_root->d_inode;
+       struct inode *root = d_inode(s->s_root);
         struct address_space *mapping = root->i_mapping;
         struct page *page = read_mapping_page(mapping, 0, NULL);
         if (IS_ERR(page))
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c

index ecc25cf..20d1f74 100644 (file)
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2328,7 +2328,7 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id,
         if (path->dentry->d_sb != sb)
                 error = -EXDEV;
         else
-               error = vfs_load_quota_inode(path->dentry->d_inode, type,
+               error = vfs_load_quota_inode(d_inode(path->dentry), type,
                                              format_id, DQUOT_USAGE_ENABLED |
                                              DQUOT_LIMITS_ENABLED);
         return error;
@@ -2392,20 +2392,20 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
         struct dentry *dentry;
         int error;
  
-       mutex_lock(&sb->s_root->d_inode->i_mutex);
+       mutex_lock(&d_inode(sb->s_root)->i_mutex);
         dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
-       mutex_unlock(&sb->s_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(sb->s_root)->i_mutex);
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       if (!dentry->d_inode) {
+       if (d_really_is_negative(dentry)) {
                 error = -ENOENT;
                 goto out;
         }
  
         error = security_quota_on(dentry);
         if (!error)
-               error = vfs_load_quota_inode(dentry->d_inode, type, format_id,
+               error = vfs_load_quota_inode(d_inode(dentry), type, format_id,
                                 DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
  
  out:
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c

index 0b38bef..ba1323a 100644 (file)
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -163,7 +163,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
   */
  static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         unsigned int old_ia_valid = ia->ia_valid;
         int ret = 0;
  
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c

index 0a7dc94..4a024e2 100644 (file)
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -53,8 +53,8 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
  static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh)
  {
         struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root;
-       return (privroot->d_inode &&
-               deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
+       return (d_really_is_positive(privroot) &&
+               deh->deh_objectid == INODE_PKEY(d_inode(privroot))->k_objectid);
  }
  
  int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c

index 742242b..f6f2fba 100644 (file)
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3308,7 +3308,7 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
  
  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         unsigned int ia_valid;
         int error;
  
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c

index cd11358..b55a074 100644 (file)
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -400,7 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
         struct inode *inode = NULL;
         struct reiserfs_dir_entry de;
         INITIALIZE_PATH(path_to_entry);
-       struct inode *dir = child->d_inode;
+       struct inode *dir = d_inode(child);
  
         if (dir->i_nlink == 0) {
                 return ERR_PTR(-ENOENT);
@@ -917,7 +917,7 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
                 goto end_rmdir;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         reiserfs_update_inode_transaction(inode);
         reiserfs_update_inode_transaction(dir);
@@ -987,7 +987,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
  
         dquot_initialize(dir);
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         /*
          * in this transaction we can be doing at max two balancings and
@@ -1174,7 +1174,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
                          struct dentry *dentry)
  {
         int retval;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct reiserfs_transaction_handle th;
         /*
          * We need blocks for transaction + update of quotas for
@@ -1311,8 +1311,8 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         dquot_initialize(old_dir);
         dquot_initialize(new_dir);
  
-       old_inode = old_dentry->d_inode;
-       new_dentry_inode = new_dentry->d_inode;
+       old_inode = d_inode(old_dentry);
+       new_dentry_inode = d_inode(new_dentry);
  
         /*
          * make sure that oldname still exists and points to an object we
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c

index 68b5f18..0111ad0 100644 (file)
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1687,7 +1687,7 @@ static __u32 find_hash_out(struct super_block *s)
         __u32 hash = DEFAULT_HASH;
         __u32 deh_hashval, teahash, r5hash, yurahash;
  
-       inode = s->s_root->d_inode;
+       inode = d_inode(s->s_root);
  
         make_cpu_key(&key, inode, ~0, TYPE_DIRENTRY, 3);
         retval = search_by_entry_key(s, &key, &path, &de);
@@ -2347,7 +2347,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
                 err = -EXDEV;
                 goto out;
         }
-       inode = path->dentry->d_inode;
+       inode = d_inode(path->dentry);
         /*
          * We must not pack tails for quota files on reiserfs for quota
          * IO to work
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c

index 4e781e6..e87f9b5 100644 (file)
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -87,9 +87,9 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
  
         BUG_ON(!mutex_is_locked(&dir->i_mutex));
  
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
         error = dir->i_op->unlink(dir, dentry);
-       mutex_unlock(&dentry->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dentry)->i_mutex);
  
         if (!error)
                 d_delete(dentry);
@@ -102,11 +102,11 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
  
         BUG_ON(!mutex_is_locked(&dir->i_mutex));
  
-       mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+       mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_CHILD);
         error = dir->i_op->rmdir(dir, dentry);
         if (!error)
-               dentry->d_inode->i_flags |= S_DEAD;
-       mutex_unlock(&dentry->d_inode->i_mutex);
+               d_inode(dentry)->i_flags |= S_DEAD;
+       mutex_unlock(&d_inode(dentry)->i_mutex);
         if (!error)
                 d_delete(dentry);
  
@@ -120,26 +120,26 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags)
         struct dentry *privroot = REISERFS_SB(sb)->priv_root;
         struct dentry *xaroot;
  
-       if (!privroot->d_inode)
+       if (d_really_is_negative(privroot))
                 return ERR_PTR(-ENODATA);
  
-       mutex_lock_nested(&privroot->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(privroot)->i_mutex, I_MUTEX_XATTR);
  
         xaroot = dget(REISERFS_SB(sb)->xattr_root);
         if (!xaroot)
                 xaroot = ERR_PTR(-ENODATA);
-       else if (!xaroot->d_inode) {
+       else if (d_really_is_negative(xaroot)) {
                 int err = -ENODATA;
  
                 if (xattr_may_create(flags))
-                       err = xattr_mkdir(privroot->d_inode, xaroot, 0700);
+                       err = xattr_mkdir(d_inode(privroot), xaroot, 0700);
                 if (err) {
                         dput(xaroot);
                         xaroot = ERR_PTR(err);
                 }
         }
  
-       mutex_unlock(&privroot->d_inode->i_mutex);
+       mutex_unlock(&d_inode(privroot)->i_mutex);
         return xaroot;
  }
  
@@ -156,21 +156,21 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags)
                  le32_to_cpu(INODE_PKEY(inode)->k_objectid),
                  inode->i_generation);
  
-       mutex_lock_nested(&xaroot->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xaroot)->i_mutex, I_MUTEX_XATTR);
  
         xadir = lookup_one_len(namebuf, xaroot, strlen(namebuf));
-       if (!IS_ERR(xadir) && !xadir->d_inode) {
+       if (!IS_ERR(xadir) && d_really_is_negative(xadir)) {
                 int err = -ENODATA;
  
                 if (xattr_may_create(flags))
-                       err = xattr_mkdir(xaroot->d_inode, xadir, 0700);
+                       err = xattr_mkdir(d_inode(xaroot), xadir, 0700);
                 if (err) {
                         dput(xadir);
                         xadir = ERR_PTR(err);
                 }
         }
  
-       mutex_unlock(&xaroot->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xaroot)->i_mutex);
         dput(xaroot);
         return xadir;
  }
@@ -195,7 +195,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
                 container_of(ctx, struct reiserfs_dentry_buf, ctx);
         struct dentry *dentry;
  
-       WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
+       WARN_ON_ONCE(!mutex_is_locked(&d_inode(dbuf->xadir)->i_mutex));
  
         if (dbuf->count == ARRAY_SIZE(dbuf->dentries))
                 return -ENOSPC;
@@ -207,7 +207,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
         dentry = lookup_one_len(name, dbuf->xadir, namelen);
         if (IS_ERR(dentry)) {
                 return PTR_ERR(dentry);
-       } else if (!dentry->d_inode) {
+       } else if (d_really_is_negative(dentry)) {
                 /* A directory entry exists, but no file? */
                 reiserfs_error(dentry->d_sb, "xattr-20003",
                                "Corrupted directory: xattr %pd listed but "
@@ -249,16 +249,16 @@ static int reiserfs_for_each_xattr(struct inode *inode,
         if (IS_ERR(dir)) {
                 err = PTR_ERR(dir);
                 goto out;
-       } else if (!dir->d_inode) {
+       } else if (d_really_is_negative(dir)) {
                 err = 0;
                 goto out_dir;
         }
  
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
  
         buf.xadir = dir;
         while (1) {
-               err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
+               err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
                 if (err)
                         break;
                 if (!buf.count)
@@ -276,7 +276,7 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                         break;
                 buf.count = 0;
         }
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  
         cleanup_dentry_buf(&buf);
  
@@ -298,13 +298,13 @@ static int reiserfs_for_each_xattr(struct inode *inode,
                 if (!err) {
                         int jerror;
  
-                       mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
+                       mutex_lock_nested(&d_inode(dir->d_parent)->i_mutex,
                                           I_MUTEX_XATTR);
                         err = action(dir, data);
                         reiserfs_write_lock(inode->i_sb);
                         jerror = journal_end(&th);
                         reiserfs_write_unlock(inode->i_sb);
-                       mutex_unlock(&dir->d_parent->d_inode->i_mutex);
+                       mutex_unlock(&d_inode(dir->d_parent)->i_mutex);
                         err = jerror ?: err;
                 }
         }
@@ -319,7 +319,7 @@ out:
  
  static int delete_one_xattr(struct dentry *dentry, void *data)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
  
         /* This is the xattr dir, handle specially. */
         if (d_is_dir(dentry))
@@ -384,27 +384,27 @@ static struct dentry *xattr_lookup(struct inode *inode, const char *name,
         if (IS_ERR(xadir))
                 return ERR_CAST(xadir);
  
-       mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
         xafile = lookup_one_len(name, xadir, strlen(name));
         if (IS_ERR(xafile)) {
                 err = PTR_ERR(xafile);
                 goto out;
         }
  
-       if (xafile->d_inode && (flags & XATTR_CREATE))
+       if (d_really_is_positive(xafile) && (flags & XATTR_CREATE))
                 err = -EEXIST;
  
-       if (!xafile->d_inode) {
+       if (d_really_is_negative(xafile)) {
                 err = -ENODATA;
                 if (xattr_may_create(flags))
-                       err = xattr_create(xadir->d_inode, xafile,
+                       err = xattr_create(d_inode(xadir), xafile,
                                               0700|S_IFREG);
         }
  
         if (err)
                 dput(xafile);
  out:
-       mutex_unlock(&xadir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xadir)->i_mutex);
         dput(xadir);
         if (err)
                 return ERR_PTR(err);
@@ -469,21 +469,21 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
         if (IS_ERR(xadir))
                 return PTR_ERR(xadir);
  
-       mutex_lock_nested(&xadir->d_inode->i_mutex, I_MUTEX_XATTR);
+       mutex_lock_nested(&d_inode(xadir)->i_mutex, I_MUTEX_XATTR);
         dentry = lookup_one_len(name, xadir, strlen(name));
         if (IS_ERR(dentry)) {
                 err = PTR_ERR(dentry);
                 goto out_dput;
         }
  
-       if (dentry->d_inode) {
-               err = xattr_unlink(xadir->d_inode, dentry);
+       if (d_really_is_positive(dentry)) {
+               err = xattr_unlink(d_inode(xadir), dentry);
                 update_ctime(inode);
         }
  
         dput(dentry);
  out_dput:
-       mutex_unlock(&xadir->d_inode->i_mutex);
+       mutex_unlock(&d_inode(xadir)->i_mutex);
         dput(xadir);
         return err;
  }
@@ -533,7 +533,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                 else
                         chunk = buffer_size - buffer_pos;
  
-               page = reiserfs_get_page(dentry->d_inode, file_pos);
+               page = reiserfs_get_page(d_inode(dentry), file_pos);
                 if (IS_ERR(page)) {
                         err = PTR_ERR(page);
                         goto out_unlock;
@@ -573,18 +573,18 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
         }
  
         new_size = buffer_size + sizeof(struct reiserfs_xattr_header);
-       if (!err && new_size < i_size_read(dentry->d_inode)) {
+       if (!err && new_size < i_size_read(d_inode(dentry))) {
                 struct iattr newattrs = {
                         .ia_ctime = current_fs_time(inode->i_sb),
                         .ia_size = new_size,
                         .ia_valid = ATTR_SIZE | ATTR_CTIME,
                 };
  
-               mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
-               inode_dio_wait(dentry->d_inode);
+               mutex_lock_nested(&d_inode(dentry)->i_mutex, I_MUTEX_XATTR);
+               inode_dio_wait(d_inode(dentry));
  
                 err = reiserfs_setattr(dentry, &newattrs);
-               mutex_unlock(&dentry->d_inode->i_mutex);
+               mutex_unlock(&d_inode(dentry)->i_mutex);
         } else
                 update_ctime(inode);
  out_unlock:
@@ -657,7 +657,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
  
         down_read(&REISERFS_I(inode)->i_xattr_sem);
  
-       isize = i_size_read(dentry->d_inode);
+       isize = i_size_read(d_inode(dentry));
  
         /* Just return the size needed */
         if (buffer == NULL) {
@@ -680,7 +680,7 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
                 else
                         chunk = isize - file_pos;
  
-               page = reiserfs_get_page(dentry->d_inode, file_pos);
+               page = reiserfs_get_page(d_inode(dentry), file_pos);
                 if (IS_ERR(page)) {
                         err = PTR_ERR(page);
                         goto out_unlock;
@@ -775,7 +775,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
  
         handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
  
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                 return -EOPNOTSUPP;
  
         return handler->get(dentry, name, buffer, size, handler->flags);
@@ -784,7 +784,7 @@ reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
  /*
   * Inode operation setxattr()
   *
- * dentry->d_inode->i_mutex down
+ * d_inode(dentry)->i_mutex down
   */
  int
  reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
@@ -794,7 +794,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  
         handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
  
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                 return -EOPNOTSUPP;
  
         return handler->set(dentry, name, value, size, flags, handler->flags);
@@ -803,7 +803,7 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
  /*
   * Inode operation removexattr()
   *
- * dentry->d_inode->i_mutex down
+ * d_inode(dentry)->i_mutex down
   */
  int reiserfs_removexattr(struct dentry *dentry, const char *name)
  {
@@ -811,7 +811,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name)
  
         handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
  
-       if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+       if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                 return -EOPNOTSUPP;
  
         return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
@@ -875,14 +875,14 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                 .size = buffer ? size : 0,
         };
  
-       if (!dentry->d_inode)
+       if (d_really_is_negative(dentry))
                 return -EINVAL;
  
         if (!dentry->d_sb->s_xattr ||
-           get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
+           get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
                 return -EOPNOTSUPP;
  
-       dir = open_xa_dir(dentry->d_inode, XATTR_REPLACE);
+       dir = open_xa_dir(d_inode(dentry), XATTR_REPLACE);
         if (IS_ERR(dir)) {
                 err = PTR_ERR(dir);
                 if (err == -ENODATA)
@@ -890,9 +890,9 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
                 goto out;
         }
  
-       mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
-       err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx);
-       mutex_unlock(&dir->d_inode->i_mutex);
+       mutex_lock_nested(&d_inode(dir)->i_mutex, I_MUTEX_XATTR);
+       err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
+       mutex_unlock(&d_inode(dir)->i_mutex);
  
         if (!err)
                 err = buf.pos;
@@ -905,12 +905,12 @@ out:
  static int create_privroot(struct dentry *dentry)
  {
         int err;
-       struct inode *inode = dentry->d_parent->d_inode;
+       struct inode *inode = d_inode(dentry->d_parent);
  
         WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex));
  
         err = xattr_mkdir(inode, dentry, 0700);
-       if (err || !dentry->d_inode) {
+       if (err || d_really_is_negative(dentry)) {
                 reiserfs_warning(dentry->d_sb, "jdm-20006",
                                  "xattrs/ACLs enabled and couldn't "
                                  "find/create .reiserfs_priv. "
@@ -918,7 +918,7 @@ static int create_privroot(struct dentry *dentry)
                 return -EOPNOTSUPP;
         }
  
-       dentry->d_inode->i_flags |= S_PRIVATE;
+       d_inode(dentry)->i_flags |= S_PRIVATE;
         reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr "
                       "storage.\n", PRIVROOT_NAME);
  
@@ -997,17 +997,17 @@ int reiserfs_lookup_privroot(struct super_block *s)
         int err = 0;
  
         /* If we don't have the privroot located yet - go find it */
-       mutex_lock(&s->s_root->d_inode->i_mutex);
+       mutex_lock(&d_inode(s->s_root)->i_mutex);
         dentry = lookup_one_len(PRIVROOT_NAME, s->s_root,
                                 strlen(PRIVROOT_NAME));
         if (!IS_ERR(dentry)) {
                 REISERFS_SB(s)->priv_root = dentry;
                 d_set_d_op(dentry, &xattr_lookup_poison_ops);
-               if (dentry->d_inode)
-                       dentry->d_inode->i_flags |= S_PRIVATE;
+               if (d_really_is_positive(dentry))
+                       d_inode(dentry)->i_flags |= S_PRIVATE;
         } else
                 err = PTR_ERR(dentry);
-       mutex_unlock(&s->s_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(s->s_root)->i_mutex);
  
         return err;
  }
@@ -1026,15 +1026,15 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
         if (err)
                 goto error;
  
-       if (!privroot->d_inode && !(mount_flags & MS_RDONLY)) {
-               mutex_lock(&s->s_root->d_inode->i_mutex);
+       if (d_really_is_negative(privroot) && !(mount_flags & MS_RDONLY)) {
+               mutex_lock(&d_inode(s->s_root)->i_mutex);
                 err = create_privroot(REISERFS_SB(s)->priv_root);
-               mutex_unlock(&s->s_root->d_inode->i_mutex);
+               mutex_unlock(&d_inode(s->s_root)->i_mutex);
         }
  
-       if (privroot->d_inode) {
+       if (d_really_is_positive(privroot)) {
                 s->s_xattr = reiserfs_xattr_handlers;
-               mutex_lock(&privroot->d_inode->i_mutex);
+               mutex_lock(&d_inode(privroot)->i_mutex);
                 if (!REISERFS_SB(s)->xattr_root) {
                         struct dentry *dentry;
  
@@ -1045,7 +1045,7 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags)
                         else
                                 err = PTR_ERR(dentry);
                 }
-               mutex_unlock(&privroot->d_inode->i_mutex);
+               mutex_unlock(&d_inode(privroot)->i_mutex);
         }
  
  error:
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h

index f620e96..15dde62 100644 (file)
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -78,7 +78,7 @@ static inline size_t reiserfs_xattr_jcreate_nblocks(struct inode *inode)
  
         if ((REISERFS_I(inode)->i_flags & i_has_xattr_dir) == 0) {
                 nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
-               if (!REISERFS_SB(inode->i_sb)->xattr_root->d_inode)
+               if (d_really_is_negative(REISERFS_SB(inode->i_sb)->xattr_root))
                         nblocks += JOURNAL_BLOCKS_PER_OBJECT(inode->i_sb);
         }
  
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c

index e7f8939..9a3b061 100644 (file)
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -15,10 +15,10 @@ security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
         if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                 return -EINVAL;
  
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                 return -EPERM;
  
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
  }
  
  static int
@@ -28,10 +28,10 @@ security_set(struct dentry *dentry, const char *name, const void *buffer,
         if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
                 return -EINVAL;
  
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                 return -EPERM;
  
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
  }
  
  static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
@@ -39,7 +39,7 @@ static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
  {
         const size_t len = namelen + 1;
  
-       if (IS_PRIVATE(dentry->d_inode))
+       if (IS_PRIVATE(d_inode(dentry)))
                 return 0;
  
         if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c

index 5eeb0c4..e4f1343 100644 (file)
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -14,10 +14,10 @@ trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
         if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                 return -EINVAL;
  
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                 return -EPERM;
  
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
  }
  
  static int
@@ -27,10 +27,10 @@ trusted_set(struct dentry *dentry, const char *name, const void *buffer,
         if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
                 return -EINVAL;
  
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                 return -EPERM;
  
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
  }
  
  static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
@@ -38,7 +38,7 @@ static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
  {
         const size_t len = name_len + 1;
  
-       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
+       if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
                 return 0;
  
         if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c

index e50eab0..d0b08d3 100644 (file)
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -15,7 +15,7 @@ user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
                 return -EINVAL;
         if (!reiserfs_xattrs_user(dentry->d_sb))
                 return -EOPNOTSUPP;
-       return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
+       return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
  }
  
  static int
@@ -27,7 +27,7 @@ user_set(struct dentry *dentry, const char *name, const void *buffer,
  
         if (!reiserfs_xattrs_user(dentry->d_sb))
                 return -EOPNOTSUPP;
-       return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
+       return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
  }
  
  static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c

index 5e1101f..8073b65 100644 (file)
--- a/fs/squashfs/export.c
+++ b/fs/squashfs/export.c
@@ -110,7 +110,7 @@ static struct dentry *squashfs_fh_to_parent(struct super_block *sb,
  
  static struct dentry *squashfs_get_parent(struct dentry *child)
  {
-       struct inode *inode = child->d_inode;
+       struct inode *inode = d_inode(child);
         unsigned int parent_ino = squashfs_i(inode)->parent;
  
         return squashfs_export_iget(inode->i_sb, parent_ino);
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c

index 92fcde7..e5e0ddf 100644 (file)
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -39,7 +39,7 @@ static const struct xattr_handler *squashfs_xattr_handler(int);
  ssize_t squashfs_listxattr(struct dentry *d, char *buffer,
         size_t buffer_size)
  {
-       struct inode *inode = d->d_inode;
+       struct inode *inode = d_inode(d);
         struct super_block *sb = inode->i_sb;
         struct squashfs_sb_info *msblk = sb->s_fs_info;
         u64 start = SQUASHFS_XATTR_BLK(squashfs_i(inode)->xattr)
@@ -229,7 +229,7 @@ static int squashfs_user_get(struct dentry *d, const char *name, void *buffer,
         if (name[0] == '\0')
                 return  -EINVAL;
  
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_USER, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_USER, name,
                 buffer, size);
  }
  
@@ -259,7 +259,7 @@ static int squashfs_trusted_get(struct dentry *d, const char *name,
         if (name[0] == '\0')
                 return  -EINVAL;
  
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_TRUSTED, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_TRUSTED, name,
                 buffer, size);
  }
  
@@ -286,7 +286,7 @@ static int squashfs_security_get(struct dentry *d, const char *name,
         if (name[0] == '\0')
                 return  -EINVAL;
  
-       return squashfs_xattr_get(d->d_inode, SQUASHFS_XATTR_SECURITY, name,
+       return squashfs_xattr_get(d_inode(d), SQUASHFS_XATTR_SECURITY, name,
                 buffer, size);
  }
  
diff --git a/fs/stat.c b/fs/stat.c

index 19636af..cccc1aa 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -51,7 +51,7 @@ EXPORT_SYMBOL(generic_fillattr);
   */
  int vfs_getattr_nosec(struct path *path, struct kstat *stat)
  {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
  
         if (inode->i_op->getattr)
                 return inode->i_op->getattr(path->mnt, path->dentry, stat);
@@ -326,7 +326,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname,
  retry:
         error = user_path_at_empty(dfd, pathname, lookup_flags, &path, &empty);
         if (!error) {
-               struct inode *inode = path.dentry->d_inode;
+               struct inode *inode = d_backing_inode(path.dentry);
  
                 error = empty ? -ENOENT : -EINVAL;
                 if (inode->i_op->readlink) {
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c

index d42291d..8f3555f 100644 (file)
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -132,7 +132,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
  {
         const char * name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
-       struct inode * dir = dentry->d_parent->d_inode;
+       struct inode * dir = d_inode(dentry->d_parent);
         unsigned long start, n;
         unsigned long npages = dir_pages(dir);
         struct page *page = NULL;
@@ -176,7 +176,7 @@ found:
  
  int sysv_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const char * name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         struct page *page = NULL;
diff --git a/fs/sysv/file.c b/fs/sysv/file.c

index a48e304..82ddc09 100644 (file)
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,7 @@ const struct file_operations sysv_file_operations = {
  
  static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, attr);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c

index 66bc316..2fde40a 100644 (file)
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -443,7 +443,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
  int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
  {
         struct super_block *s = dentry->d_sb;
-       generic_fillattr(dentry->d_inode, stat);
+       generic_fillattr(d_inode(dentry), stat);
         stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
         stat->blksize = s->s_blocksize;
         return 0;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c

index 731b2bb..11e83ed 100644 (file)
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -118,7 +118,7 @@ out_fail:
  static int sysv_link(struct dentry * old_dentry, struct inode * dir, 
         struct dentry * dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
@@ -166,7 +166,7 @@ out_dir:
  
  static int sysv_unlink(struct inode * dir, struct dentry * dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         struct page * page;
         struct sysv_dir_entry * de;
         int err = -ENOENT;
@@ -187,7 +187,7 @@ out:
  
  static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int err = -ENOTEMPTY;
  
         if (sysv_empty_dir(inode)) {
@@ -208,8 +208,8 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
  static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
                   struct inode * new_dir, struct dentry * new_dentry)
  {
-       struct inode * old_inode = old_dentry->d_inode;
-       struct inode * new_inode = new_dentry->d_inode;
+       struct inode * old_inode = d_inode(old_dentry);
+       struct inode * new_inode = d_inode(new_dentry);
         struct page * dir_page = NULL;
         struct sysv_dir_entry * dir_de = NULL;
         struct page * old_page;
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c

index 00d2f8a..d3fa0d7 100644 (file)
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -10,7 +10,7 @@
  
  static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       nd_set_link(nd, (char *)SYSV_I(dentry->d_inode)->i_data);
+       nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data);
         return NULL;
  }
  
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c

index 02d1ee7..27060fc 100644 (file)
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -499,7 +499,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
                       struct dentry *dentry)
  {
         struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct ubifs_inode *ui = ubifs_inode(inode);
         struct ubifs_inode *dir_ui = ubifs_inode(dir);
         int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
@@ -554,7 +554,7 @@ out_cancel:
  static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
  {
         struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ubifs_inode *dir_ui = ubifs_inode(dir);
         int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
         int err, budgeted = 1;
@@ -646,7 +646,7 @@ static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
  static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
  {
         struct ubifs_info *c = dir->i_sb->s_fs_info;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
         int err, budgeted = 1;
         struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -662,7 +662,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
                 inode->i_ino, dir->i_ino);
         ubifs_assert(mutex_is_locked(&dir->i_mutex));
         ubifs_assert(mutex_is_locked(&inode->i_mutex));
-       err = check_dir_empty(c, dentry->d_inode);
+       err = check_dir_empty(c, d_inode(dentry));
         if (err)
                 return err;
  
@@ -970,8 +970,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
                         struct inode *new_dir, struct dentry *new_dentry)
  {
         struct ubifs_info *c = old_dir->i_sb->s_fs_info;
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
         int err, release, sync = 0, move = (new_dir != old_dir);
         int is_dir = S_ISDIR(old_inode->i_mode);
@@ -1136,7 +1136,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
                   struct kstat *stat)
  {
         loff_t size;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ubifs_inode *ui = ubifs_inode(inode);
  
         mutex_lock(&ui->ui_mutex);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c

index 3ba3fef..35efc10 100644 (file)
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1257,7 +1257,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
  int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int err;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct ubifs_info *c = inode->i_sb->s_fs_info;
  
         dbg_gen("ino %lu, mode %#x, ia_valid %#x",
@@ -1302,7 +1302,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
  
  static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ubifs_inode *ui = ubifs_inode(dentry->d_inode);
+       struct ubifs_inode *ui = ubifs_inode(d_inode(dentry));
  
         nd_set_link(nd, ui->data);
         return NULL;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c

index 90ae1a8..0b9da5b 100644 (file)
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -930,8 +930,8 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
         union ubifs_key key;
         struct ubifs_dent_node *dent, *dent2;
         int err, dlen1, dlen2, ilen, lnum, offs, len;
-       const struct inode *old_inode = old_dentry->d_inode;
-       const struct inode *new_inode = new_dentry->d_inode;
+       const struct inode *old_inode = d_inode(old_dentry);
+       const struct inode *new_inode = d_inode(new_dentry);
         int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
         int last_reference = !!(new_inode && new_inode->i_nlink == 0);
         int move = (old_dir != new_dir);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c

index 3659b19..96f3448 100644 (file)
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -364,15 +364,15 @@ int ubifs_setxattr(struct dentry *dentry, const char *name,
                    const void *value, size_t size, int flags)
  {
         dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd",
-               name, dentry->d_inode->i_ino, dentry, size);
+               name, d_inode(dentry)->i_ino, dentry, size);
  
-       return setxattr(dentry->d_inode, name, value, size, flags);
+       return setxattr(d_inode(dentry), name, value, size, flags);
  }
  
  ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
                        size_t size)
  {
-       struct inode *inode, *host = dentry->d_inode;
+       struct inode *inode, *host = d_inode(dentry);
         struct ubifs_info *c = host->i_sb->s_fs_info;
         struct qstr nm = QSTR_INIT(name, strlen(name));
         struct ubifs_inode *ui;
@@ -432,7 +432,7 @@ out_unlock:
  ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
         union ubifs_key key;
-       struct inode *host = dentry->d_inode;
+       struct inode *host = d_inode(dentry);
         struct ubifs_info *c = host->i_sb->s_fs_info;
         struct ubifs_inode *host_ui = ubifs_inode(host);
         struct ubifs_dent_node *xent, *pxent = NULL;
@@ -535,7 +535,7 @@ out_cancel:
  
  int ubifs_removexattr(struct dentry *dentry, const char *name)
  {
-       struct inode *inode, *host = dentry->d_inode;
+       struct inode *inode, *host = d_inode(dentry);
         struct ubifs_info *c = host->i_sb->s_fs_info;
         struct qstr nm = QSTR_INIT(name, strlen(name));
         struct ubifs_dent_node *xent;
diff --git a/fs/udf/file.c b/fs/udf/file.c

index 5dadad9..7a95b8f 100644 (file)
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -249,7 +249,7 @@ const struct file_operations udf_file_operations = {
  
  static int udf_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         int error;
  
         error = inode_change_ok(inode, attr);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c

index 3966197..5c03f0d 100644 (file)
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi,
  static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
  {
         struct udf_inode_info *iinfo = UDF_I(inode);
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         struct udf_fileident_bh fibh;
         struct fileIdentDesc cfi, *fi;
         int err;
@@ -767,7 +767,7 @@ static int empty_dir(struct inode *dir)
  static int udf_rmdir(struct inode *dir, struct dentry *dentry)
  {
         int retval;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct udf_fileident_bh fibh;
         struct fileIdentDesc *fi, cfi;
         struct kernel_lb_addr tloc;
@@ -809,7 +809,7 @@ out:
  static int udf_unlink(struct inode *dir, struct dentry *dentry)
  {
         int retval;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct udf_fileident_bh fibh;
         struct fileIdentDesc *fi;
         struct fileIdentDesc cfi;
@@ -999,7 +999,7 @@ out_no_entry:
  static int udf_link(struct dentry *old_dentry, struct inode *dir,
                     struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         struct udf_fileident_bh fibh;
         struct fileIdentDesc cfi, *fi;
         int err;
@@ -1038,8 +1038,8 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
  static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
                       struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct udf_fileident_bh ofibh, nfibh;
         struct fileIdentDesc *ofi = NULL, *nfi = NULL, *dir_fi = NULL;
         struct fileIdentDesc ocfi, ncfi;
@@ -1179,7 +1179,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
         struct fileIdentDesc cfi;
         struct udf_fileident_bh fibh;
  
-       if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
+       if (!udf_find_entry(d_inode(child), &dotdot, &fibh, &cfi))
                 return ERR_PTR(-EACCES);
  
         if (fibh.sbh != fibh.ebh)
@@ -1187,7 +1187,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
         brelse(fibh.sbh);
  
         tloc = lelb_to_cpu(cfi.icb.extLocation);
-       inode = udf_iget(child->d_inode->i_sb, &tloc);
+       inode = udf_iget(d_inode(child)->i_sb, &tloc);
         if (IS_ERR(inode))
                 return ERR_CAST(inode);
  
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c

index 0ecc2ce..1bfe8ca 100644 (file)
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -311,7 +311,7 @@ found:
   */
  int ufs_add_link(struct dentry *dentry, struct inode *inode)
  {
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_inode(dentry->d_parent);
         const unsigned char *name = dentry->d_name.name;
         int namelen = dentry->d_name.len;
         struct super_block *sb = dir->i_sb;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c

index fd65deb..e491a93 100644 (file)
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -165,7 +165,7 @@ out_fail:
  static int ufs_link (struct dentry * old_dentry, struct inode * dir,
         struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int error;
  
         lock_ufs(dir->i_sb);
@@ -222,7 +222,7 @@ out_fail:
  
  static int ufs_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         struct ufs_dir_entry *de;
         struct page *page;
         int err = -ENOENT;
@@ -244,7 +244,7 @@ out:
  
  static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
  {
-       struct inode * inode = dentry->d_inode;
+       struct inode * inode = d_inode(dentry);
         int err= -ENOTEMPTY;
  
         lock_ufs(dir->i_sb);
@@ -263,8 +263,8 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
  static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
                       struct inode *new_dir, struct dentry *new_dentry)
  {
-       struct inode *old_inode = old_dentry->d_inode;
-       struct inode *new_inode = new_dentry->d_inode;
+       struct inode *old_inode = d_inode(old_dentry);
+       struct inode *new_inode = d_inode(new_dentry);
         struct page *dir_page = NULL;
         struct ufs_dir_entry * dir_de = NULL;
         struct page *old_page;
diff --git a/fs/ufs/super.c b/fs/ufs/super.c

index 8092d37..b3bc3e7 100644 (file)
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -144,10 +144,10 @@ static struct dentry *ufs_get_parent(struct dentry *child)
         struct qstr dot_dot = QSTR_INIT("..", 2);
         ino_t ino;
  
-       ino = ufs_inode_by_name(child->d_inode, &dot_dot);
+       ino = ufs_inode_by_name(d_inode(child), &dot_dot);
         if (!ino)
                 return ERR_PTR(-ENOENT);
-       return d_obtain_alias(ufs_iget(child->d_inode->i_sb, ino));
+       return d_obtain_alias(ufs_iget(d_inode(child)->i_sb, ino));
  }
  
  static const struct export_operations ufs_export_ops = {
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c

index d283628..5b537e2 100644 (file)
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -34,7 +34,7 @@
  
  static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       struct ufs_inode_info *p = UFS_I(dentry->d_inode);
+       struct ufs_inode_info *p = UFS_I(d_inode(dentry));
         nd_set_link(nd, (char*)p->i_u1.i_symlink);
         return NULL;
  }
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c

index f04f89f..2115470 100644 (file)
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -492,7 +492,7 @@ out:
  
  int ufs_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         unsigned int ia_valid = attr->ia_valid;
         int error;
  
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c

index a6fbf44..516162b 100644 (file)
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -260,6 +260,7 @@ xfs_alloc_fix_len(
                 rlen = rlen - (k - args->mod);
         else
                 rlen = rlen - args->prod + (args->mod - k);
+       /* casts to (int) catch length underflows */
         if ((int)rlen < (int)args->minlen)
                 return;
         ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
@@ -286,7 +287,8 @@ xfs_alloc_fix_minleft(
         if (diff >= 0)
                 return 1;
         args->len += diff;              /* shrink the allocated space */
-       if (args->len >= args->minlen)
+       /* casts to (int) catch length underflows */
+       if ((int)args->len >= (int)args->minlen)
                 return 1;
         args->agbno = NULLAGBLOCK;
         return 0;
@@ -315,6 +317,9 @@ xfs_alloc_fixup_trees(
         xfs_agblock_t   nfbno2;         /* second new free startblock */
         xfs_extlen_t    nflen1=0;       /* first new free length */
         xfs_extlen_t    nflen2=0;       /* second new free length */
+       struct xfs_mount *mp;
+
+       mp = cnt_cur->bc_mp;
  
         /*
          * Look up the record in the by-size tree if necessary.
@@ -323,13 +328,13 @@ xfs_alloc_fixup_trees(
  #ifdef DEBUG
                 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                         i == 1 && nfbno1 == fbno && nflen1 == flen);
  #endif
         } else {
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         }
         /*
          * Look up the record in the by-block tree if necessary.
@@ -338,13 +343,13 @@ xfs_alloc_fixup_trees(
  #ifdef DEBUG
                 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                         i == 1 && nfbno1 == fbno && nflen1 == flen);
  #endif
         } else {
                 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         }
  
  #ifdef DEBUG
@@ -355,7 +360,7 @@ xfs_alloc_fixup_trees(
                 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
                 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
  
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                         bnoblock->bb_numrecs == cntblock->bb_numrecs);
         }
  #endif
@@ -386,25 +391,25 @@ xfs_alloc_fixup_trees(
          */
         if ((error = xfs_btree_delete(cnt_cur, &i)))
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         /*
          * Add new by-size btree entry(s).
          */
         if (nfbno1 != NULLAGBLOCK) {
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                 if ((error = xfs_btree_insert(cnt_cur, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         }
         if (nfbno2 != NULLAGBLOCK) {
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                 if ((error = xfs_btree_insert(cnt_cur, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         }
         /*
          * Fix up the by-block btree entry(s).
@@ -415,7 +420,7 @@ xfs_alloc_fixup_trees(
                  */
                 if ((error = xfs_btree_delete(bno_cur, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         } else {
                 /*
                  * Update the by-block entry to start later|be shorter.
@@ -429,10 +434,10 @@ xfs_alloc_fixup_trees(
                  */
                 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 0);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
                 if ((error = xfs_btree_insert(bno_cur, &i)))
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
         }
         return 0;
  }
@@ -682,7 +687,7 @@ xfs_alloc_ag_vextent_exact(
         error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
         if (error)
                 goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
         ASSERT(fbno <= args->agbno);
  
         /*
@@ -783,7 +788,7 @@ xfs_alloc_find_best_extent(
                 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
  
                 /*
@@ -946,7 +951,7 @@ restart:
                                 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
                                                 &ltlen, &i)))
                                         goto error0;
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                                 if (ltlen >= args->minlen)
                                         break;
                                 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
@@ -966,7 +971,7 @@ restart:
                          */
                         if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                         xfs_alloc_compute_aligned(args, ltbno, ltlen,
                                                   &ltbnoa, &ltlena);
                         if (ltlena < args->minlen)
@@ -999,7 +1004,7 @@ restart:
                 cnt_cur->bc_ptrs[0] = besti;
                 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
                 args->len = blen;
                 if (!xfs_alloc_fix_minleft(args)) {
@@ -1088,7 +1093,7 @@ restart:
                 if (bno_cur_lt) {
                         if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                         xfs_alloc_compute_aligned(args, ltbno, ltlen,
                                                   &ltbnoa, &ltlena);
                         if (ltlena >= args->minlen)
@@ -1104,7 +1109,7 @@ restart:
                 if (bno_cur_gt) {
                         if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                         xfs_alloc_compute_aligned(args, gtbno, gtlen,
                                                   &gtbnoa, &gtlena);
                         if (gtlena >= args->minlen)
@@ -1303,7 +1308,7 @@ restart:
                         error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
                         if (error)
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
  
                         xfs_alloc_compute_aligned(args, fbno, flen,
                                                   &rbno, &rlen);
@@ -1342,7 +1347,7 @@ restart:
          * This can't happen in the second case above.
          */
         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
                         (rlen <= flen && rbno + rlen <= fbno + flen), error0);
         if (rlen < args->maxlen) {
                 xfs_agblock_t   bestfbno;
@@ -1362,13 +1367,13 @@ restart:
                         if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
                                         &i)))
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                         if (flen < bestrlen)
                                 break;
                         xfs_alloc_compute_aligned(args, fbno, flen,
                                                   &rbno, &rlen);
                         rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
-                       XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
+                       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
                                 (rlen <= flen && rbno + rlen <= fbno + flen),
                                 error0);
                         if (rlen > bestrlen) {
@@ -1383,7 +1388,7 @@ restart:
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
                                 &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
                 rlen = bestrlen;
                 rbno = bestrbno;
                 flen = bestflen;
@@ -1408,7 +1413,7 @@ restart:
         if (!xfs_alloc_fix_minleft(args))
                 goto out_nominleft;
         rlen = args->len;
-       XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
+       XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
         /*
          * Allocate and initialize a cursor for the by-block tree.
          */
@@ -1422,7 +1427,7 @@ restart:
         cnt_cur = bno_cur = NULL;
         args->len = rlen;
         args->agbno = rbno;
-       XFS_WANT_CORRUPTED_GOTO(
+       XFS_WANT_CORRUPTED_GOTO(args->mp,
                 args->agbno + args->len <=
                         be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
                 error0);
@@ -1467,7 +1472,7 @@ xfs_alloc_ag_vextent_small(
         if (i) {
                 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
         }
         /*
          * Nothing in the btree, try the freelist.  Make sure
@@ -1493,7 +1498,7 @@ xfs_alloc_ag_vextent_small(
                         }
                         args->len = 1;
                         args->agbno = fbno;
-                       XFS_WANT_CORRUPTED_GOTO(
+                       XFS_WANT_CORRUPTED_GOTO(args->mp,
                                 args->agbno + args->len <=
                                 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
                                 error0);
@@ -1579,7 +1584,7 @@ xfs_free_ag_extent(
                  */
                 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * It's not contiguous, though.
                  */
@@ -1591,7 +1596,8 @@ xfs_free_ag_extent(
                          * space was invalid, it's (partly) already free.
                          * Very bad.
                          */
-                       XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
+                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                               ltbno + ltlen <= bno, error0);
                 }
         }
         /*
@@ -1606,7 +1612,7 @@ xfs_free_ag_extent(
                  */
                 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * It's not contiguous, though.
                  */
@@ -1618,7 +1624,7 @@ xfs_free_ag_extent(
                          * space was invalid, it's (partly) already free.
                          * Very bad.
                          */
-                       XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
+                       XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
                 }
         }
         /*
@@ -1635,31 +1641,31 @@ xfs_free_ag_extent(
                  */
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 if ((error = xfs_btree_delete(cnt_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * Delete the old by-size entry on the right.
                  */
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 if ((error = xfs_btree_delete(cnt_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * Delete the old by-block entry for the right block.
                  */
                 if ((error = xfs_btree_delete(bno_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * Move the by-block cursor back to the left neighbor.
                  */
                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
  #ifdef DEBUG
                 /*
                  * Check that this is the right record: delete didn't
@@ -1672,7 +1678,7 @@ xfs_free_ag_extent(
                         if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
                                         &i)))
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(
+                       XFS_WANT_CORRUPTED_GOTO(mp,
                                 i == 1 && xxbno == ltbno && xxlen == ltlen,
                                 error0);
                 }
@@ -1695,17 +1701,17 @@ xfs_free_ag_extent(
                  */
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 if ((error = xfs_btree_delete(cnt_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * Back up the by-block cursor to the left neighbor, and
                  * update its length.
                  */
                 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 nbno = ltbno;
                 nlen = len + ltlen;
                 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
@@ -1721,10 +1727,10 @@ xfs_free_ag_extent(
                  */
                 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 if ((error = xfs_btree_delete(cnt_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 /*
                  * Update the starting block and length of the right
                  * neighbor in the by-block tree.
@@ -1743,7 +1749,7 @@ xfs_free_ag_extent(
                 nlen = len;
                 if ((error = xfs_btree_insert(bno_cur, &i)))
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
         }
         xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
         bno_cur = NULL;
@@ -1752,10 +1758,10 @@ xfs_free_ag_extent(
          */
         if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
                 goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
         if ((error = xfs_btree_insert(cnt_cur, &i)))
                 goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
         xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
         cnt_cur = NULL;
  
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c

index 15105db..04e79d5 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -86,8 +86,83 @@ STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args,
                         int move_count);
  STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
  
+/*
+ * attr3 block 'firstused' conversion helpers.
+ *
+ * firstused refers to the offset of the first used byte of the nameval region
+ * of an attr leaf block. The region starts at the tail of the block and expands
+ * backwards towards the middle. As such, firstused is initialized to the block
+ * size for an empty leaf block and is reduced from there.
+ *
+ * The attr3 block size is pegged to the fsb size and the maximum fsb is 64k.
+ * The in-core firstused field is 32-bit and thus supports the maximum fsb size.
+ * The on-disk field is only 16-bit, however, and overflows at 64k. Since this
+ * only occurs at exactly 64k, we use zero as a magic on-disk value to represent
+ * the attr block size. The following helpers manage the conversion between the
+ * in-core and on-disk formats.
+ */
+
+static void
+xfs_attr3_leaf_firstused_from_disk(
+       struct xfs_da_geometry          *geo,
+       struct xfs_attr3_icleaf_hdr     *to,
+       struct xfs_attr_leafblock       *from)
+{
+       struct xfs_attr3_leaf_hdr       *hdr3;
+
+       if (from->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) {
+               hdr3 = (struct xfs_attr3_leaf_hdr *) from;
+               to->firstused = be16_to_cpu(hdr3->firstused);
+       } else {
+               to->firstused = be16_to_cpu(from->hdr.firstused);
+       }
+
+       /*
+        * Convert from the magic fsb size value to actual blocksize. This
+        * should only occur for empty blocks when the block size overflows
+        * 16-bits.
+        */
+       if (to->firstused == XFS_ATTR3_LEAF_NULLOFF) {
+               ASSERT(!to->count && !to->usedbytes);
+               ASSERT(geo->blksize > USHRT_MAX);
+               to->firstused = geo->blksize;
+       }
+}
+
+static void
+xfs_attr3_leaf_firstused_to_disk(
+       struct xfs_da_geometry          *geo,
+       struct xfs_attr_leafblock       *to,
+       struct xfs_attr3_icleaf_hdr     *from)
+{
+       struct xfs_attr3_leaf_hdr       *hdr3;
+       uint32_t                        firstused;
+
+       /* magic value should only be seen on disk */
+       ASSERT(from->firstused != XFS_ATTR3_LEAF_NULLOFF);
+
+       /*
+        * Scale down the 32-bit in-core firstused value to the 16-bit on-disk
+        * value. This only overflows at the max supported value of 64k. Use the
+        * magic on-disk value to represent block size in this case.
+        */
+       firstused = from->firstused;
+       if (firstused > USHRT_MAX) {
+               ASSERT(from->firstused == geo->blksize);
+               firstused = XFS_ATTR3_LEAF_NULLOFF;
+       }
+
+       if (from->magic == XFS_ATTR3_LEAF_MAGIC) {
+               hdr3 = (struct xfs_attr3_leaf_hdr *) to;
+               hdr3->firstused = cpu_to_be16(firstused);
+       } else {
+               to->hdr.firstused = cpu_to_be16(firstused);
+       }
+}
+
  void
  xfs_attr3_leaf_hdr_from_disk(
+       struct xfs_da_geometry          *geo,
         struct xfs_attr3_icleaf_hdr     *to,
         struct xfs_attr_leafblock       *from)
  {
@@ -104,7 +179,7 @@ xfs_attr3_leaf_hdr_from_disk(
                 to->magic = be16_to_cpu(hdr3->info.hdr.magic);
                 to->count = be16_to_cpu(hdr3->count);
                 to->usedbytes = be16_to_cpu(hdr3->usedbytes);
-               to->firstused = be16_to_cpu(hdr3->firstused);
+               xfs_attr3_leaf_firstused_from_disk(geo, to, from);
                 to->holes = hdr3->holes;
  
                 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -118,7 +193,7 @@ xfs_attr3_leaf_hdr_from_disk(
         to->magic = be16_to_cpu(from->hdr.info.magic);
         to->count = be16_to_cpu(from->hdr.count);
         to->usedbytes = be16_to_cpu(from->hdr.usedbytes);
-       to->firstused = be16_to_cpu(from->hdr.firstused);
+       xfs_attr3_leaf_firstused_from_disk(geo, to, from);
         to->holes = from->hdr.holes;
  
         for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
@@ -129,10 +204,11 @@ xfs_attr3_leaf_hdr_from_disk(
  
  void
  xfs_attr3_leaf_hdr_to_disk(
+       struct xfs_da_geometry          *geo,
         struct xfs_attr_leafblock       *to,
         struct xfs_attr3_icleaf_hdr     *from)
  {
-       int     i;
+       int                             i;
  
         ASSERT(from->magic == XFS_ATTR_LEAF_MAGIC ||
                from->magic == XFS_ATTR3_LEAF_MAGIC);
@@ -145,7 +221,7 @@ xfs_attr3_leaf_hdr_to_disk(
                 hdr3->info.hdr.magic = cpu_to_be16(from->magic);
                 hdr3->count = cpu_to_be16(from->count);
                 hdr3->usedbytes = cpu_to_be16(from->usedbytes);
-               hdr3->firstused = cpu_to_be16(from->firstused);
+               xfs_attr3_leaf_firstused_to_disk(geo, to, from);
                 hdr3->holes = from->holes;
                 hdr3->pad1 = 0;
  
@@ -160,7 +236,7 @@ xfs_attr3_leaf_hdr_to_disk(
         to->hdr.info.magic = cpu_to_be16(from->magic);
         to->hdr.count = cpu_to_be16(from->count);
         to->hdr.usedbytes = cpu_to_be16(from->usedbytes);
-       to->hdr.firstused = cpu_to_be16(from->firstused);
+       xfs_attr3_leaf_firstused_to_disk(geo, to, from);
         to->hdr.holes = from->holes;
         to->hdr.pad1 = 0;
  
@@ -178,7 +254,7 @@ xfs_attr3_leaf_verify(
         struct xfs_attr_leafblock *leaf = bp->b_addr;
         struct xfs_attr3_icleaf_hdr ichdr;
  
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
  
         if (xfs_sb_version_hascrc(&mp->m_sb)) {
                 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
@@ -757,9 +833,10 @@ xfs_attr_shortform_allfit(
         struct xfs_attr3_icleaf_hdr leafhdr;
         int                     bytes;
         int                     i;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
         entry = xfs_attr3_leaf_entryp(leaf);
  
         bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -812,7 +889,7 @@ xfs_attr3_leaf_to_shortform(
         memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
  
         leaf = (xfs_attr_leafblock_t *)tmpbuffer;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         entry = xfs_attr3_leaf_entryp(leaf);
  
         /* XXX (dgc): buffer is about to be marked stale - why zero it? */
@@ -923,7 +1000,7 @@ xfs_attr3_leaf_to_node(
         btree = dp->d_ops->node_tree_p(node);
  
         leaf = bp2->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf);
         entries = xfs_attr3_leaf_entryp(leaf);
  
         /* both on-disk, don't endian-flip twice */
@@ -988,7 +1065,7 @@ xfs_attr3_leaf_create(
         }
         ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;
  
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
         xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);
  
         *bpp = bp;
@@ -1073,7 +1150,7 @@ xfs_attr3_leaf_add(
         trace_xfs_attr_leaf_add(args);
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         ASSERT(args->index >= 0 && args->index <= ichdr.count);
         entsize = xfs_attr_leaf_newentsize(args, NULL);
  
@@ -1126,7 +1203,7 @@ xfs_attr3_leaf_add(
         tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
  
  out_log_hdr:
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
         xfs_trans_log_buf(args->trans, bp,
                 XFS_DA_LOGRANGE(leaf, &leaf->hdr,
                                 xfs_attr3_leaf_hdr_size(leaf)));
@@ -1294,7 +1371,7 @@ xfs_attr3_leaf_compact(
                                                 ichdr_dst->freemap[0].base;
  
         /* write the header back to initialise the underlying buffer */
-       xfs_attr3_leaf_hdr_to_disk(leaf_dst, ichdr_dst);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf_dst, ichdr_dst);
  
         /*
          * Copy all entry's in the same (sorted) order,
@@ -1344,9 +1421,10 @@ xfs_attr_leaf_order(
  {
         struct xfs_attr3_icleaf_hdr ichdr1;
         struct xfs_attr3_icleaf_hdr ichdr2;
+       struct xfs_mount *mp = leaf1_bp->b_target->bt_mount;
  
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1_bp->b_addr);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2_bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
         return xfs_attr3_leaf_order(leaf1_bp, &ichdr1, leaf2_bp, &ichdr2);
  }
  
@@ -1388,8 +1466,8 @@ xfs_attr3_leaf_rebalance(
         ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
         leaf1 = blk1->bp->b_addr;
         leaf2 = blk2->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr1, leaf1);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, leaf2);
         ASSERT(ichdr2.count == 0);
         args = state->args;
  
@@ -1490,8 +1568,8 @@ xfs_attr3_leaf_rebalance(
                                         ichdr1.count, count);
         }
  
-       xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);
-       xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf1, &ichdr1);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, leaf2, &ichdr2);
         xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1);
         xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);
  
@@ -1684,7 +1762,7 @@ xfs_attr3_leaf_toosmall(
          */
         blk = &state->path.blk[ state->path.active-1 ];
         leaf = blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr, leaf);
         bytes = xfs_attr3_leaf_hdr_size(leaf) +
                 ichdr.count * sizeof(xfs_attr_leaf_entry_t) +
                 ichdr.usedbytes;
@@ -1740,7 +1818,7 @@ xfs_attr3_leaf_toosmall(
                 if (error)
                         return error;
  
-               xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr);
+               xfs_attr3_leaf_hdr_from_disk(state->args->geo, &ichdr2, bp->b_addr);
  
                 bytes = state->args->geo->blksize -
                         (state->args->geo->blksize >> 2) -
@@ -1805,7 +1883,7 @@ xfs_attr3_leaf_remove(
         trace_xfs_attr_leaf_remove(args);
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
  
         ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);
         ASSERT(args->index >= 0 && args->index < ichdr.count);
@@ -1918,12 +1996,11 @@ xfs_attr3_leaf_remove(
                                 tmp = be16_to_cpu(entry->nameidx);
                 }
                 ichdr.firstused = tmp;
-               if (!ichdr.firstused)
-                       ichdr.firstused = tmp - XFS_ATTR_LEAF_NAME_ALIGN;
+               ASSERT(ichdr.firstused != 0);
         } else {
                 ichdr.holes = 1;        /* mark as needing compaction */
         }
-       xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr);
+       xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
         xfs_trans_log_buf(args->trans, bp,
                           XFS_DA_LOGRANGE(leaf, &leaf->hdr,
                                           xfs_attr3_leaf_hdr_size(leaf)));
@@ -1957,8 +2034,8 @@ xfs_attr3_leaf_unbalance(
  
         drop_leaf = drop_blk->bp->b_addr;
         save_leaf = save_blk->bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&drophdr, drop_leaf);
-       xfs_attr3_leaf_hdr_from_disk(&savehdr, save_leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
+       xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
         entry = xfs_attr3_leaf_entryp(drop_leaf);
  
         /*
@@ -2012,7 +2089,7 @@ xfs_attr3_leaf_unbalance(
                 tmphdr.firstused = state->args->geo->blksize;
  
                 /* write the header to the temp buffer to initialise it */
-               xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);
+               xfs_attr3_leaf_hdr_to_disk(state->args->geo, tmp_leaf, &tmphdr);
  
                 if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,
                                          drop_blk->bp, &drophdr)) {
@@ -2039,7 +2116,7 @@ xfs_attr3_leaf_unbalance(
                 kmem_free(tmp_leaf);
         }
  
-       xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);
+       xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
         xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
                                            state->args->geo->blksize - 1);
  
@@ -2085,7 +2162,7 @@ xfs_attr3_leaf_lookup_int(
         trace_xfs_attr_leaf_lookup(args);
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         entries = xfs_attr3_leaf_entryp(leaf);
         ASSERT(ichdr.count < args->geo->blksize / 8);
  
@@ -2190,7 +2267,7 @@ xfs_attr3_leaf_getvalue(
         int                     valuelen;
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         ASSERT(ichdr.count < args->geo->blksize / 8);
         ASSERT(args->index < ichdr.count);
  
@@ -2391,8 +2468,9 @@ xfs_attr_leaf_lasthash(
  {
         struct xfs_attr3_icleaf_hdr ichdr;
         struct xfs_attr_leaf_entry *entries;
+       struct xfs_mount *mp = bp->b_target->bt_mount;
  
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, bp->b_addr);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
         entries = xfs_attr3_leaf_entryp(bp->b_addr);
         if (count)
                 *count = ichdr.count;
@@ -2486,7 +2564,7 @@ xfs_attr3_leaf_clearflag(
         ASSERT(entry->flags & XFS_ATTR_INCOMPLETE);
  
  #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         ASSERT(args->index < ichdr.count);
         ASSERT(args->index >= 0);
  
@@ -2550,7 +2628,7 @@ xfs_attr3_leaf_setflag(
  
         leaf = bp->b_addr;
  #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
         ASSERT(args->index < ichdr.count);
         ASSERT(args->index >= 0);
  #endif
@@ -2629,11 +2707,11 @@ xfs_attr3_leaf_flipflags(
         entry2 = &xfs_attr3_leaf_entryp(leaf2)[args->index2];
  
  #ifdef DEBUG
-       xfs_attr3_leaf_hdr_from_disk(&ichdr1, leaf1);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr1, leaf1);
         ASSERT(args->index < ichdr1.count);
         ASSERT(args->index >= 0);
  
-       xfs_attr3_leaf_hdr_from_disk(&ichdr2, leaf2);
+       xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr2, leaf2);
         ASSERT(args->index2 < ichdr2.count);
         ASSERT(args->index2 >= 0);
  
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h

index e2929da..025c4b8 100644 (file)
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -100,9 +100,11 @@ int        xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
  int    xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
                         xfs_dablk_t bno, xfs_daddr_t mappedbno,
                         struct xfs_buf **bpp);
-void   xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,
+void   xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
+                                    struct xfs_attr3_icleaf_hdr *to,
                                      struct xfs_attr_leafblock *from);
-void   xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,
+void   xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo,
+                                  struct xfs_attr_leafblock *to,
                                    struct xfs_attr3_icleaf_hdr *from);
  
  #endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index 61ec015..aeffeaa 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
         }
  }
  
-/*
- * Debug/sanity checking code
- */
-
-STATIC int
-xfs_bmap_sanity_check(
-       struct xfs_mount        *mp,
-       struct xfs_buf          *bp,
-       int                     level)
-{
-       struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-
-       if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
-           block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
-               return 0;
-
-       if (be16_to_cpu(block->bb_level) != level ||
-           be16_to_cpu(block->bb_numrecs) == 0 ||
-           be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-               return 0;
-
-       return 1;
-}
-
  #ifdef DEBUG
  STATIC struct xfs_buf *
  xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
                                 goto error_norelse;
                 }
                 block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
                 if (level == 0)
                         break;
  
@@ -424,7 +397,8 @@ xfs_bmap_check_leaf_extents(
                 xfs_check_block(block, mp, 0, 0);
                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                 bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                                       XFS_FSB_SANITY_CHECK(mp, bno), error0);
                 if (bp_release) {
                         bp_release = 0;
                         xfs_trans_brelse(NULL, bp);
@@ -1029,7 +1003,7 @@ xfs_bmap_add_attrfork_btree(
                 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
                         goto error0;
                 /* must be at least one entry */
-               XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
                 if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
                         goto error0;
                 if (stat == 0) {
@@ -1311,14 +1285,12 @@ xfs_bmap_read_extents(
                 if (error)
                         return error;
                 block = XFS_BUF_TO_BLOCK(bp);
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, level),
-                       error0);
                 if (level == 0)
                         break;
                 pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
                 bno = be64_to_cpu(*pp);
-               XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
+               XFS_WANT_CORRUPTED_GOTO(mp,
+                       XFS_FSB_SANITY_CHECK(mp, bno), error0);
                 xfs_trans_brelse(tp, bp);
         }
         /*
@@ -1345,9 +1317,6 @@ xfs_bmap_read_extents(
                                 XFS_ERRLEVEL_LOW, ip->i_mount, block);
                         goto error0;
                 }
-               XFS_WANT_CORRUPTED_GOTO(
-                       xfs_bmap_sanity_check(mp, bp, 0),
-                       error0);
                 /*
                  * Read-ahead the next leaf block, if any.
                  */
@@ -1755,7 +1724,9 @@ xfs_bmap_add_extent_delay_real(
         xfs_filblks_t           temp=0; /* value for da_new calculations */
         xfs_filblks_t           temp2=0;/* value for da_new calculations */
         int                     tmp_rval;       /* partial logging flags */
+       struct xfs_mount        *mp;
  
+       mp  = bma->tp ? bma->tp->t_mountp : NULL;
         ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
  
         ASSERT(bma->idx >= 0);
@@ -1866,15 +1837,15 @@ xfs_bmap_add_extent_delay_real(
                                         RIGHT.br_blockcount, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_btree_delete(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_btree_decrement(bma->cur, 0, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
@@ -1907,7 +1878,7 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
@@ -1938,7 +1909,7 @@ xfs_bmap_add_extent_delay_real(
                                         RIGHT.br_blockcount, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
                                         new->br_startblock,
                                         PREV.br_blockcount +
@@ -1968,12 +1939,12 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                         error = xfs_btree_insert(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 break;
  
@@ -2001,7 +1972,7 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
                                         LEFT.br_startblock,
                                         LEFT.br_blockcount +
@@ -2038,12 +2009,12 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                         error = xfs_btree_insert(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
  
                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2084,7 +2055,7 @@ xfs_bmap_add_extent_delay_real(
                                         RIGHT.br_blockcount, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                         new->br_startblock,
                                         new->br_blockcount +
@@ -2122,12 +2093,12 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                         error = xfs_btree_insert(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
  
                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2191,12 +2162,12 @@ xfs_bmap_add_extent_delay_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
                         error = xfs_btree_insert(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
  
                 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
@@ -2212,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                         (bma->cur ? bma->cur->bc_private.b.allocated : 0));
                 if (diff > 0) {
-                       error = xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       -((int64_t)diff), 0);
+                       error = xfs_mod_fdblocks(bma->ip->i_mount,
+                                                -((int64_t)diff), false);
                         ASSERT(!error);
                         if (error)
                                 goto done;
@@ -2265,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
                         temp += bma->cur->bc_private.b.allocated;
                 ASSERT(temp <= da_old);
                 if (temp < da_old)
-                       xfs_icsb_modify_counters(bma->ip->i_mount,
-                                       XFS_SBS_FDBLOCKS,
-                                       (int64_t)(da_old - temp), 0);
+                       xfs_mod_fdblocks(bma->ip->i_mount,
+                                       (int64_t)(da_old - temp), false);
         }
  
         /* clear out the allocated field, done with it now in any case. */
@@ -2309,6 +2278,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         /* left is 0, right is 1, prev is 2 */
         int                     rval=0; /* return value (logging flags) */
         int                     state = 0;/* state bits, accessed thru macros */
+       struct xfs_mount        *mp = tp->t_mountp;
  
         *logflagsp = 0;
  
@@ -2421,19 +2391,19 @@ xfs_bmap_add_extent_unwritten_real(
                                         RIGHT.br_startblock,
                                         RIGHT.br_blockcount, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_delete(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_decrement(cur, 0, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_delete(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_decrement(cur, 0, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                 LEFT.br_startblock,
                                 LEFT.br_blockcount + PREV.br_blockcount +
@@ -2464,13 +2434,13 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock, PREV.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_delete(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_decrement(cur, 0, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
                                 LEFT.br_startblock,
                                 LEFT.br_blockcount + PREV.br_blockcount,
@@ -2499,13 +2469,13 @@ xfs_bmap_add_extent_unwritten_real(
                                         RIGHT.br_startblock,
                                         RIGHT.br_blockcount, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_delete(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_btree_decrement(cur, 0, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                 new->br_startblock,
                                 new->br_blockcount + RIGHT.br_blockcount,
@@ -2532,7 +2502,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         new->br_startblock, new->br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, new->br_startoff,
                                 new->br_startblock, new->br_blockcount,
                                 newext)))
@@ -2569,7 +2539,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock, PREV.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur,
                                 PREV.br_startoff + new->br_blockcount,
                                 PREV.br_startblock + new->br_blockcount,
@@ -2611,7 +2581,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock, PREV.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur,
                                 PREV.br_startoff + new->br_blockcount,
                                 PREV.br_startblock + new->br_blockcount,
@@ -2621,7 +2591,7 @@ xfs_bmap_add_extent_unwritten_real(
                         cur->bc_rec.b = *new;
                         if ((error = xfs_btree_insert(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 break;
  
@@ -2651,7 +2621,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock,
                                         PREV.br_blockcount, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                 PREV.br_startblock,
                                 PREV.br_blockcount - new->br_blockcount,
@@ -2689,7 +2659,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock, PREV.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
                                 PREV.br_startblock,
                                 PREV.br_blockcount - new->br_blockcount,
@@ -2699,11 +2669,11 @@ xfs_bmap_add_extent_unwritten_real(
                                         new->br_startblock, new->br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         cur->bc_rec.b.br_state = XFS_EXT_NORM;
                         if ((error = xfs_btree_insert(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 break;
  
@@ -2737,7 +2707,7 @@ xfs_bmap_add_extent_unwritten_real(
                                         PREV.br_startblock, PREV.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         /* new right extent - oldext */
                         if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
                                 r[1].br_startblock, r[1].br_blockcount,
@@ -2749,7 +2719,7 @@ xfs_bmap_add_extent_unwritten_real(
                                 new->br_startoff - PREV.br_startoff;
                         if ((error = xfs_btree_insert(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         /*
                          * Reset the cursor to the position of the new extent
                          * we are about to insert as we can't trust it after
@@ -2759,12 +2729,12 @@ xfs_bmap_add_extent_unwritten_real(
                                         new->br_startblock, new->br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         /* new middle extent - newext */
                         cur->bc_rec.b.br_state = new->br_state;
                         if ((error = xfs_btree_insert(cur, &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 break;
  
@@ -2944,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
         }
         if (oldlen != newlen) {
                 ASSERT(oldlen > newlen);
-               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
-                       (int64_t)(oldlen - newlen), 0);
+               xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
+                                false);
                 /*
                  * Nothing to do for disk quota accounting here.
                  */
@@ -2968,7 +2938,9 @@ xfs_bmap_add_extent_hole_real(
         xfs_bmbt_irec_t         right;  /* right neighbor extent entry */
         int                     rval=0; /* return value (logging flags) */
         int                     state;  /* state bits, accessed thru macros */
+       struct xfs_mount        *mp;
  
+       mp = bma->tp ? bma->tp->t_mountp : NULL;
         ifp = XFS_IFORK_PTR(bma->ip, whichfork);
  
         ASSERT(bma->idx >= 0);
@@ -3056,15 +3028,15 @@ xfs_bmap_add_extent_hole_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_btree_delete(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_btree_decrement(bma->cur, 0, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                         left.br_startblock,
                                         left.br_blockcount +
@@ -3097,7 +3069,7 @@ xfs_bmap_add_extent_hole_real(
                                         &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, left.br_startoff,
                                         left.br_startblock,
                                         left.br_blockcount +
@@ -3131,7 +3103,7 @@ xfs_bmap_add_extent_hole_real(
                                         right.br_blockcount, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         error = xfs_bmbt_update(bma->cur, new->br_startoff,
                                         new->br_startblock,
                                         new->br_blockcount +
@@ -3161,12 +3133,12 @@ xfs_bmap_add_extent_hole_real(
                                         new->br_blockcount, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 0, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
                         bma->cur->bc_rec.b.br_state = new->br_state;
                         error = xfs_btree_insert(bma->cur, &i);
                         if (error)
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 break;
         }
@@ -4160,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
         ASSERT(indlen > 0);
  
         if (rt) {
-               error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)extsz), 0);
+               error = xfs_mod_frextents(mp, -((int64_t)extsz));
         } else {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                -((int64_t)alen), 0);
+               error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
         }
  
         if (error)
                 goto out_unreserve_quota;
  
-       error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                        -((int64_t)indlen), 0);
+       error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
         if (error)
                 goto out_unreserve_blocks;
  
@@ -4198,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
  
  out_unreserve_blocks:
         if (rt)
-               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+               xfs_mod_frextents(mp, extsz);
         else
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+               xfs_mod_fdblocks(mp, alen, false);
  out_unreserve_quota:
         if (XFS_IS_QUOTA_ON(mp))
                 xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -4801,7 +4770,7 @@ xfs_bmap_del_extent(
                                         got.br_startblock, got.br_blockcount,
                                         &i)))
                                 goto done;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 }
                 da_old = da_new = 0;
         } else {
@@ -4835,7 +4804,7 @@ xfs_bmap_del_extent(
                 }
                 if ((error = xfs_btree_delete(cur, &i)))
                         goto done;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                 break;
  
         case 2:
@@ -4935,7 +4904,8 @@ xfs_bmap_del_extent(
                                                         got.br_startblock,
                                                         temp, &i)))
                                                 goto done;
-                                       XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                                       XFS_WANT_CORRUPTED_GOTO(mp,
+                                                               i == 1, done);
                                         /*
                                          * Update the btree record back
                                          * to the original value.
@@ -4956,7 +4926,7 @@ xfs_bmap_del_extent(
                                         error = -ENOSPC;
                                         goto done;
                                 }
-                               XFS_WANT_CORRUPTED_GOTO(i == 1, done);
+                               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
                         } else
                                 flags |= xfs_ilog_fext(whichfork);
                         XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5012,10 +4982,8 @@ xfs_bmap_del_extent(
          * Nothing to do for disk quota accounting here.
          */
         ASSERT(da_old >= da_new);
-       if (da_old > da_new) {
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                       (int64_t)(da_old - da_new), 0);
-       }
+       if (da_old > da_new)
+               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
  done:
         *logflagsp = flags;
         return error;
@@ -5284,14 +5252,13 @@ xfs_bunmapi(
  
                                 rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
                                 do_div(rtexts, mp->m_sb.sb_rextsize);
-                               xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
-                                               (int64_t)rtexts, 0);
+                               xfs_mod_frextents(mp, (int64_t)rtexts);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_RTBLKS);
                         } else {
-                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                               (int64_t)del.br_blockcount, 0);
+                               xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
+                                                false);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_REGBLKS);
@@ -5453,6 +5420,7 @@ xfs_bmse_merge(
         struct xfs_bmbt_irec            left;
         xfs_filblks_t                   blockcount;
         int                             error, i;
+       struct xfs_mount                *mp = ip->i_mount;
  
         xfs_bmbt_get_all(gotp, &got);
         xfs_bmbt_get_all(leftp, &left);
@@ -5487,19 +5455,19 @@ xfs_bmse_merge(
                                    got.br_blockcount, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
  
         error = xfs_btree_delete(cur, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
  
         /* lookup and update size of the previous extent */
         error = xfs_bmbt_lookup_eq(cur, left.br_startoff, left.br_startblock,
                                    left.br_blockcount, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
  
         left.br_blockcount = blockcount;
  
@@ -5518,50 +5486,92 @@ xfs_bmse_shift_one(
         int                             *current_ext,
         struct xfs_bmbt_rec_host        *gotp,
         struct xfs_btree_cur            *cur,
-       int                             *logflags)
+       int                             *logflags,
+       enum shift_direction            direction)
  {
         struct xfs_ifork                *ifp;
+       struct xfs_mount                *mp;
         xfs_fileoff_t                   startoff;
-       struct xfs_bmbt_rec_host        *leftp;
+       struct xfs_bmbt_rec_host        *adj_irecp;
         struct xfs_bmbt_irec            got;
-       struct xfs_bmbt_irec            left;
+       struct xfs_bmbt_irec            adj_irec;
         int                             error;
         int                             i;
+       int                             total_extents;
  
+       mp = ip->i_mount;
         ifp = XFS_IFORK_PTR(ip, whichfork);
+       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
  
         xfs_bmbt_get_all(gotp, &got);
-       startoff = got.br_startoff - offset_shift_fsb;
  
         /* delalloc extents should be prevented by caller */
-       XFS_WANT_CORRUPTED_RETURN(!isnullstartblock(got.br_startblock));
+       XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock));
  
-       /*
-        * Check for merge if we've got an extent to the left, otherwise make
-        * sure there's enough room at the start of the file for the shift.
-        */
-       if (*current_ext) {
-               /* grab the left extent and check for a large enough hole */
-               leftp = xfs_iext_get_ext(ifp, *current_ext - 1);
-               xfs_bmbt_get_all(leftp, &left);
+       if (direction == SHIFT_LEFT) {
+               startoff = got.br_startoff - offset_shift_fsb;
+
+               /*
+                * Check for merge if we've got an extent to the left,
+                * otherwise make sure there's enough room at the start
+                * of the file for the shift.
+                */
+               if (!*current_ext) {
+                       if (got.br_startoff < offset_shift_fsb)
+                               return -EINVAL;
+                       goto update_current_ext;
+               }
+               /*
+                * grab the left extent and check for a large
+                * enough hole.
+                */
+               adj_irecp = xfs_iext_get_ext(ifp, *current_ext - 1);
+               xfs_bmbt_get_all(adj_irecp, &adj_irec);
  
-               if (startoff < left.br_startoff + left.br_blockcount)
+               if (startoff <
+                   adj_irec.br_startoff + adj_irec.br_blockcount)
                         return -EINVAL;
  
                 /* check whether to merge the extent or shift it down */
-               if (xfs_bmse_can_merge(&left, &got, offset_shift_fsb)) {
+               if (xfs_bmse_can_merge(&adj_irec, &got,
+                                      offset_shift_fsb)) {
                         return xfs_bmse_merge(ip, whichfork, offset_shift_fsb,
-                                             *current_ext, gotp, leftp, cur,
-                                             logflags);
+                                             *current_ext, gotp, adj_irecp,
+                                             cur, logflags);
                 }
-       } else if (got.br_startoff < offset_shift_fsb)
-               return -EINVAL;
-
+       } else {
+               startoff = got.br_startoff + offset_shift_fsb;
+               /* nothing to move if this is the last extent */
+               if (*current_ext >= (total_extents - 1))
+                       goto update_current_ext;
+               /*
+                * If this is not the last extent in the file, make sure there
+                * is enough room between current extent and next extent for
+                * accommodating the shift.
+                */
+               adj_irecp = xfs_iext_get_ext(ifp, *current_ext + 1);
+               xfs_bmbt_get_all(adj_irecp, &adj_irec);
+               if (startoff + got.br_blockcount > adj_irec.br_startoff)
+                       return -EINVAL;
+               /*
+                * Unlike a left shift (which involves a hole punch),
+                * a right shift does not modify extent neighbors
+                * in any way. We should never find mergeable extents
+                * in this scenario. Check anyways and warn if we
+                * encounter two extents that could be one.
+                */
+               if (xfs_bmse_can_merge(&got, &adj_irec, offset_shift_fsb))
+                       WARN_ON_ONCE(1);
+       }
         /*
          * Increment the extent index for the next iteration, update the start
          * offset of the in-core extent and update the btree if applicable.
          */
-       (*current_ext)++;
+update_current_ext:
+       if (direction == SHIFT_LEFT)
+               (*current_ext)++;
+       else
+               (*current_ext)--;
         xfs_bmbt_set_startoff(gotp, startoff);
         *logflags |= XFS_ILOG_CORE;
         if (!cur) {
@@ -5573,18 +5583,18 @@ xfs_bmse_shift_one(
                                    got.br_blockcount, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
  
         got.br_startoff = startoff;
         return xfs_bmbt_update(cur, got.br_startoff, got.br_startblock,
-                               got.br_blockcount, got.br_state);
+                              got.br_blockcount, got.br_state);
  }
  
  /*
- * Shift extent records to the left to cover a hole.
+ * Shift extent records to the left/right to cover/create a hole.
   *
   * The maximum number of extents to be shifted in a single operation is
- * @num_exts. @start_fsb specifies the file offset to start the shift and the
+ * @num_exts. @stop_fsb specifies the file offset at which to stop shift and the
   * file offset where we've left off is returned in @next_fsb. @offset_shift_fsb
   * is the length by which each extent is shifted. If there is no hole to shift
   * the extents into, this will be considered invalid operation and we abort
@@ -5594,12 +5604,13 @@ int
  xfs_bmap_shift_extents(
         struct xfs_trans        *tp,
         struct xfs_inode        *ip,
-       xfs_fileoff_t           start_fsb,
+       xfs_fileoff_t           *next_fsb,
         xfs_fileoff_t           offset_shift_fsb,
         int                     *done,
-       xfs_fileoff_t           *next_fsb,
+       xfs_fileoff_t           stop_fsb,
         xfs_fsblock_t           *firstblock,
         struct xfs_bmap_free    *flist,
+       enum shift_direction    direction,
         int                     num_exts)
  {
         struct xfs_btree_cur            *cur = NULL;
@@ -5609,10 +5620,11 @@ xfs_bmap_shift_extents(
         struct xfs_ifork                *ifp;
         xfs_extnum_t                    nexts = 0;
         xfs_extnum_t                    current_ext;
+       xfs_extnum_t                    total_extents;
+       xfs_extnum_t                    stop_extent;
         int                             error = 0;
         int                             whichfork = XFS_DATA_FORK;
         int                             logflags = 0;
-       int                             total_extents;
  
         if (unlikely(XFS_TEST_ERROR(
             (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5628,6 +5640,8 @@ xfs_bmap_shift_extents(
  
         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
+       ASSERT(*next_fsb != NULLFSBLOCK || direction == SHIFT_RIGHT);
  
         ifp = XFS_IFORK_PTR(ip, whichfork);
         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -5644,44 +5658,84 @@ xfs_bmap_shift_extents(
                 cur->bc_private.b.flags = 0;
         }
  
+       /*
+        * There may be delalloc extents in the data fork before the range we
+        * are collapsing out, so we cannot use the count of real extents here.
+        * Instead we have to calculate it from the incore fork.
+        */
+       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+       if (total_extents == 0) {
+               *done = 1;
+               goto del_cursor;
+       }
+
+       /*
+        * In case of first right shift, we need to initialize next_fsb
+        */
+       if (*next_fsb == NULLFSBLOCK) {
+               gotp = xfs_iext_get_ext(ifp, total_extents - 1);
+               xfs_bmbt_get_all(gotp, &got);
+               *next_fsb = got.br_startoff;
+               if (stop_fsb > *next_fsb) {
+                       *done = 1;
+                       goto del_cursor;
+               }
+       }
+
+       /* Lookup the extent index at which we have to stop */
+       if (direction == SHIFT_RIGHT) {
+               gotp = xfs_iext_bno_to_ext(ifp, stop_fsb, &stop_extent);
+               /* Make stop_extent exclusive of shift range */
+               stop_extent--;
+       } else
+               stop_extent = total_extents;
+
         /*
          * Look up the extent index for the fsb where we start shifting. We can
          * henceforth iterate with current_ext as extent list changes are locked
          * out via ilock.
          *
          * gotp can be null in 2 cases: 1) if there are no extents or 2)
-        * start_fsb lies in a hole beyond which there are no extents. Either
+        * *next_fsb lies in a hole beyond which there are no extents. Either
          * way, we are done.
          */
-       gotp = xfs_iext_bno_to_ext(ifp, start_fsb, &current_ext);
+       gotp = xfs_iext_bno_to_ext(ifp, *next_fsb, &current_ext);
         if (!gotp) {
                 *done = 1;
                 goto del_cursor;
         }
  
-       /*
-        * There may be delalloc extents in the data fork before the range we
-        * are collapsing out, so we cannot use the count of real extents here.
-        * Instead we have to calculate it from the incore fork.
-        */
-       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-       while (nexts++ < num_exts && current_ext < total_extents) {
+       /* some sanity checking before we finally start shifting extents */
+       if ((direction == SHIFT_LEFT && current_ext >= stop_extent) ||
+            (direction == SHIFT_RIGHT && current_ext <= stop_extent)) {
+               error = -EIO;
+               goto del_cursor;
+       }
+
+       while (nexts++ < num_exts) {
                 error = xfs_bmse_shift_one(ip, whichfork, offset_shift_fsb,
-                                       &current_ext, gotp, cur, &logflags);
+                                          &current_ext, gotp, cur, &logflags,
+                                          direction);
                 if (error)
                         goto del_cursor;
+               /*
+                * If there was an extent merge during the shift, the extent
+                * count can change. Update the total and grade the next record.
+                */
+               if (direction == SHIFT_LEFT) {
+                       total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+                       stop_extent = total_extents;
+               }
  
-               /* update total extent count and grab the next record */
-               total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
-               if (current_ext >= total_extents)
+               if (current_ext == stop_extent) {
+                       *done = 1;
+                       *next_fsb = NULLFSBLOCK;
                         break;
+               }
                 gotp = xfs_iext_get_ext(ifp, current_ext);
         }
  
-       /* Check if we are done */
-       if (current_ext == total_extents) {
-               *done = 1;
-       } else if (next_fsb) {
+       if (!*done) {
                 xfs_bmbt_get_all(gotp, &got);
                 *next_fsb = got.br_startoff;
         }
@@ -5696,3 +5750,189 @@ del_cursor:
  
         return error;
  }
+
+/*
+ * Splits an extent into two extents at split_fsb block such that it is
+ * the first block of the current_ext. @current_ext is a target extent
+ * to be split. @split_fsb is a block where the extents is split.
+ * If split_fsb lies in a hole or the first block of extents, just return 0.
+ */
+STATIC int
+xfs_bmap_split_extent_at(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           split_fsb,
+       xfs_fsblock_t           *firstfsb,
+       struct xfs_bmap_free    *free_list)
+{
+       int                             whichfork = XFS_DATA_FORK;
+       struct xfs_btree_cur            *cur = NULL;
+       struct xfs_bmbt_rec_host        *gotp;
+       struct xfs_bmbt_irec            got;
+       struct xfs_bmbt_irec            new; /* split extent */
+       struct xfs_mount                *mp = ip->i_mount;
+       struct xfs_ifork                *ifp;
+       xfs_fsblock_t                   gotblkcnt; /* new block count for got */
+       xfs_extnum_t                    current_ext;
+       int                             error = 0;
+       int                             logflags = 0;
+       int                             i = 0;
+
+       if (unlikely(XFS_TEST_ERROR(
+           (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+            XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+            mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+               XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
+                                XFS_ERRLEVEL_LOW, mp);
+               return -EFSCORRUPTED;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               return -EIO;
+
+       ifp = XFS_IFORK_PTR(ip, whichfork);
+       if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+               /* Read in all the extents */
+               error = xfs_iread_extents(tp, ip, whichfork);
+               if (error)
+                       return error;
+       }
+
+       /*
+        * gotp can be null in 2 cases: 1) if there are no extents
+        * or 2) split_fsb lies in a hole beyond which there are
+        * no extents. Either way, we are done.
+        */
+       gotp = xfs_iext_bno_to_ext(ifp, split_fsb, &current_ext);
+       if (!gotp)
+               return 0;
+
+       xfs_bmbt_get_all(gotp, &got);
+
+       /*
+        * Check split_fsb lies in a hole or the start boundary offset
+        * of the extent.
+        */
+       if (got.br_startoff >= split_fsb)
+               return 0;
+
+       gotblkcnt = split_fsb - got.br_startoff;
+       new.br_startoff = split_fsb;
+       new.br_startblock = got.br_startblock + gotblkcnt;
+       new.br_blockcount = got.br_blockcount - gotblkcnt;
+       new.br_state = got.br_state;
+
+       if (ifp->if_flags & XFS_IFBROOT) {
+               cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+               cur->bc_private.b.firstblock = *firstfsb;
+               cur->bc_private.b.flist = free_list;
+               cur->bc_private.b.flags = 0;
+               error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount,
+                               &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+       }
+
+       xfs_bmbt_set_blockcount(gotp, gotblkcnt);
+       got.br_blockcount = gotblkcnt;
+
+       logflags = XFS_ILOG_CORE;
+       if (cur) {
+               error = xfs_bmbt_update(cur, got.br_startoff,
+                               got.br_startblock,
+                               got.br_blockcount,
+                               got.br_state);
+               if (error)
+                       goto del_cursor;
+       } else
+               logflags |= XFS_ILOG_DEXT;
+
+       /* Add new extent */
+       current_ext++;
+       xfs_iext_insert(ip, current_ext, 1, &new, 0);
+       XFS_IFORK_NEXT_SET(ip, whichfork,
+                          XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
+
+       if (cur) {
+               error = xfs_bmbt_lookup_eq(cur, new.br_startoff,
+                               new.br_startblock, new.br_blockcount,
+                               &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
+               cur->bc_rec.b.br_state = new.br_state;
+
+               error = xfs_btree_insert(cur, &i);
+               if (error)
+                       goto del_cursor;
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+       }
+
+       /*
+        * Convert to a btree if necessary.
+        */
+       if (xfs_bmap_needs_btree(ip, whichfork)) {
+               int tmp_logflags; /* partial log flag return val */
+
+               ASSERT(cur == NULL);
+               error = xfs_bmap_extents_to_btree(tp, ip, firstfsb, free_list,
+                               &cur, 0, &tmp_logflags, whichfork);
+               logflags |= tmp_logflags;
+       }
+
+del_cursor:
+       if (cur) {
+               cur->bc_private.b.allocated = 0;
+               xfs_btree_del_cursor(cur,
+                               error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       }
+
+       if (logflags)
+               xfs_trans_log_inode(tp, ip, logflags);
+       return error;
+}
+
+int
+xfs_bmap_split_extent(
+       struct xfs_inode        *ip,
+       xfs_fileoff_t           split_fsb)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           firstfsb;
+       int                     committed;
+       int                     error;
+
+       tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+       error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                       XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+       if (error) {
+               xfs_trans_cancel(tp, 0);
+               return error;
+       }
+
+       xfs_ilock(ip, XFS_ILOCK_EXCL);
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+
+       xfs_bmap_init(&free_list, &firstfsb);
+
+       error = xfs_bmap_split_extent_at(tp, ip, split_fsb,
+                       &firstfsb, &free_list);
+       if (error)
+               goto out;
+
+       error = xfs_bmap_finish(&tp, &free_list, &committed);
+       if (error)
+               goto out;
+
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+
+
+out:
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+       return error;
+}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h

index b9d8a49..6aaa0c1 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -166,6 +166,11 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
   */
  #define XFS_BMAP_MAX_SHIFT_EXTENTS     1
  
+enum shift_direction {
+       SHIFT_LEFT = 0,
+       SHIFT_RIGHT,
+};
+
  #ifdef DEBUG
  void   xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                 int whichfork, unsigned long caller_ip);
@@ -211,8 +216,10 @@ int        xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
  int    xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
-               xfs_fileoff_t start_fsb, xfs_fileoff_t offset_shift_fsb,
-               int *done, xfs_fileoff_t *next_fsb, xfs_fsblock_t *firstblock,
-               struct xfs_bmap_free *flist, int num_exts);
+               xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb,
+               int *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock,
+               struct xfs_bmap_free *flist, enum shift_direction direction,
+               int num_exts);
+int    xfs_bmap_split_extent(struct xfs_inode *ip, xfs_fileoff_t split_offset);
  
  #endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c

index 81cad43..c72283d 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -168,7 +168,7 @@ xfs_btree_check_lptr(
         xfs_fsblock_t           bno,    /* btree block disk address */
         int                     level)  /* btree block level */
  {
-       XFS_WANT_CORRUPTED_RETURN(
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                 level > 0 &&
                 bno != NULLFSBLOCK &&
                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
@@ -187,7 +187,7 @@ xfs_btree_check_sptr(
  {
         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
  
-       XFS_WANT_CORRUPTED_RETURN(
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
                 level > 0 &&
                 bno != NULLAGBLOCK &&
                 bno != 0 &&
@@ -1825,7 +1825,7 @@ xfs_btree_lookup(
                         error = xfs_btree_increment(cur, 0, &i);
                         if (error)
                                 goto error0;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
                         *stat = 1;
                         return 0;
@@ -2285,7 +2285,7 @@ xfs_btree_rshift(
         if (error)
                 goto error0;
         i = xfs_btree_lastrec(tcur, level);
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
         error = xfs_btree_increment(tcur, level, &i);
         if (error)
@@ -3138,7 +3138,7 @@ xfs_btree_insert(
                         goto error0;
                 }
  
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
                 level++;
  
                 /*
@@ -3582,15 +3582,15 @@ xfs_btree_delrec(
                  * Actually any entry but the first would suffice.
                  */
                 i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                 error = xfs_btree_increment(tcur, level, &i);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                 i = xfs_btree_lastrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                 /* Grab a pointer to the block. */
                 right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3634,12 +3634,12 @@ xfs_btree_delrec(
                 rrecs = xfs_btree_get_numrecs(right);
                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
                         i = xfs_btree_firstrec(tcur, level);
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                         error = xfs_btree_decrement(tcur, level, &i);
                         if (error)
                                 goto error0;
-                       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+                       XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
                 }
         }
  
@@ -3653,13 +3653,13 @@ xfs_btree_delrec(
                  * previous block.
                  */
                 i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                 error = xfs_btree_decrement(tcur, level, &i);
                 if (error)
                         goto error0;
                 i = xfs_btree_firstrec(tcur, level);
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
  
                 /* Grab a pointer to the block. */
                 left = xfs_btree_get_block(tcur, level, &lbp);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c

index 9cb0115..2385f8c 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
         oldroot = blk1->bp->b_addr;
         if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
             oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
-               struct xfs_da3_icnode_hdr nodehdr;
+               struct xfs_da3_icnode_hdr icnodehdr;
  
-               dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
+               dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
                 btree = dp->d_ops->node_tree_p(oldroot);
-               size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
-               level = nodehdr.level;
+               size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
+               level = icnodehdr.level;
  
                 /*
                  * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h

index 0a49b02..74bcbab 100644 (file)
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -725,7 +725,13 @@ struct xfs_attr3_icleaf_hdr {
         __uint16_t      magic;
         __uint16_t      count;
         __uint16_t      usedbytes;
-       __uint16_t      firstused;
+       /*
+        * firstused is 32-bit here instead of 16-bit like the on-disk variant
+        * to support maximum fsb size of 64k without overflow issues throughout
+        * the attr code. Instead, the overflow condition is handled on
+        * conversion to/from disk.
+        */
+       __uint32_t      firstused;
         __u8            holes;
         struct {
                 __uint16_t      base;
@@ -733,6 +739,12 @@ struct xfs_attr3_icleaf_hdr {
         } freemap[XFS_ATTR_LEAF_MAPSIZE];
  };
  
+/*
+ * Special value to represent fs block size in the leaf header firstused field.
+ * Only used when block size overflows the 2-bytes available on disk.
+ */
+#define XFS_ATTR3_LEAF_NULLOFF 0
+
  /*
   * Flags used in the leaf_entry[i].flags field.
   * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c

index 5ff31be..de1ea16 100644 (file)
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -89,7 +89,7 @@ __xfs_dir3_data_check(
                  * so just ensure that the count falls somewhere inside the
                  * block right now.
                  */
-               XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) <
+               XFS_WANT_CORRUPTED_RETURN(mp, be32_to_cpu(btp->count) <
                         ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));
                 break;
         case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
@@ -107,21 +107,21 @@ __xfs_dir3_data_check(
         bf = ops->data_bestfree_p(hdr);
         count = lastfree = freeseen = 0;
         if (!bf[0].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[0].offset);
                 freeseen |= 1 << 0;
         }
         if (!bf[1].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[1].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[1].offset);
                 freeseen |= 1 << 1;
         }
         if (!bf[2].length) {
-               XFS_WANT_CORRUPTED_RETURN(!bf[2].offset);
+               XFS_WANT_CORRUPTED_RETURN(mp, !bf[2].offset);
                 freeseen |= 1 << 2;
         }
  
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[0].length) >=
+       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[0].length) >=
                                                 be16_to_cpu(bf[1].length));
-       XFS_WANT_CORRUPTED_RETURN(be16_to_cpu(bf[1].length) >=
+       XFS_WANT_CORRUPTED_RETURN(mp, be16_to_cpu(bf[1].length) >=
                                                 be16_to_cpu(bf[2].length));
         /*
          * Loop over the data/unused entries.
@@ -134,18 +134,18 @@ __xfs_dir3_data_check(
                  * doesn't need to be there.
                  */
                 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
-                       XFS_WANT_CORRUPTED_RETURN(lastfree == 0);
-                       XFS_WANT_CORRUPTED_RETURN(
+                       XFS_WANT_CORRUPTED_RETURN(mp, lastfree == 0);
+                       XFS_WANT_CORRUPTED_RETURN(mp,
                                 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
                                                (char *)dup - (char *)hdr);
                         dfp = xfs_dir2_data_freefind(hdr, bf, dup);
                         if (dfp) {
                                 i = (int)(dfp - bf);
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                         (freeseen & (1 << i)) == 0);
                                 freeseen |= 1 << i;
                         } else {
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                         be16_to_cpu(dup->length) <=
                                                 be16_to_cpu(bf[2].length));
                         }
@@ -160,13 +160,13 @@ __xfs_dir3_data_check(
                  * The linear search is crude but this is DEBUG code.
                  */
                 dep = (xfs_dir2_data_entry_t *)p;
-               XFS_WANT_CORRUPTED_RETURN(dep->namelen != 0);
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp, dep->namelen != 0);
+               XFS_WANT_CORRUPTED_RETURN(mp,
                         !xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                         be16_to_cpu(*ops->data_entry_tag_p(dep)) ==
                                                (char *)dep - (char *)hdr);
-               XFS_WANT_CORRUPTED_RETURN(
+               XFS_WANT_CORRUPTED_RETURN(mp,
                                 ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);
                 count++;
                 lastfree = 0;
@@ -183,14 +183,15 @@ __xfs_dir3_data_check(
                                     be32_to_cpu(lep[i].hashval) == hash)
                                         break;
                         }
-                       XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));
+                       XFS_WANT_CORRUPTED_RETURN(mp,
+                                                 i < be32_to_cpu(btp->count));
                 }
                 p += ops->data_entsize(dep->namelen);
         }
         /*
          * Need to have seen all the entries and all the bestfree slots.
          */
-       XFS_WANT_CORRUPTED_RETURN(freeseen == 7);
+       XFS_WANT_CORRUPTED_RETURN(mp, freeseen == 7);
         if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||
             hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) {
                 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
@@ -198,13 +199,13 @@ __xfs_dir3_data_check(
                             cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
                                 stale++;
                         if (i > 0)
-                               XFS_WANT_CORRUPTED_RETURN(
+                               XFS_WANT_CORRUPTED_RETURN(mp,
                                         be32_to_cpu(lep[i].hashval) >=
                                                 be32_to_cpu(lep[i - 1].hashval));
                 }
-               XFS_WANT_CORRUPTED_RETURN(count ==
+               XFS_WANT_CORRUPTED_RETURN(mp, count ==
                         be32_to_cpu(btp->count) - be32_to_cpu(btp->stale));
-               XFS_WANT_CORRUPTED_RETURN(stale == be32_to_cpu(btp->stale));
+               XFS_WANT_CORRUPTED_RETURN(mp, stale == be32_to_cpu(btp->stale));
         }
         return 0;
  }
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index 8eb7189..4daaa66 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
         /* must be padded to 64 bit alignment */
  } xfs_dsb_t;
  
-/*
- * Sequence number values for the fields.
- */
-typedef enum {
-       XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
-       XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
-       XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
-       XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
-       XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
-       XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
-       XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
-       XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
-       XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
-       XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
-       XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
-       XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-       XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
-       XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
-       XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
-       XFS_SBS_PQUOTINO, XFS_SBS_LSN,
-       XFS_SBS_FIELDCOUNT
-} xfs_sb_field_t;
-
-/*
- * Mask values, defined based on the xfs_sb_field_t values.
- * Only define the ones we're using.
- */
-#define        XFS_SB_MVAL(x)          (1LL << XFS_SBS_ ## x)
-#define        XFS_SB_UUID             XFS_SB_MVAL(UUID)
-#define        XFS_SB_FNAME            XFS_SB_MVAL(FNAME)
-#define        XFS_SB_ROOTINO          XFS_SB_MVAL(ROOTINO)
-#define        XFS_SB_RBMINO           XFS_SB_MVAL(RBMINO)
-#define        XFS_SB_RSUMINO          XFS_SB_MVAL(RSUMINO)
-#define        XFS_SB_VERSIONNUM       XFS_SB_MVAL(VERSIONNUM)
-#define XFS_SB_UQUOTINO                XFS_SB_MVAL(UQUOTINO)
-#define XFS_SB_GQUOTINO                XFS_SB_MVAL(GQUOTINO)
-#define XFS_SB_QFLAGS          XFS_SB_MVAL(QFLAGS)
-#define XFS_SB_SHARED_VN       XFS_SB_MVAL(SHARED_VN)
-#define XFS_SB_UNIT            XFS_SB_MVAL(UNIT)
-#define XFS_SB_WIDTH           XFS_SB_MVAL(WIDTH)
-#define XFS_SB_ICOUNT          XFS_SB_MVAL(ICOUNT)
-#define XFS_SB_IFREE           XFS_SB_MVAL(IFREE)
-#define XFS_SB_FDBLOCKS                XFS_SB_MVAL(FDBLOCKS)
-#define XFS_SB_FEATURES2       (XFS_SB_MVAL(FEATURES2) | \
-                                XFS_SB_MVAL(BAD_FEATURES2))
-#define XFS_SB_FEATURES_COMPAT XFS_SB_MVAL(FEATURES_COMPAT)
-#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
-#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
-#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
-#define XFS_SB_CRC             XFS_SB_MVAL(CRC)
-#define XFS_SB_PQUOTINO                XFS_SB_MVAL(PQUOTINO)
-#define        XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
-#define        XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
-#define        XFS_SB_MOD_BITS         \
-       (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
-        XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
-        XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-        XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
-        XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
-        XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
-        XFS_SB_PQUOTINO)
-
  
  /*
   * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c

index 116ef1d..07349a1 100644 (file)
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
          */
         newlen = args.mp->m_ialloc_inos;
         if (args.mp->m_maxicount &&
-           args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+           percpu_counter_read(&args.mp->m_icount) + newlen >
+                                                       args.mp->m_maxicount)
                 return -ENOSPC;
         args.minlen = args.maxlen = args.mp->m_ialloc_blks;
         /*
@@ -700,7 +701,7 @@ xfs_ialloc_next_rec(
                 error = xfs_inobt_get_rec(cur, rec, &i);
                 if (error)
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
         }
  
         return 0;
@@ -724,7 +725,7 @@ xfs_ialloc_get_rec(
                 error = xfs_inobt_get_rec(cur, rec, &i);
                 if (error)
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
         }
  
         return 0;
@@ -783,12 +784,12 @@ xfs_dialloc_ag_inobt(
                 error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
  
                 error = xfs_inobt_get_rec(cur, &rec, &j);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
  
                 if (rec.ir_freecount > 0) {
                         /*
@@ -944,19 +945,19 @@ newino:
         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
         if (error)
                 goto error0;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
  
         for (;;) {
                 error = xfs_inobt_get_rec(cur, &rec, &i);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
                 if (rec.ir_freecount > 0)
                         break;
                 error = xfs_btree_increment(cur, 0, &i);
                 if (error)
                         goto error0;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
         }
  
  alloc_inode:
@@ -1016,7 +1017,7 @@ xfs_dialloc_ag_finobt_near(
                 error = xfs_inobt_get_rec(lcur, rec, &i);
                 if (error)
                         return error;
-               XFS_WANT_CORRUPTED_RETURN(i == 1);
+               XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
  
                 /*
                  * See if we've landed in the parent inode record. The finobt
@@ -1039,10 +1040,10 @@ xfs_dialloc_ag_finobt_near(
                 error = xfs_inobt_get_rec(rcur, &rrec, &j);
                 if (error)
                         goto error_rcur;
-               XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur);
+               XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
         }
  
-       XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur);
+       XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
         if (i == 1 && j == 1) {
                 /*
                  * Both the left and right records are valid. Choose the closer
@@ -1095,7 +1096,7 @@ xfs_dialloc_ag_finobt_newino(
                         error = xfs_inobt_get_rec(cur, rec, &i);
                         if (error)
                                 return error;
-                       XFS_WANT_CORRUPTED_RETURN(i == 1);
+                       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
                         return 0;
                 }
         }
@@ -1106,12 +1107,12 @@ xfs_dialloc_ag_finobt_newino(
         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
  
         error = xfs_inobt_get_rec(cur, rec, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
  
         return 0;
  }
@@ -1133,19 +1134,19 @@ xfs_dialloc_ag_update_inobt(
         error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
  
         error = xfs_inobt_get_rec(cur, &rec, &i);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(i == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
         ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
                                    XFS_INODES_PER_CHUNK) == 0);
  
         rec.ir_free &= ~XFS_INOBT_MASK(offset);
         rec.ir_freecount--;
  
-       XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) &&
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
                                   (rec.ir_freecount == frec->ir_freecount));
  
         return xfs_inobt_update(cur, &rec);
@@ -1340,7 +1341,8 @@ xfs_dialloc(
          * inode.
          */
         if (mp->m_maxicount &&
-           mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+           percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
+                                                       mp->m_maxicount) {
                 noroom = 1;
                 okalloc = 0;
         }
@@ -1475,14 +1477,14 @@ xfs_difree_inobt(
                         __func__, error);
                 goto error0;
         }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
         error = xfs_inobt_get_rec(cur, &rec, &i);
         if (error) {
                 xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
                         __func__, error);
                 goto error0;
         }
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
         /*
          * Get the offset in the inode chunk.
          */
@@ -1592,7 +1594,7 @@ xfs_difree_finobt(
                  * freed an inode in a previously fully allocated chunk. If not,
                  * something is out of sync.
                  */
-               XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error);
+               XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
  
                 error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount,
                                              ibtrec->ir_free, &i);
@@ -1613,12 +1615,12 @@ xfs_difree_finobt(
         error = xfs_inobt_get_rec(cur, &rec, &i);
         if (error)
                 goto error;
-       XFS_WANT_CORRUPTED_GOTO(i == 1, error);
+       XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
  
         rec.ir_free |= XFS_INOBT_MASK(offset);
         rec.ir_freecount++;
  
-       XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) &&
+       XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
                                 (rec.ir_freecount == ibtrec->ir_freecount),
                                 error);
  
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c

index b0a5fe9..dc4bfc5 100644 (file)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -111,14 +111,6 @@ xfs_mount_validate_sb(
         bool            check_inprogress,
         bool            check_version)
  {
-
-       /*
-        * If the log device and data device have the
-        * same device number, the log is internal.
-        * Consequently, the sb_logstart should be non-zero.  If
-        * we have a zero sb_logstart in this case, we may be trying to mount
-        * a volume filesystem in a non-volume manner.
-        */
         if (sbp->sb_magicnum != XFS_SB_MAGIC) {
                 xfs_warn(mp, "bad magic number");
                 return -EWRONGFS;
@@ -743,17 +735,15 @@ xfs_initialize_perag_data(
                 btree += pag->pagf_btreeblks;
                 xfs_perag_put(pag);
         }
-       /*
-        * Overwrite incore superblock counters with just-read data
-        */
+
+       /* Overwrite incore superblock counters with just-read data */
         spin_lock(&mp->m_sb_lock);
         sbp->sb_ifree = ifree;
         sbp->sb_icount = ialloc;
         sbp->sb_fdblocks = bfree + bfreelst + btree;
         spin_unlock(&mp->m_sb_lock);
  
-       /* Fixup the per-cpu counters as well. */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
  
         return 0;
  }
@@ -771,6 +761,10 @@ xfs_log_sb(
         struct xfs_mount        *mp = tp->t_mountp;
         struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
  
+       mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+       mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+       mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+
         xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
         xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
         xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c

index 1d8eef9..a56960d 100644 (file)
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1232,6 +1232,117 @@ xfs_vm_releasepage(
         return try_to_free_buffers(page);
  }
  
+/*
+ * When we map a DIO buffer, we may need to attach an ioend that describes the
+ * type of write IO we are doing. This passes to the completion function the
+ * operations it needs to perform. If the mapping is for an overwrite wholly
+ * within the EOF then we don't need an ioend and so we don't allocate one.
+ * This avoids the unnecessary overhead of allocating and freeing ioends for
+ * workloads that don't require transactions on IO completion.
+ *
+ * If we get multiple mappings in a single IO, we might be mapping different
+ * types. But because the direct IO can only have a single private pointer, we
+ * need to ensure that:
+ *
+ * a) i) the ioend spans the entire region of unwritten mappings; or
+ *    ii) the ioend spans all the mappings that cross or are beyond EOF; and
+ * b) if it contains unwritten extents, it is *permanently* marked as such
+ *
+ * We could do this by chaining ioends like buffered IO does, but we only
+ * actually get one IO completion callback from the direct IO, and that spans
+ * the entire IO regardless of how many mappings and IOs are needed to complete
+ * the DIO. There is only going to be one reference to the ioend and its life
+ * cycle is constrained by the DIO completion code. hence we don't need
+ * reference counting here.
+ */
+static void
+xfs_map_direct(
+       struct inode            *inode,
+       struct buffer_head      *bh_result,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
+{
+       struct xfs_ioend        *ioend;
+       xfs_off_t               size = bh_result->b_size;
+       int                     type;
+
+       if (ISUNWRITTEN(imap))
+               type = XFS_IO_UNWRITTEN;
+       else
+               type = XFS_IO_OVERWRITE;
+
+       trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
+
+       if (bh_result->b_private) {
+               ioend = bh_result->b_private;
+               ASSERT(ioend->io_size > 0);
+               ASSERT(offset >= ioend->io_offset);
+               if (offset + size > ioend->io_offset + ioend->io_size)
+                       ioend->io_size = offset - ioend->io_offset + size;
+
+               if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
+                       ioend->io_type = XFS_IO_UNWRITTEN;
+
+               trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
+                                             ioend->io_size, ioend->io_type,
+                                             imap);
+       } else if (type == XFS_IO_UNWRITTEN ||
+                  offset + size > i_size_read(inode)) {
+               ioend = xfs_alloc_ioend(inode, type);
+               ioend->io_offset = offset;
+               ioend->io_size = size;
+
+               bh_result->b_private = ioend;
+               set_buffer_defer_completion(bh_result);
+
+               trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
+                                          imap);
+       } else {
+               trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
+                                           imap);
+       }
+}
+
+/*
+ * If this is O_DIRECT or the mpage code calling tell them how large the mapping
+ * is, so that we can avoid repeated get_blocks calls.
+ *
+ * If the mapping spans EOF, then we have to break the mapping up as the mapping
+ * for blocks beyond EOF must be marked new so that sub block regions can be
+ * correctly zeroed. We can't do this for mappings within EOF unless the mapping
+ * was just allocated or is unwritten, otherwise the callers would overwrite
+ * existing data with zeros. Hence we have to split the mapping into a range up
+ * to and including EOF, and a second mapping for beyond EOF.
+ */
+static void
+xfs_map_trim_size(
+       struct inode            *inode,
+       sector_t                iblock,
+       struct buffer_head      *bh_result,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset,
+       ssize_t                 size)
+{
+       xfs_off_t               mapping_size;
+
+       mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
+       mapping_size <<= inode->i_blkbits;
+
+       ASSERT(mapping_size > 0);
+       if (mapping_size > size)
+               mapping_size = size;
+       if (offset < i_size_read(inode) &&
+           offset + mapping_size >= i_size_read(inode)) {
+               /* limit mapping to block that spans EOF */
+               mapping_size = roundup_64(i_size_read(inode) - offset,
+                                         1 << inode->i_blkbits);
+       }
+       if (mapping_size > LONG_MAX)
+               mapping_size = LONG_MAX;
+
+       bh_result->b_size = mapping_size;
+}
+
  STATIC int
  __xfs_get_blocks(
         struct inode            *inode,
@@ -1320,31 +1431,37 @@ __xfs_get_blocks(
  
                         xfs_iunlock(ip, lockmode);
                 }
-
-               trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
+               trace_xfs_get_blocks_alloc(ip, offset, size,
+                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
+                                                  : XFS_IO_DELALLOC, &imap);
         } else if (nimaps) {
-               trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
+               trace_xfs_get_blocks_found(ip, offset, size,
+                               ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
+                                                  : XFS_IO_OVERWRITE, &imap);
                 xfs_iunlock(ip, lockmode);
         } else {
                 trace_xfs_get_blocks_notfound(ip, offset, size);
                 goto out_unlock;
         }
  
+       /* trim mapping down to size requested */
+       if (direct || size > (1 << inode->i_blkbits))
+               xfs_map_trim_size(inode, iblock, bh_result,
+                                 &imap, offset, size);
+
+       /*
+        * For unwritten extents do not report a disk address in the buffered
+        * read case (treat as if we're reading into a hole).
+        */
         if (imap.br_startblock != HOLESTARTBLOCK &&
-           imap.br_startblock != DELAYSTARTBLOCK) {
-               /*
-                * For unwritten extents do not report a disk address on
-                * the read case (treat as if we're reading into a hole).
-                */
-               if (create || !ISUNWRITTEN(&imap))
-                       xfs_map_buffer(inode, bh_result, &imap, offset);
-               if (create && ISUNWRITTEN(&imap)) {
-                       if (direct) {
-                               bh_result->b_private = inode;
-                               set_buffer_defer_completion(bh_result);
-                       }
+           imap.br_startblock != DELAYSTARTBLOCK &&
+           (create || !ISUNWRITTEN(&imap))) {
+               xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (ISUNWRITTEN(&imap))
                         set_buffer_unwritten(bh_result);
-               }
+               /* direct IO needs special help */
+               if (create && direct)
+                       xfs_map_direct(inode, bh_result, &imap, offset);
         }
  
         /*
@@ -1377,39 +1494,6 @@ __xfs_get_blocks(
                 }
         }
  
-       /*
-        * If this is O_DIRECT or the mpage code calling tell them how large
-        * the mapping is, so that we can avoid repeated get_blocks calls.
-        *
-        * If the mapping spans EOF, then we have to break the mapping up as the
-        * mapping for blocks beyond EOF must be marked new so that sub block
-        * regions can be correctly zeroed. We can't do this for mappings within
-        * EOF unless the mapping was just allocated or is unwritten, otherwise
-        * the callers would overwrite existing data with zeros. Hence we have
-        * to split the mapping into a range up to and including EOF, and a
-        * second mapping for beyond EOF.
-        */
-       if (direct || size > (1 << inode->i_blkbits)) {
-               xfs_off_t               mapping_size;
-
-               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
-               mapping_size <<= inode->i_blkbits;
-
-               ASSERT(mapping_size > 0);
-               if (mapping_size > size)
-                       mapping_size = size;
-               if (offset < i_size_read(inode) &&
-                   offset + mapping_size >= i_size_read(inode)) {
-                       /* limit mapping to block that spans EOF */
-                       mapping_size = roundup_64(i_size_read(inode) - offset,
-                                                 1 << inode->i_blkbits);
-               }
-               if (mapping_size > LONG_MAX)
-                       mapping_size = LONG_MAX;
-
-               bh_result->b_size = mapping_size;
-       }
-
         return 0;
  
  out_unlock:
@@ -1440,9 +1524,11 @@ xfs_get_blocks_direct(
  /*
   * Complete a direct I/O write request.
   *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents.
+ * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
+ * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
+ * wholly within the EOF and so there is nothing for us to do. Note that in this
+ * case the completion can be called in interrupt context, whereas if we have an
+ * ioend we will always be called in task context (i.e. from a workqueue).
   */
  STATIC void
  xfs_end_io_direct_write(
@@ -1454,43 +1540,71 @@ xfs_end_io_direct_write(
         struct inode            *inode = file_inode(iocb->ki_filp);
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ioend        *ioend = private;
  
-       if (XFS_FORCED_SHUTDOWN(mp))
+       trace_xfs_gbmap_direct_endio(ip, offset, size,
+                                    ioend ? ioend->io_type : 0, NULL);
+
+       if (!ioend) {
+               ASSERT(offset + size <= i_size_read(inode));
                 return;
+       }
+
+       if (XFS_FORCED_SHUTDOWN(mp))
+               goto out_end_io;
  
         /*
-        * While the generic direct I/O code updates the inode size, it does
-        * so only after the end_io handler is called, which means our
-        * end_io handler thinks the on-disk size is outside the in-core
-        * size.  To prevent this just update it a little bit earlier here.
+        * dio completion end_io functions are only called on writes if more
+        * than 0 bytes was written.
          */
+       ASSERT(size > 0);
+
+       /*
+        * The ioend only maps whole blocks, while the IO may be sector aligned.
+        * Hence the ioend offset/size may not match the IO offset/size exactly.
+        * Because we don't map overwrites within EOF into the ioend, the offset
+        * may not match, but only if the endio spans EOF.  Either way, write
+        * the IO sizes into the ioend so that completion processing does the
+        * right thing.
+        */
+       ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
+       ioend->io_size = size;
+       ioend->io_offset = offset;
+
+       /*
+        * The ioend tells us whether we are doing unwritten extent conversion
+        * or an append transaction that updates the on-disk file size. These
+        * cases are the only cases where we should *potentially* be needing
+        * to update the VFS inode size.
+        *
+        * We need to update the in-core inode size here so that we don't end up
+        * with the on-disk inode size being outside the in-core inode size. We
+        * have no other method of updating EOF for AIO, so always do it here
+        * if necessary.
+        *
+        * We need to lock the test/set EOF update as we can be racing with
+        * other IO completions here to update the EOF. Failing to serialise
+        * here can result in EOF moving backwards and Bad Things Happen when
+        * that occurs.
+        */
+       spin_lock(&ip->i_flags_lock);
         if (offset + size > i_size_read(inode))
                 i_size_write(inode, offset + size);
+       spin_unlock(&ip->i_flags_lock);
  
         /*
-        * For direct I/O we do not know if we need to allocate blocks or not,
-        * so we can't preallocate an append transaction, as that results in
-        * nested reservations and log space deadlocks. Hence allocate the
-        * transaction here. While this is sub-optimal and can block IO
-        * completion for some time, we're stuck with doing it this way until
-        * we can pass the ioend to the direct IO allocation callbacks and
-        * avoid nesting that way.
+        * If we are doing an append IO that needs to update the EOF on disk,
+        * do the transaction reserve now so we can use common end io
+        * processing. Stashing the error (if there is one) in the ioend will
+        * result in the ioend processing passing on the error if it is
+        * possible as we can't return it from here.
          */
-       if (private && size > 0) {
-               xfs_iomap_write_unwritten(ip, offset, size);
-       } else if (offset + size > ip->i_d.di_size) {
-               struct xfs_trans        *tp;
-               int                     error;
-
-               tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
-               error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
-               if (error) {
-                       xfs_trans_cancel(tp, 0);
-                       return;
-               }
+       if (ioend->io_type == XFS_IO_OVERWRITE)
+               ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
  
-               xfs_setfilesize(ip, tp, offset, size);
-       }
+out_end_io:
+       xfs_end_io(&ioend->io_work);
+       return;
  }
  
  STATIC ssize_t
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c

index 83af4c1..f9c1c64 100644 (file)
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -132,9 +132,10 @@ xfs_attr3_leaf_inactive(
         int                     size;
         int                     tmp;
         int                     i;
+       struct xfs_mount        *mp = bp->b_target->bt_mount;
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
  
         /*
          * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c

index a43d370..65fb37a 100644 (file)
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -225,6 +225,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
         int error, i;
         struct xfs_buf *bp;
         struct xfs_inode        *dp = context->dp;
+       struct xfs_mount        *mp = dp->i_mount;
  
         trace_xfs_attr_node_list(context);
  
@@ -256,7 +257,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
                         case XFS_ATTR_LEAF_MAGIC:
                         case XFS_ATTR3_LEAF_MAGIC:
                                 leaf = bp->b_addr;
-                               xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+                               xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo,
+                                                            &leafhdr, leaf);
                                 entries = xfs_attr3_leaf_entryp(leaf);
                                 if (cursor->hashval > be32_to_cpu(
                                                 entries[leafhdr.count - 1].hashval)) {
@@ -340,7 +342,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
                         xfs_trans_brelse(NULL, bp);
                         return error;
                 }
-               xfs_attr3_leaf_hdr_from_disk(&leafhdr, leaf);
+               xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
                 if (context->seen_enough || leafhdr.forw == 0)
                         break;
                 cursor->blkno = leafhdr.forw;
@@ -368,11 +370,12 @@ xfs_attr3_leaf_list_int(
         struct xfs_attr_leaf_entry      *entry;
         int                             retval;
         int                             i;
+       struct xfs_mount                *mp = context->dp->i_mount;
  
         trace_xfs_attr_list_leaf(context);
  
         leaf = bp->b_addr;
-       xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);
+       xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
         entries = xfs_attr3_leaf_entryp(leaf);
  
         cursor = context->cursor;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c

index 22a5dcb..a52bbd3 100644 (file)
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1376,22 +1376,19 @@ out:
  }
  
  /*
- * xfs_collapse_file_space()
- *     This routine frees disk space and shift extent for the given file.
- *     The first thing we do is to free data blocks in the specified range
- *     by calling xfs_free_file_space(). It would also sync dirty data
- *     and invalidate page cache over the region on which collapse range
- *     is working. And Shift extent records to the left to cover a hole.
- * RETURNS:
- *     0 on success
- *     errno on error
- *
+ * @next_fsb will keep track of the extent currently undergoing shift.
+ * @stop_fsb will keep track of the extent at which we have to stop.
+ * If we are shifting left, we will start with block (offset + len) and
+ * shift each extent till last extent.
+ * If we are shifting right, we will start with last extent inside file space
+ * and continue until we reach the block corresponding to offset.
   */
-int
-xfs_collapse_file_space(
-       struct xfs_inode        *ip,
-       xfs_off_t               offset,
-       xfs_off_t               len)
+static int
+xfs_shift_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len,
+       enum shift_direction    direction)
  {
         int                     done = 0;
         struct xfs_mount        *mp = ip->i_mount;
@@ -1400,21 +1397,26 @@ xfs_collapse_file_space(
         struct xfs_bmap_free    free_list;
         xfs_fsblock_t           first_block;
         int                     committed;
-       xfs_fileoff_t           start_fsb;
+       xfs_fileoff_t           stop_fsb;
         xfs_fileoff_t           next_fsb;
         xfs_fileoff_t           shift_fsb;
  
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
  
-       trace_xfs_collapse_file_space(ip);
+       if (direction == SHIFT_LEFT) {
+               next_fsb = XFS_B_TO_FSB(mp, offset + len);
+               stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
+       } else {
+               /*
+                * If right shift, delegate the work of initialization of
+                * next_fsb to xfs_bmap_shift_extent as it has ilock held.
+                */
+               next_fsb = NULLFSBLOCK;
+               stop_fsb = XFS_B_TO_FSB(mp, offset);
+       }
  
-       next_fsb = XFS_B_TO_FSB(mp, offset + len);
         shift_fsb = XFS_B_TO_FSB(mp, len);
  
-       error = xfs_free_file_space(ip, offset, len);
-       if (error)
-               return error;
-
         /*
          * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
          * into the accessible region of the file.
@@ -1427,20 +1429,28 @@ xfs_collapse_file_space(
  
         /*
          * Writeback and invalidate cache for the remainder of the file as we're
-        * about to shift down every extent from the collapse range to EOF. The
-        * free of the collapse range above might have already done some of
-        * this, but we shouldn't rely on it to do anything outside of the range
-        * that was freed.
+        * about to shift down every extent from offset to EOF.
          */
         error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                            offset + len, -1);
+                                            offset, -1);
         if (error)
                 return error;
         error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
-                                       (offset + len) >> PAGE_CACHE_SHIFT, -1);
+                                       offset >> PAGE_CACHE_SHIFT, -1);
         if (error)
                 return error;
  
+       /*
+        * The extent shiting code works on extent granularity. So, if
+        * stop_fsb is not the starting block of extent, we need to split
+        * the extent at stop_fsb.
+        */
+       if (direction == SHIFT_RIGHT) {
+               error = xfs_bmap_split_extent(ip, stop_fsb);
+               if (error)
+                       return error;
+       }
+
         while (!error && !done) {
                 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
                 /*
@@ -1464,7 +1474,7 @@ xfs_collapse_file_space(
                 if (error)
                         goto out;
  
-               xfs_trans_ijoin(tp, ip, 0);
+               xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
  
                 xfs_bmap_init(&free_list, &first_block);
  
@@ -1472,10 +1482,9 @@ xfs_collapse_file_space(
                  * We are using the write transaction in which max 2 bmbt
                  * updates are allowed
                  */
-               start_fsb = next_fsb;
-               error = xfs_bmap_shift_extents(tp, ip, start_fsb, shift_fsb,
-                               &done, &next_fsb, &first_block, &free_list,
-                               XFS_BMAP_MAX_SHIFT_EXTENTS);
+               error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
+                               &done, stop_fsb, &first_block, &free_list,
+                               direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
                 if (error)
                         goto out;
  
@@ -1484,17 +1493,69 @@ xfs_collapse_file_space(
                         goto out;
  
                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
         }
  
         return error;
  
  out:
         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
         return error;
  }
  
+/*
+ * xfs_collapse_file_space()
+ *     This routine frees disk space and shift extent for the given file.
+ *     The first thing we do is to free data blocks in the specified range
+ *     by calling xfs_free_file_space(). It would also sync dirty data
+ *     and invalidate page cache over the region on which collapse range
+ *     is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       xfs_off_t               len)
+{
+       int error;
+
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       trace_xfs_collapse_file_space(ip);
+
+       error = xfs_free_file_space(ip, offset, len);
+       if (error)
+               return error;
+
+       return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
+}
+
+/*
+ * xfs_insert_file_space()
+ *     This routine create hole space by shifting extents for the given file.
+ *     The first thing we do is to sync dirty data and invalidate page cache
+ *     over the region on which insert range is working. And split an extent
+ *     to two extents at given offset by calling xfs_bmap_split_extent.
+ *     And shift all extent records which are laying between [offset,
+ *     last allocated extent] to the right to reserve hole range.
+ * RETURNS:
+ *     0 on success
+ *     errno on error
+ */
+int
+xfs_insert_file_space(
+       struct xfs_inode        *ip,
+       loff_t                  offset,
+       loff_t                  len)
+{
+       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       trace_xfs_insert_file_space(ip);
+
+       return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
+}
+
  /*
   * We need to check that the format of the data fork in the temporary inode is
   * valid for the target inode before doing the swap. This is not a problem with
@@ -1599,13 +1660,6 @@ xfs_swap_extent_flush(
         /* Verify O_DIRECT for ftmp */
         if (VFS_I(ip)->i_mapping->nrpages)
                 return -EINVAL;
-
-       /*
-        * Don't try to swap extents on mmap()d files because we can't lock
-        * out races against page faults safely.
-        */
-       if (mapping_mapped(VFS_I(ip)->i_mapping))
-               return -EBUSY;
         return 0;
  }
  
@@ -1633,13 +1687,14 @@ xfs_swap_extents(
         }
  
         /*
-        * Lock up the inodes against other IO and truncate to begin with.
-        * Then we can ensure the inodes are flushed and have no page cache
-        * safely. Once we have done this we can take the ilocks and do the rest
-        * of the checks.
+        * Lock the inodes against other IO, page faults and truncate to
+        * begin with.  Then we can ensure the inodes are flushed and have no
+        * page cache safely. Once we have done this we can take the ilocks and
+        * do the rest of the checks.
          */
-       lock_flags = XFS_IOLOCK_EXCL;
+       lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
         xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+       xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
  
         /* Verify that both files have the same format */
         if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1666,8 +1721,16 @@ xfs_swap_extents(
                 xfs_trans_cancel(tp, 0);
                 goto out_unlock;
         }
+
+       /*
+        * Lock and join the inodes to the tansaction so that transaction commit
+        * or cancel will unlock the inodes from this point onwards.
+        */
         xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
         lock_flags |= XFS_ILOCK_EXCL;
+       xfs_trans_ijoin(tp, ip, lock_flags);
+       xfs_trans_ijoin(tp, tip, lock_flags);
+
  
         /* Verify all data are being swapped */
         if (sxp->sx_offset != 0 ||
@@ -1720,9 +1783,6 @@ xfs_swap_extents(
                         goto out_trans_cancel;
         }
  
-       xfs_trans_ijoin(tp, ip, lock_flags);
-       xfs_trans_ijoin(tp, tip, lock_flags);
-
         /*
          * Before we've swapped the forks, lets set the owners of the forks
          * appropriately. We have to do this as we are demand paging the btree
@@ -1856,5 +1916,5 @@ out_unlock:
  
  out_trans_cancel:
         xfs_trans_cancel(tp, 0);
-       goto out_unlock;
+       goto out;
  }
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h

index 736429a..af97d9a 100644 (file)
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -63,6 +63,8 @@ int   xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                             xfs_off_t len);
  int    xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
                                 xfs_off_t len);
+int    xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
+                               xfs_off_t len);
  
  /* EOF block manipulation functions */
  bool   xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 507d96a..092d652 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -537,9 +537,9 @@ xfs_buf_item_push(
  
         /* has a previous flush failed due to IO errors? */
         if ((bp->b_flags & XBF_WRITE_FAIL) &&
-           ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+           ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
                 xfs_warn(bp->b_target->bt_mount,
-"Detected failing async write on buffer block 0x%llx. Retrying async write.",
+"Failing async write on buffer block 0x%llx. Retrying async write.",
                          (long long)bp->b_bn);
         }
  
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c

index 799e5a2..e85a951 100644 (file)
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -84,7 +84,7 @@ xfs_trim_extents(
                 error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
                 if (error)
                         goto out_del_cursor;
-               XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
+               XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
                 ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
  
                 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c

index 3ee186a..338e50b 100644 (file)
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
  {
         if (level <= xfs_error_level) {
                 xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-               "Internal error %s at line %d of file %s.  Caller %pF",
+               "Internal error %s at line %d of file %s.  Caller %pS",
                             tag, linenum, filename, ra);
  
                 xfs_stack_trace();
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h

index 279a76e..c0394ed 100644 (file)
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -40,25 +40,25 @@ extern void xfs_verifier_error(struct xfs_buf *bp);
  /*
   * Macros to set EFSCORRUPTED & return/branch.
   */
-#define        XFS_WANT_CORRUPTED_GOTO(x,l)    \
+#define        XFS_WANT_CORRUPTED_GOTO(mp, x, l)       \
         { \
                 int fs_is_ok = (x); \
                 ASSERT(fs_is_ok); \
                 if (unlikely(!fs_is_ok)) { \
                         XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
-                                        XFS_ERRLEVEL_LOW, NULL); \
+                                        XFS_ERRLEVEL_LOW, mp); \
                         error = -EFSCORRUPTED; \
                         goto l; \
                 } \
         }
  
-#define        XFS_WANT_CORRUPTED_RETURN(x)    \
+#define        XFS_WANT_CORRUPTED_RETURN(mp, x)        \
         { \
                 int fs_is_ok = (x); \
                 ASSERT(fs_is_ok); \
                 if (unlikely(!fs_is_ok)) { \
                         XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
-                                        XFS_ERRLEVEL_LOW, NULL); \
+                                        XFS_ERRLEVEL_LOW, mp); \
                         return -EFSCORRUPTED; \
                 } \
         }
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c

index b97359b..652cd3c 100644 (file)
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
         int                     error;
         struct xfs_inode        *cip;
  
-       error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
+       error = xfs_lookup(XFS_I(d_inode(child)), &xfs_name_dotdot, &cip, NULL);
         if (unlikely(error))
                 return ERR_PTR(error);
  
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index 1f12ad0..8121e75 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -559,7 +559,7 @@ restart:
         if (error <= 0)
                 return error;
  
-       error = xfs_break_layouts(inode, iolock);
+       error = xfs_break_layouts(inode, iolock, true);
         if (error)
                 return error;
  
@@ -569,21 +569,42 @@ restart:
          * write.  If zeroing is needed and we are currently holding the
          * iolock shared, we need to update it to exclusive which implies
          * having to redo all checks before.
+        *
+        * We need to serialise against EOF updates that occur in IO
+        * completions here. We want to make sure that nobody is changing the
+        * size while we do this check until we have placed an IO barrier (i.e.
+        * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched.
+        * The spinlock effectively forms a memory barrier once we have the
+        * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value
+        * and hence be able to correctly determine if we need to run zeroing.
          */
+       spin_lock(&ip->i_flags_lock);
         if (iocb->ki_pos > i_size_read(inode)) {
                 bool    zero = false;
  
+               spin_unlock(&ip->i_flags_lock);
                 if (*iolock == XFS_IOLOCK_SHARED) {
                         xfs_rw_iunlock(ip, *iolock);
                         *iolock = XFS_IOLOCK_EXCL;
                         xfs_rw_ilock(ip, *iolock);
                         iov_iter_reexpand(from, count);
+
+                       /*
+                        * We now have an IO submission barrier in place, but
+                        * AIO can do EOF updates during IO completion and hence
+                        * we now need to wait for all of them to drain. Non-AIO
+                        * DIO will have drained before we are given the
+                        * XFS_IOLOCK_EXCL, and so for most cases this wait is a
+                        * no-op.
+                        */
+                       inode_dio_wait(inode);
                         goto restart;
                 }
                 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
                 if (error)
                         return error;
-       }
+       } else
+               spin_unlock(&ip->i_flags_lock);
  
         /*
          * Updating the timestamps will grab the ilock again from
@@ -645,6 +666,8 @@ xfs_file_dio_aio_write(
         int                     iolock;
         size_t                  count = iov_iter_count(from);
         loff_t                  pos = iocb->ki_pos;
+       loff_t                  end;
+       struct iov_iter         data;
         struct xfs_buftarg      *target = XFS_IS_REALTIME_INODE(ip) ?
                                         mp->m_rtdev_targp : mp->m_ddev_targp;
  
@@ -685,10 +708,11 @@ xfs_file_dio_aio_write(
                 goto out;
         count = iov_iter_count(from);
         pos = iocb->ki_pos;
+       end = pos + count - 1;
  
         if (mapping->nrpages) {
                 ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
-                                                   pos, pos + count - 1);
+                                                  pos, end);
                 if (ret)
                         goto out;
                 /*
@@ -698,7 +722,7 @@ xfs_file_dio_aio_write(
                  */
                 ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
                                         pos >> PAGE_CACHE_SHIFT,
-                                       (pos + count - 1) >> PAGE_CACHE_SHIFT);
+                                       end >> PAGE_CACHE_SHIFT);
                 WARN_ON_ONCE(ret);
                 ret = 0;
         }
@@ -715,8 +739,22 @@ xfs_file_dio_aio_write(
         }
  
         trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-       ret = generic_file_direct_write(iocb, from, pos);
  
+       data = *from;
+       ret = mapping->a_ops->direct_IO(iocb, &data, pos);
+
+       /* see generic_file_direct_write() for why this is necessary */
+       if (mapping->nrpages) {
+               invalidate_inode_pages2_range(mapping,
+                                             pos >> PAGE_CACHE_SHIFT,
+                                             end >> PAGE_CACHE_SHIFT);
+       }
+
+       if (ret > 0) {
+               pos += ret;
+               iov_iter_advance(from, ret);
+               iocb->ki_pos = pos;
+       }
  out:
         xfs_rw_iunlock(ip, iolock);
  
@@ -822,6 +860,11 @@ xfs_file_write_iter(
         return ret;
  }
  
+#define        XFS_FALLOC_FL_SUPPORTED                                         \
+               (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
+                FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |      \
+                FALLOC_FL_INSERT_RANGE)
+
  STATIC long
  xfs_file_fallocate(
         struct file             *file,
@@ -835,18 +878,21 @@ xfs_file_fallocate(
         enum xfs_prealloc_flags flags = 0;
         uint                    iolock = XFS_IOLOCK_EXCL;
         loff_t                  new_size = 0;
+       bool                    do_file_insert = 0;
  
         if (!S_ISREG(inode->i_mode))
                 return -EINVAL;
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-                    FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+       if (mode & ~XFS_FALLOC_FL_SUPPORTED)
                 return -EOPNOTSUPP;
  
         xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, false);
         if (error)
                 goto out_unlock;
  
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
         if (mode & FALLOC_FL_PUNCH_HOLE) {
                 error = xfs_free_file_space(ip, offset, len);
                 if (error)
@@ -873,6 +919,27 @@ xfs_file_fallocate(
                 error = xfs_collapse_file_space(ip, offset, len);
                 if (error)
                         goto out_unlock;
+       } else if (mode & FALLOC_FL_INSERT_RANGE) {
+               unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+               new_size = i_size_read(inode) + len;
+               if (offset & blksize_mask || len & blksize_mask) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+
+               /* check the new inode size does not wrap through zero */
+               if (new_size > inode->i_sb->s_maxbytes) {
+                       error = -EFBIG;
+                       goto out_unlock;
+               }
+
+               /* Offset should be less than i_size */
+               if (offset >= i_size_read(inode)) {
+                       error = -EINVAL;
+                       goto out_unlock;
+               }
+               do_file_insert = 1;
         } else {
                 flags |= XFS_PREALLOC_SET;
  
@@ -907,8 +974,19 @@ xfs_file_fallocate(
                 iattr.ia_valid = ATTR_SIZE;
                 iattr.ia_size = new_size;
                 error = xfs_setattr_size(ip, &iattr);
+               if (error)
+                       goto out_unlock;
         }
  
+       /*
+        * Perform hole insertion now that the file size has been
+        * updated so that if we crash during the operation we don't
+        * leave shifted extents past EOF and hence losing access to
+        * the data that is contained within them.
+        */
+       if (do_file_insert)
+               error = xfs_insert_file_space(ip, offset, len);
+
  out_unlock:
         xfs_iunlock(ip, iolock);
         return error;
@@ -996,20 +1074,6 @@ xfs_file_mmap(
         return 0;
  }
  
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-       struct vm_area_struct   *vma,
-       struct vm_fault         *vmf)
-{
-       return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
  /*
   * This type is designed to indicate the type of offset we would like
   * to search from page cache for xfs_seek_hole_data().
@@ -1385,6 +1449,55 @@ xfs_file_llseek(
         }
  }
  
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ *   i_mmap_lock (XFS - truncate serialisation)
+ *     page_lock (MM)
+ *       i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_fault(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = filemap_fault(vma, vmf);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+       struct vm_area_struct   *vma,
+       struct vm_fault         *vmf)
+{
+       struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+       int                     error;
+
+       trace_xfs_filemap_page_mkwrite(ip);
+
+       xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+       error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+       xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+
+       return error;
+}
+
  const struct file_operations xfs_file_operations = {
         .llseek         = xfs_file_llseek,
         .read_iter      = xfs_file_read_iter,
@@ -1415,7 +1528,7 @@ const struct file_operations xfs_dir_file_operations = {
  };
  
  static const struct vm_operations_struct xfs_file_vm_ops = {
-       .fault          = filemap_fault,
+       .fault          = xfs_filemap_fault,
         .map_pages      = filemap_map_pages,
-       .page_mkwrite   = xfs_vm_page_mkwrite,
+       .page_mkwrite   = xfs_filemap_page_mkwrite,
  };
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index a2e86e8..da82f1c 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -294,7 +294,7 @@ xfs_filestream_get_parent(
         if (!parent)
                 goto out_dput;
  
-       dir = igrab(parent->d_inode);
+       dir = igrab(d_inode(parent));
         dput(parent);
  
  out_dput:
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
  
         pip = xfs_filestream_get_parent(ip);
         if (!pip)
-               goto out;
+               return NULLAGNUMBER;
  
         mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
         if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 74efe5b..cb7e8a2 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
         xfs_mount_t             *mp,
         xfs_fsop_counts_t       *cnt)
  {
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
+       cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
+       cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
+
         spin_lock(&mp->m_sb_lock);
-       cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
         cnt->freertx = mp->m_sb.sb_frextents;
-       cnt->freeino = mp->m_sb.sb_ifree;
-       cnt->allocino = mp->m_sb.sb_icount;
         spin_unlock(&mp->m_sb_lock);
         return 0;
  }
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
          * what to do. This means that the amount of free space can
          * change while we do this, so we need to retry if we end up
          * trying to reserve more space than is available.
-        *
-        * We also use the xfs_mod_incore_sb() interface so that we
-        * don't have to care about whether per cpu counter are
-        * enabled, disabled or even compiled in....
          */
  retry:
         spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, 0);
  
         /*
          * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
         } else {
                 __int64_t       free;
  
-               free =  mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+               free = percpu_counter_sum(&mp->m_fdblocks) -
+                                                       XFS_ALLOC_SET_ASIDE(mp);
                 if (!free)
                         goto out; /* ENOSPC and fdblks_delta = 0 */
  
@@ -755,8 +752,7 @@ out:
                  * the extra reserve blocks from the reserve.....
                  */
                 int error;
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                fdblks_delta, 0);
+               error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
                 if (error == -ENOSPC)
                         goto retry;
         }
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 9771b7e..76a9f27 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -439,11 +439,11 @@ again:
         *ipp = ip;
  
         /*
-        * If we have a real type for an on-disk inode, we can set ops(&unlock)
+        * If we have a real type for an on-disk inode, we can setup the inode
          * now.  If it's a new inode being created, xfs_ialloc will handle it.
          */
         if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
-               xfs_setup_inode(ip);
+               xfs_setup_existing_inode(ip);
         return 0;
  
  out_error_or_again:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 6163767..d6ebc85 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
  }
  
  /*
- * The xfs inode contains 2 locks: a multi-reader lock called the
- * i_iolock and a multi-reader lock called the i_lock.  This routine
- * allows either or both of the locks to be obtained.
+ * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
+ * the i_lock.  This routine allows various combinations of the locks to be
+ * obtained.
   *
- * The 2 locks should always be ordered so that the IO lock is
- * obtained first in order to prevent deadlock.
+ * The 3 locks should always be ordered so that the IO lock is obtained first,
+ * the mmap lock second and the ilock last in order to prevent deadlock.
   *
- * ip -- the inode being locked
- * lock_flags -- this parameter indicates the inode's locks
- *       to be locked.  It can be:
- *             XFS_IOLOCK_SHARED,
- *             XFS_IOLOCK_EXCL,
- *             XFS_ILOCK_SHARED,
- *             XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
- *             XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
+ * Basic locking order:
+ *
+ * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
+ *
+ * mmap_sem locking order:
+ *
+ * i_iolock -> page lock -> mmap_sem
+ * mmap_sem -> i_mmap_lock -> page_lock
+ *
+ * The difference in mmap_sem locking order mean that we cannot hold the
+ * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * in get_user_pages() to map the user pages into the kernel address space for
+ * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * page faults already hold the mmap_sem.
+ *
+ * Hence to serialise fully against both syscall and mmap based IO, we need to
+ * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * taken in places where we need to invalidate the page cache in a race
+ * free manner (e.g. truncate, hole punch and other extent manipulation
+ * functions).
   */
  void
  xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
  
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
         else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
                 if (!mrtryaccess(&ip->i_iolock))
                         goto out;
         }
+
+       if (lock_flags & XFS_MMAPLOCK_EXCL) {
+               if (!mrtryupdate(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+               if (!mrtryaccess(&ip->i_mmaplock))
+                       goto out_undo_iolock;
+       }
+
         if (lock_flags & XFS_ILOCK_EXCL) {
                 if (!mrtryupdate(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
         } else if (lock_flags & XFS_ILOCK_SHARED) {
                 if (!mrtryaccess(&ip->i_lock))
-                       goto out_undo_iolock;
+                       goto out_undo_mmaplock;
         }
         return 1;
  
- out_undo_iolock:
+out_undo_mmaplock:
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
         if (lock_flags & XFS_IOLOCK_EXCL)
                 mrunlock_excl(&ip->i_iolock);
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mrunlock_shared(&ip->i_iolock);
- out:
+out:
         return 0;
  }
  
@@ -244,6 +277,8 @@ xfs_iunlock(
          */
         ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
                (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+       ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+              (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
         ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
                (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
         ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
         else if (lock_flags & XFS_IOLOCK_SHARED)
                 mrunlock_shared(&ip->i_iolock);
  
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrunlock_excl(&ip->i_mmaplock);
+       else if (lock_flags & XFS_MMAPLOCK_SHARED)
+               mrunlock_shared(&ip->i_mmaplock);
+
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrunlock_excl(&ip->i_lock);
         else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
         xfs_inode_t             *ip,
         uint                    lock_flags)
  {
-       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
-       ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+       ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
+       ASSERT((lock_flags &
+               ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
  
         if (lock_flags & XFS_ILOCK_EXCL)
                 mrdemote(&ip->i_lock);
+       if (lock_flags & XFS_MMAPLOCK_EXCL)
+               mrdemote(&ip->i_mmaplock);
         if (lock_flags & XFS_IOLOCK_EXCL)
                 mrdemote(&ip->i_iolock);
  
@@ -294,6 +337,12 @@ xfs_isilocked(
                 return rwsem_is_locked(&ip->i_lock.mr_lock);
         }
  
+       if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+               if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+                       return !!ip->i_mmaplock.mr_writer;
+               return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+       }
+
         if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
                 if (!(lock_flags & XFS_IOLOCK_SHARED))
                         return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
  #endif
  
  /*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
- * a different value
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
   */
  static inline int
  xfs_lock_inumorder(int lock_mode, int subclass)
  {
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+       }
+
+       if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+               ASSERT(subclass + XFS_LOCK_INUMORDER <
+                       (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+               lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+                                                       XFS_MMAPLOCK_SHIFT;
+       }
+
         if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
                 lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
  
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
  }
  
  /*
- * The following routine will lock n inodes in exclusive mode.
- * We assume the caller calls us with the inodes in i_ino order.
+ * The following routine will lock n inodes in exclusive mode.  We assume the
+ * caller calls us with the inodes in i_ino order.
   *
- * We need to detect deadlock where an inode that we lock
- * is in the AIL and we start waiting for another inode that is locked
- * by a thread in a long running transaction (such as truncate). This can
- * result in deadlock since the long running trans might need to wait
- * for the inode we just locked in order to push the tail and free space
- * in the log.
+ * We need to detect deadlock where an inode that we lock is in the AIL and we
+ * start waiting for another inode that is locked by a thread in a long running
+ * transaction (such as truncate). This can result in deadlock since the long
+ * running trans might need to wait for the inode we just locked in order to
+ * push the tail and free space in the log.
   */
  void
  xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
         int             attempts = 0, i, j, try_lock;
         xfs_log_item_t  *lp;
  
-       ASSERT(ips && (inodes >= 2)); /* we need at least two */
+       /* currently supports between 2 and 5 inodes */
+       ASSERT(ips && inodes >= 2 && inodes <= 5);
  
         try_lock = 0;
         i = 0;
-
  again:
         for (; i < inodes; i++) {
                 ASSERT(ips[i]);
  
-               if (i && (ips[i] == ips[i-1]))  /* Already locked */
+               if (i && (ips[i] == ips[i - 1]))        /* Already locked */
                         continue;
  
                 /*
-                * If try_lock is not set yet, make sure all locked inodes
-                * are not in the AIL.
-                * If any are, set try_lock to be used later.
+                * If try_lock is not set yet, make sure all locked inodes are
+                * not in the AIL.  If any are, set try_lock to be used later.
                  */
-
                 if (!try_lock) {
                         for (j = (i - 1); j >= 0 && !try_lock; j--) {
                                 lp = (xfs_log_item_t *)ips[j]->i_itemp;
-                               if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+                               if (lp && (lp->li_flags & XFS_LI_IN_AIL))
                                         try_lock++;
-                               }
                         }
                 }
  
@@ -381,51 +439,42 @@ again:
                  * we can't get any, we must release all we have
                  * and try again.
                  */
+               if (!try_lock) {
+                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
+                       continue;
+               }
+
+               /* try_lock means we have an inode locked that is in the AIL. */
+               ASSERT(i != 0);
+               if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
+                       continue;
  
-               if (try_lock) {
-                       /* try_lock must be 0 if i is 0. */
+               /*
+                * Unlock all previous guys and try again.  xfs_iunlock will try
+                * to push the tail if the inode is in the AIL.
+                */
+               attempts++;
+               for (j = i - 1; j >= 0; j--) {
                         /*
-                        * try_lock means we have an inode locked
-                        * that is in the AIL.
+                        * Check to see if we've already unlocked this one.  Not
+                        * the first one going back, and the inode ptr is the
+                        * same.
                          */
-                       ASSERT(i != 0);
-                       if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
-                               attempts++;
-
-                               /*
-                                * Unlock all previous guys and try again.
-                                * xfs_iunlock will try to push the tail
-                                * if the inode is in the AIL.
-                                */
-
-                               for(j = i - 1; j >= 0; j--) {
-
-                                       /*
-                                        * Check to see if we've already
-                                        * unlocked this one.
-                                        * Not the first one going back,
-                                        * and the inode ptr is the same.
-                                        */
-                                       if ((j != (i - 1)) && ips[j] ==
-                                                               ips[j+1])
-                                               continue;
-
-                                       xfs_iunlock(ips[j], lock_mode);
-                               }
+                       if (j != (i - 1) && ips[j] == ips[j + 1])
+                               continue;
+
+                       xfs_iunlock(ips[j], lock_mode);
+               }
  
-                               if ((attempts % 5) == 0) {
-                                       delay(1); /* Don't just spin the CPU */
+               if ((attempts % 5) == 0) {
+                       delay(1); /* Don't just spin the CPU */
  #ifdef DEBUG
-                                       xfs_lock_delays++;
+                       xfs_lock_delays++;
  #endif
-                               }
-                               i = 0;
-                               try_lock = 0;
-                               goto again;
-                       }
-               } else {
-                       xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
                 }
+               i = 0;
+               try_lock = 0;
+               goto again;
         }
  
  #ifdef DEBUG
@@ -440,10 +489,10 @@ again:
  }
  
  /*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
- * at a time - the iolock or the ilock, but not both at once. If
- * we lock both at once, lockdep will report false positives saying
- * we have violated locking orders.
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
   */
  void
  xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
         int                     attempts = 0;
         xfs_log_item_t          *lp;
  
-       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
-               ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+       if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+               ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+       } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+               ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+
         ASSERT(ip0->i_ino != ip1->i_ino);
  
         if (ip0->i_ino > ip1->i_ino) {
@@ -818,7 +871,7 @@ xfs_ialloc(
         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
         xfs_trans_log_inode(tp, ip, flags);
  
-       /* now that we have an i_mode we can setup inode ops and unlock */
+       /* now that we have an i_mode we can setup the inode structure */
         xfs_setup_inode(ip);
  
         *ipp = ip;
@@ -1235,12 +1288,14 @@ xfs_create(
         xfs_trans_cancel(tp, cancel_flags);
   out_release_inode:
         /*
-        * Wait until after the current transaction is aborted to
-        * release the inode.  This prevents recursive transactions
-        * and deadlocks from xfs_inactive.
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
          */
-       if (ip)
+       if (ip) {
+               xfs_finish_inode_setup(ip);
                 IRELE(ip);
+       }
  
         xfs_qm_dqrele(udqp);
         xfs_qm_dqrele(gdqp);
@@ -1345,12 +1400,14 @@ xfs_create_tmpfile(
         xfs_trans_cancel(tp, cancel_flags);
   out_release_inode:
         /*
-        * Wait until after the current transaction is aborted to
-        * release the inode.  This prevents recursive transactions
-        * and deadlocks from xfs_inactive.
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
          */
-       if (ip)
+       if (ip) {
+               xfs_finish_inode_setup(ip);
                 IRELE(ip);
+       }
  
         xfs_qm_dqrele(udqp);
         xfs_qm_dqrele(gdqp);
@@ -2611,19 +2668,22 @@ xfs_remove(
  /*
   * Enter all inodes for a rename transaction into a sorted array.
   */
+#define __XFS_SORT_INODES      5
  STATIC void
  xfs_sort_for_rename(
-       xfs_inode_t     *dp1,   /* in: old (source) directory inode */
-       xfs_inode_t     *dp2,   /* in: new (target) directory inode */
-       xfs_inode_t     *ip1,   /* in: inode of old entry */
-       xfs_inode_t     *ip2,   /* in: inode of new entry, if it
-                                  already exists, NULL otherwise. */
-       xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
-       int             *num_inodes)  /* out: number of inodes in array */
+       struct xfs_inode        *dp1,   /* in: old (source) directory inode */
+       struct xfs_inode        *dp2,   /* in: new (target) directory inode */
+       struct xfs_inode        *ip1,   /* in: inode of old entry */
+       struct xfs_inode        *ip2,   /* in: inode of new entry */
+       struct xfs_inode        *wip,   /* in: whiteout inode */
+       struct xfs_inode        **i_tab,/* out: sorted array of inodes */
+       int                     *num_inodes)  /* in/out: inodes in array */
  {
-       xfs_inode_t             *temp;
         int                     i, j;
  
+       ASSERT(*num_inodes == __XFS_SORT_INODES);
+       memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
+
         /*
          * i_tab contains a list of pointers to inodes.  We initialize
          * the table here & we'll sort it.  We will then use it to
@@ -2631,25 +2691,24 @@ xfs_sort_for_rename(
          *
          * Note that the table may contain duplicates.  e.g., dp1 == dp2.
          */
-       i_tab[0] = dp1;
-       i_tab[1] = dp2;
-       i_tab[2] = ip1;
-       if (ip2) {
-               *num_inodes = 4;
-               i_tab[3] = ip2;
-       } else {
-               *num_inodes = 3;
-               i_tab[3] = NULL;
-       }
+       i = 0;
+       i_tab[i++] = dp1;
+       i_tab[i++] = dp2;
+       i_tab[i++] = ip1;
+       if (ip2)
+               i_tab[i++] = ip2;
+       if (wip)
+               i_tab[i++] = wip;
+       *num_inodes = i;
  
         /*
          * Sort the elements via bubble sort.  (Remember, there are at
-        * most 4 elements to sort, so this is adequate.)
+        * most 5 elements to sort, so this is adequate.)
          */
         for (i = 0; i < *num_inodes; i++) {
                 for (j = 1; j < *num_inodes; j++) {
                         if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
-                               temp = i_tab[j];
+                               struct xfs_inode *temp = i_tab[j];
                                 i_tab[j] = i_tab[j-1];
                                 i_tab[j-1] = temp;
                         }
@@ -2657,6 +2716,31 @@ xfs_sort_for_rename(
         }
  }
  
+static int
+xfs_finish_rename(
+       struct xfs_trans        *tp,
+       struct xfs_bmap_free    *free_list)
+{
+       int                     committed = 0;
+       int                     error;
+
+       /*
+        * If this is a synchronous mount, make sure that the rename transaction
+        * goes to disk before returning to the user.
+        */
+       if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+               xfs_trans_set_sync(tp);
+
+       error = xfs_bmap_finish(&tp, free_list, &committed);
+       if (error) {
+               xfs_bmap_cancel(free_list);
+               xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+               return error;
+       }
+
+       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
+
  /*
   * xfs_cross_rename()
   *
@@ -2685,14 +2769,14 @@ xfs_cross_rename(
                                 ip2->i_ino,
                                 first_block, free_list, spaceres);
         if (error)
-               goto out;
+               goto out_trans_abort;
  
         /* Swap inode number for dirent in second parent */
         error = xfs_dir_replace(tp, dp2, name2,
                                 ip1->i_ino,
                                 first_block, free_list, spaceres);
         if (error)
-               goto out;
+               goto out_trans_abort;
  
         /*
          * If we're renaming one or more directories across different parents,
@@ -2707,16 +2791,16 @@ xfs_cross_rename(
                                                 dp1->i_ino, first_block,
                                                 free_list, spaceres);
                         if (error)
-                               goto out;
+                               goto out_trans_abort;
  
                         /* transfer ip2 ".." reference to dp1 */
                         if (!S_ISDIR(ip1->i_d.di_mode)) {
                                 error = xfs_droplink(tp, dp2);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                 error = xfs_bumplink(tp, dp1);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                         }
  
                         /*
@@ -2734,16 +2818,16 @@ xfs_cross_rename(
                                                 dp2->i_ino, first_block,
                                                 free_list, spaceres);
                         if (error)
-                               goto out;
+                               goto out_trans_abort;
  
                         /* transfer ip1 ".." reference to dp2 */
                         if (!S_ISDIR(ip2->i_d.di_mode)) {
                                 error = xfs_droplink(tp, dp1);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                                 error = xfs_bumplink(tp, dp2);
                                 if (error)
-                                       goto out;
+                                       goto out_trans_abort;
                         }
  
                         /*
@@ -2771,66 +2855,108 @@ xfs_cross_rename(
         }
         xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-out:
+       return xfs_finish_rename(tp, free_list);
+
+out_trans_abort:
+       xfs_bmap_cancel(free_list);
+       xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
         return error;
  }
  
+/*
+ * xfs_rename_alloc_whiteout()
+ *
+ * Return a referenced, unlinked, unlocked inode that that can be used as a
+ * whiteout in a rename transaction. We use a tmpfile inode here so that if we
+ * crash between allocating the inode and linking it into the rename transaction
+ * recovery will free the inode and we won't leak it.
+ */
+static int
+xfs_rename_alloc_whiteout(
+       struct xfs_inode        *dp,
+       struct xfs_inode        **wip)
+{
+       struct xfs_inode        *tmpfile;
+       int                     error;
+
+       error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+       if (error)
+               return error;
+
+       /* Satisfy xfs_bumplink that this is a real tmpfile */
+       xfs_finish_inode_setup(tmpfile);
+       VFS_I(tmpfile)->i_state |= I_LINKABLE;
+
+       *wip = tmpfile;
+       return 0;
+}
+
  /*
   * xfs_rename
   */
  int
  xfs_rename(
-       xfs_inode_t     *src_dp,
-       struct xfs_name *src_name,
-       xfs_inode_t     *src_ip,
-       xfs_inode_t     *target_dp,
-       struct xfs_name *target_name,
-       xfs_inode_t     *target_ip,
-       unsigned int    flags)
+       struct xfs_inode        *src_dp,
+       struct xfs_name         *src_name,
+       struct xfs_inode        *src_ip,
+       struct xfs_inode        *target_dp,
+       struct xfs_name         *target_name,
+       struct xfs_inode        *target_ip,
+       unsigned int            flags)
  {
-       xfs_trans_t     *tp = NULL;
-       xfs_mount_t     *mp = src_dp->i_mount;
-       int             new_parent;             /* moving to a new dir */
-       int             src_is_directory;       /* src_name is a directory */
-       int             error;
-       xfs_bmap_free_t free_list;
-       xfs_fsblock_t   first_block;
-       int             cancel_flags;
-       int             committed;
-       xfs_inode_t     *inodes[4];
-       int             spaceres;
-       int             num_inodes;
+       struct xfs_mount        *mp = src_dp->i_mount;
+       struct xfs_trans        *tp;
+       struct xfs_bmap_free    free_list;
+       xfs_fsblock_t           first_block;
+       struct xfs_inode        *wip = NULL;            /* whiteout inode */
+       struct xfs_inode        *inodes[__XFS_SORT_INODES];
+       int                     num_inodes = __XFS_SORT_INODES;
+       bool                    new_parent = (src_dp != target_dp);
+       bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       int                     cancel_flags = 0;
+       int                     spaceres;
+       int                     error;
  
         trace_xfs_rename(src_dp, target_dp, src_name, target_name);
  
-       new_parent = (src_dp != target_dp);
-       src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+       if ((flags & RENAME_EXCHANGE) && !target_ip)
+               return -EINVAL;
  
-       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+       /*
+        * If we are doing a whiteout operation, allocate the whiteout inode
+        * we will be placing at the target and ensure the type is set
+        * appropriately.
+        */
+       if (flags & RENAME_WHITEOUT) {
+               ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
+               error = xfs_rename_alloc_whiteout(target_dp, &wip);
+               if (error)
+                       return error;
+
+               /* setup target dirent info as whiteout */
+               src_name->type = XFS_DIR3_FT_CHRDEV;
+       }
+
+       xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
                                 inodes, &num_inodes);
  
-       xfs_bmap_init(&free_list, &first_block);
         tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
-       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
         spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
         if (error == -ENOSPC) {
                 spaceres = 0;
                 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
         }
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
  
         /*
          * Attach the dquots to the inodes
          */
         error = xfs_qm_vop_rename_dqattach(inodes);
-       if (error) {
-               xfs_trans_cancel(tp, cancel_flags);
-               goto std_return;
-       }
+       if (error)
+               goto out_trans_cancel;
  
         /*
          * Lock all the participating inodes. Depending upon whether
@@ -2851,6 +2977,8 @@ xfs_rename(
         xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
         if (target_ip)
                 xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
+       if (wip)
+               xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
  
         /*
          * If we are using project inheritance, we only allow renames
@@ -2860,24 +2988,16 @@ xfs_rename(
         if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
                      (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                 error = -EXDEV;
-               goto error_return;
+               goto out_trans_cancel;
         }
  
-       /*
-        * Handle RENAME_EXCHANGE flags
-        */
-       if (flags & RENAME_EXCHANGE) {
-               if (target_ip == NULL) {
-                       error = -EINVAL;
-                       goto error_return;
-               }
-               error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
-                                        target_dp, target_name, target_ip,
-                                        &free_list, &first_block, spaceres);
-               if (error)
-                       goto abort_return;
-               goto finish_rename;
-       }
+       xfs_bmap_init(&free_list, &first_block);
+
+       /* RENAME_EXCHANGE is unique from here on. */
+       if (flags & RENAME_EXCHANGE)
+               return xfs_cross_rename(tp, src_dp, src_name, src_ip,
+                                       target_dp, target_name, target_ip,
+                                       &free_list, &first_block, spaceres);
  
         /*
          * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
                 if (!spaceres) {
                         error = xfs_dir_canenter(tp, target_dp, target_name);
                         if (error)
-                               goto error_return;
+                               goto out_trans_cancel;
                 }
                 /*
                  * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
                                                 src_ip->i_ino, &first_block,
                                                 &free_list, spaceres);
                 if (error == -ENOSPC)
-                       goto error_return;
+                       goto out_bmap_cancel;
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 xfs_trans_ichgtime(tp, target_dp,
                                         XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
                 if (new_parent && src_is_directory) {
                         error = xfs_bumplink(tp, target_dp);
                         if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                 }
         } else { /* target_ip != NULL */
                 /*
@@ -2926,7 +3046,7 @@ xfs_rename(
                         if (!(xfs_dir_isempty(target_ip)) ||
                             (target_ip->i_d.di_nlink > 2)) {
                                 error = -EEXIST;
-                               goto error_return;
+                               goto out_trans_cancel;
                         }
                 }
  
@@ -2943,7 +3063,7 @@ xfs_rename(
                                         src_ip->i_ino,
                                         &first_block, &free_list, spaceres);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 xfs_trans_ichgtime(tp, target_dp,
                                         XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
                  */
                 error = xfs_droplink(tp, target_ip);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
  
                 if (src_is_directory) {
                         /*
@@ -2962,7 +3082,7 @@ xfs_rename(
                          */
                         error = xfs_droplink(tp, target_ip);
                         if (error)
-                               goto abort_return;
+                               goto out_trans_abort;
                 }
         } /* target_ip != NULL */
  
@@ -2979,7 +3099,7 @@ xfs_rename(
                                         &first_block, &free_list, spaceres);
                 ASSERT(error != -EEXIST);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
         }
  
         /*
@@ -3005,49 +3125,67 @@ xfs_rename(
                  */
                 error = xfs_droplink(tp, src_dp);
                 if (error)
-                       goto abort_return;
+                       goto out_trans_abort;
         }
  
-       error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+       /*
+        * For whiteouts, we only need to update the source dirent with the
+        * inode number of the whiteout inode rather than removing it
+        * altogether.
+        */
+       if (wip) {
+               error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
                                         &first_block, &free_list, spaceres);
+       } else
+               error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+                                          &first_block, &free_list, spaceres);
         if (error)
-               goto abort_return;
-
-       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-       if (new_parent)
-               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
+               goto out_trans_abort;
  
-finish_rename:
         /*
-        * If this is a synchronous mount, make sure that the
-        * rename transaction goes to disk before returning to
-        * the user.
+        * For whiteouts, we need to bump the link count on the whiteout inode.
+        * This means that failures all the way up to this point leave the inode
+        * on the unlinked list and so cleanup is a simple matter of dropping
+        * the remaining reference to it. If we fail here after bumping the link
+        * count, we're shutting down the filesystem so we'll never see the
+        * intermediate state on disk.
          */
-       if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
-               xfs_trans_set_sync(tp);
-       }
+       if (wip) {
+               ASSERT(wip->i_d.di_nlink == 0);
+               error = xfs_bumplink(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               error = xfs_iunlink_remove(tp, wip);
+               if (error)
+                       goto out_trans_abort;
+               xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
  
-       error = xfs_bmap_finish(&tp, &free_list, &committed);
-       if (error) {
-               xfs_bmap_cancel(&free_list);
-               xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
-                                XFS_TRANS_ABORT));
-               goto std_return;
+               /*
+                * Now we have a real link, clear the "I'm a tmpfile" state
+                * flag from the inode so it doesn't accidentally get misused in
+                * future.
+                */
+               VFS_I(wip)->i_state &= ~I_LINKABLE;
         }
  
-       /*
-        * trans_commit will unlock src_ip, target_ip & decrement
-        * the vnode references.
-        */
-       return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
+       if (new_parent)
+               xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
  
- abort_return:
+       error = xfs_finish_rename(tp, &free_list);
+       if (wip)
+               IRELE(wip);
+       return error;
+
+out_trans_abort:
         cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_bmap_cancel:
         xfs_bmap_cancel(&free_list);
+out_trans_cancel:
         xfs_trans_cancel(tp, cancel_flags);
- std_return:
+       if (wip)
+               IRELE(wip);
         return error;
  }
  
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index a1cd55f..8f22d20 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
         struct xfs_inode_log_item *i_itemp;     /* logging information */
         mrlock_t                i_lock;         /* inode lock */
         mrlock_t                i_iolock;       /* inode IO lock */
+       mrlock_t                i_mmaplock;     /* inode mmap IO lock */
         atomic_t                i_pincount;     /* inode pin count */
         spinlock_t              i_flags_lock;   /* inode i_flags lock */
         /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  #define        XFS_IOLOCK_SHARED       (1<<1)
  #define        XFS_ILOCK_EXCL          (1<<2)
  #define        XFS_ILOCK_SHARED        (1<<3)
+#define        XFS_MMAPLOCK_EXCL       (1<<4)
+#define        XFS_MMAPLOCK_SHARED     (1<<5)
  
  #define XFS_LOCK_MASK          (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+                               | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                               | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
  
  #define XFS_LOCK_FLAGS \
         { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
         { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
         { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
+       { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
+       { XFS_MMAPLOCK_EXCL,    "MMAPLOCK_EXCL" }, \
+       { XFS_MMAPLOCK_SHARED,  "MMAPLOCK_SHARED" }
  
  
  /*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
  #define XFS_IOLOCK_SHIFT       16
  #define        XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
  
+#define XFS_MMAPLOCK_SHIFT     20
+
  #define XFS_ILOCK_SHIFT                24
  #define        XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
  #define        XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
  #define        XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
  
-#define XFS_IOLOCK_DEP_MASK    0x00ff0000
+#define XFS_IOLOCK_DEP_MASK    0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK  0x00f00000
  #define XFS_ILOCK_DEP_MASK     0xff000000
-#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
+#define XFS_LOCK_DEP_MASK      (XFS_IOLOCK_DEP_MASK | \
+                                XFS_MMAPLOCK_DEP_MASK | \
+                                XFS_ILOCK_DEP_MASK)
  
-#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
-#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+#define XFS_IOLOCK_DEP(flags)  (((flags) & XFS_IOLOCK_DEP_MASK) \
+                                       >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags)        (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+                                       >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags)   (((flags) & XFS_ILOCK_DEP_MASK) \
+                                       >> XFS_ILOCK_SHIFT)
  
  /*
   * For multiple groups support: if S_ISGID bit is set in the parent
@@ -391,6 +406,28 @@ int        xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
  int    xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
  
  
+/* from xfs_iops.c */
+/*
+ * When setting up a newly allocated inode, we need to call
+ * xfs_finish_inode_setup() once the inode is fully instantiated at
+ * the VFS level to prevent the rest of the world seeing the inode
+ * before we've completed instantiation. Otherwise we can do it
+ * the moment the inode lookup is complete.
+ */
+extern void xfs_setup_inode(struct xfs_inode *ip);
+static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
+{
+       xfs_iflags_clear(ip, XFS_INEW);
+       barrier();
+       unlock_new_inode(VFS_I(ip));
+}
+
+static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
+{
+       xfs_setup_inode(ip);
+       xfs_finish_inode_setup(ip);
+}
+
  #define IHOLD(ip) \
  do { \
         ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c

index ac4feae..87f67c6 100644 (file)
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -82,7 +82,7 @@ xfs_find_handle(
                 error = user_lpath((const char __user *)hreq->path, &path);
                 if (error)
                         return error;
-               inode = path.dentry->d_inode;
+               inode = d_inode(path.dentry);
         }
         ip = XFS_I(inode);
  
@@ -210,7 +210,7 @@ xfs_open_by_handle(
         dentry = xfs_handlereq_to_dentry(parfilp, hreq);
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
  
         /* Restrict xfs_open_by_handle to directories & regular files. */
         if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
@@ -303,7 +303,7 @@ xfs_readlink_by_handle(
                 goto out_dput;
         }
  
-       error = xfs_readlink(XFS_I(dentry->d_inode), link);
+       error = xfs_readlink(XFS_I(d_inode(dentry)), link);
         if (error)
                 goto out_kfree;
         error = readlink_copy(hreq->ohandle, olen, link);
@@ -376,7 +376,7 @@ xfs_fssetdm_by_handle(
                 return PTR_ERR(dentry);
         }
  
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+       if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
                 error = -EPERM;
                 goto out;
         }
@@ -386,7 +386,7 @@ xfs_fssetdm_by_handle(
                 goto out;
         }
  
-       error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+       error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
                                  fsd.fsd_dmstate);
  
   out:
@@ -429,7 +429,7 @@ xfs_attrlist_by_handle(
                 goto out_dput;
  
         cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+       error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen,
                                         al_hreq.flags, cursor);
         if (error)
                 goto out_kfree;
@@ -559,7 +559,7 @@ xfs_attrmulti_by_handle(
                 switch (ops[i].am_opcode) {
                 case ATTR_OP_GET:
                         ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         ops[i].am_attrvalue, &ops[i].am_length,
                                         ops[i].am_flags);
                         break;
@@ -568,7 +568,7 @@ xfs_attrmulti_by_handle(
                         if (ops[i].am_error)
                                 break;
                         ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         ops[i].am_attrvalue, ops[i].am_length,
                                         ops[i].am_flags);
                         mnt_drop_write_file(parfilp);
@@ -578,7 +578,7 @@ xfs_attrmulti_by_handle(
                         if (ops[i].am_error)
                                 break;
                         ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         ops[i].am_flags);
                         mnt_drop_write_file(parfilp);
                         break;
@@ -631,7 +631,7 @@ xfs_ioc_space(
  
         if (filp->f_flags & O_DSYNC)
                 flags |= XFS_PREALLOC_SYNC;
-       if (ioflags & XFS_IO_INVIS)     
+       if (ioflags & XFS_IO_INVIS)
                 flags |= XFS_PREALLOC_INVISIBLE;
  
         error = mnt_want_write_file(filp);
@@ -639,10 +639,13 @@ xfs_ioc_space(
                 return error;
  
         xfs_ilock(ip, iolock);
-       error = xfs_break_layouts(inode, &iolock);
+       error = xfs_break_layouts(inode, &iolock, false);
         if (error)
                 goto out_unlock;
  
+       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+       iolock |= XFS_MMAPLOCK_EXCL;
+
         switch (bf->l_whence) {
         case 0: /*SEEK_SET*/
                 break;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c

index bfc7c7c..b88bdc8 100644 (file)
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -375,7 +375,7 @@ xfs_compat_attrlist_by_handle(
                 goto out_dput;
  
         cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
-       error = xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
+       error = xfs_attr_list(XFS_I(d_inode(dentry)), kbuf, al_hreq.buflen,
                                         al_hreq.flags, cursor);
         if (error)
                 goto out_kfree;
@@ -445,7 +445,7 @@ xfs_compat_attrmulti_by_handle(
                 switch (ops[i].am_opcode) {
                 case ATTR_OP_GET:
                         ops[i].am_error = xfs_attrmulti_attr_get(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         compat_ptr(ops[i].am_attrvalue),
                                         &ops[i].am_length, ops[i].am_flags);
                         break;
@@ -454,7 +454,7 @@ xfs_compat_attrmulti_by_handle(
                         if (ops[i].am_error)
                                 break;
                         ops[i].am_error = xfs_attrmulti_attr_set(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         compat_ptr(ops[i].am_attrvalue),
                                         ops[i].am_length, ops[i].am_flags);
                         mnt_drop_write_file(parfilp);
@@ -464,7 +464,7 @@ xfs_compat_attrmulti_by_handle(
                         if (ops[i].am_error)
                                 break;
                         ops[i].am_error = xfs_attrmulti_attr_remove(
-                                       dentry->d_inode, attr_name,
+                                       d_inode(dentry), attr_name,
                                         ops[i].am_flags);
                         mnt_drop_write_file(parfilp);
                         break;
@@ -504,7 +504,7 @@ xfs_compat_fssetdm_by_handle(
         if (IS_ERR(dentry))
                 return PTR_ERR(dentry);
  
-       if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
+       if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
                 error = -EPERM;
                 goto out;
         }
@@ -514,7 +514,7 @@ xfs_compat_fssetdm_by_handle(
                 goto out;
         }
  
-       error = xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
+       error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
                                  fsd.fsd_dmstate);
  
  out:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index ccb1dd0..38e633b 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
         alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
                                        alloc_blocks);
  
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-       freesp = mp->m_sb.sb_fdblocks;
+       freesp = percpu_counter_read_positive(&mp->m_fdblocks);
         if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
                 shift = 2;
                 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c

index e53a903..f4cd720 100644 (file)
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -187,6 +187,8 @@ xfs_generic_create(
         else
                 d_instantiate(dentry, inode);
  
+       xfs_finish_inode_setup(ip);
+
   out_free_acl:
         if (default_acl)
                 posix_acl_release(default_acl);
@@ -195,6 +197,7 @@ xfs_generic_create(
         return error;
  
   out_cleanup_inode:
+       xfs_finish_inode_setup(ip);
         if (!tmpfile)
                 xfs_cleanup_inode(dir, inode, dentry);
         iput(inode);
@@ -301,7 +304,7 @@ xfs_vn_link(
         struct inode    *dir,
         struct dentry   *dentry)
  {
-       struct inode    *inode = old_dentry->d_inode;
+       struct inode    *inode = d_inode(old_dentry);
         struct xfs_name name;
         int             error;
  
@@ -326,7 +329,7 @@ xfs_vn_unlink(
  
         xfs_dentry_to_name(&name, dentry, 0);
  
-       error = xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
+       error = xfs_remove(XFS_I(dir), &name, XFS_I(d_inode(dentry)));
         if (error)
                 return error;
  
@@ -367,9 +370,11 @@ xfs_vn_symlink(
                 goto out_cleanup_inode;
  
         d_instantiate(dentry, inode);
+       xfs_finish_inode_setup(cip);
         return 0;
  
   out_cleanup_inode:
+       xfs_finish_inode_setup(cip);
         xfs_cleanup_inode(dir, inode, dentry);
         iput(inode);
   out:
@@ -384,22 +389,22 @@ xfs_vn_rename(
         struct dentry   *ndentry,
         unsigned int    flags)
  {
-       struct inode    *new_inode = ndentry->d_inode;
+       struct inode    *new_inode = d_inode(ndentry);
         int             omode = 0;
         struct xfs_name oname;
         struct xfs_name nname;
  
-       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+       if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                 return -EINVAL;
  
         /* if we are exchanging files, we need to set i_mode of both files */
         if (flags & RENAME_EXCHANGE)
-               omode = ndentry->d_inode->i_mode;
+               omode = d_inode(ndentry)->i_mode;
  
         xfs_dentry_to_name(&oname, odentry, omode);
-       xfs_dentry_to_name(&nname, ndentry, odentry->d_inode->i_mode);
+       xfs_dentry_to_name(&nname, ndentry, d_inode(odentry)->i_mode);
  
-       return xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
+       return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
                           XFS_I(ndir), &nname,
                           new_inode ? XFS_I(new_inode) : NULL, flags);
  }
@@ -421,7 +426,7 @@ xfs_vn_follow_link(
         if (!link)
                 goto out_err;
  
-       error = xfs_readlink(XFS_I(dentry->d_inode), link);
+       error = xfs_readlink(XFS_I(d_inode(dentry)), link);
         if (unlikely(error))
                 goto out_kfree;
  
@@ -441,7 +446,7 @@ xfs_vn_getattr(
         struct dentry           *dentry,
         struct kstat            *stat)
  {
-       struct inode            *inode = dentry->d_inode;
+       struct inode            *inode = d_inode(dentry);
         struct xfs_inode        *ip = XFS_I(inode);
         struct xfs_mount        *mp = ip->i_mount;
  
@@ -766,6 +771,7 @@ xfs_setattr_size(
                 return error;
  
         ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+       ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
         ASSERT(S_ISREG(ip->i_d.di_mode));
         ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                 ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -829,55 +835,27 @@ xfs_setattr_size(
         inode_dio_wait(inode);
  
         /*
-        * Do all the page cache truncate work outside the transaction context
-        * as the "lock" order is page lock->log space reservation.  i.e.
-        * locking pages inside the transaction can ABBA deadlock with
-        * writeback. We have to do the VFS inode size update before we truncate
-        * the pagecache, however, to avoid racing with page faults beyond the
-        * new EOF they are not serialised against truncate operations except by
-        * page locks and size updates.
+        * We've already locked out new page faults, so now we can safely remove
+        * pages from the page cache knowing they won't get refaulted until we
+        * drop the XFS_MMAP_EXCL lock after the extent manipulations are
+        * complete. The truncate_setsize() call also cleans partial EOF page
+        * PTEs on extending truncates and hence ensures sub-page block size
+        * filesystems are correctly handled, too.
          *
-        * Hence we are in a situation where a truncate can fail with ENOMEM
-        * from xfs_trans_reserve(), but having already truncated the in-memory
-        * version of the file (i.e. made user visible changes). There's not
-        * much we can do about this, except to hope that the caller sees ENOMEM
-        * and retries the truncate operation.
+        * We have to do all the page cache truncate work outside the
+        * transaction context as the "lock" order is page lock->log space
+        * reservation as defined by extent allocation in the writeback path.
+        * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
+        * having already truncated the in-memory version of the file (i.e. made
+        * user visible changes). There's not much we can do about this, except
+        * to hope that the caller sees ENOMEM and retries the truncate
+        * operation.
          */
         error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
         if (error)
                 return error;
         truncate_setsize(inode, newsize);
  
-       /*
-        * The "we can't serialise against page faults" pain gets worse.
-        *
-        * If the file is mapped then we have to clean the page at the old EOF
-        * when extending the file. Extending the file can expose changes the
-        * underlying page mapping (e.g. from beyond EOF to a hole or
-        * unwritten), and so on the next attempt to write to that page we need
-        * to remap it for write. i.e. we need .page_mkwrite() to be called.
-        * Hence we need to clean the page to clean the pte and so a new write
-        * fault will be triggered appropriately.
-        *
-        * If we do it before we change the inode size, then we can race with a
-        * page fault that maps the page with exactly the same problem. If we do
-        * it after we change the file size, then a new page fault can come in
-        * and allocate space before we've run the rest of the truncate
-        * transaction. That's kinda grotesque, but it's better than have data
-        * over a hole, and so that's the lesser evil that has been chosen here.
-        *
-        * The real solution, however, is to have some mechanism for locking out
-        * page faults while a truncate is in progress.
-        */
-       if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
-               error = filemap_write_and_wait_range(
-                               VFS_I(ip)->i_mapping,
-                               round_down(oldsize, PAGE_CACHE_SIZE),
-                               round_up(oldsize, PAGE_CACHE_SIZE) - 1);
-               if (error)
-                       return error;
-       }
-
         tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
         error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
         if (error)
@@ -968,16 +946,20 @@ xfs_vn_setattr(
         struct dentry           *dentry,
         struct iattr            *iattr)
  {
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode        *ip = XFS_I(d_inode(dentry));
         int                     error;
  
         if (iattr->ia_valid & ATTR_SIZE) {
                 uint            iolock = XFS_IOLOCK_EXCL;
  
                 xfs_ilock(ip, iolock);
-               error = xfs_break_layouts(dentry->d_inode, &iolock);
-               if (!error)
+               error = xfs_break_layouts(d_inode(dentry), &iolock, true);
+               if (!error) {
+                       xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+                       iolock |= XFS_MMAPLOCK_EXCL;
+
                         error = xfs_setattr_size(ip, iattr);
+               }
                 xfs_iunlock(ip, iolock);
         } else {
                 error = xfs_setattr_nonsize(ip, iattr, 0);
@@ -1228,16 +1210,12 @@ xfs_diflags_to_iflags(
  }
  
  /*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
+ * Initialize the Linux inode and set up the operation vectors.
   *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
+ * When reading existing inodes from disk this is called directly from xfs_iget,
+ * when creating a new inode it is called from xfs_ialloc after setting up the
+ * inode. These callers have different criteria for clearing XFS_INEW, so leave
+ * it up to the caller to deal with unlocking the inode appropriately.
   */
  void
  xfs_setup_inode(
@@ -1324,9 +1302,4 @@ xfs_setup_inode(
                 inode_has_no_xattr(inode);
                 cache_no_acl(inode);
         }
-
-       xfs_iflags_clear(ip, XFS_INEW);
-       barrier();
-
-       unlock_new_inode(inode);
  }
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h

index ea7a98e..a0f84ab 100644 (file)
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -25,8 +25,6 @@ extern const struct file_operations xfs_dir_file_operations;
  
  extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
  
-extern void xfs_setup_inode(struct xfs_inode *);
-
  /*
   * Internal setattr interfaces.
   */
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index 82e3142..8042989 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -229,7 +229,7 @@ xfs_bulkstat_grab_ichunk(
         error = xfs_inobt_get_rec(cur, irec, &stat);
         if (error)
                 return error;
-       XFS_WANT_CORRUPTED_RETURN(stat == 1);
+       XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
  
         /* Check if the record contains the inode in request */
         if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h

index c31d2c2..7c7842c 100644 (file)
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
  #undef XFS_NATIVE_HOST
  #endif
  
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
  #define irix_sgid_inherit      xfs_params.sgid_inherit.val
  #define irix_symlink_mode      xfs_params.symlink_mode.val
  #define xfs_panic_mask         xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index a5a945f..4f5784f 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
         xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
         ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
         ASSERT(xfs_sb_good_version(sbp));
+       xfs_reinit_percpu_counters(log->l_mp);
+
         xfs_buf_relse(bp);
  
-       /* We've re-read the superblock so re-initialize per-cpu counters */
-       xfs_icsb_reinit_counters(log->l_mp);
  
         xlog_recover_check_summary(log);
  
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index 4fa80e6..2ce7ee3 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
  #include "xfs_sysfs.h"
  
  
-#ifdef HAVE_PERCPU_SB
-STATIC void    xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-                                               int);
-STATIC void    xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-
-#define xfs_icsb_balance_counter(mp, a, b)             do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)      do { } while (0)
-#endif
-
  static DEFINE_MUTEX(xfs_uuid_table_mutex);
  static int xfs_uuid_table_size;
  static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
                 goto reread;
         }
  
-       /* Initialize per-cpu counters */
-       xfs_icsb_reinit_counters(mp);
+       xfs_reinit_percpu_counters(mp);
  
         /* no need to be quiet anymore, so reset the buf ops */
         bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
         if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
                 return 0;
  
-       xfs_icsb_sync_counters(mp, 0);
-
         /*
          * we don't need to do this if we are updating the superblock
          * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
         return xfs_sync_sb(mp, true);
  }
  
-/*
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
- * a delta to a specified field in the in-core superblock.  Simply
- * switch on the field indicated and apply the delta to that field.
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
+int
+xfs_mod_icount(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
  {
-       int             scounter;       /* short counter for 32 bit fields */
-       long long       lcounter;       /* long counter for 64 bit fields */
-       long long       res_used, rem;
-
-       /*
-        * With the in-core superblock spin lock held, switch
-        * on the indicated field.  Apply the delta to the
-        * proper field.  If the fields value would dip below
-        * 0, then do not apply the delta and return EINVAL.
-        */
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = (long long)mp->m_sb.sb_icount;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_icount = lcounter;
-               return 0;
-       case XFS_SBS_IFREE:
-               lcounter = (long long)mp->m_sb.sb_ifree;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_ifree = lcounter;
-               return 0;
-       case XFS_SBS_FDBLOCKS:
-               lcounter = (long long)
-                       mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-
-               if (delta > 0) {                /* Putting blocks back */
-                       if (res_used > delta) {
-                               mp->m_resblks_avail += delta;
-                       } else {
-                               rem = delta - res_used;
-                               mp->m_resblks_avail = mp->m_resblks;
-                               lcounter += rem;
-                       }
-               } else {                                /* Taking blocks away */
-                       lcounter += delta;
-                       if (lcounter >= 0) {
-                               mp->m_sb.sb_fdblocks = lcounter +
-                                                       XFS_ALLOC_SET_ASIDE(mp);
-                               return 0;
-                       }
-
-                       /*
-                        * We are out of blocks, use any available reserved
-                        * blocks if were allowed to.
-                        */
-                       if (!rsvd)
-                               return -ENOSPC;
-
-                       lcounter = (long long)mp->m_resblks_avail + delta;
-                       if (lcounter >= 0) {
-                               mp->m_resblks_avail = lcounter;
-                               return 0;
-                       }
-                       printk_once(KERN_WARNING
-                               "Filesystem \"%s\": reserve blocks depleted! "
-                               "Consider increasing reserve pool size.",
-                               mp->m_fsname);
-                       return -ENOSPC;
-               }
-
-               mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               return 0;
-       case XFS_SBS_FREXTENTS:
-               lcounter = (long long)mp->m_sb.sb_frextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       return -ENOSPC;
-               }
-               mp->m_sb.sb_frextents = lcounter;
-               return 0;
-       case XFS_SBS_DBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_dblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_dblocks = lcounter;
-               return 0;
-       case XFS_SBS_AGCOUNT:
-               scounter = mp->m_sb.sb_agcount;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_agcount = scounter;
-               return 0;
-       case XFS_SBS_IMAX_PCT:
-               scounter = mp->m_sb.sb_imax_pct;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_imax_pct = scounter;
-               return 0;
-       case XFS_SBS_REXTSIZE:
-               scounter = mp->m_sb.sb_rextsize;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextsize = scounter;
-               return 0;
-       case XFS_SBS_RBMBLOCKS:
-               scounter = mp->m_sb.sb_rbmblocks;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rbmblocks = scounter;
-               return 0;
-       case XFS_SBS_RBLOCKS:
-               lcounter = (long long)mp->m_sb.sb_rblocks;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rblocks = lcounter;
-               return 0;
-       case XFS_SBS_REXTENTS:
-               lcounter = (long long)mp->m_sb.sb_rextents;
-               lcounter += delta;
-               if (lcounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextents = lcounter;
-               return 0;
-       case XFS_SBS_REXTSLOG:
-               scounter = mp->m_sb.sb_rextslog;
-               scounter += delta;
-               if (scounter < 0) {
-                       ASSERT(0);
-                       return -EINVAL;
-               }
-               mp->m_sb.sb_rextslog = scounter;
-               return 0;
-       default:
+       /* deltas are +/-64, hence the large batch size of 128. */
+       __percpu_counter_add(&mp->m_icount, delta, 128);
+       if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
                 ASSERT(0);
+               percpu_counter_add(&mp->m_icount, -delta);
                 return -EINVAL;
         }
+       return 0;
  }
  
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta.  This modification
- * is protected by the m_sb_lock.  Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
  int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
         struct xfs_mount        *mp,
-       xfs_sb_field_t          field,
-       int64_t                 delta,
-       int                     rsvd)
+       int64_t                 delta)
  {
-       int                     status;
-
-#ifdef HAVE_PERCPU_SB
-       ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
-#endif
-       spin_lock(&mp->m_sb_lock);
-       status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       return status;
+       percpu_counter_add(&mp->m_ifree, delta);
+       if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
+               ASSERT(0);
+               percpu_counter_add(&mp->m_ifree, -delta);
+               return -EINVAL;
+       }
+       return 0;
  }
  
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be applied or none of them will.  If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
  int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
         struct xfs_mount        *mp,
-       xfs_mod_sb_t            *msb,
-       uint                    nmsb,
-       int                     rsvd)
+       int64_t                 delta,
+       bool                    rsvd)
  {
-       xfs_mod_sb_t            *msbp;
-       int                     error = 0;
+       int64_t                 lcounter;
+       long long               res_used;
+       s32                     batch;
+
+       if (delta > 0) {
+               /*
+                * If the reserve pool is depleted, put blocks back into it
+                * first. Most of the time the pool is full.
+                */
+               if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+                       return 0;
+               }
+
+               spin_lock(&mp->m_sb_lock);
+               res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+
+               if (res_used > delta) {
+                       mp->m_resblks_avail += delta;
+               } else {
+                       delta -= res_used;
+                       mp->m_resblks_avail = mp->m_resblks;
+                       percpu_counter_add(&mp->m_fdblocks, delta);
+               }
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
+       }
  
         /*
-        * Loop through the array of mod structures and apply each individually.
-        * If any fail, then back out all those which have already been applied.
-        * Do all of this within the scope of the m_sb_lock so that all of the
-        * changes will be atomic.
+        * Taking blocks away, need to be more accurate the closer we
+        * are to zero.
+        *
+        * batch size is set to a maximum of 1024 blocks - if we are
+        * allocating of freeing extents larger than this then we aren't
+        * going to be hammering the counter lock so a lock per update
+        * is not a problem.
+        *
+        * If the counter has a value of less than 2 * max batch size,
+        * then make everything serialise as we are real close to
+        * ENOSPC.
+        */
+#define __BATCH        1024
+       if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+               batch = 1;
+       else
+               batch = __BATCH;
+#undef __BATCH
+
+       __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+       if (percpu_counter_compare(&mp->m_fdblocks,
+                                  XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+               /* we had space! */
+               return 0;
+       }
+
+       /*
+        * lock up the sb for dipping into reserves before releasing the space
+        * that took us to ENOSPC.
          */
         spin_lock(&mp->m_sb_lock);
-       for (msbp = msb; msbp < (msb + nmsb); msbp++) {
-               ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
-                      msbp->msb_field > XFS_SBS_FDBLOCKS);
+       percpu_counter_add(&mp->m_fdblocks, -delta);
+       if (!rsvd)
+               goto fdblocks_enospc;
  
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  msbp->msb_delta, rsvd);
-               if (error)
-                       goto unwind;
+       lcounter = (long long)mp->m_resblks_avail + delta;
+       if (lcounter >= 0) {
+               mp->m_resblks_avail = lcounter;
+               spin_unlock(&mp->m_sb_lock);
+               return 0;
         }
+       printk_once(KERN_WARNING
+               "Filesystem \"%s\": reserve blocks depleted! "
+               "Consider increasing reserve pool size.",
+               mp->m_fsname);
+fdblocks_enospc:
         spin_unlock(&mp->m_sb_lock);
-       return 0;
+       return -ENOSPC;
+}
  
-unwind:
-       while (--msbp >= msb) {
-               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
-                                                  -msbp->msb_delta, rsvd);
-               ASSERT(error == 0);
-       }
+int
+xfs_mod_frextents(
+       struct xfs_mount        *mp,
+       int64_t                 delta)
+{
+       int64_t                 lcounter;
+       int                     ret = 0;
+
+       spin_lock(&mp->m_sb_lock);
+       lcounter = mp->m_sb.sb_frextents + delta;
+       if (lcounter < 0)
+               ret = -ENOSPC;
+       else
+               mp->m_sb.sb_frextents = lcounter;
         spin_unlock(&mp->m_sb_lock);
-       return error;
+       return ret;
  }
  
  /*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
         }
         return 0;
  }
-
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g.  free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- *     1. m_sb_lock before picking up per-cpu locks
- *     2. per-cpu locks always picked up via for_each_online_cpu() order
- *     3. accurate counter sync requires m_sb_lock + per cpu locks
- *     4. modifying per-cpu counters requires holding per-cpu lock
- *     5. modifying global counters requires holding m_sb_lock
- *     6. enabling or disabling a counter requires holding the m_sb_lock 
- *        and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
-       struct notifier_block *nfb,
-       unsigned long action,
-       void *hcpu)
-{
-       xfs_icsb_cnts_t *cntp;
-       xfs_mount_t     *mp;
-
-       mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
-       cntp = (xfs_icsb_cnts_t *)
-                       per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
-       switch (action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               /* Easy Case - initialize the area and locks, and
-                * then rebalance when online does everything else for us. */
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-               break;
-       case CPU_ONLINE:
-       case CPU_ONLINE_FROZEN:
-               xfs_icsb_lock(mp);
-               xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-               xfs_icsb_unlock(mp);
-               break;
-       case CPU_DEAD:
-       case CPU_DEAD_FROZEN:
-               /* Disable all the counters, then fold the dead cpu's
-                * count into the total on the global superblock and
-                * re-enable the counters. */
-               xfs_icsb_lock(mp);
-               spin_lock(&mp->m_sb_lock);
-               xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
-               xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
-               xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-
-               mp->m_sb.sb_icount += cntp->icsb_icount;
-               mp->m_sb.sb_ifree += cntp->icsb_ifree;
-               mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-               xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
-               spin_unlock(&mp->m_sb_lock);
-               xfs_icsb_unlock(mp);
-               break;
-       }
-
-       return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int
-xfs_icsb_init_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-       if (mp->m_sb_cnts == NULL)
-               return -ENOMEM;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-       }
-
-       mutex_init(&mp->m_icsb_mutex);
-
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-
-#ifdef CONFIG_HOTPLUG_CPU
-       mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-       mp->m_icsb_notifier.priority = 0;
-       register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-
-       return 0;
-}
-
-void
-xfs_icsb_reinit_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_lock(mp);
-       /*
-        * start with all counters disabled so that the
-        * initial balance kicks us off correctly
-        */
-       mp->m_icsb_counters = -1;
-       xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-       xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-       xfs_icsb_unlock(mp);
-}
-
-void
-xfs_icsb_destroy_counters(
-       xfs_mount_t     *mp)
-{
-       if (mp->m_sb_cnts) {
-               unregister_hotcpu_notifier(&mp->m_icsb_notifier);
-               free_percpu(mp->m_sb_cnts);
-       }
-       mutex_destroy(&mp->m_icsb_mutex);
-}
-
-STATIC void
-xfs_icsb_lock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
-               ndelay(1000);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_cntr(
-       xfs_icsb_cnts_t *icsbp)
-{
-       clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-
-
-STATIC void
-xfs_icsb_lock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_lock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_unlock_all_counters(
-       xfs_mount_t     *mp)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               xfs_icsb_unlock_cntr(cntp);
-       }
-}
-
-STATIC void
-xfs_icsb_count(
-       xfs_mount_t     *mp,
-       xfs_icsb_cnts_t *cnt,
-       int             flags)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_lock_all_counters(mp);
-
-       for_each_online_cpu(i) {
-               cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-               cnt->icsb_icount += cntp->icsb_icount;
-               cnt->icsb_ifree += cntp->icsb_ifree;
-               cnt->icsb_fdblocks += cntp->icsb_fdblocks;
-       }
-
-       if (!(flags & XFS_ICSB_LAZY_COUNT))
-               xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC int
-xfs_icsb_counter_disabled(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-       return test_bit(field, &mp->m_icsb_counters);
-}
-
-STATIC void
-xfs_icsb_disable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field)
-{
-       xfs_icsb_cnts_t cnt;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       /*
-        * If we are already disabled, then there is nothing to do
-        * here. We check before locking all the counters to avoid
-        * the expensive lock operation when being called in the
-        * slow path and the counter is already disabled. This is
-        * safe because the only time we set or clear this state is under
-        * the m_icsb_mutex.
-        */
-       if (xfs_icsb_counter_disabled(mp, field))
-               return;
-
-       xfs_icsb_lock_all_counters(mp);
-       if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
-               /* drain back to superblock */
-
-               xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
-               switch(field) {
-               case XFS_SBS_ICOUNT:
-                       mp->m_sb.sb_icount = cnt.icsb_icount;
-                       break;
-               case XFS_SBS_IFREE:
-                       mp->m_sb.sb_ifree = cnt.icsb_ifree;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-                       break;
-               default:
-                       BUG();
-               }
-       }
-
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-STATIC void
-xfs_icsb_enable_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       uint64_t        count,
-       uint64_t        resid)
-{
-       xfs_icsb_cnts_t *cntp;
-       int             i;
-
-       ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-
-       xfs_icsb_lock_all_counters(mp);
-       for_each_online_cpu(i) {
-               cntp = per_cpu_ptr(mp->m_sb_cnts, i);
-               switch (field) {
-               case XFS_SBS_ICOUNT:
-                       cntp->icsb_icount = count + resid;
-                       break;
-               case XFS_SBS_IFREE:
-                       cntp->icsb_ifree = count + resid;
-                       break;
-               case XFS_SBS_FDBLOCKS:
-                       cntp->icsb_fdblocks = count + resid;
-                       break;
-               default:
-                       BUG();
-                       break;
-               }
-               resid = 0;
-       }
-       clear_bit(field, &mp->m_icsb_counters);
-       xfs_icsb_unlock_all_counters(mp);
-}
-
-void
-xfs_icsb_sync_counters_locked(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       xfs_icsb_cnts_t cnt;
-
-       xfs_icsb_count(mp, &cnt, flags);
-
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
-               mp->m_sb.sb_icount = cnt.icsb_icount;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
-               mp->m_sb.sb_ifree = cnt.icsb_ifree;
-       if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
-               mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
-       xfs_mount_t     *mp,
-       int             flags)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_sync_counters_locked(mp, flags);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic.  inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-
-#define XFS_ICSB_INO_CNTR_REENABLE     (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
-               (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int             min_per_cpu)
-{
-       uint64_t        count, resid;
-       int             weight = num_online_cpus();
-       uint64_t        min = (uint64_t)min_per_cpu;
-
-       /* disable counter and sync counter */
-       xfs_icsb_disable_counter(mp, field);
-
-       /* update counters  - first CPU gets residual*/
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               count = mp->m_sb.sb_icount;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_IFREE:
-               count = mp->m_sb.sb_ifree;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                       return;
-               break;
-       case XFS_SBS_FDBLOCKS:
-               count = mp->m_sb.sb_fdblocks;
-               resid = do_div(count, weight);
-               if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-                       return;
-               break;
-       default:
-               BUG();
-               count = resid = 0;      /* quiet, gcc */
-               break;
-       }
-
-       xfs_icsb_enable_counter(mp, field, count, resid);
-}
-
-STATIC void
-xfs_icsb_balance_counter(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  fields,
-       int             min_per_cpu)
-{
-       spin_lock(&mp->m_sb_lock);
-       xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-       spin_unlock(&mp->m_sb_lock);
-}
-
-int
-xfs_icsb_modify_counters(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
-{
-       xfs_icsb_cnts_t *icsbp;
-       long long       lcounter;       /* long counter for 64 bit fields */
-       int             ret = 0;
-
-       might_sleep();
-again:
-       preempt_disable();
-       icsbp = this_cpu_ptr(mp->m_sb_cnts);
-
-       /*
-        * if the counter is disabled, go to slow path
-        */
-       if (unlikely(xfs_icsb_counter_disabled(mp, field)))
-               goto slow_path;
-       xfs_icsb_lock_cntr(icsbp);
-       if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock_cntr(icsbp);
-               goto slow_path;
-       }
-
-       switch (field) {
-       case XFS_SBS_ICOUNT:
-               lcounter = icsbp->icsb_icount;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_icount = lcounter;
-               break;
-
-       case XFS_SBS_IFREE:
-               lcounter = icsbp->icsb_ifree;
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_ifree = lcounter;
-               break;
-
-       case XFS_SBS_FDBLOCKS:
-               BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-
-               lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-               lcounter += delta;
-               if (unlikely(lcounter < 0))
-                       goto balance_counter;
-               icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-               break;
-       default:
-               BUG();
-               break;
-       }
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-       return 0;
-
-slow_path:
-       preempt_enable();
-
-       /*
-        * serialise with a mutex so we don't burn lots of cpu on
-        * the superblock lock. We still need to hold the superblock
-        * lock, however, when we modify the global structures.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * Now running atomically.
-        *
-        * If the counter is enabled, someone has beaten us to rebalancing.
-        * Drop the lock and try again in the fast path....
-        */
-       if (!(xfs_icsb_counter_disabled(mp, field))) {
-               xfs_icsb_unlock(mp);
-               goto again;
-       }
-
-       /*
-        * The counter is currently disabled. Because we are
-        * running atomically here, we know a rebalance cannot
-        * be in progress. Hence we can go straight to operating
-        * on the global superblock. We do not call xfs_mod_incore_sb()
-        * here even though we need to get the m_sb_lock. Doing so
-        * will cause us to re-enter this function and deadlock.
-        * Hence we get the m_sb_lock ourselves and then call
-        * xfs_mod_incore_sb_unlocked() as the unlocked path operates
-        * directly on the global counters.
-        */
-       spin_lock(&mp->m_sb_lock);
-       ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-       spin_unlock(&mp->m_sb_lock);
-
-       /*
-        * Now that we've modified the global superblock, we
-        * may be able to re-enable the distributed counters
-        * (e.g. lots of space just got freed). After that
-        * we are done.
-        */
-       if (ret != -ENOSPC)
-               xfs_icsb_balance_counter(mp, field, 0);
-       xfs_icsb_unlock(mp);
-       return ret;
-
-balance_counter:
-       xfs_icsb_unlock_cntr(icsbp);
-       preempt_enable();
-
-       /*
-        * We may have multiple threads here if multiple per-cpu
-        * counters run dry at the same time. This will mean we can
-        * do more balances than strictly necessary but it is not
-        * the common slowpath case.
-        */
-       xfs_icsb_lock(mp);
-
-       /*
-        * running atomically.
-        *
-        * This will leave the counter in the correct state for future
-        * accesses. After the rebalance, we simply try again and our retry
-        * will either succeed through the fast path or slow path without
-        * another balance operation being required.
-        */
-       xfs_icsb_balance_counter(mp, field, delta);
-       xfs_icsb_unlock(mp);
-       goto again;
-}
-
-#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 0d8abd6..8c995a2 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
  #ifndef __XFS_MOUNT_H__
  #define        __XFS_MOUNT_H__
  
-#ifdef __KERNEL__
-
  struct xlog;
  struct xfs_inode;
  struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
  struct xfs_dir_ops;
  struct xfs_da_geometry;
  
-#ifdef HAVE_PERCPU_SB
-
-/*
- * Valid per-cpu incore superblock counters. Note that if you add new counters,
- * you may need to define new counter disabled bit field descriptors as there
- * are more possible fields in the superblock that can fit in a bitfield on a
- * 32 bit platform. The XFS_SBS_* values for the current current counters just
- * fit.
- */
-typedef struct xfs_icsb_cnts {
-       uint64_t        icsb_fdblocks;
-       uint64_t        icsb_ifree;
-       uint64_t        icsb_icount;
-       unsigned long   icsb_flags;
-} xfs_icsb_cnts_t;
-
-#define XFS_ICSB_FLAG_LOCK     (1 << 0)        /* counter lock bit */
-
-#define XFS_ICSB_LAZY_COUNT    (1 << 1)        /* accuracy not needed */
-
-extern int     xfs_icsb_init_counters(struct xfs_mount *);
-extern void    xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void    xfs_icsb_destroy_counters(struct xfs_mount *);
-extern void    xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void    xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
-extern int     xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
-                                               int64_t, int);
-
-#else
-#define xfs_icsb_init_counters(mp)             (0)
-#define xfs_icsb_destroy_counters(mp)          do { } while (0)
-#define xfs_icsb_reinit_counters(mp)           do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags)      do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
-#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
-       xfs_mod_incore_sb(mp, field, delta, rsvd)
-#endif
-
  /* dynamic preallocation free space thresholds, 5% down to 1% */
  enum {
         XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
         struct super_block      *m_super;
         xfs_tid_t               m_tid;          /* next unused tid for fs */
         struct xfs_ail          *m_ail;         /* fs active log item list */
-       xfs_sb_t                m_sb;           /* copy of fs superblock */
+
+       struct xfs_sb           m_sb;           /* copy of fs superblock */
         spinlock_t              m_sb_lock;      /* sb counter lock */
+       struct percpu_counter   m_icount;       /* allocated inodes counter */
+       struct percpu_counter   m_ifree;        /* free inodes counter */
+       struct percpu_counter   m_fdblocks;     /* free block counter */
+
         struct xfs_buf          *m_sb_bp;       /* buffer for superblock */
         char                    *m_fsname;      /* filesystem name */
         int                     m_fsname_len;   /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
         const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
         uint                    m_chsize;       /* size of next field */
         atomic_t                m_active_trans; /* number trans frozen */
-#ifdef HAVE_PERCPU_SB
-       xfs_icsb_cnts_t __percpu *m_sb_cnts;    /* per-cpu superblock counters */
-       unsigned long           m_icsb_counters; /* disabled per-cpu counters */
-       struct notifier_block   m_icsb_notifier; /* hotplug cpu notifier */
-       struct mutex            m_icsb_mutex;   /* balancer sync lock */
-#endif
         struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
         struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct delayed_work     m_eofblocks_work; /* background eof blocks
@@ -300,35 +259,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
         return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
  }
  
-/*
- * Per-cpu superblock locking functions
- */
-#ifdef HAVE_PERCPU_SB
-static inline void
-xfs_icsb_lock(xfs_mount_t *mp)
-{
-       mutex_lock(&mp->m_icsb_mutex);
-}
-
-static inline void
-xfs_icsb_unlock(xfs_mount_t *mp)
-{
-       mutex_unlock(&mp->m_icsb_mutex);
-}
-#else
-#define xfs_icsb_lock(mp)
-#define xfs_icsb_unlock(mp)
-#endif
-
-/*
- * This structure is for use by the xfs_mod_incore_sb_batch() routine.
- * xfs_growfs can specify a few fields which are more than int limit
- */
-typedef struct xfs_mod_sb {
-       xfs_sb_field_t  msb_field;      /* Field to modify, see below */
-       int64_t         msb_delta;      /* Change to make to specified field */
-} xfs_mod_sb_t;
-
  /*
   * Per-ag incore structure, copies of information in agf and agi, to improve the
   * performance of allocation group selection.
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
  extern int     xfs_mountfs(xfs_mount_t *mp);
  extern int     xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
                                      xfs_agnumber_t *maxagi);
-
  extern void    xfs_unmountfs(xfs_mount_t *);
-extern int     xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int     xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
-                       uint, int);
+
+extern int     xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
+extern int     xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+                                bool reserved);
+extern int     xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
+
  extern int     xfs_mount_log_sb(xfs_mount_t *);
  extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
  extern int     xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int  xfs_dev_is_read_only(struct xfs_mount *, char *);
  
  extern void    xfs_set_low_space_thresholds(struct xfs_mount *);
  
-#endif /* __KERNEL__ */
-
  #endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c

index 30ecca3..f8a674d 100644 (file)
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
         if (!mru || !mru->lists)
                 return -EINVAL;
  
-       if (radix_tree_preload(GFP_KERNEL))
+       if (radix_tree_preload(GFP_NOFS))
                 return -ENOMEM;
  
         INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c

index 365dd57..981a657 100644 (file)
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -31,7 +31,8 @@
  int
  xfs_break_layouts(
         struct inode            *inode,
-       uint                    *iolock)
+       uint                    *iolock,
+       bool                    with_imutex)
  {
         struct xfs_inode        *ip = XFS_I(inode);
         int                     error;
@@ -40,8 +41,12 @@ xfs_break_layouts(
  
         while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
                 xfs_iunlock(ip, *iolock);
+               if (with_imutex && (*iolock & XFS_IOLOCK_EXCL))
+                       mutex_unlock(&inode->i_mutex);
                 error = break_layout(inode, true);
                 *iolock = XFS_IOLOCK_EXCL;
+               if (with_imutex)
+                       mutex_lock(&inode->i_mutex);
                 xfs_ilock(ip, *iolock);
         }
  
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h

index b7fbfce..8147ac1 100644 (file)
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -8,9 +8,10 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
  int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps,
                 struct iattr *iattr);
  
-int xfs_break_layouts(struct inode *inode, uint *iolock);
+int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex);
  #else
-static inline int xfs_break_layouts(struct inode *inode, uint *iolock)
+static inline int
+xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex)
  {
         return 0;
  }
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c

index fbbb9e6..5538468 100644 (file)
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -719,6 +719,7 @@ xfs_qm_qino_alloc(
         xfs_trans_t     *tp;
         int             error;
         int             committed;
+       bool            need_alloc = true;
  
         *ip = NULL;
         /*
@@ -747,6 +748,7 @@ xfs_qm_qino_alloc(
                                 return error;
                         mp->m_sb.sb_gquotino = NULLFSINO;
                         mp->m_sb.sb_pquotino = NULLFSINO;
+                       need_alloc = false;
                 }
         }
  
@@ -758,7 +760,7 @@ xfs_qm_qino_alloc(
                 return error;
         }
  
-       if (!*ip) {
+       if (need_alloc) {
                 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
                                                                 &committed);
                 if (error) {
@@ -794,11 +796,14 @@ xfs_qm_qino_alloc(
         spin_unlock(&mp->m_sb_lock);
         xfs_log_sb(tp);
  
-       if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error) {
+               ASSERT(XFS_FORCED_SHUTDOWN(mp));
                 xfs_alert(mp, "%s failed (error %d)!", __func__, error);
-               return error;
         }
-       return 0;
+       if (need_alloc)
+               xfs_finish_inode_setup(*ip);
+       return error;
  }
  
  
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c

index 8fcc4cc..858e1e6 100644 (file)
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -109,8 +109,6 @@ static struct xfs_kobj xfs_dbg_kobj;        /* global debug sysfs attrs */
  #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
  #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
  #define MNTOPT_QUOTANOENF  "qnoenforce"        /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG    "delaylog"  /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG  "nodelaylog"        /* Delayed logging disabled */
  #define MNTOPT_DISCARD    "discard"    /* Discard unused blocks */
  #define MNTOPT_NODISCARD   "nodiscard" /* Do not discard unused blocks */
  
@@ -361,28 +359,10 @@ xfs_parseargs(
                 } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
                         mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
                         mp->m_qflags &= ~XFS_GQUOTA_ENFD;
-               } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
-                       xfs_warn(mp,
-       "delaylog is the default now, option is deprecated.");
-               } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
-                       xfs_warn(mp,
-       "nodelaylog support has been removed, option is deprecated.");
                 } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
                         mp->m_flags |= XFS_MOUNT_DISCARD;
                 } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
                         mp->m_flags &= ~XFS_MOUNT_DISCARD;
-               } else if (!strcmp(this_char, "ihashsize")) {
-                       xfs_warn(mp,
-       "ihashsize no longer used, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisdsync")) {
-                       xfs_warn(mp,
-       "osyncisdsync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "osyncisosync")) {
-                       xfs_warn(mp,
-       "osyncisosync has no effect, option is deprecated.");
-               } else if (!strcmp(this_char, "irixsgid")) {
-                       xfs_warn(mp,
-       "irixsgid is now a sysctl(2) variable, option is deprecated.");
                 } else {
                         xfs_warn(mp, "unknown mount option [%s].", this_char);
                         return -EINVAL;
@@ -986,6 +966,8 @@ xfs_fs_inode_init_once(
         atomic_set(&ip->i_pincount, 0);
         spin_lock_init(&ip->i_flags_lock);
  
+       mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                    "xfsino", ip->i_ino);
         mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                      "xfsino", ip->i_ino);
  }
@@ -1033,23 +1015,6 @@ xfs_free_fsname(
         kfree(mp->m_logname);
  }
  
-STATIC void
-xfs_fs_put_super(
-       struct super_block      *sb)
-{
-       struct xfs_mount        *mp = XFS_M(sb);
-
-       xfs_filestream_unmount(mp);
-       xfs_unmountfs(mp);
-
-       xfs_freesb(mp);
-       xfs_icsb_destroy_counters(mp);
-       xfs_destroy_mount_workqueues(mp);
-       xfs_close_devices(mp);
-       xfs_free_fsname(mp);
-       kfree(mp);
-}
-
  STATIC int
  xfs_fs_sync_fs(
         struct super_block      *sb,
@@ -1083,8 +1048,11 @@ xfs_fs_statfs(
  {
         struct xfs_mount        *mp = XFS_M(dentry->d_sb);
         xfs_sb_t                *sbp = &mp->m_sb;
-       struct xfs_inode        *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode        *ip = XFS_I(d_inode(dentry));
         __uint64_t              fakeinos, id;
+       __uint64_t              icount;
+       __uint64_t              ifree;
+       __uint64_t              fdblocks;
         xfs_extlen_t            lsize;
         __int64_t               ffree;
  
@@ -1095,17 +1063,21 @@ xfs_fs_statfs(
         statp->f_fsid.val[0] = (u32)id;
         statp->f_fsid.val[1] = (u32)(id >> 32);
  
-       xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+       icount = percpu_counter_sum(&mp->m_icount);
+       ifree = percpu_counter_sum(&mp->m_ifree);
+       fdblocks = percpu_counter_sum(&mp->m_fdblocks);
  
         spin_lock(&mp->m_sb_lock);
         statp->f_bsize = sbp->sb_blocksize;
         lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
         statp->f_blocks = sbp->sb_dblocks - lsize;
-       statp->f_bfree = statp->f_bavail =
-                               sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       spin_unlock(&mp->m_sb_lock);
+
+       statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+       statp->f_bavail = statp->f_bfree;
+
         fakeinos = statp->f_bfree << sbp->sb_inopblog;
-       statp->f_files =
-           MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
+       statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
         if (mp->m_maxicount)
                 statp->f_files = min_t(typeof(statp->f_files),
                                         statp->f_files,
@@ -1117,10 +1089,9 @@ xfs_fs_statfs(
                                         sbp->sb_icount);
  
         /* make sure statp->f_ffree does not underflow */
-       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       ffree = statp->f_files - (icount - ifree);
         statp->f_ffree = max_t(__int64_t, ffree, 0);
  
-       spin_unlock(&mp->m_sb_lock);
  
         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
             ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1256,6 +1227,12 @@ xfs_fs_remount(
  
         /* ro -> rw */
         if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
+               if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+                       xfs_warn(mp,
+               "ro->rw transition prohibited on norecovery mount");
+                       return -EINVAL;
+               }
+
                 mp->m_flags &= ~XFS_MOUNT_RDONLY;
  
                 /*
@@ -1401,6 +1378,51 @@ xfs_finish_flags(
         return 0;
  }
  
+static int
+xfs_init_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       int             error;
+
+       error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
+       if (error)
+               return -ENOMEM;
+
+       error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
+       if (error)
+               goto free_icount;
+
+       error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+       if (error)
+               goto free_ifree;
+
+       return 0;
+
+free_ifree:
+       percpu_counter_destroy(&mp->m_ifree);
+free_icount:
+       percpu_counter_destroy(&mp->m_icount);
+       return -ENOMEM;
+}
+
+void
+xfs_reinit_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
+       percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
+       percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+}
+
+static void
+xfs_destroy_percpu_counters(
+       struct xfs_mount        *mp)
+{
+       percpu_counter_destroy(&mp->m_icount);
+       percpu_counter_destroy(&mp->m_ifree);
+       percpu_counter_destroy(&mp->m_fdblocks);
+}
+
  STATIC int
  xfs_fs_fill_super(
         struct super_block      *sb,
@@ -1449,7 +1471,7 @@ xfs_fs_fill_super(
         if (error)
                 goto out_close_devices;
  
-       error = xfs_icsb_init_counters(mp);
+       error = xfs_init_percpu_counters(mp);
         if (error)
                 goto out_destroy_workqueues;
  
@@ -1507,7 +1529,7 @@ xfs_fs_fill_super(
   out_free_sb:
         xfs_freesb(mp);
   out_destroy_counters:
-       xfs_icsb_destroy_counters(mp);
+       xfs_destroy_percpu_counters(mp);
  out_destroy_workqueues:
         xfs_destroy_mount_workqueues(mp);
   out_close_devices:
@@ -1524,6 +1546,24 @@ out_destroy_workqueues:
         goto out_free_sb;
  }
  
+STATIC void
+xfs_fs_put_super(
+       struct super_block      *sb)
+{
+       struct xfs_mount        *mp = XFS_M(sb);
+
+       xfs_notice(mp, "Unmounting Filesystem");
+       xfs_filestream_unmount(mp);
+       xfs_unmountfs(mp);
+
+       xfs_freesb(mp);
+       xfs_destroy_percpu_counters(mp);
+       xfs_destroy_mount_workqueues(mp);
+       xfs_close_devices(mp);
+       xfs_free_fsname(mp);
+       kfree(mp);
+}
+
  STATIC struct dentry *
  xfs_fs_mount(
         struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h

index 2b830c2..499058f 100644 (file)
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
  extern const struct xattr_handler *xfs_xattr_handlers[];
  extern const struct quotactl_ops xfs_quotactl_operations;
  
+extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
+
  #define XFS_M(sb)              ((struct xfs_mount *)((sb)->s_fs_info))
  
  #endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c

index 25791df..3df411e 100644 (file)
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -177,7 +177,7 @@ xfs_symlink(
         int                     pathlen;
         struct xfs_bmap_free    free_list;
         xfs_fsblock_t           first_block;
-       bool                    unlock_dp_on_error = false;
+       bool                    unlock_dp_on_error = false;
         uint                    cancel_flags;
         int                     committed;
         xfs_fileoff_t           first_fsb;
@@ -221,7 +221,7 @@ xfs_symlink(
                         XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
                         &udqp, &gdqp, &pdqp);
         if (error)
-               goto std_return;
+               return error;
  
         tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK);
         cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
@@ -241,7 +241,7 @@ xfs_symlink(
         }
         if (error) {
                 cancel_flags = 0;
-               goto error_return;
+               goto out_trans_cancel;
         }
  
         xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
@@ -252,7 +252,7 @@ xfs_symlink(
          */
         if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) {
                 error = -EPERM;
-               goto error_return;
+               goto out_trans_cancel;
         }
  
         /*
@@ -261,7 +261,7 @@ xfs_symlink(
         error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
                                                 pdqp, resblks, 1, 0);
         if (error)
-               goto error_return;
+               goto out_trans_cancel;
  
         /*
          * Check for ability to enter directory entry, if no space reserved.
@@ -269,7 +269,7 @@ xfs_symlink(
         if (!resblks) {
                 error = xfs_dir_canenter(tp, dp, link_name);
                 if (error)
-                       goto error_return;
+                       goto out_trans_cancel;
         }
         /*
          * Initialize the bmap freelist prior to calling either
@@ -282,15 +282,14 @@ xfs_symlink(
          */
         error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
                                prid, resblks > 0, &ip, NULL);
-       if (error) {
-               if (error == -ENOSPC)
-                       goto error_return;
-               goto error1;
-       }
+       if (error)
+               goto out_trans_cancel;
  
         /*
-        * An error after we've joined dp to the transaction will result in the
-        * transaction cancel unlocking dp so don't do it explicitly in the
+        * Now we join the directory inode to the transaction.  We do not do it
+        * earlier because xfs_dir_ialloc might commit the previous transaction
+        * (and release all the locks).  An error from here on will result in
+        * the transaction cancel unlocking dp so don't do it explicitly in the
          * error path.
          */
         xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
@@ -330,7 +329,7 @@ xfs_symlink(
                                   XFS_BMAPI_METADATA, &first_block, resblks,
                                   mval, &nmaps, &free_list);
                 if (error)
-                       goto error2;
+                       goto out_bmap_cancel;
  
                 if (resblks)
                         resblks -= fs_blocks;
@@ -348,7 +347,7 @@ xfs_symlink(
                                                BTOBB(byte_cnt), 0);
                         if (!bp) {
                                 error = -ENOMEM;
-                               goto error2;
+                               goto out_bmap_cancel;
                         }
                         bp->b_ops = &xfs_symlink_buf_ops;
  
@@ -378,7 +377,7 @@ xfs_symlink(
         error = xfs_dir_createname(tp, dp, link_name, ip->i_ino,
                                         &first_block, &free_list, resblks);
         if (error)
-               goto error2;
+               goto out_bmap_cancel;
         xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
  
@@ -392,10 +391,13 @@ xfs_symlink(
         }
  
         error = xfs_bmap_finish(&tp, &free_list, &committed);
-       if (error) {
-               goto error2;
-       }
+       if (error)
+               goto out_bmap_cancel;
+
         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+       if (error)
+               goto out_release_inode;
+
         xfs_qm_dqrele(udqp);
         xfs_qm_dqrele(gdqp);
         xfs_qm_dqrele(pdqp);
@@ -403,20 +405,28 @@ xfs_symlink(
         *ipp = ip;
         return 0;
  
- error2:
-       IRELE(ip);
- error1:
+out_bmap_cancel:
         xfs_bmap_cancel(&free_list);
         cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_trans_cancel:
         xfs_trans_cancel(tp, cancel_flags);
+out_release_inode:
+       /*
+        * Wait until after the current transaction is aborted to finish the
+        * setup of the inode and release the inode.  This prevents recursive
+        * transactions and deadlocks from xfs_inactive.
+        */
+       if (ip) {
+               xfs_finish_inode_setup(ip);
+               IRELE(ip);
+       }
+
         xfs_qm_dqrele(udqp);
         xfs_qm_dqrele(gdqp);
         xfs_qm_dqrele(pdqp);
  
         if (unlock_dp_on_error)
                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
- std_return:
         return error;
  }
  
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index 51372e3..615781b 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
                 __entry->refcount = refcount;
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+       TP_printk("dev %d:%d agno %u refcount %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->agno,
                   __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                 "offset %lld block %lld count %lld flag %d caller %pf",
+                 "offset %lld block %lld count %lld flag %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d flags %s caller %pf",
+                 "lock %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                 __entry->caller_ip = caller_ip;
         ),
         TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                 "lock %d error %d flags %s caller %pf",
+                 "lock %d error %d flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   (unsigned long long)__entry->bno,
                   __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
                 __entry->lock_flags = lock_flags;
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -664,6 +664,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space);
  DEFINE_INODE_EVENT(xfs_free_file_space);
  DEFINE_INODE_EVENT(xfs_zero_file_space);
  DEFINE_INODE_EVENT(xfs_collapse_file_space);
+DEFINE_INODE_EVENT(xfs_insert_file_space);
  DEFINE_INODE_EVENT(xfs_readdir);
  #ifdef CONFIG_XFS_POSIX_ACL
  DEFINE_INODE_EVENT(xfs_get_acl);
@@ -685,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
  DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
  
+DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+
  DECLARE_EVENT_CLASS(xfs_iref_class,
         TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
         TP_ARGS(ip, caller_ip),
@@ -702,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
                 __entry->pincount = atomic_read(&ip->i_pincount);
                 __entry->caller_ip = caller_ip;
         ),
-       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __entry->count,
@@ -1217,6 +1221,11 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
  DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
  DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
  DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
+DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
  
  DECLARE_EVENT_CLASS(xfs_simple_io_class,
         TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1333,7 +1342,7 @@ TRACE_EVENT(xfs_bunmap,
                 __entry->flags = flags;
         ),
         TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                 "flags %s caller %pf",
+                 "flags %s caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->ino,
                   __entry->size,
@@ -1466,7 +1475,7 @@ TRACE_EVENT(xfs_agf,
         ),
         TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
                   "levels b %u c %u flfirst %u fllast %u flcount %u "
-                 "freeblks %u longest %u caller %pf",
+                 "freeblks %u longest %u caller %ps",
                   MAJOR(__entry->dev), MINOR(__entry->dev),
                   __entry->agno,
                   __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index eb90cd5..220ef2c 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
         uint                    rtextents)
  {
         int             error = 0;
-       int             rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
  
         /* Mark this thread as being in a transaction */
         current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
          * fail if the count would go below zero.
          */
         if (blocks > 0) {
-               error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                         -((int64_t)blocks), rsvd);
+               error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                 if (error != 0) {
                         current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
                         return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
          * fail if the count would go below zero.
          */
         if (rtextents > 0) {
-               error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
-                                         -((int64_t)rtextents), rsvd);
+               error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
                 if (error) {
                         error = -ENOSPC;
                         goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
  
  undo_blocks:
         if (blocks > 0) {
-               xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
-                                        (int64_t)blocks, rsvd);
+               xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
                 tp->t_blk_res = 0;
         }
  
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
                                   sizeof(sbp->sb_frextents) - 1);
  }
  
+STATIC int
+xfs_sb_mod8(
+       uint8_t                 *field,
+       int8_t                  delta)
+{
+       int8_t                  counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod32(
+       uint32_t                *field,
+       int32_t                 delta)
+{
+       int32_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
+STATIC int
+xfs_sb_mod64(
+       uint64_t                *field,
+       int64_t                 delta)
+{
+       int64_t                 counter = *field;
+
+       counter += delta;
+       if (counter < 0) {
+               ASSERT(0);
+               return -EINVAL;
+       }
+       *field = counter;
+       return 0;
+}
+
  /*
   * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
   * and apply superblock counter changes to the in-core superblock.  The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
   * applied to the in-core superblock.  The idea is that that has already been
   * done.
   *
- * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
- * However, we have to ensure that we only modify each superblock field only
- * once because the application of the delta values may not be atomic. That can
- * lead to ENOSPC races occurring if we have two separate modifcations of the
- * free space counter to put back the entire reservation and then take away
- * what we used.
- *
   * If we are not logging superblock counters, then the inode allocated/free and
   * used block counts are not updated in the on disk superblock. In this case,
   * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
   */
  void
  xfs_trans_unreserve_and_mod_sb(
-       xfs_trans_t     *tp)
+       struct xfs_trans        *tp)
  {
-       xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
-       xfs_mod_sb_t    *msbp;
-       xfs_mount_t     *mp = tp->t_mountp;
-       /* REFERENCED */
-       int             error;
-       int             rsvd;
-       int64_t         blkdelta = 0;
-       int64_t         rtxdelta = 0;
-       int64_t         idelta = 0;
-       int64_t         ifreedelta = 0;
-
-       msbp = msb;
-       rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       struct xfs_mount        *mp = tp->t_mountp;
+       bool                    rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+       int64_t                 blkdelta = 0;
+       int64_t                 rtxdelta = 0;
+       int64_t                 idelta = 0;
+       int64_t                 ifreedelta = 0;
+       int                     error;
  
         /* calculate deltas */
         if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
  
         /* apply the per-cpu counters */
         if (blkdelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
-                                                blkdelta, rsvd);
+               error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
                 if (error)
                         goto out;
         }
  
         if (idelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
-                                                idelta, rsvd);
+               error = xfs_mod_icount(mp, idelta);
                 if (error)
                         goto out_undo_fdblocks;
         }
  
         if (ifreedelta) {
-               error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
-                                                ifreedelta, rsvd);
+               error = xfs_mod_ifree(mp, ifreedelta);
                 if (error)
                         goto out_undo_icount;
         }
  
+       if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+               return;
+
         /* apply remaining deltas */
-       if (rtxdelta != 0) {
-               msbp->msb_field = XFS_SBS_FREXTENTS;
-               msbp->msb_delta = rtxdelta;
-               msbp++;
+       spin_lock(&mp->m_sb_lock);
+       if (rtxdelta) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
+               if (error)
+                       goto out_undo_ifree;
         }
  
-       if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
-               if (tp->t_dblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_DBLOCKS;
-                       msbp->msb_delta = tp->t_dblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_agcount_delta != 0) {
-                       msbp->msb_field = XFS_SBS_AGCOUNT;
-                       msbp->msb_delta = tp->t_agcount_delta;
-                       msbp++;
-               }
-               if (tp->t_imaxpct_delta != 0) {
-                       msbp->msb_field = XFS_SBS_IMAX_PCT;
-                       msbp->msb_delta = tp->t_imaxpct_delta;
-                       msbp++;
-               }
-               if (tp->t_rextsize_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSIZE;
-                       msbp->msb_delta = tp->t_rextsize_delta;
-                       msbp++;
-               }
-               if (tp->t_rbmblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBMBLOCKS;
-                       msbp->msb_delta = tp->t_rbmblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rblocks_delta != 0) {
-                       msbp->msb_field = XFS_SBS_RBLOCKS;
-                       msbp->msb_delta = tp->t_rblocks_delta;
-                       msbp++;
-               }
-               if (tp->t_rextents_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTENTS;
-                       msbp->msb_delta = tp->t_rextents_delta;
-                       msbp++;
-               }
-               if (tp->t_rextslog_delta != 0) {
-                       msbp->msb_field = XFS_SBS_REXTSLOG;
-                       msbp->msb_delta = tp->t_rextslog_delta;
-                       msbp++;
-               }
+       if (tp->t_dblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
+               if (error)
+                       goto out_undo_frextents;
         }
-
-       /*
-        * If we need to change anything, do it.
-        */
-       if (msbp > msb) {
-               error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
-                       (uint)(msbp - msb), rsvd);
+       if (tp->t_agcount_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
                 if (error)
-                       goto out_undo_ifreecount;
+                       goto out_undo_dblocks;
         }
-
+       if (tp->t_imaxpct_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+               if (error)
+                       goto out_undo_agcount;
+       }
+       if (tp->t_rextsize_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+                                    tp->t_rextsize_delta);
+               if (error)
+                       goto out_undo_imaxpct;
+       }
+       if (tp->t_rbmblocks_delta != 0) {
+               error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+                                    tp->t_rbmblocks_delta);
+               if (error)
+                       goto out_undo_rextsize;
+       }
+       if (tp->t_rblocks_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+               if (error)
+                       goto out_undo_rbmblocks;
+       }
+       if (tp->t_rextents_delta != 0) {
+               error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+                                    tp->t_rextents_delta);
+               if (error)
+                       goto out_undo_rblocks;
+       }
+       if (tp->t_rextslog_delta != 0) {
+               error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+                                    tp->t_rextslog_delta);
+               if (error)
+                       goto out_undo_rextents;
+       }
+       spin_unlock(&mp->m_sb_lock);
         return;
  
-out_undo_ifreecount:
+out_undo_rextents:
+       if (tp->t_rextents_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+out_undo_rblocks:
+       if (tp->t_rblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+out_undo_rbmblocks:
+       if (tp->t_rbmblocks_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+out_undo_rextsize:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+out_undo_imaxpct:
+       if (tp->t_rextsize_delta)
+               xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+out_undo_agcount:
+       if (tp->t_agcount_delta)
+               xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+out_undo_dblocks:
+       if (tp->t_dblocks_delta)
+               xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+out_undo_frextents:
+       if (rtxdelta)
+               xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+out_undo_ifree:
+       spin_unlock(&mp->m_sb_lock);
         if (ifreedelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+               xfs_mod_ifree(mp, -ifreedelta);
  out_undo_icount:
         if (idelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+               xfs_mod_icount(mp, -idelta);
  out_undo_fdblocks:
         if (blkdelta)
-               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+               xfs_mod_fdblocks(mp, -blkdelta, rsvd);
  out:
         ASSERT(error == 0);
         return;
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c

index 69f6e47..c036815 100644 (file)
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -35,7 +35,7 @@ static int
  xfs_xattr_get(struct dentry *dentry, const char *name,
                 void *value, size_t size, int xflags)
  {
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode *ip = XFS_I(d_inode(dentry));
         int error, asize = size;
  
         if (strcmp(name, "") == 0)
@@ -57,7 +57,7 @@ static int
  xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
                 size_t size, int flags, int xflags)
  {
-       struct xfs_inode *ip = XFS_I(dentry->d_inode);
+       struct xfs_inode *ip = XFS_I(d_inode(dentry));
  
         if (strcmp(name, "") == 0)
                 return -EINVAL;
@@ -197,7 +197,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
  {
         struct xfs_attr_list_context context;
         struct attrlist_cursor_kern cursor = { 0 };
-       struct inode            *inode = dentry->d_inode;
+       struct inode            *inode = d_inode(dentry);
         int                     error;
  
         /*
diff --git a/include/acpi/acpi_io.h b/include/acpi/acpi_io.h

index 444671e..dd86c5f 100644 (file)
--- a/include/acpi/acpi_io.h
+++ b/include/acpi/acpi_io.h
@@ -3,11 +3,15 @@
  
  #include <linux/io.h>
  
+#include <asm/acpi.h>
+
+#ifndef acpi_os_ioremap
  static inline void __iomem *acpi_os_ioremap(acpi_physical_address phys,
                                             acpi_size size)
  {
         return ioremap_cache(phys, size);
  }
+#endif
  
  void __iomem *__init_refok
  acpi_os_map_iomem(acpi_physical_address phys, acpi_size size);
diff --git a/include/acpi/processor.h b/include/acpi/processor.h

index b95dc32..4188a4d 100644 (file)
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -196,7 +196,7 @@ struct acpi_processor_flags {
  struct acpi_processor {
         acpi_handle handle;
         u32 acpi_id;
-       u32 phys_id;    /* CPU hardware ID such as APIC ID for x86 */
+       phys_cpuid_t phys_id;   /* CPU hardware ID such as APIC ID for x86 */
         u32 id;         /* CPU logical ID allocated by OS */
         u32 pblk;
         int performance_platform_limit;
@@ -310,8 +310,8 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
  #endif                         /* CONFIG_CPU_FREQ */
  
  /* in processor_core.c */
-int acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
-int acpi_map_cpuid(int phys_id, u32 acpi_id);
+phys_cpuid_t acpi_get_phys_id(acpi_handle, int type, u32 acpi_id);
+int acpi_map_cpuid(phys_cpuid_t phys_id, u32 acpi_id);
  int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id);
  
  /* in processor_pdc.c */
diff --git a/include/dt-bindings/dma/jz4780-dma.h b/include/dt-bindings/dma/jz4780-dma.h

new file mode 100644 (file)

index 0000000..df017fd
--- /dev/null
+++ b/include/dt-bindings/dma/jz4780-dma.h
@@ -0,0 +1,49 @@
+#ifndef __DT_BINDINGS_DMA_JZ4780_DMA_H__
+#define __DT_BINDINGS_DMA_JZ4780_DMA_H__
+
+/*
+ * Request type numbers for the JZ4780 DMA controller (written to the DRTn
+ * register for the channel).
+ */
+#define JZ4780_DMA_I2S1_TX     0x4
+#define JZ4780_DMA_I2S1_RX     0x5
+#define JZ4780_DMA_I2S0_TX     0x6
+#define JZ4780_DMA_I2S0_RX     0x7
+#define JZ4780_DMA_AUTO                0x8
+#define JZ4780_DMA_SADC_RX     0x9
+#define JZ4780_DMA_UART4_TX    0xc
+#define JZ4780_DMA_UART4_RX    0xd
+#define JZ4780_DMA_UART3_TX    0xe
+#define JZ4780_DMA_UART3_RX    0xf
+#define JZ4780_DMA_UART2_TX    0x10
+#define JZ4780_DMA_UART2_RX    0x11
+#define JZ4780_DMA_UART1_TX    0x12
+#define JZ4780_DMA_UART1_RX    0x13
+#define JZ4780_DMA_UART0_TX    0x14
+#define JZ4780_DMA_UART0_RX    0x15
+#define JZ4780_DMA_SSI0_TX     0x16
+#define JZ4780_DMA_SSI0_RX     0x17
+#define JZ4780_DMA_SSI1_TX     0x18
+#define JZ4780_DMA_SSI1_RX     0x19
+#define JZ4780_DMA_MSC0_TX     0x1a
+#define JZ4780_DMA_MSC0_RX     0x1b
+#define JZ4780_DMA_MSC1_TX     0x1c
+#define JZ4780_DMA_MSC1_RX     0x1d
+#define JZ4780_DMA_MSC2_TX     0x1e
+#define JZ4780_DMA_MSC2_RX     0x1f
+#define JZ4780_DMA_PCM0_TX     0x20
+#define JZ4780_DMA_PCM0_RX     0x21
+#define JZ4780_DMA_SMB0_TX     0x24
+#define JZ4780_DMA_SMB0_RX     0x25
+#define JZ4780_DMA_SMB1_TX     0x26
+#define JZ4780_DMA_SMB1_RX     0x27
+#define JZ4780_DMA_SMB2_TX     0x28
+#define JZ4780_DMA_SMB2_RX     0x29
+#define JZ4780_DMA_SMB3_TX     0x2a
+#define JZ4780_DMA_SMB3_RX     0x2b
+#define JZ4780_DMA_SMB4_TX     0x2c
+#define JZ4780_DMA_SMB4_RX     0x2d
+#define JZ4780_DMA_DES_TX      0x2e
+#define JZ4780_DMA_DES_RX      0x2f
+
+#endif /* __DT_BINDINGS_DMA_JZ4780_DMA_H__ */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h

index dd12127..e4da5e3 100644 (file)
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -79,6 +79,7 @@ enum acpi_irq_model_id {
         ACPI_IRQ_MODEL_IOAPIC,
         ACPI_IRQ_MODEL_IOSAPIC,
         ACPI_IRQ_MODEL_PLATFORM,
+       ACPI_IRQ_MODEL_GIC,
         ACPI_IRQ_MODEL_COUNT
  };
  
@@ -152,9 +153,14 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
  int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);
  void acpi_numa_arch_fixup(void);
  
+#ifndef PHYS_CPUID_INVALID
+typedef u32 phys_cpuid_t;
+#define PHYS_CPUID_INVALID (phys_cpuid_t)(-1)
+#endif
+
  #ifdef CONFIG_ACPI_HOTPLUG_CPU
  /* Arch dependent functions for cpu hotplug support */
-int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu);
+int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu);
  int acpi_unmap_cpu(int cpu);
  #endif /* CONFIG_ACPI_HOTPLUG_CPU */
  
diff --git a/include/linux/acpi_irq.h b/include/linux/acpi_irq.h

new file mode 100644 (file)

index 0000000..f10c872
--- /dev/null
+++ b/include/linux/acpi_irq.h
@@ -0,0 +1,10 @@
+#ifndef _LINUX_ACPI_IRQ_H
+#define _LINUX_ACPI_IRQ_H
+
+#include <linux/irq.h>
+
+#ifndef acpi_irq_init
+static inline void acpi_irq_init(void) { }
+#endif
+
+#endif /* _LINUX_ACPI_IRQ_H */
diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h

deleted file mode 100644 (file)

index 34b98f2..0000000
--- a/include/linux/amba/xilinx_dma.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Xilinx DMA Engine drivers support header file
- *
- * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __DMA_XILINX_DMA_H
-#define __DMA_XILINX_DMA_H
-
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-
-/**
- * struct xilinx_vdma_config - VDMA Configuration structure
- * @frm_dly: Frame delay
- * @gen_lock: Whether in gen-lock mode
- * @master: Master that it syncs to
- * @frm_cnt_en: Enable frame count enable
- * @park: Whether wants to park
- * @park_frm: Frame to park on
- * @coalesc: Interrupt coalescing threshold
- * @delay: Delay counter
- * @reset: Reset Channel
- * @ext_fsync: External Frame Sync source
- */
-struct xilinx_vdma_config {
-       int frm_dly;
-       int gen_lock;
-       int master;
-       int frm_cnt_en;
-       int park;
-       int park_frm;
-       int coalesc;
-       int delay;
-       int reset;
-       int ext_fsync;
-};
-
-int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
-                                       struct xilinx_vdma_config *cfg);
-
-#endif
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h

index 179b38f..388574e 100644 (file)
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -60,12 +60,15 @@ struct dma_chan_ref {
   * dependency chain
   * @ASYNC_TX_FENCE: specify that the next operation in the dependency
   * chain uses this operation's result as an input
+ * @ASYNC_TX_PQ_XOR_DST: do not overwrite the syndrome but XOR it with the
+ * input data. Required for rmw case.
   */
  enum async_tx_flags {
         ASYNC_TX_XOR_ZERO_DST    = (1 << 0),
         ASYNC_TX_XOR_DROP_DST    = (1 << 1),
         ASYNC_TX_ACK             = (1 << 2),
         ASYNC_TX_FENCE           = (1 << 3),
+       ASYNC_TX_PQ_XOR_DST      = (1 << 4),
  };
  
  /**
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h

index 71e05bb..4763ad6 100644 (file)
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -50,6 +50,19 @@
  #define CEPH_FEATURE_MDS_INLINE_DATA     (1ULL<<40)
  #define CEPH_FEATURE_CRUSH_TUNABLES3     (1ULL<<41)
  #define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41)  /* overlap w/ tunables3 */
+#define CEPH_FEATURE_MSGR_KEEPALIVE2   (1ULL<<42)
+#define CEPH_FEATURE_OSD_POOLRESEND    (1ULL<<43)
+#define CEPH_FEATURE_ERASURE_CODE_PLUGINS_V2 (1ULL<<44)
+#define CEPH_FEATURE_OSD_SET_ALLOC_HINT (1ULL<<45)
+#define CEPH_FEATURE_OSD_FADVISE_FLAGS (1ULL<<46)
+#define CEPH_FEATURE_OSD_REPOP         (1ULL<<46)   /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_OBJECT_DIGEST  (1ULL<<46)  /* overlap with fadvise */
+#define CEPH_FEATURE_OSD_TRANSACTION_MAY_LAYOUT (1ULL<<46) /* overlap w/ fadvise */
+#define CEPH_FEATURE_MDS_QUOTA      (1ULL<<47)
+#define CEPH_FEATURE_CRUSH_V4      (1ULL<<48)  /* straw2 buckets */
+#define CEPH_FEATURE_OSD_MIN_SIZE_RECOVERY (1ULL<<49)
+// duplicated since it was introduced at the same time as MIN_SIZE_RECOVERY
+#define CEPH_FEATURE_OSD_PROXY_FEATURES (1ULL<<49)  /* overlap w/ above */
  
  /*
   * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -93,7 +106,8 @@ static inline u64 ceph_sanitize_features(u64 features)
          CEPH_FEATURE_EXPORT_PEER |             \
          CEPH_FEATURE_OSDMAP_ENC |              \
          CEPH_FEATURE_CRUSH_TUNABLES3 |         \
-        CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
+        CEPH_FEATURE_OSD_PRIMARY_AFFINITY |    \
+        CEPH_FEATURE_CRUSH_V4)
  
  #define CEPH_FEATURES_REQUIRED_DEFAULT   \
         (CEPH_FEATURE_NOSRCADDR |        \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h

index 31eb03d..d7d072a 100644 (file)
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -323,6 +323,7 @@ enum {
         CEPH_MDS_OP_MKSNAP     = 0x01400,
         CEPH_MDS_OP_RMSNAP     = 0x01401,
         CEPH_MDS_OP_LSSNAP     = 0x00402,
+       CEPH_MDS_OP_RENAMESNAP = 0x01403,
  };
  
  extern const char *ceph_mds_op_name(int op);
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h

index 1df086d..29cf897 100644 (file)
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -7,13 +7,7 @@
  #define CEPH_DEFINE_SHOW_FUNC(name)                                    \
  static int name##_open(struct inode *inode, struct file *file)         \
  {                                                                      \
-       struct seq_file *sf;                                            \
-       int ret;                                                        \
-                                                                       \
-       ret = single_open(file, name, NULL);                            \
-       sf = file->private_data;                                        \
-       sf->private = inode->i_private;                                 \
-       return ret;                                                     \
+       return single_open(file, name, inode->i_private);               \
  }                                                                      \
                                                                         \
  static const struct file_operations name##_fops = {                    \
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h

index 16fff96..30f92ce 100644 (file)
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -135,6 +135,7 @@ struct ceph_client {
         struct dentry *debugfs_dir;
         struct dentry *debugfs_monmap;
         struct dentry *debugfs_osdmap;
+       struct dentry *debugfs_options;
  #endif
  };
  
@@ -191,6 +192,7 @@ extern struct ceph_options *ceph_parse_options(char *options,
                               const char *dev_name, const char *dev_name_end,
                               int (*parse_extra_token)(char *c, void *private),
                               void *private);
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client);
  extern void ceph_destroy_options(struct ceph_options *opt);
  extern int ceph_compare_options(struct ceph_options *new_opt,
                                 struct ceph_client *client);
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h

index 561ea89..e55c08b 100644 (file)
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -175,13 +175,12 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
         __u8 version;
  
         if (!ceph_has_room(p, end, 1 + 8 + 4 + 4)) {
-               pr_warning("incomplete pg encoding");
-
+               pr_warn("incomplete pg encoding\n");
                 return -EINVAL;
         }
         version = ceph_decode_8(p);
         if (version > 1) {
-               pr_warning("do not understand pg encoding %d > 1",
+               pr_warn("do not understand pg encoding %d > 1\n",
                         (int)version);
                 return -EINVAL;
         }
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h

index 1355098..d27d015 100644 (file)
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -253,4 +253,10 @@ extern void clocksource_of_init(void);
  static inline void clocksource_of_init(void) {}
  #endif
  
+#ifdef CONFIG_ACPI
+void acpi_generic_timer_init(void);
+#else
+static inline void acpi_generic_timer_init(void) { }
+#endif
+
  #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h

index 4fad5f8..48a1a7d 100644 (file)
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -96,13 +96,15 @@ struct crush_rule {
   *  uniform         O(1)       poor         poor
   *  list            O(n)       optimal      poor
   *  tree            O(log n)   good         good
- *  straw           O(n)       optimal      optimal
+ *  straw           O(n)       better       better
+ *  straw2          O(n)       optimal      optimal
   */
  enum {
         CRUSH_BUCKET_UNIFORM = 1,
         CRUSH_BUCKET_LIST = 2,
         CRUSH_BUCKET_TREE = 3,
-       CRUSH_BUCKET_STRAW = 4
+       CRUSH_BUCKET_STRAW = 4,
+       CRUSH_BUCKET_STRAW2 = 5,
  };
  extern const char *crush_bucket_alg_name(int alg);
  
@@ -149,6 +151,11 @@ struct crush_bucket_straw {
         __u32 *straws;         /* 16-bit fixed point */
  };
  
+struct crush_bucket_straw2 {
+       struct crush_bucket h;
+       __u32 *item_weights;   /* 16-bit fixed point */
+};
+
  
  
  /*
@@ -189,6 +196,7 @@ extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
  extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
  extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b);
  extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
+extern void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b);
  extern void crush_destroy_bucket(struct crush_bucket *b);
  extern void crush_destroy_rule(struct crush_rule *r);
  extern void crush_destroy(struct crush_map *map);
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h

index 694e1fe..2f0b431 100644 (file)
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -162,6 +162,33 @@ struct dma_buf_attachment {
         void *priv;
  };
  
+/**
+ * struct dma_buf_export_info - holds information needed to export a dma_buf
+ * @exp_name:  name of the exporting module - useful for debugging.
+ * @ops:       Attach allocator-defined dma buf ops to the new buffer
+ * @size:      Size of the buffer
+ * @flags:     mode flags for the file
+ * @resv:      reservation-object, NULL to allocate default one
+ * @priv:      Attach private data of allocator to this buffer
+ *
+ * This structure holds the information required to export the buffer. Used
+ * with dma_buf_export() only.
+ */
+struct dma_buf_export_info {
+       const char *exp_name;
+       const struct dma_buf_ops *ops;
+       size_t size;
+       int flags;
+       struct reservation_object *resv;
+       void *priv;
+};
+
+/**
+ * helper macro for exporters; zeros and fills in most common values
+ */
+#define DEFINE_DMA_BUF_EXPORT_INFO(a)  \
+       struct dma_buf_export_info a = { .exp_name = KBUILD_MODNAME }
+
  /**
   * get_dma_buf - convenience wrapper for get_file.
   * @dmabuf:    [in]    pointer to dma_buf
@@ -181,12 +208,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
  void dma_buf_detach(struct dma_buf *dmabuf,
                                 struct dma_buf_attachment *dmabuf_attach);
  
-struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
-                              size_t size, int flags, const char *,
-                              struct reservation_object *);
-
-#define dma_buf_export(priv, ops, size, flags, resv)   \
-       dma_buf_export_named(priv, ops, size, flags, KBUILD_MODNAME, resv)
+struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info);
  
  int dma_buf_fd(struct dma_buf *dmabuf, int flags);
  struct dma_buf *dma_buf_get(int fd);
diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h

new file mode 100644 (file)

index 0000000..34b98f2
--- /dev/null
+++ b/include/linux/dma/xilinx_dma.h
@@ -0,0 +1,47 @@
+/*
+ * Xilinx DMA Engine drivers support header file
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DMA_XILINX_DMA_H
+#define __DMA_XILINX_DMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct xilinx_vdma_config - VDMA Configuration structure
+ * @frm_dly: Frame delay
+ * @gen_lock: Whether in gen-lock mode
+ * @master: Master that it syncs to
+ * @frm_cnt_en: Enable frame count enable
+ * @park: Whether wants to park
+ * @park_frm: Frame to park on
+ * @coalesc: Interrupt coalescing threshold
+ * @delay: Delay counter
+ * @reset: Reset Channel
+ * @ext_fsync: External Frame Sync source
+ */
+struct xilinx_vdma_config {
+       int frm_dly;
+       int gen_lock;
+       int master;
+       int frm_cnt_en;
+       int park;
+       int park_frm;
+       int coalesc;
+       int delay;
+       int reset;
+       int ext_fsync;
+};
+
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+                                       struct xilinx_vdma_config *cfg);
+
+#endif
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h

index b6997a0..ad41975 100644 (file)
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -11,10 +11,6 @@
   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   * more details.
   *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
   * The full GNU General Public License is included in this distribution in the
   * file called COPYING.
   */
@@ -574,7 +570,6 @@ struct dma_tx_state {
   * @copy_align: alignment shift for memcpy operations
   * @xor_align: alignment shift for xor operations
   * @pq_align: alignment shift for pq operations
- * @fill_align: alignment shift for memset operations
   * @dev_id: unique device ID
   * @dev: struct device reference for dma mapping api
   * @src_addr_widths: bit mask of src addr widths the device supports
@@ -625,7 +620,6 @@ struct dma_device {
         u8 copy_align;
         u8 xor_align;
         u8 pq_align;
-       u8 fill_align;
         #define DMA_HAS_PQ_CONTINUE (1 << 15)
  
         int dev_id;
@@ -826,12 +820,6 @@ static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
         return dmaengine_check_align(dev->pq_align, off1, off2, len);
  }
  
-static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
-                                      size_t off2, size_t len)
-{
-       return dmaengine_check_align(dev->fill_align, off1, off2, len);
-}
-
  static inline void
  dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
  {
@@ -1098,7 +1086,6 @@ void dma_async_device_unregister(struct dma_device *device);
  void dma_run_dependencies(struct dma_async_tx_descriptor *tx);
  struct dma_chan *dma_get_slave_channel(struct dma_chan *chan);
  struct dma_chan *dma_get_any_slave_channel(struct dma_device *device);
-struct dma_chan *net_dma_find_channel(void);
  #define dma_request_channel(mask, x, y) __dma_request_channel(&(mask), x, y)
  #define dma_request_slave_channel_compat(mask, x, y, dev, name) \
         __dma_request_slave_channel_compat(&(mask), x, y, dev, name)
@@ -1116,27 +1103,4 @@ static inline struct dma_chan
  
         return __dma_request_channel(mask, fn, fn_param);
  }
-
-/* --- Helper iov-locking functions --- */
-
-struct dma_page_list {
-       char __user *base_address;
-       int nr_pages;
-       struct page **pages;
-};
-
-struct dma_pinned_list {
-       int nr_iovecs;
-       struct dma_page_list page_list[0];
-};
-
-struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len);
-void dma_unpin_iovec_pages(struct dma_pinned_list* pinned_list);
-
-dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
-       struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len);
-dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
-       struct dma_pinned_list *pinned_list, struct page *page,
-       unsigned int offset, size_t len);
-
  #endif /* DMAENGINE_H */
diff --git a/include/linux/falloc.h b/include/linux/falloc.h

index 3159168..9961110 100644 (file)
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -21,4 +21,10 @@ struct space_resv {
  #define FS_IOC_RESVSP          _IOW('X', 40, struct space_resv)
  #define FS_IOC_RESVSP64                _IOW('X', 42, struct space_resv)
  
+#define        FALLOC_FL_SUPPORTED_MASK        (FALLOC_FL_KEEP_SIZE |          \
+                                        FALLOC_FL_PUNCH_HOLE |         \
+                                        FALLOC_FL_COLLAPSE_RANGE |     \
+                                        FALLOC_FL_ZERO_RANGE |         \
+                                        FALLOC_FL_INSERT_RANGE)
+
  #endif /* _FALLOC_H_ */
diff --git a/include/linux/fs.h b/include/linux/fs.h

index c7496f2..35ec87e 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1820,7 +1820,7 @@ struct super_operations {
  #define I_SYNC                 (1 << __I_SYNC)
  #define I_REFERENCED           (1 << 8)
  #define __I_DIO_WAKEUP         9
-#define I_DIO_WAKEUP           (1 << I_DIO_WAKEUP)
+#define I_DIO_WAKEUP           (1 << __I_DIO_WAKEUP)
  #define I_LINKABLE             (1 << 10)
  #define I_DIRTY_TIME           (1 << 11)
  #define __I_DIRTY_TIME_EXPIRED 12
@@ -2644,6 +2644,9 @@ enum {
  
         /* filesystem can handle aio writes beyond i_size */
         DIO_ASYNC_EXTEND = 0x04,
+
+       /* inode/fs/bdev does not need truncate protection */
+       DIO_SKIP_DIO_COUNT = 0x08,
  };
  
  void dio_end_io(struct bio *bio, int error);
@@ -2666,7 +2669,31 @@ static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
  #endif
  
  void inode_dio_wait(struct inode *inode);
-void inode_dio_done(struct inode *inode);
+
+/*
+ * inode_dio_begin - signal start of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+static inline void inode_dio_begin(struct inode *inode)
+{
+       atomic_inc(&inode->i_dio_count);
+}
+
+/*
+ * inode_dio_end - signal finish of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+static inline void inode_dio_end(struct inode *inode)
+{
+       if (atomic_dec_and_test(&inode->i_dio_count))
+               wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+}
  
  extern void inode_set_flags(struct inode *inode, unsigned int flags,
                             unsigned int mask);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h

index a65208a..796ef96 100644 (file)
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -115,10 +115,19 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
   * Extended Capability Register
   */
  
-#define ecap_niotlb_iunits(e)  ((((e) >> 24) & 0xff) + 1)
+#define ecap_pss(e)            ((e >> 35) & 0x1f)
+#define ecap_eafs(e)           ((e >> 34) & 0x1)
+#define ecap_nwfs(e)           ((e >> 33) & 0x1)
+#define ecap_srs(e)            ((e >> 31) & 0x1)
+#define ecap_ers(e)            ((e >> 30) & 0x1)
+#define ecap_prs(e)            ((e >> 29) & 0x1)
+#define ecap_pasid(e)          ((e >> 28) & 0x1)
+#define ecap_dis(e)            ((e >> 27) & 0x1)
+#define ecap_nest(e)           ((e >> 26) & 0x1)
+#define ecap_mts(e)            ((e >> 25) & 0x1)
+#define ecap_ecs(e)            ((e >> 24) & 0x1)
  #define ecap_iotlb_offset(e)   ((((e) >> 8) & 0x3ff) * 16)
-#define ecap_max_iotlb_offset(e) \
-       (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
  #define ecap_coherent(e)       ((e) & 0x1)
  #define ecap_qis(e)            ((e) & 0x2)
  #define ecap_pass_through(e)   ((e >> 6) & 0x1)
@@ -180,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
  #define DMA_GSTS_IRES (((u32)1) << 25)
  #define DMA_GSTS_CFIS (((u32)1) << 23)
  
+/* DMA_RTADDR_REG */
+#define DMA_RTADDR_RTT (((u64)1) << 11)
+
  /* CCMD_REG */
  #define DMA_CCMD_ICC (((u64)1) << 63)
  #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
diff --git a/include/linux/irqchip/arm-gic-acpi.h b/include/linux/irqchip/arm-gic-acpi.h

new file mode 100644 (file)

index 0000000..de3419e
--- /dev/null
+++ b/include/linux/irqchip/arm-gic-acpi.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014, Linaro Ltd.
+ *     Author: Tomasz Nowicki <tomasz.nowicki@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef ARM_GIC_ACPI_H_
+#define ARM_GIC_ACPI_H_
+
+#ifdef CONFIG_ACPI
+
+/*
+ * Hard code here, we can not get memory size from MADT (but FDT does),
+ * Actually no need to do that, because this size can be inferred
+ * from GIC spec.
+ */
+#define ACPI_GICV2_DIST_MEM_SIZE       (SZ_4K)
+#define ACPI_GIC_CPU_IF_MEM_SIZE       (SZ_8K)
+
+struct acpi_table_header;
+
+int gic_v2_acpi_init(struct acpi_table_header *table);
+void acpi_gic_init(void);
+#else
+static inline void acpi_gic_init(void) { }
+#endif
+
+#endif /* ARM_GIC_ACPI_H_ */
diff --git a/include/linux/lguest.h b/include/linux/lguest.h

index 9962c6b..6db19f3 100644 (file)
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -61,8 +61,8 @@ struct lguest_data {
         u32 tsc_khz;
  
  /* Fields initialized by the Guest at boot: */
-       /* Instruction range to suppress interrupts even if enabled */
-       unsigned long noirq_start, noirq_end;
+       /* Instruction to suppress interrupts even if enabled */
+       unsigned long noirq_iret;
         /* Address above which page tables are all identical. */
         unsigned long kernel_address;
         /* The vector to try to use for system calls (0x40 or 0x80). */
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h

index 0e166b9..324a346 100644 (file)
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -16,6 +16,7 @@
  #ifndef __LINUX_MFD_CROS_EC_H
  #define __LINUX_MFD_CROS_EC_H
  
+#include <linux/cdev.h>
  #include <linux/notifier.h>
  #include <linux/mfd/cros_ec_commands.h>
  #include <linux/mutex.h>
@@ -38,20 +39,20 @@ enum {
  /*
   * @version: Command version number (often 0)
   * @command: Command to send (EC_CMD_...)
- * @outdata: Outgoing data to EC
   * @outsize: Outgoing length in bytes
- * @indata: Where to put the incoming data from EC
   * @insize: Max number of bytes to accept from EC
   * @result: EC's response to the command (separate from communication failure)
+ * @outdata: Outgoing data to EC
+ * @indata: Where to put the incoming data from EC
   */
  struct cros_ec_command {
         uint32_t version;
         uint32_t command;
-       uint8_t *outdata;
         uint32_t outsize;
-       uint8_t *indata;
         uint32_t insize;
         uint32_t result;
+       uint8_t outdata[EC_PROTO2_MAX_PARAM_SIZE];
+       uint8_t indata[EC_PROTO2_MAX_PARAM_SIZE];
  };
  
  /**
@@ -59,9 +60,17 @@ struct cros_ec_command {
   *
   * @ec_name: name of EC device (e.g. 'chromeos-ec')
   * @phys_name: name of physical comms layer (e.g. 'i2c-4')
- * @dev: Device pointer
+ * @dev: Device pointer for physical comms device
+ * @vdev: Device pointer for virtual comms device
+ * @cdev: Character device structure for virtual comms device
   * @was_wake_device: true if this device was set to wake the system from
   * sleep at the last suspend
+ * @cmd_readmem: direct read of the EC memory-mapped region, if supported
+ *     @offset is within EC_LPC_ADDR_MEMMAP region.
+ *     @bytes: number of bytes to read. zero means "read a string" (including
+ *     the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be read.
+ *     Caller must ensure that the buffer is large enough for the result when
+ *     reading a string.
   *
   * @priv: Private data
   * @irq: Interrupt to use
@@ -90,8 +99,12 @@ struct cros_ec_device {
         const char *ec_name;
         const char *phys_name;
         struct device *dev;
+       struct device *vdev;
+       struct cdev cdev;
         bool was_wake_device;
         struct class *cros_class;
+       int (*cmd_readmem)(struct cros_ec_device *ec, unsigned int offset,
+                          unsigned int bytes, void *dest);
  
         /* These are used to implement the platform-specific interface */
         void *priv;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h

index 6058128..24b86d5 100644 (file)
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -111,6 +111,8 @@ struct dma_chan;
   * data for the MMC controller
   */
  struct tmio_mmc_data {
+       void                            *chan_priv_tx;
+       void                            *chan_priv_rx;
         unsigned int                    hclk;
         unsigned long                   capabilities;
         unsigned long                   capabilities2;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h

index f9ce34b..83e80ab 100644 (file)
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
  int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
  void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
  
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
+                        int port);
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
  int mlx4_flow_attach(struct mlx4_dev *dev,
                      struct mlx4_net_trans_rule *rule, u64 *reg_id);
  int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 8b08607..0755b9f 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -499,7 +499,7 @@ static inline int page_count(struct page *page)
  
  static inline bool __compound_tail_refcounted(struct page *page)
  {
-       return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
+       return !PageSlab(page) && !PageHeadHuge(page);
  }
  
  /*
diff --git a/include/linux/mmc/sh_mobile_sdhi.h b/include/linux/mmc/sh_mobile_sdhi.h

index da77e5e..95d6f03 100644 (file)
--- a/include/linux/mmc/sh_mobile_sdhi.h
+++ b/include/linux/mmc/sh_mobile_sdhi.h
@@ -7,14 +7,4 @@
  #define SH_MOBILE_SDHI_IRQ_SDCARD      "sdcard"
  #define SH_MOBILE_SDHI_IRQ_SDIO                "sdio"
  
-struct sh_mobile_sdhi_info {
-       int dma_slave_tx;
-       int dma_slave_rx;
-       unsigned long tmio_flags;
-       unsigned long tmio_caps;
-       unsigned long tmio_caps2;
-       u32 tmio_ocr_mask;      /* available MMC voltages */
-       unsigned int cd_gpio;
-};
-
  #endif /* LINUX_MMC_SH_MOBILE_SDHI_H */
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h

index 5f487d7..29975c7 100644 (file)
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -77,7 +77,7 @@
  /* ensure we never evaluate anything shorted than an unsigned long
   * to zero, and ensure we'll never miss the end of an comparison (bjd) */
  
-#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1))/ sizeof(unsigned long))
+#define map_calc_words(map) ((map_bankwidth(map) + (sizeof(unsigned long)-1)) / sizeof(unsigned long))
  
  #ifdef CONFIG_MTD_MAP_BANK_WIDTH_8
  # ifdef map_bankwidth
@@ -181,7 +181,7 @@ static inline int map_bankwidth_supported(int w)
         }
  }
  
-#define MAX_MAP_LONGS ( ((MAX_MAP_BANKWIDTH*8) + BITS_PER_LONG - 1) / BITS_PER_LONG )
+#define MAX_MAP_LONGS (((MAX_MAP_BANKWIDTH * 8) + BITS_PER_LONG - 1) / BITS_PER_LONG)
  
  typedef union {
         unsigned long x[MAX_MAP_LONGS];
@@ -264,20 +264,22 @@ void unregister_mtd_chip_driver(struct mtd_chip_driver *);
  struct mtd_info *do_map_probe(const char *name, struct map_info *map);
  void map_destroy(struct mtd_info *mtd);
  
-#define ENABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 1); } while(0)
-#define DISABLE_VPP(map) do { if(map->set_vpp) map->set_vpp(map, 0); } while(0)
+#define ENABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 1); } while (0)
+#define DISABLE_VPP(map) do { if (map->set_vpp) map->set_vpp(map, 0); } while (0)
  
  #define INVALIDATE_CACHED_RANGE(map, from, size) \
-       do { if(map->inval_cache) map->inval_cache(map, from, size); } while(0)
+       do { if (map->inval_cache) map->inval_cache(map, from, size); } while (0)
  
  
  static inline int map_word_equal(struct map_info *map, map_word val1, map_word val2)
  {
         int i;
-       for (i=0; i<map_words(map); i++) {
+
+       for (i = 0; i < map_words(map); i++) {
                 if (val1.x[i] != val2.x[i])
                         return 0;
         }
+
         return 1;
  }
  
@@ -286,9 +288,9 @@ static inline map_word map_word_and(struct map_info *map, map_word val1, map_wor
         map_word r;
         int i;
  
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                 r.x[i] = val1.x[i] & val2.x[i];
-       }
+
         return r;
  }
  
@@ -297,9 +299,9 @@ static inline map_word map_word_clr(struct map_info *map, map_word val1, map_wor
         map_word r;
         int i;
  
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                 r.x[i] = val1.x[i] & ~val2.x[i];
-       }
+
         return r;
  }
  
@@ -308,22 +310,33 @@ static inline map_word map_word_or(struct map_info *map, map_word val1, map_word
         map_word r;
         int i;
  
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++)
                 r.x[i] = val1.x[i] | val2.x[i];
-       }
+
         return r;
  }
  
-#define map_word_andequal(m, a, b, z) map_word_equal(m, z, map_word_and(m, a, b))
+static inline int map_word_andequal(struct map_info *map, map_word val1, map_word val2, map_word val3)
+{
+       int i;
+
+       for (i = 0; i < map_words(map); i++) {
+               if ((val1.x[i] & val2.x[i]) != val3.x[i])
+                       return 0;
+       }
+
+       return 1;
+}
  
  static inline int map_word_bitsset(struct map_info *map, map_word val1, map_word val2)
  {
         int i;
  
-       for (i=0; i<map_words(map); i++) {
+       for (i = 0; i < map_words(map); i++) {
                 if (val1.x[i] & val2.x[i])
                         return 1;
         }
+
         return 0;
  }
  
@@ -355,14 +368,16 @@ static inline map_word map_word_load_partial(struct map_info *map, map_word orig
  
         if (map_bankwidth_is_large(map)) {
                 char *dest = (char *)&orig;
+
                 memcpy(dest+start, buf, len);
         } else {
-               for (i=start; i < start+len; i++) {
+               for (i = start; i < start+len; i++) {
                         int bitpos;
+
  #ifdef __LITTLE_ENDIAN
-                       bitpos = i*8;
+                       bitpos = i * 8;
  #else /* __BIG_ENDIAN */
-                       bitpos = (map_bankwidth(map)-1-i)*8;
+                       bitpos = (map_bankwidth(map) - 1 - i) * 8;
  #endif
                         orig.x[0] &= ~(0xff << bitpos);
                         orig.x[0] |= (unsigned long)buf[i-start] << bitpos;
@@ -384,9 +399,10 @@ static inline map_word map_word_ff(struct map_info *map)
  
         if (map_bankwidth(map) < MAP_FF_LIMIT) {
                 int bw = 8 * map_bankwidth(map);
+
                 r.x[0] = (1UL << bw) - 1;
         } else {
-               for (i=0; i<map_words(map); i++)
+               for (i = 0; i < map_words(map); i++)
                         r.x[i] = ~0UL;
         }
         return r;
@@ -407,7 +423,7 @@ static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
                 r.x[0] = __raw_readq(map->virt + ofs);
  #endif
         else if (map_bankwidth_is_large(map))
-               memcpy_fromio(r.x, map->virt+ofs, map->bankwidth);
+               memcpy_fromio(r.x, map->virt + ofs, map->bankwidth);
         else
                 BUG();
  
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h

index 4720b86..e540952 100644 (file)
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -155,6 +155,8 @@ enum spi_nor_option_flags {
   * @write:             [DRIVER-SPECIFIC] write data to the SPI NOR
   * @erase:             [DRIVER-SPECIFIC] erase a sector of the SPI NOR
   *                     at the offset @offs
+ * @lock:              [FLASH-SPECIFIC] lock a region of the SPI NOR
+ * @unlock:            [FLASH-SPECIFIC] unlock a region of the SPI NOR
   * @priv:              the private data
   */
  struct spi_nor {
@@ -189,6 +191,9 @@ struct spi_nor {
                         size_t len, size_t *retlen, const u_char *write_buf);
         int (*erase)(struct spi_nor *nor, loff_t offs);
  
+       int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+       int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len);
+
         void *priv;
  };
  
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h

index ed43cb7..32201c2 100644 (file)
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -16,6 +16,13 @@
  #include <linux/uidgid.h>
  #include <uapi/linux/nfs4.h>
  
+enum nfs4_acl_whotype {
+       NFS4_ACL_WHO_NAMED = 0,
+       NFS4_ACL_WHO_OWNER,
+       NFS4_ACL_WHO_GROUP,
+       NFS4_ACL_WHO_EVERYONE,
+};
+
  struct nfs4_ace {
         uint32_t        type;
         uint32_t        flag;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h

index 410abd1..b95f914 100644 (file)
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -511,6 +511,7 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
   * Try to write back everything synchronously (but check the
   * return value!)
   */
+extern int nfs_sync_inode(struct inode *inode);
  extern int nfs_wb_all(struct inode *inode);
  extern int nfs_wb_page(struct inode *inode, struct page* page);
  extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h

deleted file mode 100644 (file)

index 333844e..0000000
--- a/include/linux/nfs_idmap.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * include/linux/nfs_idmap.h
- *
- *  UID and GID to name mapping for clients.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef NFS_IDMAP_H
-#define NFS_IDMAP_H
-
-#include <linux/uidgid.h>
-#include <uapi/linux/nfs_idmap.h>
-
-
-/* Forward declaration to make this header independent of others */
-struct nfs_client;
-struct nfs_server;
-struct nfs_fattr;
-struct nfs4_string;
-
-#if IS_ENABLED(CONFIG_NFS_V4)
-int nfs_idmap_init(void);
-void nfs_idmap_quit(void);
-#else
-static inline int nfs_idmap_init(void)
-{
-       return 0;
-}
-
-static inline void nfs_idmap_quit(void)
-{}
-#endif
-
-int nfs_idmap_new(struct nfs_client *);
-void nfs_idmap_delete(struct nfs_client *);
-
-void nfs_fattr_init_names(struct nfs_fattr *fattr,
-               struct nfs4_string *owner_name,
-               struct nfs4_string *group_name);
-void nfs_fattr_free_names(struct nfs_fattr *);
-void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
-
-int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
-int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
-int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
-int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
-
-int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
-
-extern unsigned int nfs_idmap_cache_timeout;
-#endif /* NFS_IDMAP_H */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h

index 4cb3eaa..93ab607 100644 (file)
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -255,11 +255,13 @@ struct nfs4_layoutget {
  struct nfs4_getdeviceinfo_args {
         struct nfs4_sequence_args seq_args;
         struct pnfs_device *pdev;
+       __u32 notify_types;
  };
  
  struct nfs4_getdeviceinfo_res {
         struct nfs4_sequence_res seq_res;
         struct pnfs_device *pdev;
+       __u32 notification;
  };
  
  struct nfs4_layoutcommit_args {
@@ -1271,11 +1273,15 @@ struct nfs42_falloc_args {
         nfs4_stateid                     falloc_stateid;
         u64                              falloc_offset;
         u64                              falloc_length;
+       const u32                       *falloc_bitmask;
  };
  
  struct nfs42_falloc_res {
         struct nfs4_sequence_res        seq_res;
         unsigned int                    status;
+
+       struct nfs_fattr                *falloc_fattr;
+       const struct nfs_server         *falloc_server;
  };
  
  struct nfs42_seek_args {
diff --git a/include/linux/of.h b/include/linux/of.h

index 5f124f6..ddeaae6 100644 (file)
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -305,6 +305,7 @@ extern int of_property_read_string_helper(struct device_node *np,
  extern int of_device_is_compatible(const struct device_node *device,
                                    const char *);
  extern bool of_device_is_available(const struct device_node *device);
+extern bool of_device_is_big_endian(const struct device_node *device);
  extern const void *of_get_property(const struct device_node *node,
                                 const char *name,
                                 int *lenp);
@@ -467,6 +468,11 @@ static inline bool of_device_is_available(const struct device_node *device)
         return false;
  }
  
+static inline bool of_device_is_big_endian(const struct device_node *device)
+{
+       return false;
+}
+
  static inline struct property *of_find_property(const struct device_node *np,
                                                 const char *name,
                                                 int *lenp)
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h

index 0ff360d..587ee50 100644 (file)
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -33,6 +33,8 @@ extern void *of_fdt_get_property(const void *blob,
  extern int of_fdt_is_compatible(const void *blob,
                                 unsigned long node,
                                 const char *compat);
+extern bool of_fdt_is_big_endian(const void *blob,
+                                unsigned long node);
  extern int of_fdt_match(const void *blob, unsigned long node,
                         const char *const *compat);
  extern void of_fdt_unflatten_tree(unsigned long *blob,
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h

index bfec136..d884929 100644 (file)
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -37,8 +37,6 @@ extern int of_irq_parse_one(struct device_node *device, int index,
  extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
  extern int of_irq_to_resource(struct device_node *dev, int index,
                               struct resource *r);
-extern int of_irq_to_resource_table(struct device_node *dev,
-               struct resource *res, int nr_irqs);
  
  extern void of_irq_init(const struct of_device_id *matches);
  
@@ -46,6 +44,8 @@ extern void of_irq_init(const struct of_device_id *matches);
  extern int of_irq_count(struct device_node *dev);
  extern int of_irq_get(struct device_node *dev, int index);
  extern int of_irq_get_byname(struct device_node *dev, const char *name);
+extern int of_irq_to_resource_table(struct device_node *dev,
+               struct resource *res, int nr_irqs);
  #else
  static inline int of_irq_count(struct device_node *dev)
  {
@@ -59,6 +59,11 @@ static inline int of_irq_get_byname(struct device_node *dev, const char *name)
  {
         return 0;
  }
+static inline int of_irq_to_resource_table(struct device_node *dev,
+                                          struct resource *res, int nr_irqs)
+{
+       return 0;
+}
  #endif
  
  #if defined(CONFIG_OF)
diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h

index eabac4e..2d08816 100644 (file)
--- a/include/linux/platform_data/dma-imx-sdma.h
+++ b/include/linux/platform_data/dma-imx-sdma.h
@@ -48,6 +48,9 @@ struct sdma_script_start_addrs {
         s32 ssish_2_mcu_addr;
         s32 hdmi_dma_addr;
         /* End of v2 array */
+       s32 zcanfd_2_mcu_addr;
+       s32 zqspi_2_mcu_addr;
+       /* End of v3 array */
  };
  
  /**
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h

index 73069cb..a7a06d1 100644 (file)
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -72,6 +72,7 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
  /* Routine choices */
  struct raid6_calls {
         void (*gen_syndrome)(int, size_t, void **);
+       void (*xor_syndrome)(int, int, int, size_t, void **);
         int  (*valid)(void);    /* Returns 1 if this routine set is usable */
         const char *name;       /* Name of this routine set */
         int prefer;             /* Has special performance attribute */
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h

index abdf1f2..dd0ba50 100644 (file)
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -69,6 +69,7 @@ struct shdma_chan {
         int id;                         /* Raw id of this channel */
         int irq;                        /* Channel IRQ */
         int slave_id;                   /* Client ID for slave DMA */
+       int real_slave_id;              /* argument passed to filter function */
         int hw_req;                     /* DMA request line for slave DMA - same
                                          * as MID/RID, used with DT */
         enum shdma_pm_state pm_state;
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h

index aadc6a0..8073713 100644 (file)
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -142,12 +142,18 @@ typedef __be32    rpc_fraghdr;
         (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4))
  
  /*
- * RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
+ * Well-known netids. See:
+ *
+ *   http://www.iana.org/assignments/rpc-netids/rpc-netids.xhtml
   */
  #define RPCBIND_NETID_UDP      "udp"
  #define RPCBIND_NETID_TCP      "tcp"
+#define RPCBIND_NETID_RDMA     "rdma"
+#define RPCBIND_NETID_SCTP     "sctp"
  #define RPCBIND_NETID_UDP6     "udp6"
  #define RPCBIND_NETID_TCP6     "tcp6"
+#define RPCBIND_NETID_RDMA6    "rdma6"
+#define RPCBIND_NETID_SCTP6    "sctp6"
  #define RPCBIND_NETID_LOCAL    "local"
  
  /*
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h

index 64a0a0a..c984c85 100644 (file)
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -40,11 +40,6 @@
  #ifndef _LINUX_SUNRPC_XPRTRDMA_H
  #define _LINUX_SUNRPC_XPRTRDMA_H
  
-/*
- * rpcbind (v3+) RDMA netid.
- */
-#define RPCBIND_NETID_RDMA     "rdma"
-
  /*
   * Constants. Max RPC/NFS header is big enough to account for
   * additional marshaling buffers passed down by Linux client.
diff --git a/include/linux/virtio.h b/include/linux/virtio.h

index 28f0e65..8f4d4bf 100644 (file)
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -108,8 +108,6 @@ struct virtio_device {
         void *priv;
  };
  
-bool virtio_device_is_legacy_only(struct virtio_device_id id);
-
  static inline struct virtio_device *dev_to_virtio(struct device *_dev)
  {
         return container_of(_dev, struct virtio_device, dev);
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h

index ca3ed78..1e306f7 100644 (file)
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -298,13 +298,6 @@ static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
                 }                                                       \
         } while(0)
  
-static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
-{
-       u8 ret;
-       vdev->config->get(vdev, offset, &ret, sizeof(ret));
-       return ret;
-}
-
  /* Read @count fields, @bytes each. */
  static inline void __virtio_cread_many(struct virtio_device *vdev,
                                        unsigned int offset,
@@ -326,7 +319,6 @@ static inline void __virtio_cread_many(struct virtio_device *vdev,
         } while (gen != old);
  }
  
-
  static inline void virtio_cread_bytes(struct virtio_device *vdev,
                                       unsigned int offset,
                                       void *buf, size_t len)
@@ -334,6 +326,13 @@ static inline void virtio_cread_bytes(struct virtio_device *vdev,
         __virtio_cread_many(vdev, offset, buf, len, 1);
  }
  
+static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
+{
+       u8 ret;
+       vdev->config->get(vdev, offset, &ret, sizeof(ret));
+       return ret;
+}
+
  static inline void virtio_cwrite8(struct virtio_device *vdev,
                                   unsigned int offset, u8 val)
  {
@@ -374,7 +373,6 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
                                  unsigned int offset)
  {
         u64 ret;
-       vdev->config->get(vdev, offset, &ret, sizeof(ret));
         __virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
         return virtio64_to_cpu(vdev, (__force __virtio64)ret);
  }
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h

index 67e06fe..8e50888 100644 (file)
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -21,19 +21,20 @@
   * actually quite cheap.
   */
  
-#ifdef CONFIG_SMP
  static inline void virtio_mb(bool weak_barriers)
  {
+#ifdef CONFIG_SMP
         if (weak_barriers)
                 smp_mb();
         else
+#endif
                 mb();
  }
  
  static inline void virtio_rmb(bool weak_barriers)
  {
         if (weak_barriers)
-               smp_rmb();
+               dma_rmb();
         else
                 rmb();
  }
@@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
  static inline void virtio_wmb(bool weak_barriers)
  {
         if (weak_barriers)
-               smp_wmb();
+               dma_wmb();
         else
                 wmb();
  }
-#else
-static inline void virtio_mb(bool weak_barriers)
-{
-       mb();
-}
-
-static inline void virtio_rmb(bool weak_barriers)
-{
-       rmb();
-}
-
-static inline void virtio_wmb(bool weak_barriers)
-{
-       wmb();
-}
-#endif
  
  struct virtio_device;
  struct virtqueue;
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h

index d3583d3..54e7af3 100644 (file)
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -20,6 +20,8 @@
  #define ISCSIT_MIN_TAGS                        16
  #define ISCSIT_EXTRA_TAGS              8
  #define ISCSIT_TCP_BACKLOG             256
+#define ISCSI_RX_THREAD_NAME           "iscsi_trx"
+#define ISCSI_TX_THREAD_NAME           "iscsi_ttx"
  
  /* struct iscsi_node_attrib sanity values */
  #define NA_DATAOUT_TIMEOUT             3
@@ -60,6 +62,7 @@
  #define TA_CACHE_CORE_NPS              0
  /* T10 protection information disabled by default */
  #define TA_DEFAULT_T10_PI              0
+#define TA_DEFAULT_FABRIC_PROT_TYPE    0
  
  #define ISCSI_IOV_DATA_BUFFER          5
  
@@ -600,8 +603,11 @@ struct iscsi_conn {
         struct iscsi_tpg_np     *tpg_np;
         /* Pointer to parent session */
         struct iscsi_session    *sess;
-       /* Pointer to thread_set in use for this conn's threads */
-       struct iscsi_thread_set *thread_set;
+       int                     bitmap_id;
+       int                     rx_thread_active;
+       struct task_struct      *rx_thread;
+       int                     tx_thread_active;
+       struct task_struct      *tx_thread;
         /* list_head for session connection list */
         struct list_head        conn_list;
  } ____cacheline_aligned;
@@ -767,6 +773,7 @@ struct iscsi_tpg_attrib {
         u32                     demo_mode_discovery;
         u32                     default_erl;
         u8                      t10_pi;
+       u32                     fabric_prot_type;
         struct iscsi_portal_group *tpg;
  };
  
@@ -871,10 +878,10 @@ struct iscsit_global {
         /* Unique identifier used for the authentication daemon */
         u32                     auth_id;
         u32                     inactive_ts;
-       /* Thread Set bitmap count */
-       int                     ts_bitmap_count;
+#define ISCSIT_BITMAP_BITS     262144
         /* Thread Set bitmap pointer */
         unsigned long           *ts_bitmap;
+       spinlock_t              ts_bitmap_lock;
         /* Used for iSCSI discovery session authentication */
         struct iscsi_node_acl   discovery_acl;
         struct iscsi_portal_group       *discovery_tpg;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h

index 672150b..480e9f8 100644 (file)
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -165,10 +165,8 @@ enum se_cmd_flags_table {
         SCF_SEND_DELAYED_TAS            = 0x00004000,
         SCF_ALUA_NON_OPTIMIZED          = 0x00008000,
         SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000,
-       SCF_ACK_KREF                    = 0x00040000,
         SCF_COMPARE_AND_WRITE           = 0x00080000,
         SCF_COMPARE_AND_WRITE_POST      = 0x00100000,
-       SCF_CMD_XCOPY_PASSTHROUGH       = 0x00200000,
  };
  
  /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -520,11 +518,11 @@ struct se_cmd {
         struct list_head        se_cmd_list;
         struct completion       cmd_wait_comp;
         struct kref             cmd_kref;
-       struct target_core_fabric_ops *se_tfo;
+       const struct target_core_fabric_ops *se_tfo;
         sense_reason_t          (*execute_cmd)(struct se_cmd *);
         sense_reason_t          (*execute_rw)(struct se_cmd *, struct scatterlist *,
                                               u32, enum dma_data_direction);
-       sense_reason_t (*transport_complete_callback)(struct se_cmd *);
+       sense_reason_t (*transport_complete_callback)(struct se_cmd *, bool);
  
         unsigned char           *t_task_cdb;
         unsigned char           __t_task_cdb[TCM_MAX_COMMAND_SIZE];
@@ -591,6 +589,7 @@ struct se_node_acl {
         bool                    acl_stop:1;
         u32                     queue_depth;
         u32                     acl_index;
+       enum target_prot_type   saved_prot_type;
  #define MAX_ACL_TAG_SIZE 64
         char                    acl_tag[MAX_ACL_TAG_SIZE];
         /* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
@@ -616,6 +615,7 @@ struct se_session {
         unsigned                sess_tearing_down:1;
         u64                     sess_bin_isid;
         enum target_prot_op     sup_prot_ops;
+       enum target_prot_type   sess_prot_type;
         struct se_node_acl      *se_node_acl;
         struct se_portal_group *se_tpg;
         void                    *fabric_sess_ptr;
@@ -890,7 +890,7 @@ struct se_portal_group {
         /* List of TCM sessions associated wth this TPG */
         struct list_head        tpg_sess_list;
         /* Pointer to $FABRIC_MOD dependent code */
-       struct target_core_fabric_ops *se_tpg_tfo;
+       const struct target_core_fabric_ops *se_tpg_tfo;
         struct se_wwn           *se_tpg_wwn;
         struct config_group     tpg_group;
         struct config_group     *tpg_default_groups[7];
diff --git a/include/target/target_core_configfs.h b/include/target/target_core_configfs.h

index e080138..25bb04c 100644 (file)
--- a/include/target/target_core_configfs.h
+++ b/include/target/target_core_configfs.h
@@ -5,12 +5,6 @@
  #define TARGET_CORE_NAME_MAX_LEN       64
  #define TARGET_FABRIC_NAME_SIZE                32
  
-extern struct target_fabric_configfs *target_fabric_configfs_init(
-                               struct module *, const char *);
-extern void target_fabric_configfs_free(struct target_fabric_configfs *);
-extern int target_fabric_configfs_register(struct target_fabric_configfs *);
-extern void target_fabric_configfs_deregister(struct target_fabric_configfs *);
-
  struct target_fabric_configfs_template {
         struct config_item_type tfc_discovery_cit;
         struct config_item_type tfc_wwn_cit;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h

index 22a4e98..17c7f5a 100644 (file)
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -2,6 +2,8 @@
  #define TARGET_CORE_FABRIC_H
  
  struct target_core_fabric_ops {
+       struct module *module;
+       const char *name;
         struct configfs_subsystem *tf_subsys;
         char *(*get_fabric_name)(void);
         u8 (*get_fabric_proto_ident)(struct se_portal_group *);
@@ -27,6 +29,14 @@ struct target_core_fabric_ops {
          * inquiry response
          */
         int (*tpg_check_demo_mode_login_only)(struct se_portal_group *);
+       /*
+        * Optionally used as a configfs tunable to determine when
+        * target-core should signal the PROTECT=1 feature bit for
+        * backends that don't support T10-PI, so that either fabric
+        * HW offload or target-core emulation performs the associated
+        * WRITE_STRIP and READ_INSERT operations.
+        */
+       int (*tpg_check_prot_fabric_only)(struct se_portal_group *);
         struct se_node_acl *(*tpg_alloc_fabric_acl)(
                                         struct se_portal_group *);
         void (*tpg_release_fabric_acl)(struct se_portal_group *,
@@ -82,8 +92,23 @@ struct target_core_fabric_ops {
         struct se_node_acl *(*fabric_make_nodeacl)(struct se_portal_group *,
                                 struct config_group *, const char *);
         void (*fabric_drop_nodeacl)(struct se_node_acl *);
+
+       struct configfs_attribute **tfc_discovery_attrs;
+       struct configfs_attribute **tfc_wwn_attrs;
+       struct configfs_attribute **tfc_tpg_base_attrs;
+       struct configfs_attribute **tfc_tpg_np_base_attrs;
+       struct configfs_attribute **tfc_tpg_attrib_attrs;
+       struct configfs_attribute **tfc_tpg_auth_attrs;
+       struct configfs_attribute **tfc_tpg_param_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_base_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_attrib_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_auth_attrs;
+       struct configfs_attribute **tfc_tpg_nacl_param_attrs;
  };
  
+int target_register_template(const struct target_core_fabric_ops *fo);
+void target_unregister_template(const struct target_core_fabric_ops *fo);
+
  struct se_session *transport_init_session(enum target_prot_op);
  int transport_alloc_session_tags(struct se_session *, unsigned int,
                 unsigned int);
@@ -95,13 +120,15 @@ void       transport_register_session(struct se_portal_group *,
                 struct se_node_acl *, struct se_session *, void *);
  void   target_get_session(struct se_session *);
  void   target_put_session(struct se_session *);
+ssize_t        target_show_dynamic_sessions(struct se_portal_group *, char *);
  void   transport_free_session(struct se_session *);
  void   target_put_nacl(struct se_node_acl *);
  void   transport_deregister_session_configfs(struct se_session *);
  void   transport_deregister_session(struct se_session *);
  
  
-void   transport_init_se_cmd(struct se_cmd *, struct target_core_fabric_ops *,
+void   transport_init_se_cmd(struct se_cmd *,
+               const struct target_core_fabric_ops *,
                 struct se_session *, u32, int, int, unsigned char *);
  sense_reason_t transport_lookup_cmd_lun(struct se_cmd *, u32);
  sense_reason_t target_setup_cmd_from_cdb(struct se_cmd *, unsigned char *);
@@ -153,8 +180,8 @@ int core_tpg_set_initiator_node_queue_depth(struct se_portal_group *,
                 unsigned char *, u32, int);
  int    core_tpg_set_initiator_node_tag(struct se_portal_group *,
                 struct se_node_acl *, const char *);
-int    core_tpg_register(struct target_core_fabric_ops *, struct se_wwn *,
-               struct se_portal_group *, void *, int);
+int    core_tpg_register(const struct target_core_fabric_ops *,
+               struct se_wwn *, struct se_portal_group *, void *, int);
  int    core_tpg_deregister(struct se_portal_group *);
  
  /* SAS helpers */
diff --git a/include/target/target_core_fabric_configfs.h b/include/target/target_core_fabric_configfs.h

index b32a149..7a0649c 100644 (file)
--- a/include/target/target_core_fabric_configfs.h
+++ b/include/target/target_core_fabric_configfs.h
@@ -90,6 +90,11 @@ static struct target_fabric_tpg_attribute _fabric##_tpg_##_name =    \
         _fabric##_tpg_store_##_name);
  
  
+#define TF_TPG_BASE_ATTR_RO(_fabric, _name)                            \
+static struct target_fabric_tpg_attribute _fabric##_tpg_##_name =      \
+       __CONFIGFS_EATTR_RO(_name,                                      \
+       _fabric##_tpg_show_##_name);
+
  CONFIGFS_EATTR_STRUCT(target_fabric_wwn, target_fabric_configfs);
  #define TF_WWN_ATTR(_fabric, _name, _mode)                             \
  static struct target_fabric_wwn_attribute _fabric##_wwn_##_name =      \
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h

index 572e650..7f79cf4 100644 (file)
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -407,10 +407,10 @@ TRACE_EVENT(btrfs_sync_file,
  
         TP_fast_assign(
                 struct dentry *dentry = file->f_path.dentry;
-               struct inode *inode = dentry->d_inode;
+               struct inode *inode = d_inode(dentry);
  
                 __entry->ino            = inode->i_ino;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
                 __entry->datasync       = datasync;
                 __entry->root_objectid  =
                                  BTRFS_I(inode)->root->root_key.objectid;
diff --git a/include/trace/events/ext3.h b/include/trace/events/ext3.h

index 7f20707..fc733d2 100644 (file)
--- a/include/trace/events/ext3.h
+++ b/include/trace/events/ext3.h
@@ -439,10 +439,10 @@ TRACE_EVENT(ext3_sync_file_enter,
         TP_fast_assign(
                 struct dentry *dentry = file->f_path.dentry;
  
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                 __entry->datasync       = datasync;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
         ),
  
         TP_printk("dev %d,%d ino %lu parent %ld datasync %d ",
@@ -710,9 +710,9 @@ TRACE_EVENT(ext3_unlink_enter,
  
         TP_fast_assign(
                 __entry->parent         = parent->i_ino;
-               __entry->ino            = dentry->d_inode->i_ino;
-               __entry->size           = dentry->d_inode->i_size;
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
+               __entry->size           = d_inode(dentry)->i_size;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
         ),
  
         TP_printk("dev %d,%d ino %lu size %lld parent %ld",
@@ -734,8 +734,8 @@ TRACE_EVENT(ext3_unlink_exit,
         ),
  
         TP_fast_assign(
-               __entry->ino            = dentry->d_inode->i_ino;
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
                 __entry->ret            = ret;
         ),
  
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h

index 47fca36..08ec3dd 100644 (file)
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -872,10 +872,10 @@ TRACE_EVENT(ext4_sync_file_enter,
         TP_fast_assign(
                 struct dentry *dentry = file->f_path.dentry;
  
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                 __entry->datasync       = datasync;
-               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->parent         = d_inode(dentry->d_parent)->i_ino;
         ),
  
         TP_printk("dev %d,%d ino %lu parent %lu datasync %d ",
@@ -1453,10 +1453,10 @@ TRACE_EVENT(ext4_unlink_enter,
         ),
  
         TP_fast_assign(
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                 __entry->parent         = parent->i_ino;
-               __entry->size           = dentry->d_inode->i_size;
+               __entry->size           = d_inode(dentry)->i_size;
         ),
  
         TP_printk("dev %d,%d ino %lu size %lld parent %lu",
@@ -1477,8 +1477,8 @@ TRACE_EVENT(ext4_unlink_exit,
         ),
  
         TP_fast_assign(
-               __entry->dev            = dentry->d_inode->i_sb->s_dev;
-               __entry->ino            = dentry->d_inode->i_ino;
+               __entry->dev            = d_inode(dentry)->i_sb->s_dev;
+               __entry->ino            = d_inode(dentry)->i_ino;
                 __entry->ret            = ret;
         ),
  
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild

index 640954b..1a0006a 100644 (file)
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -431,6 +431,7 @@ header-y += virtio_blk.h
  header-y += virtio_config.h
  header-y += virtio_console.h
  header-y += virtio_ids.h
+header-y += virtio_input.h
  header-y += virtio_net.h
  header-y += virtio_pci.h
  header-y += virtio_ring.h
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h

index d1197ae..3e445a7 100644 (file)
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -41,4 +41,21 @@
   */
  #define FALLOC_FL_ZERO_RANGE           0x10
  
+/*
+ * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
+ * overwriting any existing data. The contents of the file beyond offset are
+ * shifted towards right by len bytes to create a hole.  As such, this
+ * operation will increase the size of the file by len bytes.
+ *
+ * Different filesystems may implement different limitations on the granularity
+ * of the operation. Most will limit operations to filesystem block size
+ * boundaries, but this boundary may be larger or smaller depending on
+ * the filesystem and/or the configuration of the filesystem or file.
+ *
+ * Attempting to insert space using this flag at OR beyond the end of
+ * the file is considered an illegal operation - just use ftruncate(2) or
+ * fallocate(2) with mode 0 for such type of operations.
+ */
+#define FALLOC_FL_INSERT_RANGE         0x20
+
  #endif /* _UAPI_FALLOC_H_ */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h

index f574d7b..4b60056 100644 (file)
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -813,6 +813,7 @@ struct kvm_ppc_smmu_info {
  #define KVM_CAP_MIPS_MSA 112
  #define KVM_CAP_S390_INJECT_IRQ 113
  #define KVM_CAP_S390_IRQ_STATE 114
+#define KVM_CAP_PPC_HWRNG 115
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h

index 35f5f4c..adc0aff 100644 (file)
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -162,13 +162,6 @@
   */
  #define NFS4_MAX_BACK_CHANNEL_OPS 2
  
-enum nfs4_acl_whotype {
-       NFS4_ACL_WHO_NAMED = 0,
-       NFS4_ACL_WHO_OWNER,
-       NFS4_ACL_WHO_GROUP,
-       NFS4_ACL_WHO_EVERYONE,
-};
-
  #endif /* _UAPI_LINUX_NFS4_H */
  
  /*
diff --git a/include/uapi/linux/nfs_idmap.h b/include/uapi/linux/nfs_idmap.h

index 8d4b1c7..038e36c 100644 (file)
--- a/include/uapi/linux/nfs_idmap.h
+++ b/include/uapi/linux/nfs_idmap.h
@@ -1,5 +1,5 @@
  /*
- * include/linux/nfs_idmap.h
+ * include/uapi/linux/nfs_idmap.h
   *
   *  UID and GID to name mapping for clients.
   *
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h

index 0bf130a..28ec6c9 100644 (file)
--- a/include/uapi/linux/nfsd/debug.h
+++ b/include/uapi/linux/nfsd/debug.h
@@ -11,14 +11,6 @@
  
  #include <linux/sunrpc/debug.h>
  
-/*
- * Enable debugging for nfsd.
- * Requires RPC_DEBUG.
- */
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define NFSD_DEBUG            1
-#endif
-
  /*
   * knfsd debug flags
   */
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h

index d3bd6ff..0df7bd5 100644 (file)
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -21,6 +21,9 @@
  
  /*
   * Export flags.
+ *
+ * Please update the expflags[] array in fs/nfsd/export.c when adding
+ * a new flag.
   */
  #define NFSEXP_READONLY                0x0001
  #define NFSEXP_INSECURE_PORT   0x0002
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h

index 49f4210..2ae6131 100644 (file)
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -78,6 +78,12 @@
  #define MD_DISK_ACTIVE         1 /* disk is running or spare disk */
  #define MD_DISK_SYNC           2 /* disk is in sync with the raid set */
  #define MD_DISK_REMOVED                3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD     4 /* Initiate a disk add across the cluster
+                                  * For clustered enviroments only.
+                                  */
+#define MD_DISK_CANDIDATE      5 /* disk is added as spare (local) until confirmed
+                                  * For clustered enviroments only.
+                                  */
  
  #define        MD_DISK_WRITEMOSTLY     9 /* disk is "write-mostly" is RAID1 config.
                                    * read requests will only be sent here in
@@ -101,6 +107,7 @@ typedef struct mdp_device_descriptor_s {
  #define MD_SB_CLEAN            0
  #define MD_SB_ERRORS           1
  
+#define        MD_SB_CLUSTERED         5 /* MD is clustered */
  #define        MD_SB_BITMAP_PRESENT    8 /* bitmap may be present nearby */
  
  /*
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h

index 74e7c60..1cb8aa6 100644 (file)
--- a/include/uapi/linux/raid/md_u.h
+++ b/include/uapi/linux/raid/md_u.h
@@ -62,6 +62,7 @@
  #define STOP_ARRAY             _IO (MD_MAJOR, 0x32)
  #define STOP_ARRAY_RO          _IO (MD_MAJOR, 0x33)
  #define RESTART_ARRAY_RW       _IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK    _IO (MD_MAJOR, 0x35)
  
  /* 63 partitions with the alternate major number (mdp) */
  #define MdpMinorShift 6
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h

index b483d19..b67f99d 100644 (file)
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -6,7 +6,7 @@
  #include <linux/types.h>
  #include <linux/uio.h>
  
-#define TCMU_VERSION "1.0"
+#define TCMU_VERSION "2.0"
  
  /*
   * Ring Design
@@ -39,9 +39,13 @@
   * should process the next packet the same way, and so on.
   */
  
-#define TCMU_MAILBOX_VERSION 1
+#define TCMU_MAILBOX_VERSION 2
  #define ALIGN_SIZE 64 /* Should be enough for most CPUs */
  
+/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */
+#define xstr(s) str(s)
+#define str(s) #s
+
  struct tcmu_mailbox {
         __u16 version;
         __u16 flags;
@@ -64,31 +68,36 @@ enum tcmu_opcode {
   * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
   */
  struct tcmu_cmd_entry_hdr {
-               __u32 len_op;
+       __u32 len_op;
+       __u16 cmd_id;
+       __u8 kflags;
+#define TCMU_UFLAG_UNKNOWN_OP 0x1
+       __u8 uflags;
+
  } __packed;
  
  #define TCMU_OP_MASK 0x7
  
-static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+static inline enum tcmu_opcode tcmu_hdr_get_op(__u32 len_op)
  {
-       return hdr->len_op & TCMU_OP_MASK;
+       return len_op & TCMU_OP_MASK;
  }
  
-static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+static inline void tcmu_hdr_set_op(__u32 *len_op, enum tcmu_opcode op)
  {
-       hdr->len_op &= ~TCMU_OP_MASK;
-       hdr->len_op |= (op & TCMU_OP_MASK);
+       *len_op &= ~TCMU_OP_MASK;
+       *len_op |= (op & TCMU_OP_MASK);
  }
  
-static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+static inline __u32 tcmu_hdr_get_len(__u32 len_op)
  {
-       return hdr->len_op & ~TCMU_OP_MASK;
+       return len_op & ~TCMU_OP_MASK;
  }
  
-static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+static inline void tcmu_hdr_set_len(__u32 *len_op, __u32 len)
  {
-       hdr->len_op &= TCMU_OP_MASK;
-       hdr->len_op |= len;
+       *len_op &= TCMU_OP_MASK;
+       *len_op |= len;
  }
  
  /* Currently the same as SCSI_SENSE_BUFFERSIZE */
@@ -97,13 +106,14 @@ static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
  struct tcmu_cmd_entry {
         struct tcmu_cmd_entry_hdr hdr;
  
-       uint16_t cmd_id;
-       uint16_t __pad1;
-
         union {
                 struct {
+                       uint32_t iov_cnt;
+                       uint32_t iov_bidi_cnt;
+                       uint32_t iov_dif_cnt;
                         uint64_t cdb_off;
-                       uint64_t iov_cnt;
+                       uint64_t __pad1;
+                       uint64_t __pad2;
                         struct iovec iov[0];
                 } req;
                 struct {
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h

index 4b0488f..984169a 100644 (file)
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -25,6 +25,7 @@
   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE. */
+#include <linux/types.h>
  #include <linux/virtio_ids.h>
  #include <linux/virtio_config.h>
  
@@ -38,9 +39,9 @@
  
  struct virtio_balloon_config {
         /* Number of pages host wants Guest to give up. */
-       __le32 num_pages;
+       __u32 num_pages;
         /* Number of pages we've actually got in balloon. */
-       __le32 actual;
+       __u32 actual;
  };
  
  #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
@@ -51,9 +52,32 @@ struct virtio_balloon_config {
  #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
  #define VIRTIO_BALLOON_S_NR       6
  
+/*
+ * Memory statistics structure.
+ * Driver fills an array of these structures and passes to device.
+ *
+ * NOTE: fields are laid out in a way that would make compiler add padding
+ * between and after fields, so we have to use compiler-specific attributes to
+ * pack it, to disable this padding. This also often causes compiler to
+ * generate suboptimal code.
+ *
+ * We maintain this statistics structure format for backwards compatibility,
+ * but don't follow this example.
+ *
+ * If implementing a similar structure, do something like the below instead:
+ *     struct virtio_balloon_stat {
+ *         __virtio16 tag;
+ *         __u8 reserved[6];
+ *         __virtio64 val;
+ *     };
+ *
+ * In other words, add explicit reserved fields to align field and
+ * structure boundaries at field size, avoiding compiler padding
+ * without the packed attribute.
+ */
  struct virtio_balloon_stat {
-       __u16 tag;
-       __u64 val;
+       __virtio16 tag;
+       __virtio64 val;
  } __attribute__((packed));
  
  #endif /* _LINUX_VIRTIO_BALLOON_H */
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h

index 284fc3a..5f60aa4 100644 (file)
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -39,5 +39,6 @@
  #define VIRTIO_ID_9P           9 /* 9p virtio console */
  #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
  #define VIRTIO_ID_CAIF        12 /* Virtio caif */
+#define VIRTIO_ID_INPUT        18 /* virtio input */
  
  #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h

new file mode 100644 (file)

index 0000000..a7fe5c8
--- /dev/null
+++ b/include/uapi/linux/virtio_input.h
@@ -0,0 +1,76 @@
+#ifndef _LINUX_VIRTIO_INPUT_H
+#define _LINUX_VIRTIO_INPUT_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+#include <linux/types.h>
+
+enum virtio_input_config_select {
+       VIRTIO_INPUT_CFG_UNSET      = 0x00,
+       VIRTIO_INPUT_CFG_ID_NAME    = 0x01,
+       VIRTIO_INPUT_CFG_ID_SERIAL  = 0x02,
+       VIRTIO_INPUT_CFG_ID_DEVIDS  = 0x03,
+       VIRTIO_INPUT_CFG_PROP_BITS  = 0x10,
+       VIRTIO_INPUT_CFG_EV_BITS    = 0x11,
+       VIRTIO_INPUT_CFG_ABS_INFO   = 0x12,
+};
+
+struct virtio_input_absinfo {
+       __u32 min;
+       __u32 max;
+       __u32 fuzz;
+       __u32 flat;
+       __u32 res;
+};
+
+struct virtio_input_devids {
+       __u16 bustype;
+       __u16 vendor;
+       __u16 product;
+       __u16 version;
+};
+
+struct virtio_input_config {
+       __u8    select;
+       __u8    subsel;
+       __u8    size;
+       __u8    reserved[5];
+       union {
+               char string[128];
+               __u8 bitmap[128];
+               struct virtio_input_absinfo abs;
+               struct virtio_input_devids ids;
+       } u;
+};
+
+struct virtio_input_event {
+       __le16 type;
+       __le16 code;
+       __le32 value;
+};
+
+#endif /* _LINUX_VIRTIO_INPUT_H */
diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h

index 46145a5..a45be6b 100644 (file)
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -864,7 +864,7 @@ struct snd_ctl_elem_id {
         snd_ctl_elem_iface_t iface;     /* interface identifier */
         unsigned int device;            /* device/client number */
         unsigned int subdevice;         /* subdevice (substream) number */
-       unsigned char name[44];         /* ASCII name of item */
+       unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];              /* ASCII name of item */
         unsigned int index;             /* index of item */
  };
  
diff --git a/ipc/mqueue.c b/ipc/mqueue.c

index 7635a1c..3aaea7f 100644 (file)
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -466,7 +466,7 @@ out_unlock:
  
  static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         dir->i_ctime = dir->i_mtime = dir->i_atime = CURRENT_TIME;
         dir->i_size -= DIRENT_SIZE;
@@ -770,7 +770,7 @@ static struct file *do_open(struct path *path, int oflag)
         if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
                 return ERR_PTR(-EINVAL);
         acc = oflag2acc[oflag & O_ACCMODE];
-       if (inode_permission(path->dentry->d_inode, acc))
+       if (inode_permission(d_inode(path->dentry), acc))
                 return ERR_PTR(-EACCES);
         return dentry_open(path, oflag, current_cred());
  }
@@ -802,7 +802,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
  
         ro = mnt_want_write(mnt);       /* we'll drop it in any case */
         error = 0;
-       mutex_lock(&root->d_inode->i_mutex);
+       mutex_lock(&d_inode(root)->i_mutex);
         path.dentry = lookup_one_len(name->name, root, strlen(name->name));
         if (IS_ERR(path.dentry)) {
                 error = PTR_ERR(path.dentry);
@@ -811,7 +811,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
         path.mnt = mntget(mnt);
  
         if (oflag & O_CREAT) {
-               if (path.dentry->d_inode) {     /* entry already exists */
+               if (d_really_is_positive(path.dentry)) {        /* entry already exists */
                         audit_inode(name, path.dentry, 0);
                         if (oflag & O_EXCL) {
                                 error = -EEXIST;
@@ -824,12 +824,12 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
                                 goto out;
                         }
                         audit_inode_parent_hidden(name, root);
-                       filp = do_create(ipc_ns, root->d_inode,
+                       filp = do_create(ipc_ns, d_inode(root),
                                                 &path, oflag, mode,
                                                 u_attr ? &attr : NULL);
                 }
         } else {
-               if (!path.dentry->d_inode) {
+               if (d_really_is_negative(path.dentry)) {
                         error = -ENOENT;
                         goto out;
                 }
@@ -848,7 +848,7 @@ out_putfd:
                 put_unused_fd(fd);
                 fd = error;
         }
-       mutex_unlock(&root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(root)->i_mutex);
         if (!ro)
                 mnt_drop_write(mnt);
  out_putname:
@@ -873,7 +873,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
         err = mnt_want_write(mnt);
         if (err)
                 goto out_name;
-       mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT);
+       mutex_lock_nested(&d_inode(mnt->mnt_root)->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_one_len(name->name, mnt->mnt_root,
                                 strlen(name->name));
         if (IS_ERR(dentry)) {
@@ -881,17 +881,17 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
                 goto out_unlock;
         }
  
-       inode = dentry->d_inode;
+       inode = d_inode(dentry);
         if (!inode) {
                 err = -ENOENT;
         } else {
                 ihold(inode);
-               err = vfs_unlink(dentry->d_parent->d_inode, dentry, NULL);
+               err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
         }
         dput(dentry);
  
  out_unlock:
-       mutex_unlock(&mnt->mnt_root->d_inode->i_mutex);
+       mutex_unlock(&d_inode(mnt->mnt_root)->i_mutex);
         if (inode)
                 iput(inode);
         mnt_drop_write(mnt);
diff --git a/ipc/shm.c b/ipc/shm.c

index d280a74..6d76707 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1132,7 +1132,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
         path = shp->shm_file->f_path;
         path_get(&path);
         shp->shm_nattch++;
-       size = i_size_read(path.dentry->d_inode);
+       size = i_size_read(d_inode(path.dentry));
         ipc_unlock_object(&shp->shm_perm);
         rcu_read_unlock();
  
diff --git a/kernel/audit.c b/kernel/audit.c

index 72ab759..1c13e42 100644 (file)
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -43,6 +43,7 @@
  
  #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  
+#include <linux/file.h>
  #include <linux/init.h>
  #include <linux/types.h>
  #include <linux/atomic.h>
@@ -107,6 +108,7 @@ static u32  audit_rate_limit;
   * When set to zero, this means unlimited. */
  static u32     audit_backlog_limit = 64;
  #define AUDIT_BACKLOG_WAIT_TIME (60 * HZ)
+static u32     audit_backlog_wait_time_master = AUDIT_BACKLOG_WAIT_TIME;
  static u32     audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
  static u32     audit_backlog_wait_overflow = 0;
  
@@ -338,13 +340,13 @@ static int audit_set_backlog_limit(u32 limit)
  static int audit_set_backlog_wait_time(u32 timeout)
  {
         return audit_do_config_change("audit_backlog_wait_time",
-                                     &audit_backlog_wait_time, timeout);
+                                     &audit_backlog_wait_time_master, timeout);
  }
  
  static int audit_set_enabled(u32 state)
  {
         int rc;
-       if (state < AUDIT_OFF || state > AUDIT_LOCKED)
+       if (state > AUDIT_LOCKED)
                 return -EINVAL;
  
         rc =  audit_do_config_change("audit_enabled", &audit_enabled, state);
@@ -663,7 +665,7 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
         case AUDIT_MAKE_EQUIV:
                 /* Only support auditd and auditctl in initial pid namespace
                  * for now. */
-               if ((task_active_pid_ns(current) != &init_pid_ns))
+               if (task_active_pid_ns(current) != &init_pid_ns)
                         return -EPERM;
  
                 if (!netlink_capable(skb, CAP_AUDIT_CONTROL))
@@ -834,7 +836,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 s.lost                  = atomic_read(&audit_lost);
                 s.backlog               = skb_queue_len(&audit_skb_queue);
                 s.feature_bitmap        = AUDIT_FEATURE_BITMAP_ALL;
-               s.backlog_wait_time     = audit_backlog_wait_time;
+               s.backlog_wait_time     = audit_backlog_wait_time_master;
                 audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));
                 break;
         }
@@ -877,8 +879,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
                 if (s.mask & AUDIT_STATUS_BACKLOG_WAIT_TIME) {
                         if (sizeof(s) > (size_t)nlh->nlmsg_len)
                                 return -EINVAL;
-                       if (s.backlog_wait_time < 0 ||
-                           s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
+                       if (s.backlog_wait_time > 10*AUDIT_BACKLOG_WAIT_TIME)
                                 return -EINVAL;
                         err = audit_set_backlog_wait_time(s.backlog_wait_time);
                         if (err < 0)
@@ -1385,7 +1386,8 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
                 return NULL;
         }
  
-       audit_backlog_wait_time = AUDIT_BACKLOG_WAIT_TIME;
+       if (!reserve)
+               audit_backlog_wait_time = audit_backlog_wait_time_master;
  
         ab = audit_buffer_alloc(ctx, gfp_mask, type);
         if (!ab) {
@@ -1759,7 +1761,7 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
         } else
                 audit_log_format(ab, " name=(null)");
  
-       if (n->ino != (unsigned long)-1) {
+       if (n->ino != (unsigned long)-1)
                 audit_log_format(ab, " inode=%lu"
                                  " dev=%02x:%02x mode=%#ho"
                                  " ouid=%u ogid=%u rdev=%02x:%02x",
@@ -1771,7 +1773,6 @@ void audit_log_name(struct audit_context *context, struct audit_names *n,
                                  from_kgid(&init_user_ns, n->gid),
                                  MAJOR(n->rdev),
                                  MINOR(n->rdev));
-       }
         if (n->osid != 0) {
                 char *ctx = NULL;
                 u32 len;
@@ -1838,11 +1839,29 @@ error_path:
  }
  EXPORT_SYMBOL(audit_log_task_context);
  
+void audit_log_d_path_exe(struct audit_buffer *ab,
+                         struct mm_struct *mm)
+{
+       struct file *exe_file;
+
+       if (!mm)
+               goto out_null;
+
+       exe_file = get_mm_exe_file(mm);
+       if (!exe_file)
+               goto out_null;
+
+       audit_log_d_path(ab, " exe=", &exe_file->f_path);
+       fput(exe_file);
+       return;
+out_null:
+       audit_log_format(ab, " exe=(null)");
+}
+
  void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
  {
         const struct cred *cred;
         char comm[sizeof(tsk->comm)];
-       struct mm_struct *mm = tsk->mm;
         char *tty;
  
         if (!ab)
@@ -1878,13 +1897,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
         audit_log_format(ab, " comm=");
         audit_log_untrustedstring(ab, get_task_comm(comm, tsk));
  
-       if (mm) {
-               down_read(&mm->mmap_sem);
-               if (mm->exe_file)
-                       audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
-               up_read(&mm->mmap_sem);
-       } else
-               audit_log_format(ab, " exe=(null)");
+       audit_log_d_path_exe(ab, tsk->mm);
         audit_log_task_context(ab);
  }
  EXPORT_SYMBOL(audit_log_task_info);
@@ -1915,7 +1928,7 @@ void audit_log_link_denied(const char *operation, struct path *link)
  
         /* Generate AUDIT_PATH record with object. */
         name->type = AUDIT_TYPE_NORMAL;
-       audit_copy_inode(name, link->dentry, link->dentry->d_inode);
+       audit_copy_inode(name, link->dentry, d_backing_inode(link->dentry));
         audit_log_name(current->audit_context, name, link, 0, NULL);
  out:
         kfree(name);
diff --git a/kernel/audit.h b/kernel/audit.h

index 1caa0d3..d641f9b 100644 (file)
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -257,6 +257,9 @@ extern struct list_head audit_filter_list[];
  
  extern struct audit_entry *audit_dupe_rule(struct audit_krule *old);
  
+extern void audit_log_d_path_exe(struct audit_buffer *ab,
+                                struct mm_struct *mm);
+
  /* audit watch functions */
  #ifdef CONFIG_AUDIT_WATCH
  extern void audit_put_watch(struct audit_watch *watch);
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c

index 2e0c974..b0f9877 100644 (file)
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -37,6 +37,7 @@ struct audit_chunk {
  
  static LIST_HEAD(tree_list);
  static LIST_HEAD(prune_list);
+static struct task_struct *prune_thread;
  
  /*
   * One struct chunk is attached to each inode of interest.
@@ -576,7 +577,7 @@ int audit_remove_tree_rule(struct audit_krule *rule)
  
  static int compare_root(struct vfsmount *mnt, void *arg)
  {
-       return mnt->mnt_root->d_inode == arg;
+       return d_backing_inode(mnt->mnt_root) == arg;
  }
  
  void audit_trim_trees(void)
@@ -648,7 +649,58 @@ void audit_put_tree(struct audit_tree *tree)
  
  static int tag_mount(struct vfsmount *mnt, void *arg)
  {
-       return tag_chunk(mnt->mnt_root->d_inode, arg);
+       return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
+}
+
+/*
+ * That gets run when evict_chunk() ends up needing to kill audit_tree.
+ * Runs from a separate thread.
+ */
+static int prune_tree_thread(void *unused)
+{
+       for (;;) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (list_empty(&prune_list))
+                       schedule();
+               __set_current_state(TASK_RUNNING);
+
+               mutex_lock(&audit_cmd_mutex);
+               mutex_lock(&audit_filter_mutex);
+
+               while (!list_empty(&prune_list)) {
+                       struct audit_tree *victim;
+
+                       victim = list_entry(prune_list.next,
+                                       struct audit_tree, list);
+                       list_del_init(&victim->list);
+
+                       mutex_unlock(&audit_filter_mutex);
+
+                       prune_one(victim);
+
+                       mutex_lock(&audit_filter_mutex);
+               }
+
+               mutex_unlock(&audit_filter_mutex);
+               mutex_unlock(&audit_cmd_mutex);
+       }
+       return 0;
+}
+
+static int audit_launch_prune(void)
+{
+       if (prune_thread)
+               return 0;
+       prune_thread = kthread_create(prune_tree_thread, NULL,
+                               "audit_prune_tree");
+       if (IS_ERR(prune_thread)) {
+               pr_err("cannot start thread audit_prune_tree");
+               prune_thread = NULL;
+               return -ENOMEM;
+       } else {
+               wake_up_process(prune_thread);
+               return 0;
+       }
  }
  
  /* called with audit_filter_mutex */
@@ -674,6 +726,12 @@ int audit_add_tree_rule(struct audit_krule *rule)
         /* do not set rule->tree yet */
         mutex_unlock(&audit_filter_mutex);
  
+       if (unlikely(!prune_thread)) {
+               err = audit_launch_prune();
+               if (err)
+                       goto Err;
+       }
+
         err = kern_path(tree->pathname, 0, &path);
         if (err)
                 goto Err;
@@ -811,36 +869,10 @@ int audit_tag_tree(char *old, char *new)
         return failed;
  }
  
-/*
- * That gets run when evict_chunk() ends up needing to kill audit_tree.
- * Runs from a separate thread.
- */
-static int prune_tree_thread(void *unused)
-{
-       mutex_lock(&audit_cmd_mutex);
-       mutex_lock(&audit_filter_mutex);
-
-       while (!list_empty(&prune_list)) {
-               struct audit_tree *victim;
-
-               victim = list_entry(prune_list.next, struct audit_tree, list);
-               list_del_init(&victim->list);
-
-               mutex_unlock(&audit_filter_mutex);
-
-               prune_one(victim);
-
-               mutex_lock(&audit_filter_mutex);
-       }
-
-       mutex_unlock(&audit_filter_mutex);
-       mutex_unlock(&audit_cmd_mutex);
-       return 0;
-}
  
  static void audit_schedule_prune(void)
  {
-       kthread_run(prune_tree_thread, NULL, "audit_prune_tree");
+       wake_up_process(prune_thread);
  }
  
  /*
@@ -907,9 +939,9 @@ static void evict_chunk(struct audit_chunk *chunk)
         for (n = 0; n < chunk->count; n++)
                 list_del_init(&chunk->owners[n].list);
         spin_unlock(&hash_lock);
+       mutex_unlock(&audit_filter_mutex);
         if (need_prune)
                 audit_schedule_prune();
-       mutex_unlock(&audit_filter_mutex);
  }
  
  static int audit_tree_handle_event(struct fsnotify_group *group,
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c

index ad9c168..6e30024 100644 (file)
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -146,7 +146,7 @@ int audit_watch_compare(struct audit_watch *watch, unsigned long ino, dev_t dev)
  /* Initialize a parent watch entry. */
  static struct audit_parent *audit_init_parent(struct path *path)
  {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
         struct audit_parent *parent;
         int ret;
  
@@ -361,11 +361,11 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent)
         struct dentry *d = kern_path_locked(watch->path, parent);
         if (IS_ERR(d))
                 return PTR_ERR(d);
-       mutex_unlock(&parent->dentry->d_inode->i_mutex);
-       if (d->d_inode) {
+       mutex_unlock(&d_backing_inode(parent->dentry)->i_mutex);
+       if (d_is_positive(d)) {
                 /* update watch filter fields */
-               watch->dev = d->d_inode->i_sb->s_dev;
-               watch->ino = d->d_inode->i_ino;
+               watch->dev = d_backing_inode(d)->i_sb->s_dev;
+               watch->ino = d_backing_inode(d)->i_ino;
         }
         dput(d);
         return 0;
@@ -426,7 +426,7 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list)
                 return ret;
  
         /* either find an old parent or attach a new one */
-       parent = audit_find_parent(parent_path.dentry->d_inode);
+       parent = audit_find_parent(d_backing_inode(parent_path.dentry));
         if (!parent) {
                 parent = audit_init_parent(&parent_path);
                 if (IS_ERR(parent)) {
@@ -482,7 +482,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,
  
         switch (data_type) {
         case (FSNOTIFY_EVENT_PATH):
-               inode = ((struct path *)data)->dentry->d_inode;
+               inode = d_backing_inode(((struct path *)data)->dentry);
                 break;
         case (FSNOTIFY_EVENT_INODE):
                 inode = (struct inode *)data;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c

index dc4ae70..9fb9d1c 100644 (file)
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1629,7 +1629,7 @@ retry:
         rcu_read_lock();
         seq = read_seqbegin(&rename_lock);
         for(;;) {
-               struct inode *inode = d->d_inode;
+               struct inode *inode = d_backing_inode(d);
                 if (inode && unlikely(!hlist_empty(&inode->i_fsnotify_marks))) {
                         struct audit_chunk *chunk;
                         chunk = audit_tree_lookup(inode);
@@ -1754,7 +1754,7 @@ void __audit_inode(struct filename *name, const struct dentry *dentry,
                    unsigned int flags)
  {
         struct audit_context *context = current->audit_context;
-       const struct inode *inode = dentry->d_inode;
+       const struct inode *inode = d_backing_inode(dentry);
         struct audit_names *n;
         bool parent = flags & AUDIT_INODE_PARENT;
  
@@ -1853,7 +1853,7 @@ void __audit_inode_child(const struct inode *parent,
                          const unsigned char type)
  {
         struct audit_context *context = current->audit_context;
-       const struct inode *inode = dentry->d_inode;
+       const struct inode *inode = d_backing_inode(dentry);
         const char *dname = dentry->d_name.name;
         struct audit_names *n, *found_parent = NULL, *found_child = NULL;
  
@@ -2361,7 +2361,6 @@ static void audit_log_task(struct audit_buffer *ab)
         kuid_t auid, uid;
         kgid_t gid;
         unsigned int sessionid;
-       struct mm_struct *mm = current->mm;
         char comm[sizeof(current->comm)];
  
         auid = audit_get_loginuid(current);
@@ -2376,13 +2375,7 @@ static void audit_log_task(struct audit_buffer *ab)
         audit_log_task_context(ab);
         audit_log_format(ab, " pid=%d comm=", task_pid_nr(current));
         audit_log_untrustedstring(ab, get_task_comm(comm, current));
-       if (mm) {
-               down_read(&mm->mmap_sem);
-               if (mm->exe_file)
-                       audit_log_d_path(ab, " exe=", &mm->exe_file->f_path);
-               up_read(&mm->mmap_sem);
-       } else
-               audit_log_format(ab, " exe=(null)");
+       audit_log_d_path_exe(ab, current->mm);
  }
  
  /**
diff --git a/kernel/module.c b/kernel/module.c

index 650b038..42a1d2a 100644 (file)
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -387,9 +387,9 @@ static bool check_symbol(const struct symsearch *syms,
                 pr_warn("Symbol %s is marked as UNUSED, however this module is "
                         "using it.\n", fsa->name);
                 pr_warn("This symbol will go away in the future.\n");
-               pr_warn("Please evalute if this is the right api to use and if "
-                       "it really is, submit a report the linux kernel "
-                       "mailinglist together with submitting your code for "
+               pr_warn("Please evaluate if this is the right api to use and "
+                       "if it really is, submit a report to the linux kernel "
+                       "mailing list together with submitting your code for "
                         "inclusion.\n");
         }
  #endif
@@ -2511,7 +2511,8 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
                 return err;
  
         /* Suck in entire file: we'll want most of it. */
-       info->hdr = vmalloc(info->len);
+       info->hdr = __vmalloc(info->len,
+                       GFP_KERNEL | __GFP_HIGHMEM | __GFP_NOWARN, PAGE_KERNEL);
         if (!info->hdr)
                 return -ENOMEM;
  
diff --git a/kernel/params.c b/kernel/params.c

index 728e05b..a22d6a7 100644 (file)
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -173,9 +173,9 @@ static char *next_arg(char *args, char **param, char **val)
                         if (args[i-1] == '"')
                                 args[i-1] = '\0';
                 }
-               if (quoted && args[i-1] == '"')
-                       args[i-1] = '\0';
         }
+       if (quoted && args[i-1] == '"')
+               args[i-1] = '\0';
  
         if (args[i]) {
                 args[i] = '\0';
diff --git a/kernel/relay.c b/kernel/relay.c

index 5a56d3c..e9dbaeb 100644 (file)
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -407,7 +407,7 @@ static inline void relay_set_buf_dentry(struct rchan_buf *buf,
                                         struct dentry *dentry)
  {
         buf->dentry = dentry;
-       buf->dentry->d_inode->i_size = buf->early_bytes;
+       d_inode(buf->dentry)->i_size = buf->early_bytes;
  }
  
  static struct dentry *relay_create_buf_file(struct rchan *chan,
@@ -733,7 +733,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
                 buf->padding[old_subbuf] = buf->prev_padding;
                 buf->subbufs_produced++;
                 if (buf->dentry)
-                       buf->dentry->d_inode->i_size +=
+                       d_inode(buf->dentry)->i_size +=
                                 buf->chan->subbuf_size -
                                 buf->padding[old_subbuf];
                 else
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 91eecaa..0533049 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6079,7 +6079,7 @@ trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
  
         if (ret) /* See tracing_get_cpu() */
-               ret->d_inode->i_cdev = (void *)(cpu + 1);
+               d_inode(ret)->i_cdev = (void *)(cpu + 1);
         return ret;
  }
  
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c

index 7da1dfe..c4de47f 100644 (file)
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -494,8 +494,8 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
         if (dir) {
                 spin_lock(&dir->d_lock);        /* probably unneeded */
                 list_for_each_entry(child, &dir->d_subdirs, d_child) {
-                       if (child->d_inode)     /* probably unneeded */
-                               child->d_inode->i_private = NULL;
+                       if (d_really_is_positive(child))        /* probably unneeded */
+                               d_inode(child)->i_private = NULL;
                 }
                 spin_unlock(&dir->d_lock);
  
@@ -565,6 +565,7 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
  static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
  {
         char *event = NULL, *sub = NULL, *match;
+       int ret;
  
         /*
          * The buf format can be <subsystem>:<event-name>
@@ -590,7 +591,13 @@ static int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
                         event = NULL;
         }
  
-       return __ftrace_set_clr_event(tr, match, sub, event, set);
+       ret = __ftrace_set_clr_event(tr, match, sub, event, set);
+
+       /* Put back the colon to allow this to be called again */
+       if (buf)
+               *(buf - 1) = ':';
+
+       return ret;
  }
  
  /**
@@ -1753,6 +1760,8 @@ static void update_event_printk(struct ftrace_event_call *call,
                                 ptr++;
                                 /* Check for alpha chars like ULL */
                         } while (isalnum(*ptr));
+                       if (!*ptr)
+                               break;
                         /*
                          * A number must have some kind of delimiter after
                          * it, and we can ignore that too.
@@ -1779,12 +1788,16 @@ static void update_event_printk(struct ftrace_event_call *call,
                         do {
                                 ptr++;
                         } while (isalnum(*ptr) || *ptr == '_');
+                       if (!*ptr)
+                               break;
                         /*
                          * If what comes after this variable is a '.' or
                          * '->' then we can continue to ignore that string.
                          */
                         if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
                                 ptr += *ptr == '.' ? 1 : 2;
+                               if (!*ptr)
+                                       break;
                                 goto skip_more;
                         }
                         /*
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c

index 9cfea4c..a51e796 100644 (file)
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1308,15 +1308,19 @@ void graph_trace_open(struct trace_iterator *iter)
  {
         /* pid and depth on the last trace processed */
         struct fgraph_data *data;
+       gfp_t gfpflags;
         int cpu;
  
         iter->private = NULL;
  
-       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       /* We can be called in atomic context via ftrace_dump() */
+       gfpflags = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
+
+       data = kzalloc(sizeof(*data), gfpflags);
         if (!data)
                 goto out_err;
  
-       data->cpu_data = alloc_percpu(struct fgraph_cpu_data);
+       data->cpu_data = alloc_percpu_gfp(struct fgraph_cpu_data, gfpflags);
         if (!data->cpu_data)
                 goto out_err_free;
  
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c

index d60fe62..6dd022c 100644 (file)
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -443,7 +443,7 @@ static int create_trace_uprobe(int argc, char **argv)
         if (ret)
                 goto fail_address_parse;
  
-       inode = igrab(path.dentry->d_inode);
+       inode = igrab(d_inode(path.dentry));
         path_put(&path);
  
         if (!inode || !S_ISREG(inode->i_mode)) {
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c

index dbef231..975c6e0 100644 (file)
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void)
  static inline const struct raid6_calls *raid6_choose_gen(
         void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks)
  {
-       unsigned long perf, bestperf, j0, j1;
+       unsigned long perf, bestgenperf, bestxorperf, j0, j1;
+       int start = (disks>>1)-1, stop = disks-3;       /* work on the second half of the disks */
         const struct raid6_calls *const *algo;
         const struct raid6_calls *best;
  
-       for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
+       for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) {
                 if (!best || (*algo)->prefer >= best->prefer) {
                         if ((*algo)->valid && !(*algo)->valid())
                                 continue;
@@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen(
                         }
                         preempt_enable();
  
-                       if (perf > bestperf) {
-                               bestperf = perf;
+                       if (perf > bestgenperf) {
+                               bestgenperf = perf;
                                 best = *algo;
                         }
-                       pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name,
+                       pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name,
                                (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+
+                       if (!(*algo)->xor_syndrome)
+                               continue;
+
+                       perf = 0;
+
+                       preempt_disable();
+                       j0 = jiffies;
+                       while ((j1 = jiffies) == j0)
+                               cpu_relax();
+                       while (time_before(jiffies,
+                                           j1 + (1<<RAID6_TIME_JIFFIES_LG2))) {
+                               (*algo)->xor_syndrome(disks, start, stop,
+                                                     PAGE_SIZE, *dptrs);
+                               perf++;
+                       }
+                       preempt_enable();
+
+                       if (best == *algo)
+                               bestxorperf = perf;
+
+                       pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name,
+                               (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
                 }
         }
  
         if (best) {
-               pr_info("raid6: using algorithm %s (%ld MB/s)\n",
+               pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
                        best->name,
-                      (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+                      (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2));
+               if (best->xor_syndrome)
+                       pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n",
+                              (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1));
                 raid6_call = *best;
         } else
                 pr_err("raid6: Yikes!  No algorithm found!\n");
diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc

index 7cc12b5..bec27fc 100644 (file)
--- a/lib/raid6/altivec.uc
+++ b/lib/raid6/altivec.uc
@@ -119,6 +119,7 @@ int raid6_have_altivec(void)
  
  const struct raid6_calls raid6_altivec$# = {
         raid6_altivec$#_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_altivec,
         "altivecx$#",
         0
diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c

index bc3b1dd..7673400 100644 (file)
--- a/lib/raid6/avx2.c
+++ b/lib/raid6/avx2.c
@@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_avx2x1 = {
         raid6_avx21_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_avx2,
         "avx2x1",
         1                       /* Has cache hints */
@@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_avx2x2 = {
         raid6_avx22_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_avx2,
         "avx2x2",
         1                       /* Has cache hints */
@@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_avx2x4 = {
         raid6_avx24_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_avx2,
         "avx2x4",
         1                       /* Has cache hints */
diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc

index 5b50f8d..558aeac 100644 (file)
--- a/lib/raid6/int.uc
+++ b/lib/raid6/int.uc
@@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
         }
  }
  
+static void raid6_int$#_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+               /* P/Q data pages */
+               wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE];
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE];
+                       wp$$ ^= wd$$;
+                       w2$$ = MASK(wq$$);
+                       w1$$ = SHLBYTE(wq$$);
+                       w2$$ &= NBYTES(0x1d);
+                       w1$$ ^= w2$$;
+                       wq$$ = w1$$ ^ wd$$;
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       w2$$ = MASK(wq$$);
+                       w1$$ = SHLBYTE(wq$$);
+                       w2$$ &= NBYTES(0x1d);
+                       wq$$ = w1$$ ^ w2$$;
+               }
+               *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+               *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+       }
+
+}
+
  const struct raid6_calls raid6_intx$# = {
         raid6_int$#_gen_syndrome,
-       NULL,           /* always valid */
+       raid6_int$#_xor_syndrome,
+       NULL,                   /* always valid */
         "int" NSTRING "x$#",
         0
  };
diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c

index 590c71c..b3b0e1f 100644 (file)
--- a/lib/raid6/mmx.c
+++ b/lib/raid6/mmx.c
@@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_mmxx1 = {
         raid6_mmx1_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_mmx,
         "mmxx1",
         0
@@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_mmxx2 = {
         raid6_mmx2_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_mmx,
         "mmxx2",
         0
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c

index 36ad470..d9ad6ee 100644 (file)
--- a/lib/raid6/neon.c
+++ b/lib/raid6/neon.c
@@ -42,6 +42,7 @@
         }                                                               \
         struct raid6_calls const raid6_neonx ## _n = {                  \
                 raid6_neon ## _n ## _gen_syndrome,                      \
+               NULL,           /* XOR not yet implemented */           \
                 raid6_have_neon,                                        \
                 "neonx" #_n,                                            \
                 0                                                       \
diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c

index f762971..9025b8c 100644 (file)
--- a/lib/raid6/sse1.c
+++ b/lib/raid6/sse1.c
@@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_sse1x1 = {
         raid6_sse11_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_sse1_or_mmxext,
         "sse1x1",
         1                       /* Has cache hints */
@@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_sse1x2 = {
         raid6_sse12_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         raid6_have_sse1_or_mmxext,
         "sse1x2",
         1                       /* Has cache hints */
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c

index 85b82c8..1d2276b 100644 (file)
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
         kernel_fpu_end();
  }
  
+
+static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 16 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("pxor %xmm4,%xmm2");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm5,%xmm4");
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pxor %xmm5,%xmm4");
+               }
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               /* Don't use movntdq for r/w memory area < cache line */
+               asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+       }
+
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+}
+
  const struct raid6_calls raid6_sse2x1 = {
         raid6_sse21_gen_syndrome,
+       raid6_sse21_xor_syndrome,
         raid6_have_sse2,
         "sse2x1",
         1                       /* Has cache hints */
@@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
         kernel_fpu_end();
  }
  
+ static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 32 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+               asm volatile("pxor %xmm4,%xmm2");
+               asm volatile("pxor %xmm6,%xmm3");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm7,%xmm3");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+               }
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+               }
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+               /* Don't use movntdq for r/w memory area < cache line */
+               asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
+               asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
+               asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
+       }
+
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+ }
+
  const struct raid6_calls raid6_sse2x2 = {
         raid6_sse22_gen_syndrome,
+       raid6_sse22_xor_syndrome,
         raid6_have_sse2,
         "sse2x2",
         1                       /* Has cache hints */
@@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
         kernel_fpu_end();
  }
  
+ static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+                                    size_t bytes, void **ptrs)
+ {
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
+
+       for ( d = 0 ; d < bytes ; d += 64 ) {
+               asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
+               asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
+               asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
+               asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
+               asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
+               asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
+               asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
+               asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
+               asm volatile("pxor %xmm4,%xmm2");
+               asm volatile("pxor %xmm6,%xmm3");
+               asm volatile("pxor %xmm12,%xmm10");
+               asm volatile("pxor %xmm14,%xmm11");
+               /* P/Q data pages */
+               for ( z = z0-1 ; z >= start ; z-- ) {
+                       asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
+                       asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pxor %xmm13,%xmm13");
+                       asm volatile("pxor %xmm15,%xmm15");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("pcmpgtb %xmm12,%xmm13");
+                       asm volatile("pcmpgtb %xmm14,%xmm15");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("paddb %xmm12,%xmm12");
+                       asm volatile("paddb %xmm14,%xmm14");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pand %xmm0,%xmm13");
+                       asm volatile("pand %xmm0,%xmm15");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+                       asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
+                       asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
+                       asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
+                       asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
+                       asm volatile("pxor %xmm5,%xmm2");
+                       asm volatile("pxor %xmm7,%xmm3");
+                       asm volatile("pxor %xmm13,%xmm10");
+                       asm volatile("pxor %xmm15,%xmm11");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+               }
+               asm volatile("prefetchnta %0" :: "m" (q[d]));
+               asm volatile("prefetchnta %0" :: "m" (q[d+32]));
+               /* P/Q left side optimization */
+               for ( z = start-1 ; z >= 0 ; z-- ) {
+                       asm volatile("pxor %xmm5,%xmm5");
+                       asm volatile("pxor %xmm7,%xmm7");
+                       asm volatile("pxor %xmm13,%xmm13");
+                       asm volatile("pxor %xmm15,%xmm15");
+                       asm volatile("pcmpgtb %xmm4,%xmm5");
+                       asm volatile("pcmpgtb %xmm6,%xmm7");
+                       asm volatile("pcmpgtb %xmm12,%xmm13");
+                       asm volatile("pcmpgtb %xmm14,%xmm15");
+                       asm volatile("paddb %xmm4,%xmm4");
+                       asm volatile("paddb %xmm6,%xmm6");
+                       asm volatile("paddb %xmm12,%xmm12");
+                       asm volatile("paddb %xmm14,%xmm14");
+                       asm volatile("pand %xmm0,%xmm5");
+                       asm volatile("pand %xmm0,%xmm7");
+                       asm volatile("pand %xmm0,%xmm13");
+                       asm volatile("pand %xmm0,%xmm15");
+                       asm volatile("pxor %xmm5,%xmm4");
+                       asm volatile("pxor %xmm7,%xmm6");
+                       asm volatile("pxor %xmm13,%xmm12");
+                       asm volatile("pxor %xmm15,%xmm14");
+               }
+               asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
+               asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
+               asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
+               asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
+               asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
+               asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
+               asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
+               asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
+               asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
+               asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
+               asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
+               asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
+       }
+       asm volatile("sfence" : : : "memory");
+       kernel_fpu_end();
+ }
+
+
  const struct raid6_calls raid6_sse2x4 = {
         raid6_sse24_gen_syndrome,
+       raid6_sse24_xor_syndrome,
         raid6_have_sse2,
         "sse2x4",
         1                       /* Has cache hints */
diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c

index 5a485b7..3bebbab 100644 (file)
--- a/lib/raid6/test/test.c
+++ b/lib/raid6/test/test.c
@@ -28,11 +28,11 @@ char *dataptrs[NDISKS];
  char data[NDISKS][PAGE_SIZE];
  char recovi[PAGE_SIZE], recovj[PAGE_SIZE];
  
-static void makedata(void)
+static void makedata(int start, int stop)
  {
         int i, j;
  
-       for (i = 0; i < NDISKS; i++) {
+       for (i = start; i <= stop; i++) {
                 for (j = 0; j < PAGE_SIZE; j++)
                         data[i][j] = rand();
  
@@ -91,34 +91,55 @@ int main(int argc, char *argv[])
  {
         const struct raid6_calls *const *algo;
         const struct raid6_recov_calls *const *ra;
-       int i, j;
+       int i, j, p1, p2;
         int err = 0;
  
-       makedata();
+       makedata(0, NDISKS-1);
  
         for (ra = raid6_recov_algos; *ra; ra++) {
                 if ((*ra)->valid  && !(*ra)->valid())
                         continue;
+
                 raid6_2data_recov = (*ra)->data2;
                 raid6_datap_recov = (*ra)->datap;
  
                 printf("using recovery %s\n", (*ra)->name);
  
                 for (algo = raid6_algos; *algo; algo++) {
-                       if (!(*algo)->valid || (*algo)->valid()) {
-                               raid6_call = **algo;
+                       if ((*algo)->valid && !(*algo)->valid())
+                               continue;
+
+                       raid6_call = **algo;
+
+                       /* Nuke syndromes */
+                       memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+
+                       /* Generate assumed good syndrome */
+                       raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
+                                               (void **)&dataptrs);
+
+                       for (i = 0; i < NDISKS-1; i++)
+                               for (j = i+1; j < NDISKS; j++)
+                                       err += test_disks(i, j);
+
+                       if (!raid6_call.xor_syndrome)
+                               continue;
+
+                       for (p1 = 0; p1 < NDISKS-2; p1++)
+                               for (p2 = p1; p2 < NDISKS-2; p2++) {
  
-                               /* Nuke syndromes */
-                               memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE);
+                                       /* Simulate rmw run */
+                                       raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+                                                               (void **)&dataptrs);
+                                       makedata(p1, p2);
+                                       raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE,
+                                                                (void **)&dataptrs);
  
-                               /* Generate assumed good syndrome */
-                               raid6_call.gen_syndrome(NDISKS, PAGE_SIZE,
-                                                       (void **)&dataptrs);
+                                       for (i = 0; i < NDISKS-1; i++)
+                                               for (j = i+1; j < NDISKS; j++)
+                                                       err += test_disks(i, j);
+                               }
  
-                               for (i = 0; i < NDISKS-1; i++)
-                                       for (j = i+1; j < NDISKS; j++)
-                                               err += test_disks(i, j);
-                       }
                 }
                 printf("\n");
         }
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc

index e7c2945..2dd291a 100644 (file)
--- a/lib/raid6/tilegx.uc
+++ b/lib/raid6/tilegx.uc
@@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  
  const struct raid6_calls raid6_tilegx$# = {
         raid6_tilegx$#_gen_syndrome,
+       NULL,                   /* XOR not yet implemented */
         NULL,
         "tilegx$#",
         0
diff --git a/mm/shmem.c b/mm/shmem.c

index 1ea2400..de98137 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -544,7 +544,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
  
  static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
         struct shmem_inode_info *info = SHMEM_I(inode);
         int error;
  
@@ -2274,7 +2274,7 @@ static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
   */
  static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int ret;
  
         /*
@@ -2298,7 +2298,7 @@ out:
  
  static int shmem_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
                 shmem_free_inode(inode->i_sb);
@@ -2315,7 +2315,7 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
         if (!simple_empty(dentry))
                 return -ENOTEMPTY;
  
-       drop_nlink(dentry->d_inode);
+       drop_nlink(d_inode(dentry));
         drop_nlink(dir);
         return shmem_unlink(dir, dentry);
  }
@@ -2336,8 +2336,8 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
         }
         old_dir->i_ctime = old_dir->i_mtime =
         new_dir->i_ctime = new_dir->i_mtime =
-       old_dentry->d_inode->i_ctime =
-       new_dentry->d_inode->i_ctime = CURRENT_TIME;
+       d_inode(old_dentry)->i_ctime =
+       d_inode(new_dentry)->i_ctime = CURRENT_TIME;
  
         return 0;
  }
@@ -2376,7 +2376,7 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
   */
  static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
  {
-       struct inode *inode = old_dentry->d_inode;
+       struct inode *inode = d_inode(old_dentry);
         int they_are_dirs = S_ISDIR(inode->i_mode);
  
         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
@@ -2396,10 +2396,10 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
                         return error;
         }
  
-       if (new_dentry->d_inode) {
+       if (d_really_is_positive(new_dentry)) {
                 (void) shmem_unlink(new_dir, new_dentry);
                 if (they_are_dirs) {
-                       drop_nlink(new_dentry->d_inode);
+                       drop_nlink(d_inode(new_dentry));
                         drop_nlink(old_dir);
                 }
         } else if (they_are_dirs) {
@@ -2476,14 +2476,14 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
  
  static void *shmem_follow_short_symlink(struct dentry *dentry, struct nameidata *nd)
  {
-       nd_set_link(nd, SHMEM_I(dentry->d_inode)->symlink);
+       nd_set_link(nd, SHMEM_I(d_inode(dentry))->symlink);
         return NULL;
  }
  
  static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
         struct page *page = NULL;
-       int error = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL);
+       int error = shmem_getpage(d_inode(dentry), 0, &page, SGP_READ, NULL);
         nd_set_link(nd, error ? ERR_PTR(error) : kmap(page));
         if (page)
                 unlock_page(page);
@@ -2574,7 +2574,7 @@ static int shmem_xattr_validate(const char *name)
  static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
                               void *buffer, size_t size)
  {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
         int err;
  
         /*
@@ -2595,7 +2595,7 @@ static ssize_t shmem_getxattr(struct dentry *dentry, const char *name,
  static int shmem_setxattr(struct dentry *dentry, const char *name,
                           const void *value, size_t size, int flags)
  {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
         int err;
  
         /*
@@ -2615,7 +2615,7 @@ static int shmem_setxattr(struct dentry *dentry, const char *name,
  
  static int shmem_removexattr(struct dentry *dentry, const char *name)
  {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
         int err;
  
         /*
@@ -2635,7 +2635,7 @@ static int shmem_removexattr(struct dentry *dentry, const char *name)
  
  static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
  {
-       struct shmem_inode_info *info = SHMEM_I(dentry->d_inode);
+       struct shmem_inode_info *info = SHMEM_I(d_inode(dentry));
         return simple_xattr_list(&info->xattrs, buffer, size);
  }
  #endif /* CONFIG_TMPFS_XATTR */
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c

index ec56550..79e8f71 100644 (file)
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -490,6 +490,43 @@ out:
  }
  EXPORT_SYMBOL(ceph_parse_options);
  
+int ceph_print_client_options(struct seq_file *m, struct ceph_client *client)
+{
+       struct ceph_options *opt = client->options;
+       size_t pos = m->count;
+
+       if (opt->name)
+               seq_printf(m, "name=%s,", opt->name);
+       if (opt->key)
+               seq_puts(m, "secret=<hidden>,");
+
+       if (opt->flags & CEPH_OPT_FSID)
+               seq_printf(m, "fsid=%pU,", &opt->fsid);
+       if (opt->flags & CEPH_OPT_NOSHARE)
+               seq_puts(m, "noshare,");
+       if (opt->flags & CEPH_OPT_NOCRC)
+               seq_puts(m, "nocrc,");
+       if (opt->flags & CEPH_OPT_NOMSGAUTH)
+               seq_puts(m, "nocephx_require_signatures,");
+       if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0)
+               seq_puts(m, "notcp_nodelay,");
+
+       if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT)
+               seq_printf(m, "mount_timeout=%d,", opt->mount_timeout);
+       if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT)
+               seq_printf(m, "osd_idle_ttl=%d,", opt->osd_idle_ttl);
+       if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT)
+               seq_printf(m, "osdkeepalivetimeout=%d,",
+                          opt->osd_keepalive_timeout);
+
+       /* drop redundant comma */
+       if (m->count != pos)
+               m->count--;
+
+       return 0;
+}
+EXPORT_SYMBOL(ceph_print_client_options);
+
  u64 ceph_client_id(struct ceph_client *client)
  {
         return client->monc.auth->global_id;
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c

index 16bc199..9d84ce4 100644 (file)
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -17,6 +17,7 @@ const char *crush_bucket_alg_name(int alg)
         case CRUSH_BUCKET_LIST: return "list";
         case CRUSH_BUCKET_TREE: return "tree";
         case CRUSH_BUCKET_STRAW: return "straw";
+       case CRUSH_BUCKET_STRAW2: return "straw2";
         default: return "unknown";
         }
  }
@@ -40,6 +41,8 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
                 return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
         case CRUSH_BUCKET_STRAW:
                 return ((struct crush_bucket_straw *)b)->item_weights[p];
+       case CRUSH_BUCKET_STRAW2:
+               return ((struct crush_bucket_straw2 *)b)->item_weights[p];
         }
         return 0;
  }
@@ -77,6 +80,14 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
         kfree(b);
  }
  
+void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
+{
+       kfree(b->item_weights);
+       kfree(b->h.perm);
+       kfree(b->h.items);
+       kfree(b);
+}
+
  void crush_destroy_bucket(struct crush_bucket *b)
  {
         switch (b->alg) {
@@ -92,6 +103,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
         case CRUSH_BUCKET_STRAW:
                 crush_destroy_bucket_straw((struct crush_bucket_straw *)b);
                 break;
+       case CRUSH_BUCKET_STRAW2:
+               crush_destroy_bucket_straw2((struct crush_bucket_straw2 *)b);
+               break;
         }
  }
  
diff --git a/net/ceph/crush/crush_ln_table.h b/net/ceph/crush/crush_ln_table.h

new file mode 100644 (file)

index 0000000..6192c7f
--- /dev/null
+++ b/net/ceph/crush/crush_ln_table.h
@@ -0,0 +1,166 @@
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2015 Intel Corporation All Rights Reserved
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation.  See file COPYING.
+ *
+ */
+
+#if defined(__linux__)
+#include <linux/types.h>
+#elif defined(__FreeBSD__)
+#include <sys/types.h>
+#endif
+
+#ifndef CEPH_CRUSH_LN_H
+#define CEPH_CRUSH_LN_H
+
+
+// RH_LH_tbl[2*k] = 2^48/(1.0+k/128.0)
+// RH_LH_tbl[2*k+1] = 2^48*log2(1.0+k/128.0)
+
+static int64_t __RH_LH_tbl[128*2+2] = {
+  0x0001000000000000ll, 0x0000000000000000ll, 0x0000fe03f80fe040ll, 0x000002dfca16dde1ll,
+  0x0000fc0fc0fc0fc1ll, 0x000005b9e5a170b4ll, 0x0000fa232cf25214ll, 0x0000088e68ea899all,
+  0x0000f83e0f83e0f9ll, 0x00000b5d69bac77ell, 0x0000f6603d980f67ll, 0x00000e26fd5c8555ll,
+  0x0000f4898d5f85bcll, 0x000010eb389fa29fll, 0x0000f2b9d6480f2cll, 0x000013aa2fdd27f1ll,
+  0x0000f0f0f0f0f0f1ll, 0x00001663f6fac913ll, 0x0000ef2eb71fc435ll, 0x00001918a16e4633ll,
+  0x0000ed7303b5cc0fll, 0x00001bc84240adabll, 0x0000ebbdb2a5c162ll, 0x00001e72ec117fa5ll,
+  0x0000ea0ea0ea0ea1ll, 0x00002118b119b4f3ll, 0x0000e865ac7b7604ll, 0x000023b9a32eaa56ll,
+  0x0000e6c2b4481cd9ll, 0x00002655d3c4f15cll, 0x0000e525982af70dll, 0x000028ed53f307eell,
+  0x0000e38e38e38e39ll, 0x00002b803473f7adll, 0x0000e1fc780e1fc8ll, 0x00002e0e85a9de04ll,
+  0x0000e070381c0e08ll, 0x0000309857a05e07ll, 0x0000dee95c4ca038ll, 0x0000331dba0efce1ll,
+  0x0000dd67c8a60dd7ll, 0x0000359ebc5b69d9ll, 0x0000dbeb61eed19dll, 0x0000381b6d9bb29bll,
+  0x0000da740da740dbll, 0x00003a93dc9864b2ll, 0x0000d901b2036407ll, 0x00003d0817ce9cd4ll,
+  0x0000d79435e50d7all, 0x00003f782d7204d0ll, 0x0000d62b80d62b81ll, 0x000041e42b6ec0c0ll,
+  0x0000d4c77b03531ell, 0x0000444c1f6b4c2dll, 0x0000d3680d3680d4ll, 0x000046b016ca47c1ll,
+  0x0000d20d20d20d21ll, 0x000049101eac381cll, 0x0000d0b69fcbd259ll, 0x00004b6c43f1366all,
+  0x0000cf6474a8819fll, 0x00004dc4933a9337ll, 0x0000ce168a772509ll, 0x0000501918ec6c11ll,
+  0x0000cccccccccccdll, 0x00005269e12f346ell, 0x0000cb8727c065c4ll, 0x000054b6f7f1325all,
+  0x0000ca4587e6b750ll, 0x0000570068e7ef5all, 0x0000c907da4e8712ll, 0x000059463f919deell,
+  0x0000c7ce0c7ce0c8ll, 0x00005b8887367433ll, 0x0000c6980c6980c7ll, 0x00005dc74ae9fbecll,
+  0x0000c565c87b5f9ell, 0x00006002958c5871ll, 0x0000c4372f855d83ll, 0x0000623a71cb82c8ll,
+  0x0000c30c30c30c31ll, 0x0000646eea247c5cll, 0x0000c1e4bbd595f7ll, 0x000066a008e4788cll,
+  0x0000c0c0c0c0c0c1ll, 0x000068cdd829fd81ll, 0x0000bfa02fe80bfbll, 0x00006af861e5fc7dll,
+  0x0000be82fa0be830ll, 0x00006d1fafdce20all, 0x0000bd6910470767ll, 0x00006f43cba79e40ll,
+  0x0000bc52640bc527ll, 0x00007164beb4a56dll, 0x0000bb3ee721a54ell, 0x000073829248e961ll,
+  0x0000ba2e8ba2e8bbll, 0x0000759d4f80cba8ll, 0x0000b92143fa36f6ll, 0x000077b4ff5108d9ll,
+  0x0000b81702e05c0cll, 0x000079c9aa879d53ll, 0x0000b70fbb5a19bfll, 0x00007bdb59cca388ll,
+  0x0000b60b60b60b61ll, 0x00007dea15a32c1bll, 0x0000b509e68a9b95ll, 0x00007ff5e66a0ffell,
+  0x0000b40b40b40b41ll, 0x000081fed45cbccbll, 0x0000b30f63528918ll, 0x00008404e793fb81ll,
+  0x0000b21642c8590cll, 0x000086082806b1d5ll, 0x0000b11fd3b80b12ll, 0x000088089d8a9e47ll,
+  0x0000b02c0b02c0b1ll, 0x00008a064fd50f2all, 0x0000af3addc680b0ll, 0x00008c01467b94bbll,
+  0x0000ae4c415c9883ll, 0x00008df988f4ae80ll, 0x0000ad602b580ad7ll, 0x00008fef1e987409ll,
+  0x0000ac7691840ac8ll, 0x000091e20ea1393ell, 0x0000ab8f69e2835all, 0x000093d2602c2e5fll,
+  0x0000aaaaaaaaaaabll, 0x000095c01a39fbd6ll, 0x0000a9c84a47a080ll, 0x000097ab43af59f9ll,
+  0x0000a8e83f5717c1ll, 0x00009993e355a4e5ll, 0x0000a80a80a80a81ll, 0x00009b79ffdb6c8bll,
+  0x0000a72f0539782all, 0x00009d5d9fd5010bll, 0x0000a655c4392d7cll, 0x00009f3ec9bcfb80ll,
+  0x0000a57eb50295fbll, 0x0000a11d83f4c355ll, 0x0000a4a9cf1d9684ll, 0x0000a2f9d4c51039ll,
+  0x0000a3d70a3d70a4ll, 0x0000a4d3c25e68dcll, 0x0000a3065e3fae7dll, 0x0000a6ab52d99e76ll,
+  0x0000a237c32b16d0ll, 0x0000a8808c384547ll, 0x0000a16b312ea8fdll, 0x0000aa5374652a1cll,
+  0x0000a0a0a0a0a0a1ll, 0x0000ac241134c4e9ll, 0x00009fd809fd80a0ll, 0x0000adf26865a8a1ll,
+  0x00009f1165e72549ll, 0x0000afbe7fa0f04dll, 0x00009e4cad23dd60ll, 0x0000b1885c7aa982ll,
+  0x00009d89d89d89d9ll, 0x0000b35004723c46ll, 0x00009cc8e160c3fcll, 0x0000b5157cf2d078ll,
+  0x00009c09c09c09c1ll, 0x0000b6d8cb53b0call, 0x00009b4c6f9ef03bll, 0x0000b899f4d8ab63ll,
+  0x00009a90e7d95bc7ll, 0x0000ba58feb2703all, 0x000099d722dabde6ll, 0x0000bc15edfeed32ll,
+  0x0000991f1a515886ll, 0x0000bdd0c7c9a817ll, 0x00009868c809868dll, 0x0000bf89910c1678ll,
+  0x000097b425ed097cll, 0x0000c1404eadf383ll, 0x000097012e025c05ll, 0x0000c2f5058593d9ll,
+  0x0000964fda6c0965ll, 0x0000c4a7ba58377cll, 0x000095a02568095bll, 0x0000c65871da59ddll,
+  0x000094f2094f2095ll, 0x0000c80730b00016ll, 0x0000944580944581ll, 0x0000c9b3fb6d0559ll,
+  0x0000939a85c4093all, 0x0000cb5ed69565afll, 0x000092f113840498ll, 0x0000cd07c69d8702ll,
+  0x0000924924924925ll, 0x0000ceaecfea8085ll, 0x000091a2b3c4d5e7ll, 0x0000d053f6d26089ll,
+  0x000090fdbc090fdcll, 0x0000d1f73f9c70c0ll, 0x0000905a38633e07ll, 0x0000d398ae817906ll,
+  0x00008fb823ee08fcll, 0x0000d53847ac00a6ll, 0x00008f1779d9fdc4ll, 0x0000d6d60f388e41ll,
+  0x00008e78356d1409ll, 0x0000d8720935e643ll, 0x00008dda5202376all, 0x0000da0c39a54804ll,
+  0x00008d3dcb08d3ddll, 0x0000dba4a47aa996ll, 0x00008ca29c046515ll, 0x0000dd3b4d9cf24bll,
+  0x00008c08c08c08c1ll, 0x0000ded038e633f3ll, 0x00008b70344a139cll, 0x0000e0636a23e2eell,
+  0x00008ad8f2fba939ll, 0x0000e1f4e5170d02ll, 0x00008a42f870566all, 0x0000e384ad748f0ell,
+  0x000089ae4089ae41ll, 0x0000e512c6e54998ll, 0x0000891ac73ae982ll, 0x0000e69f35065448ll,
+  0x0000888888888889ll, 0x0000e829fb693044ll, 0x000087f78087f781ll, 0x0000e9b31d93f98ell,
+  0x00008767ab5f34e5ll, 0x0000eb3a9f019750ll, 0x000086d905447a35ll, 0x0000ecc08321eb30ll,
+  0x0000864b8a7de6d2ll, 0x0000ee44cd59ffabll, 0x000085bf37612cefll, 0x0000efc781043579ll,
+  0x0000853408534086ll, 0x0000f148a170700all, 0x000084a9f9c8084bll, 0x0000f2c831e44116ll,
+  0x0000842108421085ll, 0x0000f446359b1353ll, 0x0000839930523fbfll, 0x0000f5c2afc65447ll,
+  0x000083126e978d50ll, 0x0000f73da38d9d4all, 0x0000828cbfbeb9a1ll, 0x0000f8b7140edbb1ll,
+  0x0000820820820821ll, 0x0000fa2f045e7832ll, 0x000081848da8faf1ll, 0x0000fba577877d7dll,
+  0x0000810204081021ll, 0x0000fd1a708bbe11ll, 0x0000808080808081ll, 0x0000fe8df263f957ll,
+  0x0000800000000000ll, 0x0000ffff00000000ll,
+  };
+
+
+    // LL_tbl[k] = 2^48*log2(1.0+k/2^15);
+static int64_t __LL_tbl[256] = {
+  0x0000000000000000ull, 0x00000002e2a60a00ull, 0x000000070cb64ec5ull, 0x00000009ef50ce67ull,
+  0x0000000cd1e588fdull, 0x0000000fb4747e9cull, 0x0000001296fdaf5eull, 0x0000001579811b58ull,
+  0x000000185bfec2a1ull, 0x0000001b3e76a552ull, 0x0000001e20e8c380ull, 0x0000002103551d43ull,
+  0x00000023e5bbb2b2ull, 0x00000026c81c83e4ull, 0x00000029aa7790f0ull, 0x0000002c8cccd9edull,
+  0x0000002f6f1c5ef2ull, 0x0000003251662017ull, 0x0000003533aa1d71ull, 0x0000003815e8571aull,
+  0x0000003af820cd26ull, 0x0000003dda537faeull, 0x00000040bc806ec8ull, 0x000000439ea79a8cull,
+  0x0000004680c90310ull, 0x0000004962e4a86cull, 0x0000004c44fa8ab6ull, 0x0000004f270aaa06ull,
+  0x0000005209150672ull, 0x00000054eb19a013ull, 0x00000057cd1876fdull, 0x0000005aaf118b4aull,
+  0x0000005d9104dd0full, 0x0000006072f26c64ull, 0x0000006354da3960ull, 0x0000006636bc441aull,
+  0x0000006918988ca8ull, 0x0000006bfa6f1322ull, 0x0000006edc3fd79full, 0x00000071be0ada35ull,
+  0x000000749fd01afdull, 0x00000077818f9a0cull, 0x0000007a6349577aull, 0x0000007d44fd535eull,
+  0x0000008026ab8dceull, 0x00000083085406e3ull, 0x00000085e9f6beb2ull, 0x00000088cb93b552ull,
+  0x0000008bad2aeadcull, 0x0000008e8ebc5f65ull, 0x0000009170481305ull, 0x0000009451ce05d3ull,
+  0x00000097334e37e5ull, 0x0000009a14c8a953ull, 0x0000009cf63d5a33ull, 0x0000009fd7ac4a9dull,
+  0x000000a2b07f3458ull, 0x000000a59a78ea6aull, 0x000000a87bd699fbull, 0x000000ab5d2e8970ull,
+  0x000000ae3e80b8e3ull, 0x000000b11fcd2869ull, 0x000000b40113d818ull, 0x000000b6e254c80aull,
+  0x000000b9c38ff853ull, 0x000000bca4c5690cull, 0x000000bf85f51a4aull, 0x000000c2671f0c26ull,
+  0x000000c548433eb6ull, 0x000000c82961b211ull, 0x000000cb0a7a664dull, 0x000000cdeb8d5b82ull,
+  0x000000d0cc9a91c8ull, 0x000000d3ada20933ull, 0x000000d68ea3c1ddull, 0x000000d96f9fbbdbull,
+  0x000000dc5095f744ull, 0x000000df31867430ull, 0x000000e2127132b5ull, 0x000000e4f35632eaull,
+  0x000000e7d43574e6ull, 0x000000eab50ef8c1ull, 0x000000ed95e2be90ull, 0x000000f076b0c66cull,
+  0x000000f35779106aull, 0x000000f6383b9ca2ull, 0x000000f918f86b2aull, 0x000000fbf9af7c1aull,
+  0x000000feda60cf88ull, 0x00000101bb0c658cull, 0x000001049bb23e3cull, 0x000001077c5259afull,
+  0x0000010a5cecb7fcull, 0x0000010d3d81593aull, 0x000001101e103d7full, 0x00000112fe9964e4ull,
+  0x00000115df1ccf7eull, 0x00000118bf9a7d64ull, 0x0000011ba0126eadull, 0x0000011e8084a371ull,
+  0x0000012160f11bc6ull, 0x000001244157d7c3ull, 0x0000012721b8d77full, 0x0000012a02141b10ull,
+  0x0000012ce269a28eull, 0x0000012fc2b96e0full, 0x00000132a3037daaull, 0x000001358347d177ull,
+  0x000001386386698cull, 0x0000013b43bf45ffull, 0x0000013e23f266e9ull, 0x00000141041fcc5eull,
+  0x00000143e4477678ull, 0x00000146c469654bull, 0x00000149a48598f0ull, 0x0000014c849c117cull,
+  0x0000014f64accf08ull, 0x0000015244b7d1a9ull, 0x0000015524bd1976ull, 0x0000015804bca687ull,
+  0x0000015ae4b678f2ull, 0x0000015dc4aa90ceull, 0x00000160a498ee31ull, 0x0000016384819134ull,
+  0x00000166646479ecull, 0x000001694441a870ull, 0x0000016c24191cd7ull, 0x0000016df6ca19bdull,
+  0x00000171e3b6d7aaull, 0x00000174c37d1e44ull, 0x00000177a33dab1cull, 0x0000017a82f87e49ull,
+  0x0000017d62ad97e2ull, 0x00000180425cf7feull, 0x00000182b07f3458ull, 0x0000018601aa8c19ull,
+  0x00000188e148c046ull, 0x0000018bc0e13b52ull, 0x0000018ea073fd52ull, 0x000001918001065dull,
+  0x000001945f88568bull, 0x000001973f09edf2ull, 0x0000019a1e85ccaaull, 0x0000019cfdfbf2c8ull,
+  0x0000019fdd6c6063ull, 0x000001a2bcd71593ull, 0x000001a59c3c126eull, 0x000001a87b9b570bull,
+  0x000001ab5af4e380ull, 0x000001ae3a48b7e5ull, 0x000001b11996d450ull, 0x000001b3f8df38d9ull,
+  0x000001b6d821e595ull, 0x000001b9b75eda9bull, 0x000001bc96961803ull, 0x000001bf75c79de3ull,
+  0x000001c254f36c51ull, 0x000001c534198365ull, 0x000001c81339e336ull, 0x000001caf2548bd9ull,
+  0x000001cdd1697d67ull, 0x000001d0b078b7f5ull, 0x000001d38f823b9aull, 0x000001d66e86086dull,
+  0x000001d94d841e86ull, 0x000001dc2c7c7df9ull, 0x000001df0b6f26dfull, 0x000001e1ea5c194eull,
+  0x000001e4c943555dull, 0x000001e7a824db23ull, 0x000001ea8700aab5ull, 0x000001ed65d6c42bull,
+  0x000001f044a7279dull, 0x000001f32371d51full, 0x000001f60236cccaull, 0x000001f8e0f60eb3ull,
+  0x000001fbbfaf9af3ull, 0x000001fe9e63719eull, 0x000002017d1192ccull, 0x000002045bb9fe94ull,
+  0x000002073a5cb50dull, 0x00000209c06e6212ull, 0x0000020cf791026aull, 0x0000020fd622997cull,
+  0x00000212b07f3458ull, 0x000002159334a8d8ull, 0x0000021871b52150ull, 0x0000021b502fe517ull,
+  0x0000021d6a73a78full, 0x000002210d144eeeull, 0x00000223eb7df52cull, 0x00000226c9e1e713ull,
+  0x00000229a84024bbull, 0x0000022c23679b4eull, 0x0000022f64eb83a8ull, 0x000002324338a51bull,
+  0x00000235218012a9ull, 0x00000237ffc1cc69ull, 0x0000023a2c3b0ea4ull, 0x0000023d13ee805bull,
+  0x0000024035e9221full, 0x00000243788faf25ull, 0x0000024656b4e735ull, 0x00000247ed646bfeull,
+  0x0000024c12ee3d98ull, 0x0000024ef1025c1aull, 0x00000251cf10c799ull, 0x0000025492644d65ull,
+  0x000002578b1c85eeull, 0x0000025a6919d8f0ull, 0x0000025d13ee805bull, 0x0000026025036716ull,
+  0x0000026296453882ull, 0x00000265e0d62b53ull, 0x00000268beb701f3ull, 0x0000026b9c92265eull,
+  0x0000026d32f798a9ull, 0x00000271583758ebull, 0x000002743601673bull, 0x0000027713c5c3b0ull,
+  0x00000279f1846e5full, 0x0000027ccf3d6761ull, 0x0000027e6580aecbull, 0x000002828a9e44b3ull,
+  0x0000028568462932ull, 0x00000287bdbf5255ull, 0x0000028b2384de4aull, 0x0000028d13ee805bull,
+  0x0000029035e9221full, 0x0000029296453882ull, 0x0000029699bdfb61ull, 0x0000029902a37aabull,
+  0x0000029c54b864c9ull, 0x0000029deabd1083ull, 0x000002a20f9c0bb5ull, 0x000002a4c7605d61ull,
+  0x000002a7bdbf5255ull, 0x000002a96056dafcull, 0x000002ac3daf14efull, 0x000002af1b019ecaull,
+  0x000002b296453882ull, 0x000002b5d022d80full, 0x000002b8fa471cb3ull, 0x000002ba9012e713ull,
+  0x000002bd6d4901ccull, 0x000002c04a796cf6ull, 0x000002c327a428a6ull, 0x000002c61a5e8f4cull,
+  0x000002c8e1e891f6ull, 0x000002cbbf023fc2ull, 0x000002ce9c163e6eull, 0x000002d179248e13ull,
+  0x000002d4562d2ec6ull, 0x000002d73330209dull, 0x000002da102d63b0ull, 0x000002dced24f814ull,
+};
+
+
+
+
+#endif
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c

index a1ef53c..5b47736 100644 (file)
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -20,7 +20,7 @@
  
  #include <linux/crush/crush.h>
  #include <linux/crush/hash.h>
-#include <linux/crush/mapper.h>
+#include "crush_ln_table.h"
  
  /*
   * Implement the core CRUSH mapping algorithm.
@@ -238,6 +238,102 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
         return bucket->h.items[high];
  }
  
+// compute 2^44*log2(input+1)
+uint64_t crush_ln(unsigned xin)
+{
+    unsigned x=xin, x1;
+    int iexpon, index1, index2;
+    uint64_t RH, LH, LL, xl64, result;
+
+    x++;
+
+    // normalize input
+    iexpon = 15;
+    while(!(x&0x18000)) { x<<=1; iexpon--; }
+
+    index1 = (x>>8)<<1;
+    // RH ~ 2^56/index1
+    RH = __RH_LH_tbl[index1 - 256];
+    // LH ~ 2^48 * log2(index1/256)
+    LH = __RH_LH_tbl[index1 + 1 - 256];
+
+    // RH*x ~ 2^48 * (2^15 + xf), xf<2^8
+    xl64 = (int64_t)x * RH;
+    xl64 >>= 48;
+    x1 = xl64;
+
+    result = iexpon;
+    result <<= (12 + 32);
+
+    index2 = x1 & 0xff;
+    // LL ~ 2^48*log2(1.0+index2/2^15)
+    LL = __LL_tbl[index2];
+
+    LH = LH + LL;
+
+    LH >>= (48-12 - 32);
+    result += LH;
+
+    return result;
+}
+
+
+/*
+ * straw2
+ *
+ * for reference, see:
+ *
+ * http://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
+ *
+ */
+
+static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+                               int x, int r)
+{
+       unsigned i, high = 0;
+       unsigned u;
+       unsigned w;
+       __s64 ln, draw, high_draw = 0;
+
+       for (i = 0; i < bucket->h.size; i++) {
+               w = bucket->item_weights[i];
+               if (w) {
+                       u = crush_hash32_3(bucket->h.hash, x,
+                                          bucket->h.items[i], r);
+                       u &= 0xffff;
+
+                       /*
+                        * for some reason slightly less than 0x10000 produces
+                        * a slightly more accurate distribution... probably a
+                        * rounding effect.
+                        *
+                        * the natural log lookup table maps [0,0xffff]
+                        * (corresponding to real numbers [1/0x10000, 1] to
+                        * [0, 0xffffffffffff] (corresponding to real numbers
+                        * [-11.090355,0]).
+                        */
+                       ln = crush_ln(u) - 0x1000000000000ll;
+
+                       /*
+                        * divide by 16.16 fixed-point weight.  note
+                        * that the ln value is negative, so a larger
+                        * weight means a larger (less negative) value
+                        * for draw.
+                        */
+                       draw = div64_s64(ln, w);
+               } else {
+                       draw = S64_MIN;
+               }
+
+               if (i == 0 || draw > high_draw) {
+                       high = i;
+                       high_draw = draw;
+               }
+       }
+       return bucket->h.items[high];
+}
+
+
  static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
  {
         dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
@@ -255,12 +351,16 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
         case CRUSH_BUCKET_STRAW:
                 return bucket_straw_choose((struct crush_bucket_straw *)in,
                                            x, r);
+       case CRUSH_BUCKET_STRAW2:
+               return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
+                                           x, r);
         default:
                 dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
                 return in->items[0];
         }
  }
  
+
  /*
   * true if device is marked "out" (failed, fully offloaded)
   * of the cluster
@@ -290,6 +390,7 @@ static int is_out(const struct crush_map *map,
   * @type: the type of item to choose
   * @out: pointer to output vector
   * @outpos: our position in that vector
+ * @out_size: size of the out vector
   * @tries: number of attempts to make
   * @recurse_tries: number of attempts to have recursive chooseleaf make
   * @local_retries: localized retries
@@ -304,6 +405,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                                const __u32 *weight, int weight_max,
                                int x, int numrep, int type,
                                int *out, int outpos,
+                              int out_size,
                                unsigned int tries,
                                unsigned int recurse_tries,
                                unsigned int local_retries,
@@ -322,6 +424,7 @@ static int crush_choose_firstn(const struct crush_map *map,
         int item = 0;
         int itemtype;
         int collide, reject;
+       int count = out_size;
  
         dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
                 recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +432,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                 tries, recurse_tries, local_retries, local_fallback_retries,
                 parent_r);
  
-       for (rep = outpos; rep < numrep; rep++) {
+       for (rep = outpos; rep < numrep && count > 0 ; rep++) {
                 /* keep trying until we get a non-out, non-colliding item */
                 ftotal = 0;
                 skip_rep = 0;
@@ -403,7 +506,7 @@ static int crush_choose_firstn(const struct crush_map *map,
                                                          map->buckets[-1-item],
                                                          weight, weight_max,
                                                          x, outpos+1, 0,
-                                                        out2, outpos,
+                                                        out2, outpos, count,
                                                          recurse_tries, 0,
                                                          local_retries,
                                                          local_fallback_retries,
@@ -463,6 +566,7 @@ reject:
                 dprintk("CHOOSE got %d\n", item);
                 out[outpos] = item;
                 outpos++;
+               count--;
         }
  
         dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +758,7 @@ int crush_do_rule(const struct crush_map *map,
         __u32 step;
         int i, j;
         int numrep;
+       int out_size;
         /*
          * the original choose_total_tries value was off by one (it
          * counted "retries" and not "tries").  add one.
@@ -761,6 +866,7 @@ int crush_do_rule(const struct crush_map *map,
                                                 x, numrep,
                                                 curstep->arg2,
                                                 o+osize, j,
+                                               result_max-osize,
                                                 choose_tries,
                                                 recurse_tries,
                                                 choose_local_retries,
@@ -770,11 +876,13 @@ int crush_do_rule(const struct crush_map *map,
                                                 c+osize,
                                                 0);
                                 } else {
+                                       out_size = ((numrep < (result_max-osize)) ?
+                                                    numrep : (result_max-osize));
                                         crush_choose_indep(
                                                 map,
                                                 map->buckets[-1-w[i]],
                                                 weight, weight_max,
-                                               x, numrep, numrep,
+                                               x, out_size, numrep,
                                                 curstep->arg2,
                                                 o+osize, j,
                                                 choose_tries,
@@ -783,7 +891,7 @@ int crush_do_rule(const struct crush_map *map,
                                                 recurse_to_leaf,
                                                 c+osize,
                                                 0);
-                                       osize += numrep;
+                                       osize += out_size;
                                 }
                         }
  
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c

index 14d9995..593dc2e 100644 (file)
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -22,6 +22,7 @@
   *      .../monmap      - current monmap
   *      .../osdc        - active osd requests
   *      .../monc        - mon client state
+ *      .../client_options - libceph-only (i.e. not rbd or cephfs) options
   *      .../dentry_lru  - dump contents of dentry lru
   *      .../caps        - expose cap (reservation) stats
   *      .../bdi         - symlink to ../../bdi/something
@@ -177,10 +178,24 @@ static int osdc_show(struct seq_file *s, void *pp)
         return 0;
  }
  
+static int client_options_show(struct seq_file *s, void *p)
+{
+       struct ceph_client *client = s->private;
+       int ret;
+
+       ret = ceph_print_client_options(s, client);
+       if (ret)
+               return ret;
+
+       seq_putc(s, '\n');
+       return 0;
+}
+
  CEPH_DEFINE_SHOW_FUNC(monmap_show)
  CEPH_DEFINE_SHOW_FUNC(osdmap_show)
  CEPH_DEFINE_SHOW_FUNC(monc_show)
  CEPH_DEFINE_SHOW_FUNC(osdc_show)
+CEPH_DEFINE_SHOW_FUNC(client_options_show)
  
  int ceph_debugfs_init(void)
  {
@@ -242,6 +257,14 @@ int ceph_debugfs_client_init(struct ceph_client *client)
         if (!client->debugfs_osdmap)
                 goto out;
  
+       client->debugfs_options = debugfs_create_file("client_options",
+                                       0600,
+                                       client->debugfs_dir,
+                                       client,
+                                       &client_options_show_fops);
+       if (!client->debugfs_options)
+               goto out;
+
         return 0;
  
  out:
@@ -252,6 +275,7 @@ out:
  void ceph_debugfs_client_cleanup(struct ceph_client *client)
  {
         dout("ceph_debugfs_client_cleanup %p\n", client);
+       debugfs_remove(client->debugfs_options);
         debugfs_remove(client->debugfs_osdmap);
         debugfs_remove(client->debugfs_monmap);
         debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c

index a9f4ae4..967080a 100644 (file)
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -505,8 +505,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                 pr_err("connect %s error %d\n",
                        ceph_pr_addr(&con->peer_addr.in_addr), ret);
                 sock_release(sock);
-               con->error_msg = "connect error";
-
                 return ret;
         }
  
@@ -2145,12 +2143,10 @@ static int process_connect(struct ceph_connection *con)
                  * to WAIT.  This shouldn't happen if we are the
                  * client.
                  */
-               pr_err("process_connect got WAIT as client\n");
                 con->error_msg = "protocol error, got WAIT as client";
                 return -1;
  
         default:
-               pr_err("connect protocol error, will retry\n");
                 con->error_msg = "protocol error, garbage tag during connect";
                 return -1;
         }
@@ -2282,8 +2278,7 @@ static int read_partial_message(struct ceph_connection *con)
  
         crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
         if (cpu_to_le32(crc) != con->in_hdr.crc) {
-               pr_err("read_partial_message bad hdr "
-                      " crc %u != expected %u\n",
+               pr_err("read_partial_message bad hdr crc %u != expected %u\n",
                        crc, con->in_hdr.crc);
                 return -EBADMSG;
         }
@@ -2313,7 +2308,7 @@ static int read_partial_message(struct ceph_connection *con)
                 pr_err("read_partial_message bad seq %lld expected %lld\n",
                        seq, con->in_seq + 1);
                 con->error_msg = "bad message sequence # for incoming message";
-               return -EBADMSG;
+               return -EBADE;
         }
  
         /* allocate message? */
@@ -2660,6 +2655,8 @@ more:
                         switch (ret) {
                         case -EBADMSG:
                                 con->error_msg = "bad crc";
+                               /* fall through */
+                       case -EBADE:
                                 ret = -EIO;
                                 break;
                         case -EIO:
@@ -2838,7 +2835,8 @@ static void con_work(struct work_struct *work)
                 if (ret < 0) {
                         if (ret == -EAGAIN)
                                 continue;
-                       con->error_msg = "socket error on read";
+                       if (!con->error_msg)
+                               con->error_msg = "socket error on read";
                         fault = true;
                         break;
                 }
@@ -2847,7 +2845,8 @@ static void con_work(struct work_struct *work)
                 if (ret < 0) {
                         if (ret == -EAGAIN)
                                 continue;
-                       con->error_msg = "socket error on write";
+                       if (!con->error_msg)
+                               con->error_msg = "socket error on write";
                         fault = true;
                 }
  
@@ -2869,11 +2868,13 @@ static void con_work(struct work_struct *work)
   */
  static void con_fault(struct ceph_connection *con)
  {
-       pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
-               ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
         dout("fault %p state %lu to peer %s\n",
              con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));
  
+       pr_warn("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
+               ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
+       con->error_msg = NULL;
+
         WARN_ON(con->state != CON_STATE_CONNECTING &&
                con->state != CON_STATE_NEGOTIATING &&
                con->state != CON_STATE_OPEN);
@@ -3295,8 +3296,8 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
                  */
                 if (*skip)
                         return 0;
-               con->error_msg = "error allocating memory for incoming message";
  
+               con->error_msg = "error allocating memory for incoming message";
                 return -ENOMEM;
         }
         memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c

index b8c3fde..1579669 100644 (file)
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -122,6 +122,22 @@ bad:
         return -EINVAL;
  }
  
+static int crush_decode_straw2_bucket(void **p, void *end,
+                                     struct crush_bucket_straw2 *b)
+{
+       int j;
+       dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
+       b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
+       if (b->item_weights == NULL)
+               return -ENOMEM;
+       ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
+       for (j = 0; j < b->h.size; j++)
+               b->item_weights[j] = ceph_decode_32(p);
+       return 0;
+bad:
+       return -EINVAL;
+}
+
  static int skip_name_map(void **p, void *end)
  {
          int len;
@@ -204,6 +220,9 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
                 case CRUSH_BUCKET_STRAW:
                         size = sizeof(struct crush_bucket_straw);
                         break;
+               case CRUSH_BUCKET_STRAW2:
+                       size = sizeof(struct crush_bucket_straw2);
+                       break;
                 default:
                         err = -EINVAL;
                         goto bad;
@@ -261,6 +280,12 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
                         if (err < 0)
                                 goto bad;
                         break;
+               case CRUSH_BUCKET_STRAW2:
+                       err = crush_decode_straw2_bucket(p, end,
+                               (struct crush_bucket_straw2 *)b);
+                       if (err < 0)
+                               goto bad;
+                       break;
                 }
         }
  
diff --git a/net/socket.c b/net/socket.c

index 3e33959..884e329 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -312,7 +312,7 @@ static const struct super_operations sockfs_ops = {
  static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
  {
         return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
-                               dentry->d_inode->i_ino);
+                               d_inode(dentry)->i_ino);
  }
  
  static const struct dentry_operations sockfs_dentry_operations = {
@@ -375,7 +375,7 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
                   &socket_file_ops);
         if (unlikely(IS_ERR(file))) {
                 /* drop dentry, keep inode */
-               ihold(path.dentry->d_inode);
+               ihold(d_inode(path.dentry));
                 path_put(&path);
                 return file;
         }
@@ -497,7 +497,7 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
         ssize_t len;
         ssize_t used = 0;
  
-       len = security_inode_listsecurity(dentry->d_inode, buffer, size);
+       len = security_inode_listsecurity(d_inode(dentry), buffer, size);
         if (len < 0)
                 return len;
         used += len;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index 2d12b76..d81186d 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -94,7 +94,7 @@ rpc_timeout_upcall_queue(struct work_struct *work)
         }
         dentry = dget(pipe->dentry);
         spin_unlock(&pipe->lock);
-       rpc_purge_list(dentry ? &RPC_I(dentry->d_inode)->waitq : NULL,
+       rpc_purge_list(dentry ? &RPC_I(d_inode(dentry))->waitq : NULL,
                         &free_list, destroy_msg, -ETIMEDOUT);
         dput(dentry);
  }
@@ -152,7 +152,7 @@ rpc_queue_upcall(struct rpc_pipe *pipe, struct rpc_pipe_msg *msg)
         dentry = dget(pipe->dentry);
         spin_unlock(&pipe->lock);
         if (dentry) {
-               wake_up(&RPC_I(dentry->d_inode)->waitq);
+               wake_up(&RPC_I(d_inode(dentry))->waitq);
                 dput(dentry);
         }
         return res;
@@ -591,7 +591,7 @@ static int __rpc_mkpipe_dentry(struct inode *dir, struct dentry *dentry,
         err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private);
         if (err)
                 return err;
-       rpci = RPC_I(dentry->d_inode);
+       rpci = RPC_I(d_inode(dentry));
         rpci->private = private;
         rpci->pipe = pipe;
         fsnotify_create(dir, dentry);
@@ -616,7 +616,7 @@ int rpc_rmdir(struct dentry *dentry)
         int error;
  
         parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
         error = __rpc_rmdir(dir, dentry);
         mutex_unlock(&dir->i_mutex);
@@ -638,7 +638,7 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry)
  
  static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_inode(dentry);
  
         rpc_close_pipes(inode);
         return __rpc_unlink(dir, dentry);
@@ -654,7 +654,7 @@ static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent,
                 if (!dentry)
                         return ERR_PTR(-ENOMEM);
         }
-       if (dentry->d_inode == NULL)
+       if (d_really_is_negative(dentry))
                 return dentry;
         dput(dentry);
         return ERR_PTR(-EEXIST);
@@ -667,7 +667,7 @@ static void __rpc_depopulate(struct dentry *parent,
                              const struct rpc_filelist *files,
                              int start, int eof)
  {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct dentry *dentry;
         struct qstr name;
         int i;
@@ -679,9 +679,9 @@ static void __rpc_depopulate(struct dentry *parent,
  
                 if (dentry == NULL)
                         continue;
-               if (dentry->d_inode == NULL)
+               if (d_really_is_negative(dentry))
                         goto next;
-               switch (dentry->d_inode->i_mode & S_IFMT) {
+               switch (d_inode(dentry)->i_mode & S_IFMT) {
                         default:
                                 BUG();
                         case S_IFREG:
@@ -699,7 +699,7 @@ static void rpc_depopulate(struct dentry *parent,
                            const struct rpc_filelist *files,
                            int start, int eof)
  {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
  
         mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD);
         __rpc_depopulate(parent, files, start, eof);
@@ -711,7 +711,7 @@ static int rpc_populate(struct dentry *parent,
                         int start, int eof,
                         void *private)
  {
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         struct dentry *dentry;
         int i, err;
  
@@ -754,7 +754,7 @@ static struct dentry *rpc_mkdir_populate(struct dentry *parent,
                 int (*populate)(struct dentry *, void *), void *args_populate)
  {
         struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         int error;
  
         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
@@ -787,7 +787,7 @@ static int rpc_rmdir_depopulate(struct dentry *dentry,
         int error;
  
         parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
         if (depopulate != NULL)
                 depopulate(dentry);
@@ -819,7 +819,7 @@ struct dentry *rpc_mkpipe_dentry(struct dentry *parent, const char *name,
                                  void *private, struct rpc_pipe *pipe)
  {
         struct dentry *dentry;
-       struct inode *dir = parent->d_inode;
+       struct inode *dir = d_inode(parent);
         umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR;
         int err;
  
@@ -864,7 +864,7 @@ rpc_unlink(struct dentry *dentry)
         int error = 0;
  
         parent = dget_parent(dentry);
-       dir = parent->d_inode;
+       dir = d_inode(parent);
         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
         error = __rpc_rmpipe(dir, dentry);
         mutex_unlock(&dir->i_mutex);
@@ -1375,7 +1375,7 @@ rpc_gssd_dummy_depopulate(struct dentry *pipe_dentry)
         struct dentry *clnt_dir = pipe_dentry->d_parent;
         struct dentry *gssd_dir = clnt_dir->d_parent;
  
-       __rpc_rmpipe(clnt_dir->d_inode, pipe_dentry);
+       __rpc_rmpipe(d_inode(clnt_dir), pipe_dentry);
         __rpc_depopulate(clnt_dir, gssd_dummy_info_file, 0, 1);
         __rpc_depopulate(gssd_dir, gssd_dummy_clnt_dir, 0, 1);
         dput(pipe_dentry);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c

index b91fd9c..337ca85 100644 (file)
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -89,8 +89,8 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
         if (!task->tk_timeout)
                 return;
  
-       dprintk("RPC: %5u setting alarm for %lu ms\n",
-                       task->tk_pid, task->tk_timeout * 1000 / HZ);
+       dprintk("RPC: %5u setting alarm for %u ms\n",
+               task->tk_pid, jiffies_to_msecs(task->tk_timeout));
  
         task->u.tk_wait.expires = jiffies + task->tk_timeout;
         if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c

index 9949722..1d4fe24 100644 (file)
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -326,6 +326,15 @@ out_unlock:
         xprt_clear_locked(xprt);
  }
  
+static void xprt_task_clear_bytes_sent(struct rpc_task *task)
+{
+       if (task != NULL) {
+               struct rpc_rqst *req = task->tk_rqstp;
+               if (req != NULL)
+                       req->rq_bytes_sent = 0;
+       }
+}
+
  /**
   * xprt_release_xprt - allow other requests to use a transport
   * @xprt: transport with other tasks potentially waiting
@@ -336,11 +345,7 @@ out_unlock:
  void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         if (xprt->snd_task == task) {
-               if (task != NULL) {
-                       struct rpc_rqst *req = task->tk_rqstp;
-                       if (req != NULL)
-                               req->rq_bytes_sent = 0;
-               }
+               xprt_task_clear_bytes_sent(task);
                 xprt_clear_locked(xprt);
                 __xprt_lock_write_next(xprt);
         }
@@ -358,11 +363,7 @@ EXPORT_SYMBOL_GPL(xprt_release_xprt);
  void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
  {
         if (xprt->snd_task == task) {
-               if (task != NULL) {
-                       struct rpc_rqst *req = task->tk_rqstp;
-                       if (req != NULL)
-                               req->rq_bytes_sent = 0;
-               }
+               xprt_task_clear_bytes_sent(task);
                 xprt_clear_locked(xprt);
                 __xprt_lock_write_next_cong(xprt);
         }
@@ -700,6 +701,7 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
                 goto out;
         if (xprt->snd_task != task)
                 goto out;
+       xprt_task_clear_bytes_sent(task);
         xprt->snd_task = cookie;
         ret = true;
  out:
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile

index da5136f..579f72b 100644 (file)
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -1,6 +1,7 @@
  obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
  
-xprtrdma-y := transport.o rpc_rdma.o verbs.o
+xprtrdma-y := transport.o rpc_rdma.o verbs.o \
+       fmr_ops.o frwr_ops.o physical_ops.o
  
  obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
  
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c

new file mode 100644 (file)

index 0000000..302d4eb
--- /dev/null
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Memory Regions (FMR).
+ * Referred to sometimes as MTHCAFMR mode.
+ *
+ * FMR uses synchronous memory registration and deregistration.
+ * FMR registration is known to be fast, but FMR deregistration
+ * can take tens of usecs to complete.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+/* Maximum scatter/gather per FMR */
+#define RPCRDMA_MAX_FMR_SGES   (64)
+
+static int
+fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+           struct rpcrdma_create_data_internal *cdata)
+{
+       return 0;
+}
+
+/* FMR mode conveys up to 64 pages of payload per chunk segment.
+ */
+static size_t
+fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
+}
+
+static int
+fmr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
+       struct ib_fmr_attr fmr_attr = {
+               .max_pages      = RPCRDMA_MAX_FMR_SGES,
+               .max_maps       = 1,
+               .page_shift     = PAGE_SHIFT
+       };
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       struct rpcrdma_mw *r;
+       int i, rc;
+
+       INIT_LIST_HEAD(&buf->rb_mws);
+       INIT_LIST_HEAD(&buf->rb_all);
+
+       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+       dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
+
+       while (i--) {
+               r = kzalloc(sizeof(*r), GFP_KERNEL);
+               if (!r)
+                       return -ENOMEM;
+
+               r->r.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
+               if (IS_ERR(r->r.fmr))
+                       goto out_fmr_err;
+
+               list_add(&r->mw_list, &buf->rb_mws);
+               list_add(&r->mw_all, &buf->rb_all);
+       }
+       return 0;
+
+out_fmr_err:
+       rc = PTR_ERR(r->r.fmr);
+       dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
+       kfree(r);
+       return rc;
+}
+
+/* Use the ib_map_phys_fmr() verb to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+          int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_device *device = ia->ri_id->device;
+       enum dma_data_direction direction = rpcrdma_data_dir(writing);
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
+       u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
+       int len, pageoff, i, rc;
+
+       pageoff = offset_in_page(seg1->mr_offset);
+       seg1->mr_offset -= pageoff;     /* start of page */
+       seg1->mr_len += pageoff;
+       len = -pageoff;
+       if (nsegs > RPCRDMA_MAX_FMR_SGES)
+               nsegs = RPCRDMA_MAX_FMR_SGES;
+       for (i = 0; i < nsegs;) {
+               rpcrdma_map_one(device, seg, direction);
+               physaddrs[i] = seg->mr_dma;
+               len += seg->mr_len;
+               ++seg;
+               ++i;
+               /* Check for holes */
+               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+                       break;
+       }
+
+       rc = ib_map_phys_fmr(mw->r.fmr, physaddrs, i, seg1->mr_dma);
+       if (rc)
+               goto out_maperr;
+
+       seg1->mr_rkey = mw->r.fmr->rkey;
+       seg1->mr_base = seg1->mr_dma + pageoff;
+       seg1->mr_nsegs = i;
+       seg1->mr_len = len;
+       return i;
+
+out_maperr:
+       dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
+               __func__, len, (unsigned long long)seg1->mr_dma,
+               pageoff, i, rc);
+       while (i--)
+               rpcrdma_unmap_one(device, --seg);
+       return rc;
+}
+
+/* Use the ib_unmap_fmr() verb to prevent further remote
+ * access via RDMA READ or RDMA WRITE.
+ */
+static int
+fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct ib_device *device;
+       int rc, nsegs = seg->mr_nsegs;
+       LIST_HEAD(l);
+
+       list_add(&seg1->rl_mw->r.fmr->list, &l);
+       rc = ib_unmap_fmr(&l);
+       read_lock(&ia->ri_qplock);
+       device = ia->ri_id->device;
+       while (seg1->mr_nsegs--)
+               rpcrdma_unmap_one(device, seg++);
+       read_unlock(&ia->ri_qplock);
+       if (rc)
+               goto out_err;
+       return nsegs;
+
+out_err:
+       dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
+       return nsegs;
+}
+
+/* After a disconnect, unmap all FMRs.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_fmr_external().
+ */
+static void
+fmr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct rpcrdma_mw *r;
+       LIST_HEAD(list);
+       int rc;
+
+       list_for_each_entry(r, &buf->rb_all, mw_all)
+               list_add(&r->r.fmr->list, &list);
+
+       rc = ib_unmap_fmr(&list);
+       if (rc)
+               dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
+                       __func__, rc);
+}
+
+static void
+fmr_op_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_mw *r;
+       int rc;
+
+       while (!list_empty(&buf->rb_all)) {
+               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+               list_del(&r->mw_all);
+               rc = ib_dealloc_fmr(r->r.fmr);
+               if (rc)
+                       dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
+                               __func__, rc);
+               kfree(r);
+       }
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
+       .ro_map                         = fmr_op_map,
+       .ro_unmap                       = fmr_op_unmap,
+       .ro_open                        = fmr_op_open,
+       .ro_maxpages                    = fmr_op_maxpages,
+       .ro_init                        = fmr_op_init,
+       .ro_reset                       = fmr_op_reset,
+       .ro_destroy                     = fmr_op_destroy,
+       .ro_displayname                 = "fmr",
+};
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c

new file mode 100644 (file)

index 0000000..dff0481
--- /dev/null
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* Lightweight memory registration using Fast Registration Work
+ * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ *
+ * FRWR features ordered asynchronous registration and deregistration
+ * of arbitrarily sized memory regions. This is the fastest and safest
+ * but most complex memory registration mode.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+static int
+__frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
+           unsigned int depth)
+{
+       struct rpcrdma_frmr *f = &r->r.frmr;
+       int rc;
+
+       f->fr_mr = ib_alloc_fast_reg_mr(pd, depth);
+       if (IS_ERR(f->fr_mr))
+               goto out_mr_err;
+       f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
+       if (IS_ERR(f->fr_pgl))
+               goto out_list_err;
+       return 0;
+
+out_mr_err:
+       rc = PTR_ERR(f->fr_mr);
+       dprintk("RPC:       %s: ib_alloc_fast_reg_mr status %i\n",
+               __func__, rc);
+       return rc;
+
+out_list_err:
+       rc = PTR_ERR(f->fr_pgl);
+       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
+               __func__, rc);
+       ib_dereg_mr(f->fr_mr);
+       return rc;
+}
+
+static void
+__frwr_release(struct rpcrdma_mw *r)
+{
+       int rc;
+
+       rc = ib_dereg_mr(r->r.frmr.fr_mr);
+       if (rc)
+               dprintk("RPC:       %s: ib_dereg_mr status %i\n",
+                       __func__, rc);
+       ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+}
+
+static int
+frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+            struct rpcrdma_create_data_internal *cdata)
+{
+       struct ib_device_attr *devattr = &ia->ri_devattr;
+       int depth, delta;
+
+       ia->ri_max_frmr_depth =
+                       min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                             devattr->max_fast_reg_page_list_len);
+       dprintk("RPC:       %s: device's max FR page list len = %u\n",
+               __func__, ia->ri_max_frmr_depth);
+
+       /* Add room for frmr register and invalidate WRs.
+        * 1. FRMR reg WR for head
+        * 2. FRMR invalidate WR for head
+        * 3. N FRMR reg WRs for pagelist
+        * 4. N FRMR invalidate WRs for pagelist
+        * 5. FRMR reg WR for tail
+        * 6. FRMR invalidate WR for tail
+        * 7. The RDMA_SEND WR
+        */
+       depth = 7;
+
+       /* Calculate N if the device max FRMR depth is smaller than
+        * RPCRDMA_MAX_DATA_SEGS.
+        */
+       if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+               delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+               do {
+                       depth += 2; /* FRMR reg + invalidate */
+                       delta -= ia->ri_max_frmr_depth;
+               } while (delta > 0);
+       }
+
+       ep->rep_attr.cap.max_send_wr *= depth;
+       if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
+               cdata->max_requests = devattr->max_qp_wr / depth;
+               if (!cdata->max_requests)
+                       return -EINVAL;
+               ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+                                              depth;
+       }
+
+       return 0;
+}
+
+/* FRWR mode conveys a list of pages per chunk segment. The
+ * maximum length of that list is the FRWR page list depth.
+ */
+static size_t
+frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
+}
+
+/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */
+static void
+frwr_sendcompletion(struct ib_wc *wc)
+{
+       struct rpcrdma_mw *r;
+
+       if (likely(wc->status == IB_WC_SUCCESS))
+               return;
+
+       /* WARNING: Only wr_id and status are reliable at this point */
+       r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+       dprintk("RPC:       %s: frmr %p (stale), status %d\n",
+               __func__, r, wc->status);
+       r->r.frmr.fr_state = FRMR_IS_STALE;
+}
+
+static int
+frwr_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       int i;
+
+       INIT_LIST_HEAD(&buf->rb_mws);
+       INIT_LIST_HEAD(&buf->rb_all);
+
+       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
+       dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
+
+       while (i--) {
+               struct rpcrdma_mw *r;
+               int rc;
+
+               r = kzalloc(sizeof(*r), GFP_KERNEL);
+               if (!r)
+                       return -ENOMEM;
+
+               rc = __frwr_init(r, pd, device, depth);
+               if (rc) {
+                       kfree(r);
+                       return rc;
+               }
+
+               list_add(&r->mw_list, &buf->rb_mws);
+               list_add(&r->mw_all, &buf->rb_all);
+               r->mw_sendcompletion = frwr_sendcompletion;
+       }
+
+       return 0;
+}
+
+/* Post a FAST_REG Work Request to register a memory region
+ * for remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+           int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_device *device = ia->ri_id->device;
+       enum dma_data_direction direction = rpcrdma_data_dir(writing);
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_mw *mw = seg1->rl_mw;
+       struct rpcrdma_frmr *frmr = &mw->r.frmr;
+       struct ib_mr *mr = frmr->fr_mr;
+       struct ib_send_wr fastreg_wr, *bad_wr;
+       u8 key;
+       int len, pageoff;
+       int i, rc;
+       int seg_len;
+       u64 pa;
+       int page_no;
+
+       pageoff = offset_in_page(seg1->mr_offset);
+       seg1->mr_offset -= pageoff;     /* start of page */
+       seg1->mr_len += pageoff;
+       len = -pageoff;
+       if (nsegs > ia->ri_max_frmr_depth)
+               nsegs = ia->ri_max_frmr_depth;
+       for (page_no = i = 0; i < nsegs;) {
+               rpcrdma_map_one(device, seg, direction);
+               pa = seg->mr_dma;
+               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
+                       frmr->fr_pgl->page_list[page_no++] = pa;
+                       pa += PAGE_SIZE;
+               }
+               len += seg->mr_len;
+               ++seg;
+               ++i;
+               /* Check for holes */
+               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
+                       break;
+       }
+       dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
+               __func__, mw, i, len);
+
+       frmr->fr_state = FRMR_IS_VALID;
+
+       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+       fastreg_wr.wr_id = (unsigned long)(void *)mw;
+       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+       fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
+       fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
+       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+       fastreg_wr.wr.fast_reg.page_list_len = page_no;
+       fastreg_wr.wr.fast_reg.length = len;
+       fastreg_wr.wr.fast_reg.access_flags = writing ?
+                               IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+                               IB_ACCESS_REMOTE_READ;
+       key = (u8)(mr->rkey & 0x000000FF);
+       ib_update_fast_reg_key(mr, ++key);
+       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+       DECR_CQCOUNT(&r_xprt->rx_ep);
+       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+       if (rc)
+               goto out_senderr;
+
+       seg1->mr_rkey = mr->rkey;
+       seg1->mr_base = seg1->mr_dma + pageoff;
+       seg1->mr_nsegs = i;
+       seg1->mr_len = len;
+       return i;
+
+out_senderr:
+       dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+       ib_update_fast_reg_key(mr, --key);
+       frmr->fr_state = FRMR_IS_INVALID;
+       while (i--)
+               rpcrdma_unmap_one(device, --seg);
+       return rc;
+}
+
+/* Post a LOCAL_INV Work Request to prevent further remote access
+ * via RDMA READ or RDMA WRITE.
+ */
+static int
+frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_mr_seg *seg1 = seg;
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+       struct ib_send_wr invalidate_wr, *bad_wr;
+       int rc, nsegs = seg->mr_nsegs;
+       struct ib_device *device;
+
+       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
+
+       memset(&invalidate_wr, 0, sizeof(invalidate_wr));
+       invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
+       invalidate_wr.opcode = IB_WR_LOCAL_INV;
+       invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
+       DECR_CQCOUNT(&r_xprt->rx_ep);
+
+       read_lock(&ia->ri_qplock);
+       device = ia->ri_id->device;
+       while (seg1->mr_nsegs--)
+               rpcrdma_unmap_one(device, seg++);
+       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
+       read_unlock(&ia->ri_qplock);
+       if (rc)
+               goto out_err;
+       return nsegs;
+
+out_err:
+       /* Force rpcrdma_buffer_get() to retry */
+       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
+       dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
+       return nsegs;
+}
+
+/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
+ * an unusable state. Find FRMRs in this state and dereg / reg
+ * each.  FRMRs that are VALID and attached to an rpcrdma_req are
+ * also torn down.
+ *
+ * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
+ *
+ * This is invoked only in the transport connect worker in order
+ * to serialize with rpcrdma_register_frmr_external().
+ */
+static void
+frwr_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
+       struct ib_device *device = r_xprt->rx_ia.ri_id->device;
+       unsigned int depth = r_xprt->rx_ia.ri_max_frmr_depth;
+       struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
+       struct rpcrdma_mw *r;
+       int rc;
+
+       list_for_each_entry(r, &buf->rb_all, mw_all) {
+               if (r->r.frmr.fr_state == FRMR_IS_INVALID)
+                       continue;
+
+               __frwr_release(r);
+               rc = __frwr_init(r, pd, device, depth);
+               if (rc) {
+                       dprintk("RPC:       %s: mw %p left %s\n",
+                               __func__, r,
+                               (r->r.frmr.fr_state == FRMR_IS_STALE ?
+                                       "stale" : "valid"));
+                       continue;
+               }
+
+               r->r.frmr.fr_state = FRMR_IS_INVALID;
+       }
+}
+
+static void
+frwr_op_destroy(struct rpcrdma_buffer *buf)
+{
+       struct rpcrdma_mw *r;
+
+       while (!list_empty(&buf->rb_all)) {
+               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
+               list_del(&r->mw_all);
+               __frwr_release(r);
+               kfree(r);
+       }
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
+       .ro_map                         = frwr_op_map,
+       .ro_unmap                       = frwr_op_unmap,
+       .ro_open                        = frwr_op_open,
+       .ro_maxpages                    = frwr_op_maxpages,
+       .ro_init                        = frwr_op_init,
+       .ro_reset                       = frwr_op_reset,
+       .ro_destroy                     = frwr_op_destroy,
+       .ro_displayname                 = "frwr",
+};
diff --git a/net/sunrpc/xprtrdma/physical_ops.c b/net/sunrpc/xprtrdma/physical_ops.c

new file mode 100644 (file)

index 0000000..ba518af
--- /dev/null
+++ b/net/sunrpc/xprtrdma/physical_ops.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Oracle.  All rights reserved.
+ * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
+ */
+
+/* No-op chunk preparation. All client memory is pre-registered.
+ * Sometimes referred to as ALLPHYSICAL mode.
+ *
+ * Physical registration is simple because all client memory is
+ * pre-registered and never deregistered. This mode is good for
+ * adapter bring up, but is considered not safe: the server is
+ * trusted not to abuse its access to client memory not involved
+ * in RDMA I/O.
+ */
+
+#include "xprt_rdma.h"
+
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define RPCDBG_FACILITY       RPCDBG_TRANS
+#endif
+
+static int
+physical_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
+                struct rpcrdma_create_data_internal *cdata)
+{
+       return 0;
+}
+
+/* PHYSICAL memory registration conveys one page per chunk segment.
+ */
+static size_t
+physical_op_maxpages(struct rpcrdma_xprt *r_xprt)
+{
+       return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
+                    rpcrdma_max_segments(r_xprt));
+}
+
+static int
+physical_op_init(struct rpcrdma_xprt *r_xprt)
+{
+       return 0;
+}
+
+/* The client's physical memory is already exposed for
+ * remote access via RDMA READ or RDMA WRITE.
+ */
+static int
+physical_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
+               int nsegs, bool writing)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       rpcrdma_map_one(ia->ri_id->device, seg,
+                       rpcrdma_data_dir(writing));
+       seg->mr_rkey = ia->ri_bind_mem->rkey;
+       seg->mr_base = seg->mr_dma;
+       seg->mr_nsegs = 1;
+       return 1;
+}
+
+/* Unmap a memory region, but leave it registered.
+ */
+static int
+physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
+{
+       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+       read_lock(&ia->ri_qplock);
+       rpcrdma_unmap_one(ia->ri_id->device, seg);
+       read_unlock(&ia->ri_qplock);
+
+       return 1;
+}
+
+static void
+physical_op_reset(struct rpcrdma_xprt *r_xprt)
+{
+}
+
+static void
+physical_op_destroy(struct rpcrdma_buffer *buf)
+{
+}
+
+const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
+       .ro_map                         = physical_op_map,
+       .ro_unmap                       = physical_op_unmap,
+       .ro_open                        = physical_op_open,
+       .ro_maxpages                    = physical_op_maxpages,
+       .ro_init                        = physical_op_init,
+       .ro_reset                       = physical_op_reset,
+       .ro_destroy                     = physical_op_destroy,
+       .ro_displayname                 = "physical",
+};
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c

index 91ffde8..2c53ea9 100644 (file)
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -53,6 +53,14 @@
  # define RPCDBG_FACILITY       RPCDBG_TRANS
  #endif
  
+enum rpcrdma_chunktype {
+       rpcrdma_noch = 0,
+       rpcrdma_readch,
+       rpcrdma_areadch,
+       rpcrdma_writech,
+       rpcrdma_replych
+};
+
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  static const char transfertypes[][12] = {
         "pure inline",  /* no chunks */
@@ -179,6 +187,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
         struct rpcrdma_write_array *warray = NULL;
         struct rpcrdma_write_chunk *cur_wchunk = NULL;
         __be32 *iptr = headerp->rm_body.rm_chunks;
+       int (*map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool);
  
         if (type == rpcrdma_readch || type == rpcrdma_areadch) {
                 /* a read chunk - server will RDMA Read our memory */
@@ -201,9 +210,9 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
         if (nsegs < 0)
                 return nsegs;
  
+       map = r_xprt->rx_ia.ri_ops->ro_map;
         do {
-               n = rpcrdma_register_external(seg, nsegs,
-                                               cur_wchunk != NULL, r_xprt);
+               n = map(r_xprt, seg, nsegs, cur_wchunk != NULL);
                 if (n <= 0)
                         goto out;
                 if (cur_rchunk) {       /* read */
@@ -275,34 +284,13 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
         return (unsigned char *)iptr - (unsigned char *)headerp;
  
  out:
-       if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_FRMR) {
-               for (pos = 0; nchunks--;)
-                       pos += rpcrdma_deregister_external(
-                                       &req->rl_segments[pos], r_xprt);
-       }
-       return n;
-}
+       if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+               return n;
  
-/*
- * Marshal chunks. This routine returns the header length
- * consumed by marshaling.
- *
- * Returns positive RPC/RDMA header size, or negative errno.
- */
-
-ssize_t
-rpcrdma_marshal_chunks(struct rpc_rqst *rqst, ssize_t result)
-{
-       struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
-       struct rpcrdma_msg *headerp = rdmab_to_msg(req->rl_rdmabuf);
-
-       if (req->rl_rtype != rpcrdma_noch)
-               result = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
-                                              headerp, req->rl_rtype);
-       else if (req->rl_wtype != rpcrdma_noch)
-               result = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
-                                              headerp, req->rl_wtype);
-       return result;
+       for (pos = 0; nchunks--;)
+               pos += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
+                                                     &req->rl_segments[pos]);
+       return n;
  }
  
  /*
@@ -397,6 +385,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
         char *base;
         size_t rpclen, padlen;
         ssize_t hdrlen;
+       enum rpcrdma_chunktype rtype, wtype;
         struct rpcrdma_msg *headerp;
  
         /*
@@ -433,13 +422,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
          * into pages; otherwise use reply chunks.
          */
         if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst))
-               req->rl_wtype = rpcrdma_noch;
+               wtype = rpcrdma_noch;
         else if (rqst->rq_rcv_buf.page_len == 0)
-               req->rl_wtype = rpcrdma_replych;
+               wtype = rpcrdma_replych;
         else if (rqst->rq_rcv_buf.flags & XDRBUF_READ)
-               req->rl_wtype = rpcrdma_writech;
+               wtype = rpcrdma_writech;
         else
-               req->rl_wtype = rpcrdma_replych;
+               wtype = rpcrdma_replych;
  
         /*
          * Chunks needed for arguments?
@@ -456,16 +445,16 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
          * TBD check NFSv4 setacl
          */
         if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst))
-               req->rl_rtype = rpcrdma_noch;
+               rtype = rpcrdma_noch;
         else if (rqst->rq_snd_buf.page_len == 0)
-               req->rl_rtype = rpcrdma_areadch;
+               rtype = rpcrdma_areadch;
         else
-               req->rl_rtype = rpcrdma_readch;
+               rtype = rpcrdma_readch;
  
         /* The following simplification is not true forever */
-       if (req->rl_rtype != rpcrdma_noch && req->rl_wtype == rpcrdma_replych)
-               req->rl_wtype = rpcrdma_noch;
-       if (req->rl_rtype != rpcrdma_noch && req->rl_wtype != rpcrdma_noch) {
+       if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
+               wtype = rpcrdma_noch;
+       if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
                 dprintk("RPC:       %s: cannot marshal multiple chunk lists\n",
                         __func__);
                 return -EIO;
@@ -479,7 +468,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
          * When padding is in use and applies to the transfer, insert
          * it and change the message type.
          */
-       if (req->rl_rtype == rpcrdma_noch) {
+       if (rtype == rpcrdma_noch) {
  
                 padlen = rpcrdma_inline_pullup(rqst,
                                                 RPCRDMA_INLINE_PAD_VALUE(rqst));
@@ -494,7 +483,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
                         headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
                         headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
                         hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
-                       if (req->rl_wtype != rpcrdma_noch) {
+                       if (wtype != rpcrdma_noch) {
                                 dprintk("RPC:       %s: invalid chunk list\n",
                                         __func__);
                                 return -EIO;
@@ -515,18 +504,26 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
                          * on receive. Therefore, we request a reply chunk
                          * for non-writes wherever feasible and efficient.
                          */
-                       if (req->rl_wtype == rpcrdma_noch)
-                               req->rl_wtype = rpcrdma_replych;
+                       if (wtype == rpcrdma_noch)
+                               wtype = rpcrdma_replych;
                 }
         }
  
-       hdrlen = rpcrdma_marshal_chunks(rqst, hdrlen);
+       if (rtype != rpcrdma_noch) {
+               hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_snd_buf,
+                                              headerp, rtype);
+               wtype = rtype;  /* simplify dprintk */
+
+       } else if (wtype != rpcrdma_noch) {
+               hdrlen = rpcrdma_create_chunks(rqst, &rqst->rq_rcv_buf,
+                                              headerp, wtype);
+       }
         if (hdrlen < 0)
                 return hdrlen;
  
         dprintk("RPC:       %s: %s: hdrlen %zd rpclen %zd padlen %zd"
                 " headerp 0x%p base 0x%p lkey 0x%x\n",
-               __func__, transfertypes[req->rl_wtype], hdrlen, rpclen, padlen,
+               __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
                 headerp, base, rdmab_lkey(req->rl_rdmabuf));
  
         /*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c

index 2e192ba..54f23b1 100644 (file)
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -156,13 +156,48 @@ static struct ctl_table sunrpc_table[] = {
  
  static struct rpc_xprt_ops xprt_rdma_procs;    /* forward reference */
  
+static void
+xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+       struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+       char buf[20];
+
+       snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
+       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+       xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA;
+}
+
+static void
+xprt_rdma_format_addresses6(struct rpc_xprt *xprt, struct sockaddr *sap)
+{
+       struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+       char buf[40];
+
+       snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
+       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
+
+       xprt->address_strings[RPC_DISPLAY_NETID] = RPCBIND_NETID_RDMA6;
+}
+
  static void
  xprt_rdma_format_addresses(struct rpc_xprt *xprt)
  {
         struct sockaddr *sap = (struct sockaddr *)
                                         &rpcx_to_rdmad(xprt).addr;
-       struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-       char buf[64];
+       char buf[128];
+
+       switch (sap->sa_family) {
+       case AF_INET:
+               xprt_rdma_format_addresses4(xprt, sap);
+               break;
+       case AF_INET6:
+               xprt_rdma_format_addresses6(xprt, sap);
+               break;
+       default:
+               pr_err("rpcrdma: Unrecognized address family\n");
+               return;
+       }
  
         (void)rpc_ntop(sap, buf, sizeof(buf));
         xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
@@ -170,16 +205,10 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
         snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
         xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
  
-       xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
-
-       snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
-       xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
-
         snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
         xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
  
-       /* netid */
-       xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
+       xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
  }
  
  static void
@@ -377,7 +406,10 @@ xprt_setup_rdma(struct xprt_create *args)
                           xprt_rdma_connect_worker);
  
         xprt_rdma_format_addresses(xprt);
-       xprt->max_payload = rpcrdma_max_payload(new_xprt);
+       xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
+       if (xprt->max_payload == 0)
+               goto out4;
+       xprt->max_payload <<= PAGE_SHIFT;
         dprintk("RPC:       %s: transport data payload maximum: %zu bytes\n",
                 __func__, xprt->max_payload);
  
@@ -552,8 +584,8 @@ xprt_rdma_free(void *buffer)
  
         for (i = 0; req->rl_nchunks;) {
                 --req->rl_nchunks;
-               i += rpcrdma_deregister_external(
-                       &req->rl_segments[i], r_xprt);
+               i += r_xprt->rx_ia.ri_ops->ro_unmap(r_xprt,
+                                                   &req->rl_segments[i]);
         }
  
         rpcrdma_buffer_put(req);
@@ -579,10 +611,7 @@ xprt_rdma_send_request(struct rpc_task *task)
         struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
         int rc = 0;
  
-       if (req->rl_niovs == 0)
-               rc = rpcrdma_marshal_req(rqst);
-       else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
-               rc = rpcrdma_marshal_chunks(rqst, 0);
+       rc = rpcrdma_marshal_req(rqst);
         if (rc < 0)
                 goto failed_marshal;
  
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c

index e28909f..4870d27 100644 (file)
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -50,6 +50,7 @@
  #include <linux/interrupt.h>
  #include <linux/slab.h>
  #include <linux/prefetch.h>
+#include <linux/sunrpc/addr.h>
  #include <asm/bitops.h>
  
  #include "xprt_rdma.h"
@@ -62,9 +63,6 @@
  # define RPCDBG_FACILITY       RPCDBG_TRANS
  #endif
  
-static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
-static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
-
  /*
   * internal functions
   */
@@ -188,7 +186,7 @@ static const char * const wc_status[] = {
         "remote access error",
         "remote operation error",
         "transport retry counter exceeded",
-       "RNR retrycounter exceeded",
+       "RNR retry counter exceeded",
         "local RDD violation error",
         "remove invalid RD request",
         "operation aborted",
@@ -206,21 +204,17 @@ static const char * const wc_status[] = {
  static void
  rpcrdma_sendcq_process_wc(struct ib_wc *wc)
  {
-       if (likely(wc->status == IB_WC_SUCCESS))
-               return;
-
         /* WARNING: Only wr_id and status are reliable at this point */
-       if (wc->wr_id == 0ULL) {
-               if (wc->status != IB_WC_WR_FLUSH_ERR)
+       if (wc->wr_id == RPCRDMA_IGNORE_COMPLETION) {
+               if (wc->status != IB_WC_SUCCESS &&
+                   wc->status != IB_WC_WR_FLUSH_ERR)
                         pr_err("RPC:       %s: SEND: %s\n",
                                __func__, COMPLETION_MSG(wc->status));
         } else {
                 struct rpcrdma_mw *r;
  
                 r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
-               r->r.frmr.fr_state = FRMR_IS_STALE;
-               pr_err("RPC:       %s: frmr %p (stale): %s\n",
-                      __func__, r, COMPLETION_MSG(wc->status));
+               r->mw_sendcompletion(wc);
         }
  }
  
@@ -424,7 +418,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
         struct rpcrdma_ia *ia = &xprt->rx_ia;
         struct rpcrdma_ep *ep = &xprt->rx_ep;
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-       struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
+       struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
  #endif
         struct ib_qp_attr *attr = &ia->ri_qp_attr;
         struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
@@ -480,9 +474,8 @@ connected:
                 wake_up_all(&ep->rep_connect_wait);
                 /*FALLTHROUGH*/
         default:
-               dprintk("RPC:       %s: %pI4:%u (ep 0x%p): %s\n",
-                       __func__, &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port), ep,
+               dprintk("RPC:       %s: %pIS:%u (ep 0x%p): %s\n",
+                       __func__, sap, rpc_get_port(sap), ep,
                         CONNECTION_MSG(event->event));
                 break;
         }
@@ -491,19 +484,16 @@ connected:
         if (connstate == 1) {
                 int ird = attr->max_dest_rd_atomic;
                 int tird = ep->rep_remote_cma.responder_resources;
-               printk(KERN_INFO "rpcrdma: connection to %pI4:%u "
-                       "on %s, memreg %d slots %d ird %d%s\n",
-                       &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port),
+
+               pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
+                       sap, rpc_get_port(sap),
                         ia->ri_id->device->name,
-                       ia->ri_memreg_strategy,
+                       ia->ri_ops->ro_displayname,
                         xprt->rx_buf.rb_max_requests,
                         ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
         } else if (connstate < 0) {
-               printk(KERN_INFO "rpcrdma: connection to %pI4:%u closed (%d)\n",
-                       &addr->sin_addr.s_addr,
-                       ntohs(addr->sin_port),
-                       connstate);
+               pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
+                       sap, rpc_get_port(sap), connstate);
         }
  #endif
  
@@ -621,17 +611,13 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
  
         if (memreg == RPCRDMA_FRMR) {
                 /* Requires both frmr reg and local dma lkey */
-               if ((devattr->device_cap_flags &
+               if (((devattr->device_cap_flags &
                      (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
-                   (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+                   (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
+                     (devattr->max_fast_reg_page_list_len == 0)) {
                         dprintk("RPC:       %s: FRMR registration "
                                 "not supported by HCA\n", __func__);
                         memreg = RPCRDMA_MTHCAFMR;
-               } else {
-                       /* Mind the ia limit on FRMR page list depth */
-                       ia->ri_max_frmr_depth = min_t(unsigned int,
-                               RPCRDMA_MAX_DATA_SEGS,
-                               devattr->max_fast_reg_page_list_len);
                 }
         }
         if (memreg == RPCRDMA_MTHCAFMR) {
@@ -652,13 +638,16 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
          */
         switch (memreg) {
         case RPCRDMA_FRMR:
+               ia->ri_ops = &rpcrdma_frwr_memreg_ops;
                 break;
         case RPCRDMA_ALLPHYSICAL:
+               ia->ri_ops = &rpcrdma_physical_memreg_ops;
                 mem_priv = IB_ACCESS_LOCAL_WRITE |
                                 IB_ACCESS_REMOTE_WRITE |
                                 IB_ACCESS_REMOTE_READ;
                 goto register_setup;
         case RPCRDMA_MTHCAFMR:
+               ia->ri_ops = &rpcrdma_fmr_memreg_ops;
                 if (ia->ri_have_dma_lkey)
                         break;
                 mem_priv = IB_ACCESS_LOCAL_WRITE;
@@ -678,8 +667,8 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
                 rc = -ENOMEM;
                 goto out3;
         }
-       dprintk("RPC:       %s: memory registration strategy is %d\n",
-               __func__, memreg);
+       dprintk("RPC:       %s: memory registration strategy is '%s'\n",
+               __func__, ia->ri_ops->ro_displayname);
  
         /* Else will do memory reg/dereg for each chunk */
         ia->ri_memreg_strategy = memreg;
@@ -743,49 +732,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
  
         ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
         ep->rep_attr.qp_context = ep;
-       /* send_cq and recv_cq initialized below */
         ep->rep_attr.srq = NULL;
         ep->rep_attr.cap.max_send_wr = cdata->max_requests;
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR: {
-               int depth = 7;
-
-               /* Add room for frmr register and invalidate WRs.
-                * 1. FRMR reg WR for head
-                * 2. FRMR invalidate WR for head
-                * 3. N FRMR reg WRs for pagelist
-                * 4. N FRMR invalidate WRs for pagelist
-                * 5. FRMR reg WR for tail
-                * 6. FRMR invalidate WR for tail
-                * 7. The RDMA_SEND WR
-                */
-
-               /* Calculate N if the device max FRMR depth is smaller than
-                * RPCRDMA_MAX_DATA_SEGS.
-                */
-               if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
-                       int delta = RPCRDMA_MAX_DATA_SEGS -
-                                   ia->ri_max_frmr_depth;
-
-                       do {
-                               depth += 2; /* FRMR reg + invalidate */
-                               delta -= ia->ri_max_frmr_depth;
-                       } while (delta > 0);
-
-               }
-               ep->rep_attr.cap.max_send_wr *= depth;
-               if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
-                       cdata->max_requests = devattr->max_qp_wr / depth;
-                       if (!cdata->max_requests)
-                               return -EINVAL;
-                       ep->rep_attr.cap.max_send_wr = cdata->max_requests *
-                                                      depth;
-               }
-               break;
-       }
-       default:
-               break;
-       }
+       rc = ia->ri_ops->ro_open(ia, ep, cdata);
+       if (rc)
+               return rc;
         ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
         ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2);
         ep->rep_attr.cap.max_recv_sge = 1;
@@ -944,21 +895,9 @@ retry:
                 rpcrdma_ep_disconnect(ep, ia);
                 rpcrdma_flush_cqs(ep);
  
-               switch (ia->ri_memreg_strategy) {
-               case RPCRDMA_FRMR:
-                       rpcrdma_reset_frmrs(ia);
-                       break;
-               case RPCRDMA_MTHCAFMR:
-                       rpcrdma_reset_fmrs(ia);
-                       break;
-               case RPCRDMA_ALLPHYSICAL:
-                       break;
-               default:
-                       rc = -EIO;
-                       goto out;
-               }
-
                 xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
+               ia->ri_ops->ro_reset(xprt);
+
                 id = rpcrdma_create_id(xprt, ia,
                                 (struct sockaddr *)&xprt->rx_data.addr);
                 if (IS_ERR(id)) {
@@ -1123,91 +1062,6 @@ out:
         return ERR_PTR(rc);
  }
  
-static int
-rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-       int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
-       struct ib_fmr_attr fmr_attr = {
-               .max_pages      = RPCRDMA_MAX_DATA_SEGS,
-               .max_maps       = 1,
-               .page_shift     = PAGE_SHIFT
-       };
-       struct rpcrdma_mw *r;
-       int i, rc;
-
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FMRs\n", __func__, i);
-
-       while (i--) {
-               r = kzalloc(sizeof(*r), GFP_KERNEL);
-               if (r == NULL)
-                       return -ENOMEM;
-
-               r->r.fmr = ib_alloc_fmr(ia->ri_pd, mr_access_flags, &fmr_attr);
-               if (IS_ERR(r->r.fmr)) {
-                       rc = PTR_ERR(r->r.fmr);
-                       dprintk("RPC:       %s: ib_alloc_fmr failed %i\n",
-                               __func__, rc);
-                       goto out_free;
-               }
-
-               list_add(&r->mw_list, &buf->rb_mws);
-               list_add(&r->mw_all, &buf->rb_all);
-       }
-       return 0;
-
-out_free:
-       kfree(r);
-       return rc;
-}
-
-static int
-rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_frmr *f;
-       struct rpcrdma_mw *r;
-       int i, rc;
-
-       i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS;
-       dprintk("RPC:       %s: initializing %d FRMRs\n", __func__, i);
-
-       while (i--) {
-               r = kzalloc(sizeof(*r), GFP_KERNEL);
-               if (r == NULL)
-                       return -ENOMEM;
-               f = &r->r.frmr;
-
-               f->fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-                                               ia->ri_max_frmr_depth);
-               if (IS_ERR(f->fr_mr)) {
-                       rc = PTR_ERR(f->fr_mr);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_mr "
-                               "failed %i\n", __func__, rc);
-                       goto out_free;
-               }
-
-               f->fr_pgl = ib_alloc_fast_reg_page_list(ia->ri_id->device,
-                                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(f->fr_pgl)) {
-                       rc = PTR_ERR(f->fr_pgl);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list "
-                               "failed %i\n", __func__, rc);
-
-                       ib_dereg_mr(f->fr_mr);
-                       goto out_free;
-               }
-
-               list_add(&r->mw_list, &buf->rb_mws);
-               list_add(&r->mw_all, &buf->rb_all);
-       }
-
-       return 0;
-
-out_free:
-       kfree(r);
-       return rc;
-}
-
  int
  rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
  {
@@ -1244,22 +1098,9 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
         buf->rb_recv_bufs = (struct rpcrdma_rep **) p;
         p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests];
  
-       INIT_LIST_HEAD(&buf->rb_mws);
-       INIT_LIST_HEAD(&buf->rb_all);
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_init_frmrs(ia, buf);
-               if (rc)
-                       goto out;
-               break;
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_init_fmrs(ia, buf);
-               if (rc)
-                       goto out;
-               break;
-       default:
-               break;
-       }
+       rc = ia->ri_ops->ro_init(r_xprt);
+       if (rc)
+               goto out;
  
         for (i = 0; i < buf->rb_max_requests; i++) {
                 struct rpcrdma_req *req;
@@ -1311,47 +1152,6 @@ rpcrdma_destroy_req(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
         kfree(req);
  }
  
-static void
-rpcrdma_destroy_fmrs(struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int rc;
-
-       while (!list_empty(&buf->rb_all)) {
-               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-               list_del(&r->mw_all);
-               list_del(&r->mw_list);
-
-               rc = ib_dealloc_fmr(r->r.fmr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
-                               __func__, rc);
-
-               kfree(r);
-       }
-}
-
-static void
-rpcrdma_destroy_frmrs(struct rpcrdma_buffer *buf)
-{
-       struct rpcrdma_mw *r;
-       int rc;
-
-       while (!list_empty(&buf->rb_all)) {
-               r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
-               list_del(&r->mw_all);
-               list_del(&r->mw_list);
-
-               rc = ib_dereg_mr(r->r.frmr.fr_mr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-                               __func__, rc);
-               ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-               kfree(r);
-       }
-}
-
  void
  rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  {
@@ -1372,104 +1172,11 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
                         rpcrdma_destroy_req(ia, buf->rb_send_bufs[i]);
         }
  
-       switch (ia->ri_memreg_strategy) {
-       case RPCRDMA_FRMR:
-               rpcrdma_destroy_frmrs(buf);
-               break;
-       case RPCRDMA_MTHCAFMR:
-               rpcrdma_destroy_fmrs(buf);
-               break;
-       default:
-               break;
-       }
+       ia->ri_ops->ro_destroy(buf);
  
         kfree(buf->rb_pool);
  }
  
-/* After a disconnect, unmap all FMRs.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_fmr_external().
- */
-static void
-rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       LIST_HEAD(l);
-       int rc;
-
-       list_for_each(pos, &buf->rb_all) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-               INIT_LIST_HEAD(&l);
-               list_add(&r->r.fmr->list, &l);
-               rc = ib_unmap_fmr(&l);
-               if (rc)
-                       dprintk("RPC:       %s: ib_unmap_fmr failed %i\n",
-                               __func__, rc);
-       }
-}
-
-/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
- * an unusable state. Find FRMRs in this state and dereg / reg
- * each.  FRMRs that are VALID and attached to an rpcrdma_req are
- * also torn down.
- *
- * This gives all in-use FRMRs a fresh rkey and leaves them INVALID.
- *
- * This is invoked only in the transport connect worker in order
- * to serialize with rpcrdma_register_frmr_external().
- */
-static void
-rpcrdma_reset_frmrs(struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_xprt *r_xprt =
-                               container_of(ia, struct rpcrdma_xprt, rx_ia);
-       struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-       struct list_head *pos;
-       struct rpcrdma_mw *r;
-       int rc;
-
-       list_for_each(pos, &buf->rb_all) {
-               r = list_entry(pos, struct rpcrdma_mw, mw_all);
-
-               if (r->r.frmr.fr_state == FRMR_IS_INVALID)
-                       continue;
-
-               rc = ib_dereg_mr(r->r.frmr.fr_mr);
-               if (rc)
-                       dprintk("RPC:       %s: ib_dereg_mr failed %i\n",
-                               __func__, rc);
-               ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
-
-               r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
-                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(r->r.frmr.fr_mr)) {
-                       rc = PTR_ERR(r->r.frmr.fr_mr);
-                       dprintk("RPC:       %s: ib_alloc_fast_reg_mr"
-                               " failed %i\n", __func__, rc);
-                       continue;
-               }
-               r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
-                                       ia->ri_id->device,
-                                       ia->ri_max_frmr_depth);
-               if (IS_ERR(r->r.frmr.fr_pgl)) {
-                       rc = PTR_ERR(r->r.frmr.fr_pgl);
-                       dprintk("RPC:       %s: "
-                               "ib_alloc_fast_reg_page_list "
-                               "failed %i\n", __func__, rc);
-
-                       ib_dereg_mr(r->r.frmr.fr_mr);
-                       continue;
-               }
-               r->r.frmr.fr_state = FRMR_IS_INVALID;
-       }
-}
-
  /* "*mw" can be NULL when rpcrdma_buffer_get_mrs() fails, leaving
   * some req segments uninitialized.
   */
@@ -1509,7 +1216,7 @@ rpcrdma_buffer_put_sendbuf(struct rpcrdma_req *req, struct rpcrdma_buffer *buf)
         }
  }
  
-/* rpcrdma_unmap_one() was already done by rpcrdma_deregister_frmr_external().
+/* rpcrdma_unmap_one() was already done during deregistration.
   * Redo only the ib_post_send().
   */
  static void
@@ -1729,6 +1436,14 @@ rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
   * Wrappers for internal-use kmalloc memory registration, used by buffer code.
   */
  
+void
+rpcrdma_mapping_error(struct rpcrdma_mr_seg *seg)
+{
+       dprintk("RPC:       map_one: offset %p iova %llx len %zu\n",
+               seg->mr_offset,
+               (unsigned long long)seg->mr_dma, seg->mr_dmalen);
+}
+
  static int
  rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
                                 struct ib_mr **mrp, struct ib_sge *iov)
@@ -1853,287 +1568,6 @@ rpcrdma_free_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
         }
  }
  
-/*
- * Wrappers for chunk registration, shared by read/write chunk code.
- */
-
-static void
-rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
-{
-       seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-       seg->mr_dmalen = seg->mr_len;
-       if (seg->mr_page)
-               seg->mr_dma = ib_dma_map_page(ia->ri_id->device,
-                               seg->mr_page, offset_in_page(seg->mr_offset),
-                               seg->mr_dmalen, seg->mr_dir);
-       else
-               seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
-                               seg->mr_offset,
-                               seg->mr_dmalen, seg->mr_dir);
-       if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
-               dprintk("RPC:       %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
-                       __func__,
-                       (unsigned long long)seg->mr_dma,
-                       seg->mr_offset, seg->mr_dmalen);
-       }
-}
-
-static void
-rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
-{
-       if (seg->mr_page)
-               ib_dma_unmap_page(ia->ri_id->device,
-                               seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-       else
-               ib_dma_unmap_single(ia->ri_id->device,
-                               seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
-}
-
-static int
-rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
-                       int *nsegs, int writing, struct rpcrdma_ia *ia,
-                       struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       struct rpcrdma_mw *mw = seg1->rl_mw;
-       struct rpcrdma_frmr *frmr = &mw->r.frmr;
-       struct ib_mr *mr = frmr->fr_mr;
-       struct ib_send_wr fastreg_wr, *bad_wr;
-       u8 key;
-       int len, pageoff;
-       int i, rc;
-       int seg_len;
-       u64 pa;
-       int page_no;
-
-       pageoff = offset_in_page(seg1->mr_offset);
-       seg1->mr_offset -= pageoff;     /* start of page */
-       seg1->mr_len += pageoff;
-       len = -pageoff;
-       if (*nsegs > ia->ri_max_frmr_depth)
-               *nsegs = ia->ri_max_frmr_depth;
-       for (page_no = i = 0; i < *nsegs;) {
-               rpcrdma_map_one(ia, seg, writing);
-               pa = seg->mr_dma;
-               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-                       frmr->fr_pgl->page_list[page_no++] = pa;
-                       pa += PAGE_SIZE;
-               }
-               len += seg->mr_len;
-               ++seg;
-               ++i;
-               /* Check for holes */
-               if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-                       break;
-       }
-       dprintk("RPC:       %s: Using frmr %p to map %d segments\n",
-               __func__, mw, i);
-
-       frmr->fr_state = FRMR_IS_VALID;
-
-       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-       fastreg_wr.wr_id = (unsigned long)(void *)mw;
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma;
-       fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-       fastreg_wr.wr.fast_reg.page_list_len = page_no;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
-       if (fastreg_wr.wr.fast_reg.length < len) {
-               rc = -EIO;
-               goto out_err;
-       }
-
-       /* Bump the key */
-       key = (u8)(mr->rkey & 0x000000FF);
-       ib_update_fast_reg_key(mr, ++key);
-
-       fastreg_wr.wr.fast_reg.access_flags = (writing ?
-                               IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-                               IB_ACCESS_REMOTE_READ);
-       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
-       if (rc) {
-               dprintk("RPC:       %s: failed ib_post_send for register,"
-                       " status %i\n", __func__, rc);
-               ib_update_fast_reg_key(mr, --key);
-               goto out_err;
-       } else {
-               seg1->mr_rkey = mr->rkey;
-               seg1->mr_base = seg1->mr_dma + pageoff;
-               seg1->mr_nsegs = i;
-               seg1->mr_len = len;
-       }
-       *nsegs = i;
-       return 0;
-out_err:
-       frmr->fr_state = FRMR_IS_INVALID;
-       while (i--)
-               rpcrdma_unmap_one(ia, --seg);
-       return rc;
-}
-
-static int
-rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
-                       struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       struct ib_send_wr invalidate_wr, *bad_wr;
-       int rc;
-
-       seg1->rl_mw->r.frmr.fr_state = FRMR_IS_INVALID;
-
-       memset(&invalidate_wr, 0, sizeof invalidate_wr);
-       invalidate_wr.wr_id = (unsigned long)(void *)seg1->rl_mw;
-       invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = seg1->rl_mw->r.frmr.fr_mr->rkey;
-       DECR_CQCOUNT(&r_xprt->rx_ep);
-
-       read_lock(&ia->ri_qplock);
-       while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(ia, seg++);
-       rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
-       read_unlock(&ia->ri_qplock);
-       if (rc) {
-               /* Force rpcrdma_buffer_get() to retry */
-               seg1->rl_mw->r.frmr.fr_state = FRMR_IS_STALE;
-               dprintk("RPC:       %s: failed ib_post_send for invalidate,"
-                       " status %i\n", __func__, rc);
-       }
-       return rc;
-}
-
-static int
-rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
-                       int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
-       int len, pageoff, i, rc;
-
-       pageoff = offset_in_page(seg1->mr_offset);
-       seg1->mr_offset -= pageoff;     /* start of page */
-       seg1->mr_len += pageoff;
-       len = -pageoff;
-       if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
-               *nsegs = RPCRDMA_MAX_DATA_SEGS;
-       for (i = 0; i < *nsegs;) {
-               rpcrdma_map_one(ia, seg, writing);
-               physaddrs[i] = seg->mr_dma;
-               len += seg->mr_len;
-               ++seg;
-               ++i;
-               /* Check for holes */
-               if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
-                   offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
-                       break;
-       }
-       rc = ib_map_phys_fmr(seg1->rl_mw->r.fmr, physaddrs, i, seg1->mr_dma);
-       if (rc) {
-               dprintk("RPC:       %s: failed ib_map_phys_fmr "
-                       "%u@0x%llx+%i (%d)... status %i\n", __func__,
-                       len, (unsigned long long)seg1->mr_dma,
-                       pageoff, i, rc);
-               while (i--)
-                       rpcrdma_unmap_one(ia, --seg);
-       } else {
-               seg1->mr_rkey = seg1->rl_mw->r.fmr->rkey;
-               seg1->mr_base = seg1->mr_dma + pageoff;
-               seg1->mr_nsegs = i;
-               seg1->mr_len = len;
-       }
-       *nsegs = i;
-       return rc;
-}
-
-static int
-rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
-                       struct rpcrdma_ia *ia)
-{
-       struct rpcrdma_mr_seg *seg1 = seg;
-       LIST_HEAD(l);
-       int rc;
-
-       list_add(&seg1->rl_mw->r.fmr->list, &l);
-       rc = ib_unmap_fmr(&l);
-       read_lock(&ia->ri_qplock);
-       while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(ia, seg++);
-       read_unlock(&ia->ri_qplock);
-       if (rc)
-               dprintk("RPC:       %s: failed ib_unmap_fmr,"
-                       " status %i\n", __func__, rc);
-       return rc;
-}
-
-int
-rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
-                       int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       int rc = 0;
-
-       switch (ia->ri_memreg_strategy) {
-
-       case RPCRDMA_ALLPHYSICAL:
-               rpcrdma_map_one(ia, seg, writing);
-               seg->mr_rkey = ia->ri_bind_mem->rkey;
-               seg->mr_base = seg->mr_dma;
-               seg->mr_nsegs = 1;
-               nsegs = 1;
-               break;
-
-       /* Registration using frmr registration */
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
-               break;
-
-       /* Registration using fmr memory registration */
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
-               break;
-
-       default:
-               return -EIO;
-       }
-       if (rc)
-               return rc;
-
-       return nsegs;
-}
-
-int
-rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
-               struct rpcrdma_xprt *r_xprt)
-{
-       struct rpcrdma_ia *ia = &r_xprt->rx_ia;
-       int nsegs = seg->mr_nsegs, rc;
-
-       switch (ia->ri_memreg_strategy) {
-
-       case RPCRDMA_ALLPHYSICAL:
-               read_lock(&ia->ri_qplock);
-               rpcrdma_unmap_one(ia, seg);
-               read_unlock(&ia->ri_qplock);
-               break;
-
-       case RPCRDMA_FRMR:
-               rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
-               break;
-
-       case RPCRDMA_MTHCAFMR:
-               rc = rpcrdma_deregister_fmr_external(seg, ia);
-               break;
-
-       default:
-               break;
-       }
-       return nsegs;
-}
-
  /*
   * Prepost any receive buffer, then post send.
   *
@@ -2156,7 +1590,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
         }
  
         send_wr.next = NULL;
-       send_wr.wr_id = 0ULL;   /* no send cookie */
+       send_wr.wr_id = RPCRDMA_IGNORE_COMPLETION;
         send_wr.sg_list = req->rl_send_iov;
         send_wr.num_sge = req->rl_niovs;
         send_wr.opcode = IB_WR_SEND;
@@ -2215,43 +1649,24 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
         return rc;
  }
  
-/* Physical mapping means one Read/Write list entry per-page.
- * All list entries must fit within an inline buffer
- *
- * NB: The server must return a Write list for NFS READ,
- *     which has the same constraint. Factor in the inline
- *     rsize as well.
+/* How many chunk list items fit within our inline buffers?
   */
-static size_t
-rpcrdma_physical_max_payload(struct rpcrdma_xprt *r_xprt)
+unsigned int
+rpcrdma_max_segments(struct rpcrdma_xprt *r_xprt)
  {
         struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
-       unsigned int inline_size, pages;
+       int bytes, segments;
  
-       inline_size = min_t(unsigned int,
-                           cdata->inline_wsize, cdata->inline_rsize);
-       inline_size -= RPCRDMA_HDRLEN_MIN;
-       pages = inline_size / sizeof(struct rpcrdma_segment);
-       return pages << PAGE_SHIFT;
-}
-
-static size_t
-rpcrdma_mr_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-       return RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT;
-}
-
-size_t
-rpcrdma_max_payload(struct rpcrdma_xprt *r_xprt)
-{
-       size_t result;
-
-       switch (r_xprt->rx_ia.ri_memreg_strategy) {
-       case RPCRDMA_ALLPHYSICAL:
-               result = rpcrdma_physical_max_payload(r_xprt);
-               break;
-       default:
-               result = rpcrdma_mr_max_payload(r_xprt);
+       bytes = min_t(unsigned int, cdata->inline_wsize, cdata->inline_rsize);
+       bytes -= RPCRDMA_HDRLEN_MIN;
+       if (bytes < sizeof(struct rpcrdma_segment) * 2) {
+               pr_warn("RPC:       %s: inline threshold too small\n",
+                       __func__);
+               return 0;
         }
-       return result;
+
+       segments = 1 << (fls(bytes / sizeof(struct rpcrdma_segment)) - 1);
+       dprintk("RPC:       %s: max chunk list size = %d segments\n",
+               __func__, segments);
+       return segments;
  }
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h

index 0a16fb6..78e0b8b 100644 (file)
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -60,6 +60,7 @@
   * Interface Adapter -- one per transport instance
   */
  struct rpcrdma_ia {
+       const struct rpcrdma_memreg_ops *ri_ops;
         rwlock_t                ri_qplock;
         struct rdma_cm_id       *ri_id;
         struct ib_pd            *ri_pd;
@@ -105,6 +106,10 @@ struct rpcrdma_ep {
  #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
  #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
  
+/* Force completion handler to ignore the signal
+ */
+#define RPCRDMA_IGNORE_COMPLETION      (0ULL)
+
  /* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
   *
   * The below structure appears at the front of a large region of kmalloc'd
@@ -143,14 +148,6 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
         return (struct rpcrdma_msg *)rb->rg_base;
  }
  
-enum rpcrdma_chunktype {
-       rpcrdma_noch = 0,
-       rpcrdma_readch,
-       rpcrdma_areadch,
-       rpcrdma_writech,
-       rpcrdma_replych
-};
-
  /*
   * struct rpcrdma_rep -- this structure encapsulates state required to recv
   * and complete a reply, asychronously. It needs several pieces of
@@ -213,6 +210,7 @@ struct rpcrdma_mw {
                 struct ib_fmr           *fmr;
                 struct rpcrdma_frmr     frmr;
         } r;
+       void                    (*mw_sendcompletion)(struct ib_wc *);
         struct list_head        mw_list;
         struct list_head        mw_all;
  };
@@ -258,7 +256,6 @@ struct rpcrdma_req {
         unsigned int    rl_niovs;       /* 0, 2 or 4 */
         unsigned int    rl_nchunks;     /* non-zero if chunks */
         unsigned int    rl_connect_cookie;      /* retry detection */
-       enum rpcrdma_chunktype  rl_rtype, rl_wtype;
         struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
         struct rpcrdma_rep      *rl_reply;/* holder for reply buffer */
         struct ib_sge   rl_send_iov[4]; /* for active requests */
@@ -339,6 +336,29 @@ struct rpcrdma_stats {
         unsigned long           bad_reply_count;
  };
  
+/*
+ * Per-registration mode operations
+ */
+struct rpcrdma_xprt;
+struct rpcrdma_memreg_ops {
+       int             (*ro_map)(struct rpcrdma_xprt *,
+                                 struct rpcrdma_mr_seg *, int, bool);
+       int             (*ro_unmap)(struct rpcrdma_xprt *,
+                                   struct rpcrdma_mr_seg *);
+       int             (*ro_open)(struct rpcrdma_ia *,
+                                  struct rpcrdma_ep *,
+                                  struct rpcrdma_create_data_internal *);
+       size_t          (*ro_maxpages)(struct rpcrdma_xprt *);
+       int             (*ro_init)(struct rpcrdma_xprt *);
+       void            (*ro_reset)(struct rpcrdma_xprt *);
+       void            (*ro_destroy)(struct rpcrdma_buffer *);
+       const char      *ro_displayname;
+};
+
+extern const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops;
+extern const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops;
+
  /*
   * RPCRDMA transport -- encapsulates the structures above for
   * integration with RPC.
@@ -398,16 +418,56 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
  void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
  void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
  
-int rpcrdma_register_external(struct rpcrdma_mr_seg *,
-                               int, int, struct rpcrdma_xprt *);
-int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
-                               struct rpcrdma_xprt *);
-
  struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
                                             size_t, gfp_t);
  void rpcrdma_free_regbuf(struct rpcrdma_ia *,
                          struct rpcrdma_regbuf *);
  
+unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
+
+/*
+ * Wrappers for chunk registration, shared by read/write chunk code.
+ */
+
+void rpcrdma_mapping_error(struct rpcrdma_mr_seg *);
+
+static inline enum dma_data_direction
+rpcrdma_data_dir(bool writing)
+{
+       return writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+}
+
+static inline void
+rpcrdma_map_one(struct ib_device *device, struct rpcrdma_mr_seg *seg,
+               enum dma_data_direction direction)
+{
+       seg->mr_dir = direction;
+       seg->mr_dmalen = seg->mr_len;
+
+       if (seg->mr_page)
+               seg->mr_dma = ib_dma_map_page(device,
+                               seg->mr_page, offset_in_page(seg->mr_offset),
+                               seg->mr_dmalen, seg->mr_dir);
+       else
+               seg->mr_dma = ib_dma_map_single(device,
+                               seg->mr_offset,
+                               seg->mr_dmalen, seg->mr_dir);
+
+       if (ib_dma_mapping_error(device, seg->mr_dma))
+               rpcrdma_mapping_error(seg);
+}
+
+static inline void
+rpcrdma_unmap_one(struct ib_device *device, struct rpcrdma_mr_seg *seg)
+{
+       if (seg->mr_page)
+               ib_dma_unmap_page(device,
+                                 seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+       else
+               ib_dma_unmap_single(device,
+                                   seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
+}
+
  /*
   * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
   */
@@ -418,9 +478,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
  /*
   * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c
   */
-ssize_t rpcrdma_marshal_chunks(struct rpc_rqst *, ssize_t);
  int rpcrdma_marshal_req(struct rpc_rqst *);
-size_t rpcrdma_max_payload(struct rpcrdma_xprt *);
  
  /* Temporary NFS request map cache. Created in svc_rdma.c  */
  extern struct kmem_cache *svc_rdma_map_cachep;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c

index 433f287..5266ea7 100644 (file)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -305,7 +305,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
                 struct dentry *dentry = unix_sk(s)->path.dentry;
  
-               if (dentry && dentry->d_inode == i) {
+               if (dentry && d_backing_inode(dentry) == i) {
                         sock_hold(s);
                         goto found;
                 }
@@ -778,7 +778,7 @@ static struct sock *unix_find_other(struct net *net,
                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
                 if (err)
                         goto fail;
-               inode = path.dentry->d_inode;
+               inode = d_backing_inode(path.dentry);
                 err = inode_permission(inode, MAY_WRITE);
                 if (err)
                         goto put_fail;
@@ -839,7 +839,7 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
          */
         err = security_path_mknod(&path, dentry, mode, 0);
         if (!err) {
-               err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
                 if (!err) {
                         res->mnt = mntget(path.mnt);
                         res->dentry = dget(dentry);
@@ -905,7 +905,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                         goto out_up;
                 }
                 addr->hash = UNIX_HASH_SIZE;
-               hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+               hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE-1);
                 spin_lock(&unix_table_lock);
                 u->path = path;
                 list = &unix_socket_table[hash];
diff --git a/net/unix/diag.c b/net/unix/diag.c

index ef542fb..c512f64 100644 (file)
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
  
         if (dentry) {
                 struct unix_diag_vfs uv = {
-                       .udiag_vfs_ino = dentry->d_inode->i_ino,
+                       .udiag_vfs_ino = d_backing_inode(dentry)->i_ino,
                         .udiag_vfs_dev = dentry->d_sb->s_dev,
                 };
  
diff --git a/scripts/check_extable.sh b/scripts/check_extable.sh

new file mode 100755 (executable)

index 0000000..0fb6b1c
--- /dev/null
+++ b/scripts/check_extable.sh
@@ -0,0 +1,146 @@
+#! /bin/bash
+# (c) 2015, Quentin Casasnovas <quentin.casasnovas@oracle.com>
+
+obj=$1
+
+file ${obj} | grep -q ELF || (echo "${obj} is not and ELF file." 1>&2 ; exit 0)
+
+# Bail out early if there isn't an __ex_table section in this object file.
+objdump -hj __ex_table ${obj} 2> /dev/null > /dev/null
+[ $? -ne 0 ] && exit 0
+
+white_list=.text,.fixup
+
+suspicious_relocs=$(objdump -rj __ex_table ${obj}  | tail -n +6 |
+                       grep -v $(eval echo -e{${white_list}}) | awk '{print $3}')
+
+# No suspicious relocs in __ex_table, jobs a good'un
+[ -z "${suspicious_relocs}" ] && exit 0
+
+
+# After this point, something is seriously wrong since we just found out we
+# have some relocations in __ex_table which point to sections which aren't
+# white listed.  If you're adding a new section in the Linux kernel, and
+# you're expecting this section to contain code which can fault (i.e. the
+# __ex_table relocation to your new section is expected), simply add your
+# new section to the white_list variable above.  If not, you're probably
+# doing something wrong and the rest of this code is just trying to print
+# you more information about it.
+
+function find_section_offset_from_symbol()
+{
+    eval $(objdump -t ${obj} | grep ${1} | sed 's/\([0-9a-f]\+\) .\{7\} \([^ \t]\+\).*/section="\2"; section_offset="0x\1" /')
+
+    # addr2line takes addresses in hexadecimal...
+    section_offset=$(printf "0x%016x" $(( ${section_offset} + $2 )) )
+}
+
+function find_symbol_and_offset_from_reloc()
+{
+    # Extract symbol and offset from the objdump output
+    eval $(echo $reloc | sed 's/\([^+]\+\)+\?\(0x[0-9a-f]\+\)\?/symbol="\1"; symbol_offset="\2"/')
+
+    # When the relocation points to the begining of a symbol or section, it
+    # won't print the offset since it is zero.
+    if [ -z "${symbol_offset}" ]; then
+       symbol_offset=0x0
+    fi
+}
+
+function find_alt_replacement_target()
+{
+    # The target of the .altinstr_replacement is the relocation just before
+    # the .altinstr_replacement one.
+    eval $(objdump -rj .altinstructions ${obj} | grep -B1 "${section}+${section_offset}" | head -n1 | awk '{print $3}' |
+          sed 's/\([^+]\+\)+\(0x[0-9a-f]\+\)/alt_target_section="\1"; alt_target_offset="\2"/')
+}
+
+function handle_alt_replacement_reloc()
+{
+    # This will define alt_target_section and alt_target_section_offset
+    find_alt_replacement_target ${section} ${section_offset}
+
+    echo "Error: found a reference to .altinstr_replacement in __ex_table:"
+    addr2line -fip -j ${alt_target_section} -e ${obj} ${alt_target_offset} | awk '{print "\t" $0}'
+
+    error=true
+}
+
+function is_executable_section()
+{
+    objdump -hwj ${section} ${obj} | grep -q CODE
+    return $?
+}
+
+function handle_suspicious_generic_reloc()
+{
+    if is_executable_section ${section}; then
+       # We've got a relocation to a non white listed _executable_
+       # section, print a warning so the developper adds the section to
+       # the white list or fix his code.  We try to pretty-print the file
+       # and line number where that relocation was added.
+       echo "Warning: found a reference to section \"${section}\" in __ex_table:"
+       addr2line -fip -j ${section} -e ${obj} ${section_offset} | awk '{print "\t" $0}'
+    else
+       # Something is definitively wrong here since we've got a relocation
+       # to a non-executable section, there's no way this would ever be
+       # running in the kernel.
+       echo "Error: found a reference to non-executable section \"${section}\" in __ex_table at offset ${section_offset}"
+       error=true
+    fi
+}
+
+function handle_suspicious_reloc()
+{
+    case "${section}" in
+       ".altinstr_replacement")
+           handle_alt_replacement_reloc ${section} ${section_offset}
+           ;;
+       *)
+           handle_suspicious_generic_reloc ${section} ${section_offset}
+           ;;
+    esac
+}
+
+function diagnose()
+{
+
+    for reloc in ${suspicious_relocs}; do
+       # Let's find out where the target of the relocation in __ex_table
+       # is, this will define ${symbol} and ${symbol_offset}
+       find_symbol_and_offset_from_reloc ${reloc}
+
+       # When there's a global symbol at the place of the relocation,
+       # objdump will use it instead of giving us a section+offset, so
+       # let's find out which section is this symbol in and the total
+       # offset withing that section.
+       find_section_offset_from_symbol ${symbol} ${symbol_offset}
+
+       # In this case objdump was presenting us with a reloc to a symbol
+       # rather than a section. Now that we've got the actual section,
+       # we can skip it if it's in the white_list.
+       if [ -z "$( echo $section | grep -v $(eval echo -e{${white_list}}))" ]; then
+           continue;
+       fi
+
+       # Will either print a warning if the relocation happens to be in a
+       # section we do not know but has executable bit set, or error out.
+       handle_suspicious_reloc
+    done
+}
+
+function check_debug_info() {
+    objdump -hj .debug_info ${obj} 2> /dev/null > /dev/null ||
+       echo -e "${obj} does not contain debug information, the addr2line output will be limited.\n" \
+            "Recompile ${obj} with CONFIG_DEBUG_INFO to get a more useful output."
+}
+
+check_debug_info
+
+diagnose
+
+if [ "${error}" ]; then
+    exit 1
+fi
+
+exit 0
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c

index d439856..91ee1b2 100644 (file)
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -776,6 +776,7 @@ static const char *sech_name(struct elf_info *elf, Elf_Shdr *sechdr)
   * "foo" will match an exact string equal to "foo"
   * "*foo" will match a string that ends with "foo"
   * "foo*" will match a string that begins with "foo"
+ * "*foo*" will match a string that contains "foo"
   */
  static int match(const char *sym, const char * const pat[])
  {
@@ -784,8 +785,17 @@ static int match(const char *sym, const char * const pat[])
                 p = *pat++;
                 const char *endp = p + strlen(p) - 1;
  
+               /* "*foo*" */
+               if (*p == '*' && *endp == '*') {
+                       char *here, *bare = strndup(p + 1, strlen(p) - 2);
+
+                       here = strstr(sym, bare);
+                       free(bare);
+                       if (here != NULL)
+                               return 1;
+               }
                 /* "*foo" */
-               if (*p == '*') {
+               else if (*p == '*') {
                         if (strrcmp(sym, p + 1) == 0)
                                 return 1;
                 }
@@ -873,7 +883,10 @@ static void check_section(const char *modname, struct elf_info *elf,
  #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS
  
  #define DATA_SECTIONS ".data", ".data.rel"
-#define TEXT_SECTIONS ".text", ".text.unlikely"
+#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
+               ".kprobes.text"
+#define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
+               ".fixup", ".entry.text", ".exception.text", ".text.*"
  
  #define INIT_SECTIONS      ".init.*"
  #define MEM_INIT_SECTIONS  ".meminit.*"
@@ -881,6 +894,9 @@ static void check_section(const char *modname, struct elf_info *elf,
  #define EXIT_SECTIONS      ".exit.*"
  #define MEM_EXIT_SECTIONS  ".memexit.*"
  
+#define ALL_TEXT_SECTIONS  ALL_INIT_TEXT_SECTIONS, ALL_EXIT_TEXT_SECTIONS, \
+               TEXT_SECTIONS, OTHER_TEXT_SECTIONS
+
  /* init data sections */
  static const char *const init_data_sections[] =
         { ALL_INIT_DATA_SECTIONS, NULL };
@@ -892,6 +908,9 @@ static const char *const init_sections[] = { ALL_INIT_SECTIONS, NULL };
  static const char *const init_exit_sections[] =
         {ALL_INIT_SECTIONS, ALL_EXIT_SECTIONS, NULL };
  
+/* all text sections */
+static const char *const text_sections[] = { ALL_TEXT_SECTIONS, NULL };
+
  /* data section */
  static const char *const data_sections[] = { DATA_SECTIONS, NULL };
  
@@ -910,6 +929,7 @@ static const char *const data_sections[] = { DATA_SECTIONS, NULL };
  static const char *const head_sections[] = { ".head.text*", NULL };
  static const char *const linker_symbols[] =
         { "__init_begin", "_sinittext", "_einittext", NULL };
+static const char *const optim_symbols[] = { "*.constprop.*", NULL };
  
  enum mismatch {
         TEXT_TO_ANY_INIT,
@@ -921,34 +941,65 @@ enum mismatch {
         ANY_INIT_TO_ANY_EXIT,
         ANY_EXIT_TO_ANY_INIT,
         EXPORT_TO_INIT_EXIT,
+       EXTABLE_TO_NON_TEXT,
  };
  
+/**
+ * Describe how to match sections on different criterias:
+ *
+ * @fromsec: Array of sections to be matched.
+ *
+ * @bad_tosec: Relocations applied to a section in @fromsec to a section in
+ * this array is forbidden (black-list).  Can be empty.
+ *
+ * @good_tosec: Relocations applied to a section in @fromsec must be
+ * targetting sections in this array (white-list).  Can be empty.
+ *
+ * @mismatch: Type of mismatch.
+ *
+ * @symbol_white_list: Do not match a relocation to a symbol in this list
+ * even if it is targetting a section in @bad_to_sec.
+ *
+ * @handler: Specific handler to call when a match is found.  If NULL,
+ * default_mismatch_handler() will be called.
+ *
+ */
  struct sectioncheck {
         const char *fromsec[20];
-       const char *tosec[20];
+       const char *bad_tosec[20];
+       const char *good_tosec[20];
         enum mismatch mismatch;
         const char *symbol_white_list[20];
+       void (*handler)(const char *modname, struct elf_info *elf,
+                       const struct sectioncheck* const mismatch,
+                       Elf_Rela *r, Elf_Sym *sym, const char *fromsec);
+
  };
  
+static void extable_mismatch_handler(const char *modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela *r, Elf_Sym *sym,
+                                    const char *fromsec);
+
  static const struct sectioncheck sectioncheck[] = {
  /* Do not reference init/exit code/data from
   * normal code and data
   */
  {
         .fromsec = { TEXT_SECTIONS, NULL },
-       .tosec   = { ALL_INIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_INIT_SECTIONS, NULL },
         .mismatch = TEXT_TO_ANY_INIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  {
         .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { ALL_XXXINIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_XXXINIT_SECTIONS, NULL },
         .mismatch = DATA_TO_ANY_INIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  {
         .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
         .mismatch = DATA_TO_ANY_INIT,
         .symbol_white_list = {
                 "*_template", "*_timer", "*_sht", "*_ops",
@@ -957,56 +1008,66 @@ static const struct sectioncheck sectioncheck[] = {
  },
  {
         .fromsec = { TEXT_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
         .mismatch = TEXT_TO_ANY_EXIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  {
         .fromsec = { DATA_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
         .mismatch = DATA_TO_ANY_EXIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  /* Do not reference init code/data from meminit code/data */
  {
         .fromsec = { ALL_XXXINIT_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
         .mismatch = XXXINIT_TO_SOME_INIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  /* Do not reference exit code/data from memexit code/data */
  {
         .fromsec = { ALL_XXXEXIT_SECTIONS, NULL },
-       .tosec   = { EXIT_SECTIONS, NULL },
+       .bad_tosec = { EXIT_SECTIONS, NULL },
         .mismatch = XXXEXIT_TO_SOME_EXIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  /* Do not use exit code/data from init code */
  {
         .fromsec = { ALL_INIT_SECTIONS, NULL },
-       .tosec   = { ALL_EXIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_EXIT_SECTIONS, NULL },
         .mismatch = ANY_INIT_TO_ANY_EXIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  /* Do not use init code/data from exit code */
  {
         .fromsec = { ALL_EXIT_SECTIONS, NULL },
-       .tosec   = { ALL_INIT_SECTIONS, NULL },
+       .bad_tosec = { ALL_INIT_SECTIONS, NULL },
         .mismatch = ANY_EXIT_TO_ANY_INIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
  },
  {
         .fromsec = { ALL_PCI_INIT_SECTIONS, NULL },
-       .tosec   = { INIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, NULL },
         .mismatch = ANY_INIT_TO_ANY_EXIT,
         .symbol_white_list = { NULL },
  },
  /* Do not export init/exit functions or data */
  {
         .fromsec = { "__ksymtab*", NULL },
-       .tosec   = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
+       .bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
         .mismatch = EXPORT_TO_INIT_EXIT,
         .symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
+},
+{
+       .fromsec = { "__ex_table", NULL },
+       /* If you're adding any new black-listed sections in here, consider
+        * adding a special 'printer' for them in scripts/check_extable.
+        */
+       .bad_tosec = { ".altinstr_replacement", NULL },
+       .good_tosec = {ALL_TEXT_SECTIONS , NULL},
+       .mismatch = EXTABLE_TO_NON_TEXT,
+       .handler = extable_mismatch_handler,
  }
  };
  
@@ -1017,10 +1078,22 @@ static const struct sectioncheck *section_mismatch(
         int elems = sizeof(sectioncheck) / sizeof(struct sectioncheck);
         const struct sectioncheck *check = &sectioncheck[0];
  
+       /*
+        * The target section could be the SHT_NUL section when we're
+        * handling relocations to un-resolved symbols, trying to match it
+        * doesn't make much sense and causes build failures on parisc and
+        * mn10300 architectures.
+        */
+       if (*tosec == '\0')
+               return NULL;
+
         for (i = 0; i < elems; i++) {
-               if (match(fromsec, check->fromsec) &&
-                   match(tosec, check->tosec))
-                       return check;
+               if (match(fromsec, check->fromsec)) {
+                       if (check->bad_tosec[0] && match(tosec, check->bad_tosec))
+                               return check;
+                       if (check->good_tosec[0] && !match(tosec, check->good_tosec))
+                               return check;
+               }
                 check++;
         }
         return NULL;
@@ -1067,6 +1140,17 @@ static const struct sectioncheck *section_mismatch(
   *   This pattern is identified by
   *   refsymname = __init_begin, _sinittext, _einittext
   *
+ * Pattern 5:
+ *   GCC may optimize static inlines when fed constant arg(s) resulting
+ *   in functions like cpumask_empty() -- generating an associated symbol
+ *   cpumask_empty.constprop.3 that appears in the audit.  If the const that
+ *   is passed in comes from __init, like say nmi_ipi_mask, we get a
+ *   meaningless section warning.  May need to add isra symbols too...
+ *   This pattern is identified by
+ *   tosec   = init section
+ *   fromsec = text section
+ *   refsymname = *.constprop.*
+ *
   **/
  static int secref_whitelist(const struct sectioncheck *mismatch,
                             const char *fromsec, const char *fromsym,
@@ -1099,6 +1183,12 @@ static int secref_whitelist(const struct sectioncheck *mismatch,
         if (match(tosym, linker_symbols))
                 return 0;
  
+       /* Check for pattern 5 */
+       if (match(fromsec, text_sections) &&
+           match(tosec, init_sections) &&
+           match(fromsym, optim_symbols))
+               return 0;
+
         return 1;
  }
  
@@ -1261,6 +1351,15 @@ static void print_section_list(const char * const list[20])
         fprintf(stderr, "\n");
  }
  
+static inline void get_pretty_name(int is_func, const char** name, const char** name_p)
+{
+       switch (is_func) {
+       case 0: *name = "variable"; *name_p = ""; break;
+       case 1: *name = "function"; *name_p = "()"; break;
+       default: *name = "(unknown reference)"; *name_p = ""; break;
+       }
+}
+
  /*
   * Print a warning about a section mismatch.
   * Try to find symbols near it so user can find it.
@@ -1280,21 +1379,13 @@ static void report_sec_mismatch(const char *modname,
         char *prl_from;
         char *prl_to;
  
-       switch (from_is_func) {
-       case 0: from = "variable"; from_p = "";   break;
-       case 1: from = "function"; from_p = "()"; break;
-       default: from = "(unknown reference)"; from_p = ""; break;
-       }
-       switch (to_is_func) {
-       case 0: to = "variable"; to_p = "";   break;
-       case 1: to = "function"; to_p = "()"; break;
-       default: to = "(unknown reference)"; to_p = ""; break;
-       }
-
         sec_mismatch_count++;
         if (!sec_mismatch_verbose)
                 return;
  
+       get_pretty_name(from_is_func, &from, &from_p);
+       get_pretty_name(to_is_func, &to, &to_p);
+
         warn("%s(%s+0x%llx): Section mismatch in reference from the %s %s%s "
              "to the %s %s:%s%s\n",
              modname, fromsec, fromaddr, from, fromsym, from_p, to, tosec,
@@ -1408,41 +1499,179 @@ static void report_sec_mismatch(const char *modname,
                 tosym, prl_to, prl_to, tosym);
                 free(prl_to);
                 break;
+       case EXTABLE_TO_NON_TEXT:
+               fatal("There's a special handler for this mismatch type, "
+                     "we should never get here.");
+               break;
         }
         fprintf(stderr, "\n");
  }
  
-static void check_section_mismatch(const char *modname, struct elf_info *elf,
-                                  Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+static void default_mismatch_handler(const char *modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
  {
         const char *tosec;
-       const struct sectioncheck *mismatch;
+       Elf_Sym *to;
+       Elf_Sym *from;
+       const char *tosym;
+       const char *fromsym;
+
+       from = find_elf_symbol2(elf, r->r_offset, fromsec);
+       fromsym = sym_name(elf, from);
+
+       if (!strncmp(fromsym, "reference___initcall",
+                    sizeof("reference___initcall")-1))
+               return;
  
         tosec = sec_name(elf, get_secindex(elf, sym));
-       mismatch = section_mismatch(fromsec, tosec);
+       to = find_elf_symbol(elf, r->r_addend, sym);
+       tosym = sym_name(elf, to);
+
+       /* check whitelist - we may ignore it */
+       if (secref_whitelist(mismatch,
+                            fromsec, fromsym, tosec, tosym)) {
+               report_sec_mismatch(modname, mismatch,
+                                   fromsec, r->r_offset, fromsym,
+                                   is_function(from), tosec, tosym,
+                                   is_function(to));
+       }
+}
+
+static int is_executable_section(struct elf_info* elf, unsigned int section_index)
+{
+       if (section_index > elf->num_sections)
+               fatal("section_index is outside elf->num_sections!\n");
+
+       return ((elf->sechdrs[section_index].sh_flags & SHF_EXECINSTR) == SHF_EXECINSTR);
+}
+
+/*
+ * We rely on a gross hack in section_rel[a]() calling find_extable_entry_size()
+ * to know the sizeof(struct exception_table_entry) for the target architecture.
+ */
+static unsigned int extable_entry_size = 0;
+static void find_extable_entry_size(const char* const sec, const Elf_Rela* r)
+{
+       /*
+        * If we're currently checking the second relocation within __ex_table,
+        * that relocation offset tells us the offsetof(struct
+        * exception_table_entry, fixup) which is equal to sizeof(struct
+        * exception_table_entry) divided by two.  We use that to our advantage
+        * since there's no portable way to get that size as every architecture
+        * seems to go with different sized types.  Not pretty but better than
+        * hard-coding the size for every architecture..
+        */
+       if (!extable_entry_size)
+               extable_entry_size = r->r_offset * 2;
+}
+
+static inline bool is_extable_fault_address(Elf_Rela *r)
+{
+       /*
+        * extable_entry_size is only discovered after we've handled the
+        * _second_ relocation in __ex_table, so only abort when we're not
+        * handling the first reloc and extable_entry_size is zero.
+        */
+       if (r->r_offset && extable_entry_size == 0)
+               fatal("extable_entry size hasn't been discovered!\n");
+
+       return ((r->r_offset == 0) ||
+               (r->r_offset % extable_entry_size == 0));
+}
+
+#define is_second_extable_reloc(Start, Cur, Sec)                       \
+       (((Cur) == (Start) + 1) && (strcmp("__ex_table", (Sec)) == 0))
+
+static void report_extable_warnings(const char* modname, struct elf_info* elf,
+                                   const struct sectioncheck* const mismatch,
+                                   Elf_Rela* r, Elf_Sym* sym,
+                                   const char* fromsec, const char* tosec)
+{
+       Elf_Sym* fromsym = find_elf_symbol2(elf, r->r_offset, fromsec);
+       const char* fromsym_name = sym_name(elf, fromsym);
+       Elf_Sym* tosym = find_elf_symbol(elf, r->r_addend, sym);
+       const char* tosym_name = sym_name(elf, tosym);
+       const char* from_pretty_name;
+       const char* from_pretty_name_p;
+       const char* to_pretty_name;
+       const char* to_pretty_name_p;
+
+       get_pretty_name(is_function(fromsym),
+                       &from_pretty_name, &from_pretty_name_p);
+       get_pretty_name(is_function(tosym),
+                       &to_pretty_name, &to_pretty_name_p);
+
+       warn("%s(%s+0x%lx): Section mismatch in reference"
+            " from the %s %s%s to the %s %s:%s%s\n",
+            modname, fromsec, (long)r->r_offset, from_pretty_name,
+            fromsym_name, from_pretty_name_p,
+            to_pretty_name, tosec, tosym_name, to_pretty_name_p);
+
+       if (!match(tosec, mismatch->bad_tosec) &&
+           is_executable_section(elf, get_secindex(elf, sym)))
+               fprintf(stderr,
+                       "The relocation at %s+0x%lx references\n"
+                       "section \"%s\" which is not in the list of\n"
+                       "authorized sections.  If you're adding a new section\n"
+                       "and/or if this reference is valid, add \"%s\" to the\n"
+                       "list of authorized sections to jump to on fault.\n"
+                       "This can be achieved by adding \"%s\" to \n"
+                       "OTHER_TEXT_SECTIONS in scripts/mod/modpost.c.\n",
+                       fromsec, (long)r->r_offset, tosec, tosec, tosec);
+}
+
+static void extable_mismatch_handler(const char* modname, struct elf_info *elf,
+                                    const struct sectioncheck* const mismatch,
+                                    Elf_Rela* r, Elf_Sym* sym,
+                                    const char *fromsec)
+{
+       const char* tosec = sec_name(elf, get_secindex(elf, sym));
+
+       sec_mismatch_count++;
+
+       if (sec_mismatch_verbose)
+               report_extable_warnings(modname, elf, mismatch, r, sym,
+                                       fromsec, tosec);
+
+       if (match(tosec, mismatch->bad_tosec))
+               fatal("The relocation at %s+0x%lx references\n"
+                     "section \"%s\" which is black-listed.\n"
+                     "Something is seriously wrong and should be fixed.\n"
+                     "You might get more information about where this is\n"
+                     "coming from by using scripts/check_extable.sh %s\n",
+                     fromsec, (long)r->r_offset, tosec, modname);
+       else if (!is_executable_section(elf, get_secindex(elf, sym))) {
+               if (is_extable_fault_address(r))
+                       fatal("The relocation at %s+0x%lx references\n"
+                             "section \"%s\" which is not executable, IOW\n"
+                             "it is not possible for the kernel to fault\n"
+                             "at that address.  Something is seriously wrong\n"
+                             "and should be fixed.\n",
+                             fromsec, (long)r->r_offset, tosec);
+               else
+                       fatal("The relocation at %s+0x%lx references\n"
+                             "section \"%s\" which is not executable, IOW\n"
+                             "the kernel will fault if it ever tries to\n"
+                             "jump to it.  Something is seriously wrong\n"
+                             "and should be fixed.\n",
+                             fromsec, (long)r->r_offset, tosec);
+       }
+}
+
+static void check_section_mismatch(const char *modname, struct elf_info *elf,
+                                  Elf_Rela *r, Elf_Sym *sym, const char *fromsec)
+{
+       const char *tosec = sec_name(elf, get_secindex(elf, sym));;
+       const struct sectioncheck *mismatch = section_mismatch(fromsec, tosec);
+
         if (mismatch) {
-               Elf_Sym *to;
-               Elf_Sym *from;
-               const char *tosym;
-               const char *fromsym;
-
-               from = find_elf_symbol2(elf, r->r_offset, fromsec);
-               fromsym = sym_name(elf, from);
-               to = find_elf_symbol(elf, r->r_addend, sym);
-               tosym = sym_name(elf, to);
-
-               if (!strncmp(fromsym, "reference___initcall",
-                               sizeof("reference___initcall")-1))
-                       return;
-
-               /* check whitelist - we may ignore it */
-               if (secref_whitelist(mismatch,
-                                       fromsec, fromsym, tosec, tosym)) {
-                       report_sec_mismatch(modname, mismatch,
-                          fromsec, r->r_offset, fromsym,
-                          is_function(from), tosec, tosym,
-                          is_function(to));
-               }
+               if (mismatch->handler)
+                       mismatch->handler(modname, elf,  mismatch,
+                                         r, sym, fromsec);
+               else
+                       default_mismatch_handler(modname, elf, mismatch,
+                                                r, sym, fromsec);
         }
  }
  
@@ -1582,6 +1811,8 @@ static void section_rela(const char *modname, struct elf_info *elf,
                 /* Skip special sections */
                 if (is_shndx_special(sym->st_shndx))
                         continue;
+               if (is_second_extable_reloc(start, rela, fromsec))
+                       find_extable_entry_size(fromsec, &r);
                 check_section_mismatch(modname, elf, &r, sym, fromsec);
         }
  }
@@ -1640,6 +1871,8 @@ static void section_rel(const char *modname, struct elf_info *elf,
                 /* Skip special sections */
                 if (is_shndx_special(sym->st_shndx))
                         continue;
+               if (is_second_extable_reloc(start, rel, fromsec))
+                       find_extable_entry_size(fromsec, &r);
                 check_section_mismatch(modname, elf, &r, sym, fromsec);
         }
  }
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c

index 7db9954..ad4fa49 100644 (file)
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -365,7 +365,7 @@ void __aa_fs_profile_rmdir(struct aa_profile *profile)
                 if (!profile->dents[i])
                         continue;
  
-               r = profile->dents[i]->d_inode->i_private;
+               r = d_inode(profile->dents[i])->i_private;
                 securityfs_remove(profile->dents[i]);
                 aa_put_replacedby(r);
                 profile->dents[i] = NULL;
diff --git a/security/apparmor/file.c b/security/apparmor/file.c

index fdaa50c..913f377 100644 (file)
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -259,7 +259,7 @@ unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start,
   */
  static inline bool is_deleted(struct dentry *dentry)
  {
-       if (d_unlinked(dentry) && dentry->d_inode->i_nlink == 0)
+       if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0)
                 return 1;
         return 0;
  }
@@ -351,8 +351,8 @@ int aa_path_link(struct aa_profile *profile, struct dentry *old_dentry,
         struct path link = { new_dir->mnt, new_dentry };
         struct path target = { new_dir->mnt, old_dentry };
         struct path_cond cond = {
-               old_dentry->d_inode->i_uid,
-               old_dentry->d_inode->i_mode
+               d_backing_inode(old_dentry)->i_uid,
+               d_backing_inode(old_dentry)->i_mode
         };
         char *buffer = NULL, *buffer2 = NULL;
         const char *lname, *tname = NULL, *info = NULL;
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c

index dd56bff..e5f1561 100644 (file)
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -204,8 +204,8 @@ static int common_perm_mnt_dentry(int op, struct vfsmount *mnt,
                                   struct dentry *dentry, u32 mask)
  {
         struct path path = { mnt, dentry };
-       struct path_cond cond = { dentry->d_inode->i_uid,
-                                 dentry->d_inode->i_mode
+       struct path_cond cond = { d_backing_inode(dentry)->i_uid,
+                                 d_backing_inode(dentry)->i_mode
         };
  
         return common_perm(op, &path, mask, &cond);
@@ -223,7 +223,7 @@ static int common_perm_mnt_dentry(int op, struct vfsmount *mnt,
  static int common_perm_rm(int op, struct path *dir,
                           struct dentry *dentry, u32 mask)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct path_cond cond = { };
  
         if (!inode || !dir->mnt || !mediated_filesystem(dentry))
@@ -281,8 +281,8 @@ static int apparmor_path_mknod(struct path *dir, struct dentry *dentry,
  
  static int apparmor_path_truncate(struct path *path)
  {
-       struct path_cond cond = { path->dentry->d_inode->i_uid,
-                                 path->dentry->d_inode->i_mode
+       struct path_cond cond = { d_backing_inode(path->dentry)->i_uid,
+                                 d_backing_inode(path->dentry)->i_mode
         };
  
         if (!path->mnt || !mediated_filesystem(path->dentry))
@@ -327,8 +327,8 @@ static int apparmor_path_rename(struct path *old_dir, struct dentry *old_dentry,
         if (!unconfined(profile)) {
                 struct path old_path = { old_dir->mnt, old_dentry };
                 struct path new_path = { new_dir->mnt, new_dentry };
-               struct path_cond cond = { old_dentry->d_inode->i_uid,
-                                         old_dentry->d_inode->i_mode
+               struct path_cond cond = { d_backing_inode(old_dentry)->i_uid,
+                                         d_backing_inode(old_dentry)->i_mode
                 };
  
                 error = aa_path_perm(OP_RENAME_SRC, profile, &old_path, 0,
@@ -354,8 +354,8 @@ static int apparmor_path_chmod(struct path *path, umode_t mode)
  
  static int apparmor_path_chown(struct path *path, kuid_t uid, kgid_t gid)
  {
-       struct path_cond cond =  { path->dentry->d_inode->i_uid,
-                                  path->dentry->d_inode->i_mode
+       struct path_cond cond =  { d_backing_inode(path->dentry)->i_uid,
+                                  d_backing_inode(path->dentry)->i_mode
         };
  
         if (!mediated_filesystem(path->dentry))
diff --git a/security/commoncap.c b/security/commoncap.c

index f66713b..f2875cd 100644 (file)
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -297,7 +297,7 @@ static inline void bprm_clear_caps(struct linux_binprm *bprm)
   */
  int cap_inode_need_killpriv(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         int error;
  
         if (!inode->i_op->getxattr)
@@ -319,7 +319,7 @@ int cap_inode_need_killpriv(struct dentry *dentry)
   */
  int cap_inode_killpriv(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
  
         if (!inode->i_op->removexattr)
                return 0;
@@ -375,7 +375,7 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
   */
  int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         __u32 magic_etc;
         unsigned tocopy, i;
         int size;
diff --git a/security/inode.c b/security/inode.c

index 131a3c4..91503b7 100644 (file)
--- a/security/inode.c
+++ b/security/inode.c
@@ -27,7 +27,7 @@ static int mount_count;
  
  static inline int positive(struct dentry *dentry)
  {
-       return dentry->d_inode && !d_unhashed(dentry);
+       return d_really_is_positive(dentry) && !d_unhashed(dentry);
  }
  
  static int fill_super(struct super_block *sb, void *data, int silent)
@@ -102,14 +102,14 @@ struct dentry *securityfs_create_file(const char *name, umode_t mode,
         if (!parent)
                 parent = mount->mnt_root;
  
-       dir = parent->d_inode;
+       dir = d_inode(parent);
  
         mutex_lock(&dir->i_mutex);
         dentry = lookup_one_len(name, parent, strlen(name));
         if (IS_ERR(dentry))
                 goto out;
  
-       if (dentry->d_inode) {
+       if (d_really_is_positive(dentry)) {
                 error = -EEXIST;
                 goto out1;
         }
@@ -197,20 +197,20 @@ void securityfs_remove(struct dentry *dentry)
                 return;
  
         parent = dentry->d_parent;
-       if (!parent || !parent->d_inode)
+       if (!parent || d_really_is_negative(parent))
                 return;
  
-       mutex_lock(&parent->d_inode->i_mutex);
+       mutex_lock(&d_inode(parent)->i_mutex);
         if (positive(dentry)) {
-               if (dentry->d_inode) {
+               if (d_really_is_positive(dentry)) {
                         if (d_is_dir(dentry))
-                               simple_rmdir(parent->d_inode, dentry);
+                               simple_rmdir(d_inode(parent), dentry);
                         else
-                               simple_unlink(parent->d_inode, dentry);
+                               simple_unlink(d_inode(parent), dentry);
                         dput(dentry);
                 }
         }
-       mutex_unlock(&parent->d_inode->i_mutex);
+       mutex_unlock(&d_inode(parent)->i_mutex);
         simple_release_fs(&mount, &mount_count);
  }
  EXPORT_SYMBOL_GPL(securityfs_remove);
diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c

index 5e9687f..159ef3e 100644 (file)
--- a/security/integrity/evm/evm_crypto.c
+++ b/security/integrity/evm/evm_crypto.c
@@ -131,7 +131,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
                                 size_t req_xattr_value_len,
                                 char type, char *digest)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct shash_desc *desc;
         char **xattrname;
         size_t xattr_size = 0;
@@ -199,7 +199,7 @@ int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name,
  int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name,
                         const char *xattr_value, size_t xattr_value_len)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct evm_ima_xattr_data xattr_data;
         int rc = 0;
  
diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c

index f589c9a..10f9943 100644 (file)
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -72,7 +72,7 @@ static void __init evm_init_config(void)
  
  static int evm_find_protected_xattrs(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         char **xattr;
         int error;
         int count = 0;
@@ -165,8 +165,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
                         /* Replace RSA with HMAC if not mounted readonly and
                          * not immutable
                          */
-                       if (!IS_RDONLY(dentry->d_inode) &&
-                           !IS_IMMUTABLE(dentry->d_inode))
+                       if (!IS_RDONLY(d_backing_inode(dentry)) &&
+                           !IS_IMMUTABLE(d_backing_inode(dentry)))
                                 evm_update_evmxattr(dentry, xattr_name,
                                                     xattr_value,
                                                     xattr_value_len);
@@ -235,7 +235,7 @@ enum integrity_status evm_verifyxattr(struct dentry *dentry,
                 return INTEGRITY_UNKNOWN;
  
         if (!iint) {
-               iint = integrity_iint_find(dentry->d_inode);
+               iint = integrity_iint_find(d_backing_inode(dentry));
                 if (!iint)
                         return INTEGRITY_UNKNOWN;
         }
@@ -253,7 +253,7 @@ EXPORT_SYMBOL_GPL(evm_verifyxattr);
   */
  static enum integrity_status evm_verify_current_integrity(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
  
         if (!evm_initialized || !S_ISREG(inode->i_mode) || evm_fixmode)
                 return 0;
@@ -293,13 +293,13 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name,
         if (evm_status == INTEGRITY_NOXATTRS) {
                 struct integrity_iint_cache *iint;
  
-               iint = integrity_iint_find(dentry->d_inode);
+               iint = integrity_iint_find(d_backing_inode(dentry));
                 if (iint && (iint->flags & IMA_NEW_FILE))
                         return 0;
         }
  out:
         if (evm_status != INTEGRITY_PASS)
-               integrity_audit_msg(AUDIT_INTEGRITY_METADATA, dentry->d_inode,
+               integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                                     dentry->d_name.name, "appraise_metadata",
                                     integrity_status_msg[evm_status],
                                     -EPERM, 0);
@@ -379,7 +379,7 @@ void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name,
   */
  void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
  
         if (!evm_initialized || !evm_protected_xattr(xattr_name))
                 return;
@@ -404,7 +404,7 @@ int evm_inode_setattr(struct dentry *dentry, struct iattr *attr)
         if ((evm_status == INTEGRITY_PASS) ||
             (evm_status == INTEGRITY_NOXATTRS))
                 return 0;
-       integrity_audit_msg(AUDIT_INTEGRITY_METADATA, dentry->d_inode,
+       integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry),
                             dentry->d_name.name, "appraise_metadata",
                             integrity_status_msg[evm_status], -EPERM, 0);
         return -EPERM;
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c

index fffcdb0..4df493e 100644 (file)
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -165,7 +165,7 @@ void ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, int xattr_len,
  int ima_read_xattr(struct dentry *dentry,
                    struct evm_ima_xattr_data **xattr_value)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
  
         if (!inode->i_op->getxattr)
                 return 0;
@@ -190,7 +190,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
         static const char op[] = "appraise_data";
         char *cause = "unknown";
         struct dentry *dentry = file->f_path.dentry;
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         enum integrity_status status = INTEGRITY_UNKNOWN;
         int rc = xattr_len, hash_start = 0;
  
@@ -314,7 +314,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
   */
  void ima_inode_post_setattr(struct dentry *dentry)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct integrity_iint_cache *iint;
         int must_appraise, rc;
  
@@ -380,7 +380,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
         if (result == 1) {
                 if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
                         return -EINVAL;
-               ima_reset_appraise_flags(dentry->d_inode,
+               ima_reset_appraise_flags(d_backing_inode(dentry),
                          (xvalue->type == EVM_IMA_XATTR_DIGSIG) ? 1 : 0);
                 result = 0;
         }
@@ -393,7 +393,7 @@ int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name)
  
         result = ima_protect_xattr(dentry, xattr_name, NULL, 0);
         if (result == 1) {
-               ima_reset_appraise_flags(dentry->d_inode, 0);
+               ima_reset_appraise_flags(d_backing_inode(dentry), 0);
                 result = 0;
         }
         return result;
diff --git a/security/lsm_audit.c b/security/lsm_audit.c

index b526ddc..1d34277 100644 (file)
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -237,7 +237,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
  
                 audit_log_d_path(ab, " path=", &a->u.path);
  
-               inode = a->u.path.dentry->d_inode;
+               inode = d_backing_inode(a->u.path.dentry);
                 if (inode) {
                         audit_log_format(ab, " dev=");
                         audit_log_untrustedstring(ab, inode->i_sb->s_id);
@@ -251,7 +251,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
                 audit_log_format(ab, " name=");
                 audit_log_untrustedstring(ab, a->u.dentry->d_name.name);
  
-               inode = a->u.dentry->d_inode;
+               inode = d_backing_inode(a->u.dentry);
                 if (inode) {
                         audit_log_format(ab, " dev=");
                         audit_log_untrustedstring(ab, inode->i_sb->s_id);
diff --git a/security/security.c b/security/security.c

index 730ac65..8e9b1f4 100644 (file)
--- a/security/security.c
+++ b/security/security.c
@@ -410,7 +410,7 @@ EXPORT_SYMBOL(security_old_inode_init_security);
  int security_path_mknod(struct path *dir, struct dentry *dentry, umode_t mode,
                         unsigned int dev)
  {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                 return 0;
         return security_ops->path_mknod(dir, dentry, mode, dev);
  }
@@ -418,7 +418,7 @@ EXPORT_SYMBOL(security_path_mknod);
  
  int security_path_mkdir(struct path *dir, struct dentry *dentry, umode_t mode)
  {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                 return 0;
         return security_ops->path_mkdir(dir, dentry, mode);
  }
@@ -426,14 +426,14 @@ EXPORT_SYMBOL(security_path_mkdir);
  
  int security_path_rmdir(struct path *dir, struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                 return 0;
         return security_ops->path_rmdir(dir, dentry);
  }
  
  int security_path_unlink(struct path *dir, struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                 return 0;
         return security_ops->path_unlink(dir, dentry);
  }
@@ -442,7 +442,7 @@ EXPORT_SYMBOL(security_path_unlink);
  int security_path_symlink(struct path *dir, struct dentry *dentry,
                           const char *old_name)
  {
-       if (unlikely(IS_PRIVATE(dir->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dir->dentry))))
                 return 0;
         return security_ops->path_symlink(dir, dentry, old_name);
  }
@@ -450,7 +450,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
  int security_path_link(struct dentry *old_dentry, struct path *new_dir,
                        struct dentry *new_dentry)
  {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                 return 0;
         return security_ops->path_link(old_dentry, new_dir, new_dentry);
  }
@@ -459,8 +459,8 @@ int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
                          struct path *new_dir, struct dentry *new_dentry,
                          unsigned int flags)
  {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
-                    (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
+                    (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry)))))
                 return 0;
  
         if (flags & RENAME_EXCHANGE) {
@@ -477,21 +477,21 @@ EXPORT_SYMBOL(security_path_rename);
  
  int security_path_truncate(struct path *path)
  {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                 return 0;
         return security_ops->path_truncate(path);
  }
  
  int security_path_chmod(struct path *path, umode_t mode)
  {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                 return 0;
         return security_ops->path_chmod(path, mode);
  }
  
  int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
  {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                 return 0;
         return security_ops->path_chown(path, uid, gid);
  }
@@ -513,14 +513,14 @@ EXPORT_SYMBOL_GPL(security_inode_create);
  int security_inode_link(struct dentry *old_dentry, struct inode *dir,
                          struct dentry *new_dentry)
  {
-       if (unlikely(IS_PRIVATE(old_dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry))))
                 return 0;
         return security_ops->inode_link(old_dentry, dir, new_dentry);
  }
  
  int security_inode_unlink(struct inode *dir, struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_unlink(dir, dentry);
  }
@@ -543,7 +543,7 @@ EXPORT_SYMBOL_GPL(security_inode_mkdir);
  
  int security_inode_rmdir(struct inode *dir, struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_rmdir(dir, dentry);
  }
@@ -559,8 +559,8 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
                            struct inode *new_dir, struct dentry *new_dentry,
                            unsigned int flags)
  {
-        if (unlikely(IS_PRIVATE(old_dentry->d_inode) ||
-            (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode))))
+        if (unlikely(IS_PRIVATE(d_backing_inode(old_dentry)) ||
+            (d_is_positive(new_dentry) && IS_PRIVATE(d_backing_inode(new_dentry)))))
                 return 0;
  
         if (flags & RENAME_EXCHANGE) {
@@ -576,14 +576,14 @@ int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry,
  
  int security_inode_readlink(struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_readlink(dentry);
  }
  
  int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_follow_link(dentry, nd);
  }
@@ -599,7 +599,7 @@ int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
  {
         int ret;
  
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         ret = security_ops->inode_setattr(dentry, attr);
         if (ret)
@@ -610,7 +610,7 @@ EXPORT_SYMBOL_GPL(security_inode_setattr);
  
  int security_inode_getattr(const struct path *path)
  {
-       if (unlikely(IS_PRIVATE(path->dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))
                 return 0;
         return security_ops->inode_getattr(path);
  }
@@ -620,7 +620,7 @@ int security_inode_setxattr(struct dentry *dentry, const char *name,
  {
         int ret;
  
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         ret = security_ops->inode_setxattr(dentry, name, value, size, flags);
         if (ret)
@@ -634,7 +634,7 @@ int security_inode_setxattr(struct dentry *dentry, const char *name,
  void security_inode_post_setxattr(struct dentry *dentry, const char *name,
                                   const void *value, size_t size, int flags)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return;
         security_ops->inode_post_setxattr(dentry, name, value, size, flags);
         evm_inode_post_setxattr(dentry, name, value, size);
@@ -642,14 +642,14 @@ void security_inode_post_setxattr(struct dentry *dentry, const char *name,
  
  int security_inode_getxattr(struct dentry *dentry, const char *name)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_getxattr(dentry, name);
  }
  
  int security_inode_listxattr(struct dentry *dentry)
  {
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         return security_ops->inode_listxattr(dentry);
  }
@@ -658,7 +658,7 @@ int security_inode_removexattr(struct dentry *dentry, const char *name)
  {
         int ret;
  
-       if (unlikely(IS_PRIVATE(dentry->d_inode)))
+       if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
                 return 0;
         ret = security_ops->inode_removexattr(dentry, name);
         if (ret)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c

index c318b30..7dade28 100644 (file)
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -414,7 +414,7 @@ static int sb_finish_set_opts(struct super_block *sb)
  {
         struct superblock_security_struct *sbsec = sb->s_security;
         struct dentry *root = sb->s_root;
-       struct inode *root_inode = root->d_inode;
+       struct inode *root_inode = d_backing_inode(root);
         int rc = 0;
  
         if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
@@ -552,7 +552,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
                 opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT;
         }
         if (sbsec->flags & ROOTCONTEXT_MNT) {
-               struct inode *root = sbsec->sb->s_root->d_inode;
+               struct inode *root = d_backing_inode(sbsec->sb->s_root);
                 struct inode_security_struct *isec = root->i_security;
  
                 rc = security_sid_to_context(isec->sid, &context, &len);
@@ -608,7 +608,7 @@ static int selinux_set_mnt_opts(struct super_block *sb,
         int rc = 0, i;
         struct superblock_security_struct *sbsec = sb->s_security;
         const char *name = sb->s_type->name;
-       struct inode *inode = sbsec->sb->s_root->d_inode;
+       struct inode *inode = d_backing_inode(sbsec->sb->s_root);
         struct inode_security_struct *root_isec = inode->i_security;
         u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
         u32 defcontext_sid = 0;
@@ -835,8 +835,8 @@ static int selinux_cmp_sb_context(const struct super_block *oldsb,
         if ((oldflags & DEFCONTEXT_MNT) && old->def_sid != new->def_sid)
                 goto mismatch;
         if (oldflags & ROOTCONTEXT_MNT) {
-               struct inode_security_struct *oldroot = oldsb->s_root->d_inode->i_security;
-               struct inode_security_struct *newroot = newsb->s_root->d_inode->i_security;
+               struct inode_security_struct *oldroot = d_backing_inode(oldsb->s_root)->i_security;
+               struct inode_security_struct *newroot = d_backing_inode(newsb->s_root)->i_security;
                 if (oldroot->sid != newroot->sid)
                         goto mismatch;
         }
@@ -886,16 +886,16 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb,
                 if (!set_fscontext)
                         newsbsec->sid = sid;
                 if (!set_rootcontext) {
-                       struct inode *newinode = newsb->s_root->d_inode;
+                       struct inode *newinode = d_backing_inode(newsb->s_root);
                         struct inode_security_struct *newisec = newinode->i_security;
                         newisec->sid = sid;
                 }
                 newsbsec->mntpoint_sid = sid;
         }
         if (set_rootcontext) {
-               const struct inode *oldinode = oldsb->s_root->d_inode;
+               const struct inode *oldinode = d_backing_inode(oldsb->s_root);
                 const struct inode_security_struct *oldisec = oldinode->i_security;
-               struct inode *newinode = newsb->s_root->d_inode;
+               struct inode *newinode = d_backing_inode(newsb->s_root);
                 struct inode_security_struct *newisec = newinode->i_security;
  
                 newisec->sid = oldisec->sid;
@@ -1610,7 +1610,7 @@ static inline int dentry_has_perm(const struct cred *cred,
                                   struct dentry *dentry,
                                   u32 av)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct common_audit_data ad;
  
         ad.type = LSM_AUDIT_DATA_DENTRY;
@@ -1625,7 +1625,7 @@ static inline int path_has_perm(const struct cred *cred,
                                 const struct path *path,
                                 u32 av)
  {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
         struct common_audit_data ad;
  
         ad.type = LSM_AUDIT_DATA_PATH;
@@ -1753,7 +1753,7 @@ static int may_link(struct inode *dir,
         int rc;
  
         dsec = dir->i_security;
-       isec = dentry->d_inode->i_security;
+       isec = d_backing_inode(dentry)->i_security;
  
         ad.type = LSM_AUDIT_DATA_DENTRY;
         ad.u.dentry = dentry;
@@ -1797,7 +1797,7 @@ static inline int may_rename(struct inode *old_dir,
         int rc;
  
         old_dsec = old_dir->i_security;
-       old_isec = old_dentry->d_inode->i_security;
+       old_isec = d_backing_inode(old_dentry)->i_security;
         old_is_dir = d_is_dir(old_dentry);
         new_dsec = new_dir->i_security;
  
@@ -1827,7 +1827,7 @@ static inline int may_rename(struct inode *old_dir,
         if (rc)
                 return rc;
         if (d_is_positive(new_dentry)) {
-               new_isec = new_dentry->d_inode->i_security;
+               new_isec = d_backing_inode(new_dentry)->i_security;
                 new_is_dir = d_is_dir(new_dentry);
                 rc = avc_has_perm(sid, new_isec->sid,
                                   new_isec->sclass,
@@ -1963,7 +1963,7 @@ static int selinux_binder_transfer_file(struct task_struct *from,
  {
         u32 sid = task_sid(to);
         struct file_security_struct *fsec = file->f_security;
-       struct inode *inode = file->f_path.dentry->d_inode;
+       struct inode *inode = d_backing_inode(file->f_path.dentry);
         struct inode_security_struct *isec = inode->i_security;
         struct common_audit_data ad;
         int rc;
@@ -2627,7 +2627,7 @@ static int selinux_sb_remount(struct super_block *sb, void *data)
                         break;
                 case ROOTCONTEXT_MNT: {
                         struct inode_security_struct *root_isec;
-                       root_isec = sb->s_root->d_inode->i_security;
+                       root_isec = d_backing_inode(sb->s_root)->i_security;
  
                         if (bad_option(sbsec, ROOTCONTEXT_MNT, root_isec->sid, sid))
                                 goto out_bad_option;
@@ -2727,7 +2727,7 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
         struct task_security_struct *tsec;
         struct inode_security_struct *dsec;
         struct superblock_security_struct *sbsec;
-       struct inode *dir = dentry->d_parent->d_inode;
+       struct inode *dir = d_backing_inode(dentry->d_parent);
         u32 newsid;
         int rc;
  
@@ -2982,7 +2982,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name)
  static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
                                   const void *value, size_t size, int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct inode_security_struct *isec = inode->i_security;
         struct superblock_security_struct *sbsec;
         struct common_audit_data ad;
@@ -3059,7 +3059,7 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name,
                                         const void *value, size_t size,
                                         int flags)
  {
-       struct inode *inode = dentry->d_inode;
+       struct inode *inode = d_backing_inode(dentry);
         struct inode_security_struct *isec = inode->i_security;
         u32 newsid;
         int rc;
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c

index 5fde343..d2787cc 100644 (file)
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1737,7 +1737,7 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name,
         inc_nlink(inode);
         d_add(dentry, inode);
         /* bump link count on parent directory, too */
-       inc_nlink(dir->d_inode);
+       inc_nlink(d_inode(dir));
  
         return dentry;
  }
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c

index 69fdc38..b644757 100644 (file)
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -593,7 +593,7 @@ static int smack_sb_copy_data(char *orig, char *smackopts)
  static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data)
  {
         struct dentry *root = sb->s_root;
-       struct inode *inode = root->d_inode;
+       struct inode *inode = d_backing_inode(root);
         struct superblock_smack *sp = sb->s_security;
         struct inode_smack *isp;
         struct smack_known *skp;
@@ -889,15 +889,15 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
         smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
  
-       isp = smk_of_inode(old_dentry->d_inode);
+       isp = smk_of_inode(d_backing_inode(old_dentry));
         rc = smk_curacc(isp, MAY_WRITE, &ad);
-       rc = smk_bu_inode(old_dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_bu_inode(d_backing_inode(old_dentry), MAY_WRITE, rc);
  
         if (rc == 0 && d_is_positive(new_dentry)) {
-               isp = smk_of_inode(new_dentry->d_inode);
+               isp = smk_of_inode(d_backing_inode(new_dentry));
                 smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
                 rc = smk_curacc(isp, MAY_WRITE, &ad);
-               rc = smk_bu_inode(new_dentry->d_inode, MAY_WRITE, rc);
+               rc = smk_bu_inode(d_backing_inode(new_dentry), MAY_WRITE, rc);
         }
  
         return rc;
@@ -913,7 +913,7 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir,
   */
  static int smack_inode_unlink(struct inode *dir, struct dentry *dentry)
  {
-       struct inode *ip = dentry->d_inode;
+       struct inode *ip = d_backing_inode(dentry);
         struct smk_audit_info ad;
         int rc;
  
@@ -956,8 +956,8 @@ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry)
         /*
          * You need write access to the thing you're removing
          */
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
         if (rc == 0) {
                 /*
                  * You also need write access to the containing directory
@@ -995,15 +995,15 @@ static int smack_inode_rename(struct inode *old_inode,
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
         smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry);
  
-       isp = smk_of_inode(old_dentry->d_inode);
+       isp = smk_of_inode(d_backing_inode(old_dentry));
         rc = smk_curacc(isp, MAY_READWRITE, &ad);
-       rc = smk_bu_inode(old_dentry->d_inode, MAY_READWRITE, rc);
+       rc = smk_bu_inode(d_backing_inode(old_dentry), MAY_READWRITE, rc);
  
         if (rc == 0 && d_is_positive(new_dentry)) {
-               isp = smk_of_inode(new_dentry->d_inode);
+               isp = smk_of_inode(d_backing_inode(new_dentry));
                 smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry);
                 rc = smk_curacc(isp, MAY_READWRITE, &ad);
-               rc = smk_bu_inode(new_dentry->d_inode, MAY_READWRITE, rc);
+               rc = smk_bu_inode(d_backing_inode(new_dentry), MAY_READWRITE, rc);
         }
         return rc;
  }
@@ -1060,8 +1060,8 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
         smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
  
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
         return rc;
  }
  
@@ -1075,7 +1075,7 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr)
  static int smack_inode_getattr(const struct path *path)
  {
         struct smk_audit_info ad;
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
         int rc;
  
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_PATH);
@@ -1142,8 +1142,8 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name,
         smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
  
         if (rc == 0) {
-               rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-               rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+               rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+               rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
         }
  
         return rc;
@@ -1164,7 +1164,7 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name,
                                       const void *value, size_t size, int flags)
  {
         struct smack_known *skp;
-       struct inode_smack *isp = dentry->d_inode->i_security;
+       struct inode_smack *isp = d_backing_inode(dentry)->i_security;
  
         if (strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) {
                 isp->smk_flags |= SMK_INODE_TRANSMUTE;
@@ -1209,8 +1209,8 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
         smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
  
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_READ, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_READ, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_READ, rc);
         return rc;
  }
  
@@ -1246,12 +1246,12 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
         smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_DENTRY);
         smk_ad_setfield_u_fs_path_dentry(&ad, dentry);
  
-       rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad);
-       rc = smk_bu_inode(dentry->d_inode, MAY_WRITE, rc);
+       rc = smk_curacc(smk_of_inode(d_backing_inode(dentry)), MAY_WRITE, &ad);
+       rc = smk_bu_inode(d_backing_inode(dentry), MAY_WRITE, rc);
         if (rc != 0)
                 return rc;
  
-       isp = dentry->d_inode->i_security;
+       isp = d_backing_inode(dentry)->i_security;
         /*
          * Don't do anything special for these.
          *      XATTR_NAME_SMACKIPIN
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c

index 06f719e..d968298 100644 (file)
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -2490,7 +2490,7 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent)
                 return rc;
         }
  
-       root_inode = sb->s_root->d_inode;
+       root_inode = d_inode(sb->s_root);
  
         return 0;
  }
diff --git a/security/tomoyo/condition.c b/security/tomoyo/condition.c

index 63681e8..6c4528d 100644 (file)
--- a/security/tomoyo/condition.c
+++ b/security/tomoyo/condition.c
@@ -714,7 +714,7 @@ void tomoyo_get_attributes(struct tomoyo_obj_info *obj)
                         dentry = dget_parent(dentry);
                         break;
                 }
-               inode = dentry->d_inode;
+               inode = d_backing_inode(dentry);
                 if (inode) {
                         struct tomoyo_mini_stat *stat = &obj->stat[i];
                         stat->uid  = inode->i_uid;
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c

index 1e0d480..5077f19 100644 (file)
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -97,7 +97,7 @@ static char *tomoyo_get_absolute_path(const struct path *path, char * const buff
                 /* go to whatever namespace root we are under */
                 pos = d_absolute_path(path, buffer, buflen - 1);
                 if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
-                       struct inode *inode = path->dentry->d_inode;
+                       struct inode *inode = d_backing_inode(path->dentry);
                         if (inode && S_ISDIR(inode->i_mode)) {
                                 buffer[buflen - 2] = '/';
                                 buffer[buflen - 1] = '\0';
@@ -125,7 +125,7 @@ static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer,
         if (buflen >= 256) {
                 pos = dentry_path_raw(dentry, buffer, buflen - 1);
                 if (!IS_ERR(pos) && *pos == '/' && pos[1]) {
-                       struct inode *inode = dentry->d_inode;
+                       struct inode *inode = d_backing_inode(dentry);
                         if (inode && S_ISDIR(inode->i_mode)) {
                                 buffer[buflen - 2] = '/';
                                 buffer[buflen - 1] = '\0';
@@ -168,7 +168,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer,
         if (!MAJOR(sb->s_dev))
                 goto prepend_filesystem_name;
         {
-               struct inode *inode = sb->s_root->d_inode;
+               struct inode *inode = d_backing_inode(sb->s_root);
                 /*
                  * Use filesystem name if filesystem does not support rename()
                  * operation.
@@ -219,7 +219,7 @@ out:
  static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
                                     const int buflen)
  {
-       struct inode *inode = path->dentry->d_inode;
+       struct inode *inode = d_backing_inode(path->dentry);
         struct socket *sock = inode ? SOCKET_I(inode) : NULL;
         struct sock *sk = sock ? sock->sk : NULL;
         if (sk) {
@@ -277,7 +277,7 @@ char *tomoyo_realpath_from_path(const struct path *path)
                         pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
                         goto encode;
                 }
-               inode = sb->s_root->d_inode;
+               inode = d_backing_inode(sb->s_root);
                 /*
                  * Get local name for filesystems without rename() operation
                  * or dentry without vfsmount.
diff --git a/sound/oss/sequencer.c b/sound/oss/sequencer.c

index c0eea1d..f19da4b 100644 (file)
--- a/sound/oss/sequencer.c
+++ b/sound/oss/sequencer.c
@@ -681,13 +681,8 @@ static int seq_timing_event(unsigned char *event_rec)
                         break;
  
                 case TMR_ECHO:
-                       if (seq_mode == SEQ_2)
-                               seq_copy_to_input(event_rec, 8);
-                       else
-                       {
-                               parm = (parm << 8 | SEQ_ECHO);
-                               seq_copy_to_input((unsigned char *) &parm, 4);
-                       }
+                       parm = (parm << 8 | SEQ_ECHO);
+                       seq_copy_to_input((unsigned char *) &parm, 4);
                         break;
  
                 default:;
@@ -1324,7 +1319,6 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a
         int mode = translate_mode(file);
         struct synth_info inf;
         struct seq_event_rec event_rec;
-       unsigned long flags;
         int __user *p = arg;
  
         orig_dev = dev = dev >> 4;
@@ -1479,9 +1473,7 @@ int sequencer_ioctl(int dev, struct file *file, unsigned int cmd, void __user *a
                 case SNDCTL_SEQ_OUTOFBAND:
                         if (copy_from_user(&event_rec, arg, sizeof(event_rec)))
                                 return -EFAULT;
-                       spin_lock_irqsave(&lock,flags);
                         play_event(event_rec.arr);
-                       spin_unlock_irqrestore(&lock,flags);
                         return 0;
  
                 case SNDCTL_MIDI_INFO:
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c

index e70a7fb..873ed1b 100644 (file)
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2529,7 +2529,7 @@ static void set_dig_out(struct hda_codec *codec, hda_nid_t nid,
         if (!d)
                 return;
         for (; *d; d++)
-               snd_hdac_regmap_update(&codec->core, nid,
+               snd_hdac_regmap_update(&codec->core, *d,
                                        AC_VERB_SET_DIGI_CONVERT_1, mask, val);
  }
  
diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h

index be1b7de..0efdb09 100644 (file)
--- a/sound/pci/hda/hda_controller.h
+++ b/sound/pci/hda/hda_controller.h
@@ -404,7 +404,7 @@ struct azx {
         ((chip)->ops->reg_readb((dev)->sd_addr + AZX_REG_##reg))
  
  #define azx_has_pm_runtime(chip) \
-       (!AZX_DCAPS_PM_RUNTIME || ((chip)->driver_caps & AZX_DCAPS_PM_RUNTIME))
+       ((chip)->driver_caps & AZX_DCAPS_PM_RUNTIME)
  
  /* PCM setup */
  static inline struct azx_dev *get_azx_dev(struct snd_pcm_substream *substream)
diff --git a/sound/pci/hda/hda_i915.c b/sound/pci/hda/hda_i915.c

index 52a85d8..3052a2b 100644 (file)
--- a/sound/pci/hda/hda_i915.c
+++ b/sound/pci/hda/hda_i915.c
@@ -55,6 +55,12 @@ void haswell_set_bclk(struct hda_intel *hda)
         int cdclk_freq;
         unsigned int bclk_m, bclk_n;
         struct i915_audio_component *acomp = &hda->audio_component;
+       struct pci_dev *pci = hda->chip.pci;
+
+       /* Only Haswell/Broadwell need set BCLK */
+       if (pci->device != 0x0a0c && pci->device != 0x0c0c
+          && pci->device != 0x0d0c && pci->device != 0x160c)
+               return;
  
         if (!acomp->ops)
                 return;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c

index e1c2105..34040d2 100644 (file)
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -297,6 +297,9 @@ enum {
          AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_POWERWELL |\
          AZX_DCAPS_SNOOP_TYPE(SCH))
  
+#define AZX_DCAPS_INTEL_BAYTRAIL \
+       (AZX_DCAPS_INTEL_PCH_NOPM | AZX_DCAPS_I915_POWERWELL)
+
  #define AZX_DCAPS_INTEL_BRASWELL \
         (AZX_DCAPS_INTEL_PCH | AZX_DCAPS_I915_POWERWELL)
  
@@ -1992,7 +1995,7 @@ static const struct pci_device_id azx_ids[] = {
           .driver_data = AZX_DRIVER_SCH | AZX_DCAPS_INTEL_PCH_NOPM },
         /* BayTrail */
         { PCI_DEVICE(0x8086, 0x0f04),
-         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH_NOPM },
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BAYTRAIL },
         /* Braswell */
         { PCI_DEVICE(0x8086, 0x2284),
           .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_BRASWELL },
diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c

index ee62307..baaf7ed 100644 (file)
--- a/sound/pci/hda/hda_proc.c
+++ b/sound/pci/hda/hda_proc.c
@@ -582,8 +582,8 @@ static void print_conn_list(struct snd_info_buffer *buffer,
  
         /* Get Cache connections info */
         cache_len = snd_hda_get_conn_list(codec, nid, &list);
-       if (cache_len != conn_len
-                       || memcmp(list, conn, conn_len)) {
+       if (cache_len >= 0 && (cache_len != conn_len ||
+                             memcmp(list, conn, conn_len) != 0)) {
                 snd_iprintf(buffer, "  In-driver Connection: %d\n", cache_len);
                 if (cache_len > 0) {
                         snd_iprintf(buffer, "    ");
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c

index b18b9c6..06199e4 100644 (file)
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -4176,17 +4176,15 @@ static void alc_fixup_disable_aamix(struct hda_codec *codec,
         }
  }
  
-static unsigned int alc_power_filter_xps13(struct hda_codec *codec,
-                               hda_nid_t nid,
-                               unsigned int power_state)
+static void alc_shutup_dell_xps13(struct hda_codec *codec)
  {
         struct alc_spec *spec = codec->spec;
+       int hp_pin = spec->gen.autocfg.hp_pins[0];
  
-       /* Avoid pop noises when headphones are plugged in */
-       if (spec->gen.hp_jack_present)
-               if (nid == codec->core.afg || nid == 0x02 || nid == 0x15)
-                       return AC_PWRST_D0;
-       return snd_hda_gen_path_power_filter(codec, nid, power_state);
+       /* Prevent pop noises when headphones are plugged in */
+       snd_hda_codec_write(codec, hp_pin, 0,
+                           AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+       msleep(20);
  }
  
  static void alc_fixup_dell_xps13(struct hda_codec *codec,
@@ -4197,8 +4195,7 @@ static void alc_fixup_dell_xps13(struct hda_codec *codec,
                 struct hda_input_mux *imux = &spec->gen.input_mux;
                 int i;
  
-               spec->shutup = alc_no_shutup;
-               codec->power_filter = alc_power_filter_xps13;
+               spec->shutup = alc_shutup_dell_xps13;
  
                 /* Make the internal mic the default input source. */
                 for (i = 0; i < imux->num_items; i++) {
@@ -5231,6 +5228,16 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
         {0x1b, 0x411111f0}, \
         {0x1e, 0x411111f0}
  
+#define ALC256_STANDARD_PINS \
+       {0x12, 0x90a60140}, \
+       {0x14, 0x90170110}, \
+       {0x19, 0x411111f0}, \
+       {0x1a, 0x411111f0}, \
+       {0x1b, 0x411111f0}, \
+       {0x1d, 0x40700001}, \
+       {0x1e, 0x411111f0}, \
+       {0x21, 0x02211020}
+
  #define ALC282_STANDARD_PINS \
         {0x14, 0x90170110}, \
         {0x18, 0x411111f0}, \
@@ -5331,15 +5338,11 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                 {0x1d, 0x40700001},
                 {0x21, 0x02211050}),
         SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
-               {0x12, 0x90a60140},
-               {0x13, 0x40000000},
-               {0x14, 0x90170110},
-               {0x19, 0x411111f0},
-               {0x1a, 0x411111f0},
-               {0x1b, 0x411111f0},
-               {0x1d, 0x40700001},
-               {0x1e, 0x411111f0},
-               {0x21, 0x02211020}),
+               ALC256_STANDARD_PINS,
+               {0x13, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               ALC256_STANDARD_PINS,
+               {0x13, 0x411111f0}),
         SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
                 {0x12, 0x90a60130},
                 {0x13, 0x40000000},
@@ -5667,6 +5670,8 @@ static int patch_alc269(struct hda_codec *codec)
                 break;
         case 0x10ec0256:
                 spec->codec_variant = ALC269_TYPE_ALC256;
+               spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
+               alc_update_coef_idx(codec, 0x36, 1 << 13, 1 << 5); /* Switch pcbeep path to Line in path*/
                 break;
         }
  
@@ -5680,8 +5685,8 @@ static int patch_alc269(struct hda_codec *codec)
         if (err < 0)
                 goto error;
  
-       if (!spec->gen.no_analog && spec->gen.beep_nid)
-               set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
+       if (!spec->gen.no_analog && spec->gen.beep_nid && spec->gen.mixer_nid)
+               set_beep_amp(spec, spec->gen.mixer_nid, 0x04, HDA_INPUT);
  
         codec->patch_ops = alc_patch_ops;
         codec->patch_ops.stream_pm = snd_hda_gen_stream_pm;
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c

index 749069a..b120925 100644 (file)
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -3101,13 +3101,13 @@ static int snd_intel8x0_create(struct snd_card *card,
                 chip->bmaddr = pci_iomap(pci, 3, 0);
         else
                 chip->bmaddr = pci_iomap(pci, 1, 0);
+
+ port_inited:
         if (!chip->bmaddr) {
                 dev_err(card->dev, "Controller space ioremap problem\n");
                 snd_intel8x0_free(chip);
                 return -EIO;
         }
-
- port_inited:
         chip->bdbars_count = bdbars[device_type];
  
         /* initialize offsets */
diff --git a/sound/soc/sh/fsi.c b/sound/soc/sh/fsi.c

index 0c2af21..142c066 100644 (file)
--- a/sound/soc/sh/fsi.c
+++ b/sound/soc/sh/fsi.c
@@ -250,6 +250,7 @@ struct fsi_clk {
  
  struct fsi_priv {
         void __iomem *base;
+       phys_addr_t phys;
         struct fsi_master *master;
  
         struct fsi_stream playback;
@@ -1371,13 +1372,18 @@ static int fsi_dma_probe(struct fsi_priv *fsi, struct fsi_stream *io, struct dev
                                 shdma_chan_filter, (void *)io->dma_id,
                                 dev, is_play ? "tx" : "rx");
         if (io->chan) {
-               struct dma_slave_config cfg;
+               struct dma_slave_config cfg = {};
                 int ret;
  
-               cfg.slave_id    = io->dma_id;
-               cfg.dst_addr    = 0; /* use default addr */
-               cfg.src_addr    = 0; /* use default addr */
-               cfg.direction   = is_play ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
+               if (is_play) {
+                       cfg.dst_addr            = fsi->phys + REG_DODT;
+                       cfg.dst_addr_width      = DMA_SLAVE_BUSWIDTH_4_BYTES;
+                       cfg.direction           = DMA_MEM_TO_DEV;
+               } else {
+                       cfg.src_addr            = fsi->phys + REG_DIDT;
+                       cfg.src_addr_width      = DMA_SLAVE_BUSWIDTH_4_BYTES;
+                       cfg.direction           = DMA_DEV_TO_MEM;
+               }
  
                 ret = dmaengine_slave_config(io->chan, &cfg);
                 if (ret < 0) {
@@ -1974,6 +1980,7 @@ static int fsi_probe(struct platform_device *pdev)
         /* FSI A setting */
         fsi             = &master->fsia;
         fsi->base       = master->base;
+       fsi->phys       = res->start;
         fsi->master     = master;
         fsi_port_info_init(fsi, &info.port_a);
         fsi_handler_init(fsi, &info.port_a);
@@ -1986,6 +1993,7 @@ static int fsi_probe(struct platform_device *pdev)
         /* FSI B setting */
         fsi             = &master->fsib;
         fsi->base       = master->base + 0x40;
+       fsi->phys       = res->start + 0x40;
         fsi->master     = master;
         fsi_port_info_init(fsi, &info.port_b);
         fsi_handler_init(fsi, &info.port_b);
diff --git a/sound/usb/format.c b/sound/usb/format.c

index 8bcc87c..789d19e 100644 (file)
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -79,7 +79,10 @@ static u64 parse_audio_format_i_type(struct snd_usb_audio *chip,
                 format = 1 << UAC_FORMAT_TYPE_I_PCM;
         }
         if (format & (1 << UAC_FORMAT_TYPE_I_PCM)) {
-               if (chip->usb_id == USB_ID(0x0582, 0x0016) /* Edirol SD-90 */ &&
+               if (((chip->usb_id == USB_ID(0x0582, 0x0016)) ||
+                    /* Edirol SD-90 */
+                    (chip->usb_id == USB_ID(0x0582, 0x000c))) &&
+                    /* Roland SC-D70 */
                     sample_width == 24 && sample_bytes == 2)
                         sample_bytes = 3;
                 else if (sample_width > sample_bytes * 8) {
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h

index 07f984d..2f6d3e9 100644 (file)
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -816,37 +816,11 @@ YAMAHA_DEVICE(0x7010, "UB99"),
                 .data = (const struct snd_usb_audio_quirk[]) {
                         {
                                 .ifnum = 0,
-                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
-                               .data = & (const struct audioformat) {
-                                       .formats = SNDRV_PCM_FMTBIT_S24_3LE,
-                                       .channels = 2,
-                                       .iface = 0,
-                                       .altsetting = 1,
-                                       .altset_idx = 1,
-                                       .attributes = 0,
-                                       .endpoint = 0x01,
-                                       .ep_attr = 0x01,
-                                       .rates = SNDRV_PCM_RATE_CONTINUOUS,
-                                       .rate_min = 44100,
-                                       .rate_max = 44100,
-                               }
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
                         },
                         {
                                 .ifnum = 1,
-                               .type = QUIRK_AUDIO_FIXED_ENDPOINT,
-                               .data = & (const struct audioformat) {
-                                       .formats = SNDRV_PCM_FMTBIT_S24_3LE,
-                                       .channels = 2,
-                                       .iface = 1,
-                                       .altsetting = 1,
-                                       .altset_idx = 1,
-                                       .attributes = 0,
-                                       .endpoint = 0x81,
-                                       .ep_attr = 0x01,
-                                       .rates = SNDRV_PCM_RATE_CONTINUOUS,
-                                       .rate_min = 44100,
-                                       .rate_max = 44100,
-                               }
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
                         },
                         {
                                 .ifnum = 2,
diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c

index 9690798..8b27898 100644 (file)
--- a/tools/power/cpupower/utils/helpers/pci.c
+++ b/tools/power/cpupower/utils/helpers/pci.c
@@ -25,14 +25,21 @@
  struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus,
                              int slot, int func, int vendor, int dev)
  {
-       struct pci_filter filter_nb_link = { domain, bus, slot, func,
-                                            vendor, dev };
+       struct pci_filter filter_nb_link;
         struct pci_dev *device;
  
         *pacc = pci_alloc();
         if (*pacc == NULL)
                 return NULL;
  
+       pci_filter_init(*pacc, &filter_nb_link);
+       filter_nb_link.domain   = domain;
+       filter_nb_link.bus      = bus;
+       filter_nb_link.slot     = slot;
+       filter_nb_link.func     = func;
+       filter_nb_link.vendor   = vendor;
+       filter_nb_link.device   = dev;
+
         pci_init(*pacc);
         pci_scan_bus(*pacc);
  
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c

index 8d550ff..78fb820 100644 (file)
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1561,6 +1561,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                         goto out;
         }
  
+       if (irq_num >= kvm->arch.vgic.nr_irqs)
+               return -EINVAL;
+
         vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
         if (vcpu_id >= 0) {
                 /* kick the specified vcpu */
@@ -2141,7 +2144,7 @@ int kvm_irq_map_gsi(struct kvm *kvm,
                     struct kvm_kernel_irq_routing_entry *entries,
                     int gsi)
  {
-       return gsi;
+       return 0;
  }
  
  int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index d3fc939..9097741 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -89,6 +89,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
  static __read_mostly struct preempt_ops kvm_preempt_ops;
  
  struct dentry *kvm_debugfs_dir;
+EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
  
  static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
                            unsigned long arg);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 27 Apr 2015 21:05:19 +0000 (14:05 -0700)