Merge branch 'sysfs-fsdevices-4.2-part1' of git://git.kernel.org/pub/scm/linux/kernel...
authorChris Mason <clm@fb.com>
Tue, 23 Jun 2015 12:34:39 +0000 (05:34 -0700)
committerChris Mason <clm@fb.com>
Tue, 23 Jun 2015 12:34:39 +0000 (05:34 -0700)
231 files changed:
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/hwmon/tmp401
Documentation/target/tcmu-design.txt
MAINTAINERS
Makefile
arch/alpha/boot/Makefile
arch/alpha/boot/main.c
arch/alpha/boot/stdio.c [new file with mode: 0644]
arch/alpha/boot/tools/objstrip.c
arch/alpha/include/asm/types.h
arch/alpha/include/asm/unistd.h
arch/alpha/include/uapi/asm/unistd.h
arch/alpha/kernel/err_ev6.c
arch/alpha/kernel/irq.c
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/process.c
arch/alpha/kernel/smp.c
arch/alpha/kernel/srmcons.c
arch/alpha/kernel/sys_marvel.c
arch/alpha/kernel/systbls.S
arch/alpha/kernel/traps.c
arch/alpha/oprofile/op_model_ev4.c
arch/alpha/oprofile/op_model_ev5.c
arch/alpha/oprofile/op_model_ev6.c
arch/alpha/oprofile/op_model_ev67.c
arch/arm/boot/dts/Makefile
arch/arm/boot/dts/am335x-boneblack.dts
arch/arm/boot/dts/am335x-evmsk.dts
arch/arm/boot/dts/exynos4412-trats2.dts
arch/arm/boot/dts/imx27.dtsi
arch/arm/boot/dts/omap3-devkit8000.dts
arch/arm/configs/multi_v7_defconfig
arch/arm/kernel/entry-common.S
arch/arm/kernel/perf_event_cpu.c
arch/arm/mach-imx/gpc.c
arch/arm/mach-pxa/pxa_cplds_irqs.c
arch/arm/mm/mmu.c
arch/ia64/pci/pci.c
arch/mips/ath79/prom.c
arch/mips/configs/fuloong2e_defconfig
arch/mips/kernel/irq.c
arch/mips/kernel/smp-bmips.c
arch/mips/lib/strnlen_user.S
arch/x86/include/uapi/asm/msr-index.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/i387.c
arch/x86/net/bpf_jit_comp.c
arch/x86/pci/acpi.c
arch/xtensa/include/asm/dma-mapping.h
crypto/Kconfig
drivers/bus/mips_cdmm.c
drivers/gpio/gpio-kempld.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/drm_plane_helper.c
drivers/gpu/drm/nouveau/include/nvif/class.h
drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gf100.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm204.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
drivers/gpu/drm/radeon/atombios_crtc.c
drivers/gpu/drm/radeon/cik.c
drivers/gpu/drm/radeon/evergreen.c
drivers/gpu/drm/radeon/evergreen_hdmi.c
drivers/gpu/drm/radeon/ni.c
drivers/gpu/drm/radeon/r600.c
drivers/gpu/drm/radeon/radeon_audio.c
drivers/gpu/drm/radeon/radeon_connectors.c
drivers/gpu/drm/radeon/rv770.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/vgem/Makefile
drivers/gpu/drm/vgem/vgem_dma_buf.c [deleted file]
drivers/gpu/drm/vgem/vgem_drv.c
drivers/gpu/drm/vgem/vgem_drv.h
drivers/hwmon/nct6683.c
drivers/hwmon/nct6775.c
drivers/hwmon/ntc_thermistor.c
drivers/hwmon/tmp401.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/lguest/core.c
drivers/md/dm-mpath.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid5.c
drivers/md/raid5.h
drivers/mfd/da9052-core.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/ibm/emac/core.c
drivers/net/ethernet/ibm/emac/core.h
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/phy/amd-xgbe-phy.c
drivers/net/phy/bcm7xxx.c
drivers/net/phy/dp83640.c
drivers/net/wireless/iwlwifi/Kconfig
drivers/net/wireless/iwlwifi/iwl-7000.c
drivers/net/wireless/iwlwifi/iwl-eeprom-parse.c
drivers/net/wireless/iwlwifi/iwl-eeprom-parse.h
drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
drivers/net/wireless/iwlwifi/mvm/d3.c
drivers/net/wireless/iwlwifi/mvm/mac80211.c
drivers/net/wireless/iwlwifi/mvm/ops.c
drivers/net/wireless/iwlwifi/mvm/rs.c
drivers/net/wireless/iwlwifi/pcie/trans.c
drivers/net/xen-netback/xenbus.c
drivers/net/xen-netfront.c
drivers/pinctrl/bcm/pinctrl-cygnus-gpio.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/meson/pinctrl-meson.c
drivers/pinctrl/meson/pinctrl-meson8b.c
drivers/platform/x86/thinkpad_acpi.c
drivers/regulator/da9052-regulator.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/ssb/driver_pcicore.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_tpg.c
drivers/target/target_core_alua.c
drivers/target/target_core_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_file.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_pr.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_pscsi.h
drivers/target/target_core_rd.c
drivers/target/target_core_sbc.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/target/target_core_xcopy.c
drivers/tty/mips_ejtag_fdc.c
drivers/vhost/scsi.c
drivers/video/backlight/pwm_bl.c
fs/binfmt_elf.c
fs/btrfs/async-thread.c
fs/btrfs/async-thread.h
fs/btrfs/backref.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h [new file with mode: 0644]
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/tree-log.c
fs/btrfs/ulist.c
fs/btrfs/ulist.h
fs/btrfs/volumes.c
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifs_unicode.c
fs/cifs/cifsfs.c
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/readdir.c
fs/cifs/smb1ops.c
fs/cifs/smb2pdu.c
fs/dcache.c
fs/omfs/bitmap.c
fs/omfs/inode.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/super.c
fs/seq_file.c
fs/xfs/libxfs/xfs_attr_leaf.c
fs/xfs/libxfs/xfs_attr_leaf.h
fs/xfs/libxfs/xfs_bmap.c
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/xfs_attr_inactive.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_mount.c
include/linux/brcmphy.h
include/linux/cpumask.h
include/linux/percpu_counter.h
include/net/sctp/sctp.h
include/target/target_core_backend.h
include/target/target_core_configfs.h
include/target/target_core_fabric.h
include/trace/events/btrfs.h
include/trace/events/kmem.h
kernel/module.c
lib/cpumask.c
lib/percpu_counter.c
net/caif/caif_socket.c
net/mac80211/cfg.c
net/mac80211/ieee80211_i.h
net/mac80211/iface.c
net/mac80211/key.c
net/mac80211/key.h
net/mac80211/rx.c
net/mac80211/util.c
net/sched/sch_api.c
net/unix/af_unix.c
scripts/gdb/linux/modules.py
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/pci/hda/thinkpad_helper.c
sound/usb/quirks.c
tools/net/bpf_jit_disasm.c
tools/power/x86/turbostat/turbostat.c

index 99983e6..da95513 100644 (file)
@@ -162,7 +162,7 @@ Description:        Discover CPUs in the same CPU frequency coordination domain
 What:          /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
 Date:          August 2008
 KernelVersion: 2.6.27
-Contact:       discuss@x86-64.org
+Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Disable L3 cache indices
 
                These files exist in every CPU's cache/index3 directory. Each
index 8eb88e9..711f75e 100644 (file)
@@ -20,7 +20,7 @@ Supported chips:
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html
   * Texas Instruments TMP435
     Prefix: 'tmp435'
-    Addresses scanned: I2C 0x37, 0x48 - 0x4f
+    Addresses scanned: I2C 0x48 - 0x4f
     Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html
 
 Authors:
index 43e94ea..263b907 100644 (file)
@@ -15,8 +15,7 @@ Contents:
   a) Discovering and configuring TCMU uio devices
   b) Waiting for events on the device(s)
   c) Managing the command ring
-3) Command filtering and pass_level
-4) A final note
+3) A final note
 
 
 TCM Userspace Design
@@ -324,7 +323,7 @@ int handle_device_events(int fd, void *map)
   /* Process events from cmd ring until we catch up with cmd_head */
   while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
 
-    if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) {
+    if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
       uint8_t *cdb = (void *)mb + ent->req.cdb_off;
       bool success = true;
 
@@ -339,8 +338,12 @@ int handle_device_events(int fd, void *map)
         ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
       }
     }
+    else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
+      /* Tell the kernel we didn't handle unknown opcodes */
+      ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
+    }
     else {
-      /* Do nothing for PAD entries */
+      /* Do nothing for PAD entries except update cmd_tail */
     }
 
     /* update cmd_tail */
@@ -360,28 +363,6 @@ int handle_device_events(int fd, void *map)
 }
 
 
-Command filtering and pass_level
---------------------------------
-
-TCMU supports a "pass_level" option with valid values of 0 or 1.  When
-the value is 0 (the default), nearly all SCSI commands received for
-the device are passed through to the handler. This allows maximum
-flexibility but increases the amount of code required by the handler,
-to support all mandatory SCSI commands. If pass_level is set to 1,
-then only IO-related commands are presented, and the rest are handled
-by LIO's in-kernel command emulation. The commands presented at level
-1 include all versions of:
-
-READ
-WRITE
-WRITE_VERIFY
-XDWRITEREAD
-WRITE_SAME
-COMPARE_AND_WRITE
-SYNCHRONIZE_CACHE
-UNMAP
-
-
 A final note
 ------------
 
index 474bcb6..af802b3 100644 (file)
@@ -2427,7 +2427,6 @@ L:        linux-security-module@vger.kernel.org
 S:     Supported
 F:     include/linux/capability.h
 F:     include/uapi/linux/capability.h
-F:     security/capability.c
 F:     security/commoncap.c
 F:     kernel/capability.c
 
index 92a7078..aee7e5c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 1
 SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
index cd14388..8399bd0 100644 (file)
@@ -14,6 +14,9 @@ targets               := vmlinux.gz vmlinux \
                   tools/bootpzh bootloader bootpheader bootpzheader 
 OBJSTRIP       := $(obj)/tools/objstrip
 
+HOSTCFLAGS     := -Wall -I$(objtree)/usr/include
+BOOTCFLAGS     += -I$(obj) -I$(srctree)/$(obj)
+
 # SRM bootable image.  Copy to offset 512 of a partition.
 $(obj)/bootimage: $(addprefix $(obj)/tools/,mkbb lxboot bootlx) $(obj)/vmlinux.nh
        ( cat $(obj)/tools/lxboot $(obj)/tools/bootlx $(obj)/vmlinux.nh ) > $@ 
@@ -96,13 +99,14 @@ $(obj)/tools/bootph: $(obj)/bootpheader $(OBJSTRIP) FORCE
 $(obj)/tools/bootpzh: $(obj)/bootpzheader $(OBJSTRIP) FORCE
        $(call if_changed,objstrip)
 
-LDFLAGS_bootloader   := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpheader  := -static -uvsprintf -T  #-N -relax
-LDFLAGS_bootpzheader := -static -uvsprintf -T  #-N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootloader   := -static -T # -N -relax
+LDFLAGS_bootpheader  := -static -T # -N -relax
+LDFLAGS_bootpzheader := -static -T # -N -relax
 
-OBJ_bootlx   := $(obj)/head.o $(obj)/main.o
-OBJ_bootph   := $(obj)/head.o $(obj)/bootp.o
-OBJ_bootpzh  := $(obj)/head.o $(obj)/bootpz.o $(obj)/misc.o
+OBJ_bootlx   := $(obj)/head.o $(obj)/stdio.o $(obj)/main.o
+OBJ_bootph   := $(obj)/head.o $(obj)/stdio.o $(obj)/bootp.o
+OBJ_bootpzh  := $(obj)/head.o $(obj)/stdio.o $(obj)/bootpz.o $(obj)/misc.o
 
 $(obj)/bootloader: $(obj)/bootloader.lds $(OBJ_bootlx) $(LIBS_Y) FORCE
        $(call if_changed,ld)
index 3baf2d1..dd6eb4a 100644 (file)
@@ -19,7 +19,6 @@
 
 #include "ksize.h"
 
-extern int vsprintf(char *, const char *, va_list);
 extern unsigned long switch_to_osf_pal(unsigned long nr,
        struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa,
        unsigned long *vptb);
diff --git a/arch/alpha/boot/stdio.c b/arch/alpha/boot/stdio.c
new file mode 100644 (file)
index 0000000..f844dae
--- /dev/null
@@ -0,0 +1,306 @@
+/*
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <stdarg.h>
+#include <stddef.h>
+
+size_t strnlen(const char * s, size_t count)
+{
+       const char *sc;
+
+       for (sc = s; count-- && *sc != '\0'; ++sc)
+               /* nothing */;
+       return sc - s;
+}
+
+# define do_div(n, base) ({                                            \
+       unsigned int __base = (base);                                   \
+       unsigned int __rem;                                             \
+       __rem = ((unsigned long long)(n)) % __base;                     \
+       (n) = ((unsigned long long)(n)) / __base;                       \
+       __rem;                                                          \
+})
+
+
+static int skip_atoi(const char **s)
+{
+       int i, c;
+
+       for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s)
+               i = i*10 + c - '0';
+       return i;
+}
+
+#define ZEROPAD        1               /* pad with zero */
+#define SIGN   2               /* unsigned/signed long */
+#define PLUS   4               /* show plus */
+#define SPACE  8               /* space if plus */
+#define LEFT   16              /* left justified */
+#define SPECIAL        32              /* 0x */
+#define LARGE  64              /* use 'ABCDEF' instead of 'abcdef' */
+
+static char * number(char * str, unsigned long long num, int base, int size, int precision, int type)
+{
+       char c,sign,tmp[66];
+       const char *digits="0123456789abcdefghijklmnopqrstuvwxyz";
+       int i;
+
+       if (type & LARGE)
+               digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+       if (type & LEFT)
+               type &= ~ZEROPAD;
+       if (base < 2 || base > 36)
+               return 0;
+       c = (type & ZEROPAD) ? '0' : ' ';
+       sign = 0;
+       if (type & SIGN) {
+               if ((signed long long)num < 0) {
+                       sign = '-';
+                       num = - (signed long long)num;
+                       size--;
+               } else if (type & PLUS) {
+                       sign = '+';
+                       size--;
+               } else if (type & SPACE) {
+                       sign = ' ';
+                       size--;
+               }
+       }
+       if (type & SPECIAL) {
+               if (base == 16)
+                       size -= 2;
+               else if (base == 8)
+                       size--;
+       }
+       i = 0;
+       if (num == 0)
+               tmp[i++]='0';
+       else while (num != 0) {
+               tmp[i++] = digits[do_div(num, base)];
+       }
+       if (i > precision)
+               precision = i;
+       size -= precision;
+       if (!(type&(ZEROPAD+LEFT)))
+               while(size-->0)
+                       *str++ = ' ';
+       if (sign)
+               *str++ = sign;
+       if (type & SPECIAL) {
+               if (base==8)
+                       *str++ = '0';
+               else if (base==16) {
+                       *str++ = '0';
+                       *str++ = digits[33];
+               }
+       }
+       if (!(type & LEFT))
+               while (size-- > 0)
+                       *str++ = c;
+       while (i < precision--)
+               *str++ = '0';
+       while (i-- > 0)
+               *str++ = tmp[i];
+       while (size-- > 0)
+               *str++ = ' ';
+       return str;
+}
+
+int vsprintf(char *buf, const char *fmt, va_list args)
+{
+       int len;
+       unsigned long long num;
+       int i, base;
+       char * str;
+       const char *s;
+
+       int flags;              /* flags to number() */
+
+       int field_width;        /* width of output field */
+       int precision;          /* min. # of digits for integers; max
+                                  number of chars for from string */
+       int qualifier;          /* 'h', 'l', or 'L' for integer fields */
+                               /* 'z' support added 23/7/1999 S.H.    */
+                               /* 'z' changed to 'Z' --davidm 1/25/99 */
+
+
+       for (str=buf ; *fmt ; ++fmt) {
+               if (*fmt != '%') {
+                       *str++ = *fmt;
+                       continue;
+               }
+
+               /* process flags */
+               flags = 0;
+               repeat:
+                       ++fmt;          /* this also skips first '%' */
+                       switch (*fmt) {
+                               case '-': flags |= LEFT; goto repeat;
+                               case '+': flags |= PLUS; goto repeat;
+                               case ' ': flags |= SPACE; goto repeat;
+                               case '#': flags |= SPECIAL; goto repeat;
+                               case '0': flags |= ZEROPAD; goto repeat;
+                               }
+
+               /* get field width */
+               field_width = -1;
+               if ('0' <= *fmt && *fmt <= '9')
+                       field_width = skip_atoi(&fmt);
+               else if (*fmt == '*') {
+                       ++fmt;
+                       /* it's the next argument */
+                       field_width = va_arg(args, int);
+                       if (field_width < 0) {
+                               field_width = -field_width;
+                               flags |= LEFT;
+                       }
+               }
+
+               /* get the precision */
+               precision = -1;
+               if (*fmt == '.') {
+                       ++fmt;
+                       if ('0' <= *fmt && *fmt <= '9')
+                               precision = skip_atoi(&fmt);
+                       else if (*fmt == '*') {
+                               ++fmt;
+                               /* it's the next argument */
+                               precision = va_arg(args, int);
+                       }
+                       if (precision < 0)
+                               precision = 0;
+               }
+
+               /* get the conversion qualifier */
+               qualifier = -1;
+               if (*fmt == 'l' && *(fmt + 1) == 'l') {
+                       qualifier = 'q';
+                       fmt += 2;
+               } else if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L'
+                       || *fmt == 'Z') {
+                       qualifier = *fmt;
+                       ++fmt;
+               }
+
+               /* default base */
+               base = 10;
+
+               switch (*fmt) {
+               case 'c':
+                       if (!(flags & LEFT))
+                               while (--field_width > 0)
+                                       *str++ = ' ';
+                       *str++ = (unsigned char) va_arg(args, int);
+                       while (--field_width > 0)
+                               *str++ = ' ';
+                       continue;
+
+               case 's':
+                       s = va_arg(args, char *);
+                       if (!s)
+                               s = "<NULL>";
+
+                       len = strnlen(s, precision);
+
+                       if (!(flags & LEFT))
+                               while (len < field_width--)
+                                       *str++ = ' ';
+                       for (i = 0; i < len; ++i)
+                               *str++ = *s++;
+                       while (len < field_width--)
+                               *str++ = ' ';
+                       continue;
+
+               case 'p':
+                       if (field_width == -1) {
+                               field_width = 2*sizeof(void *);
+                               flags |= ZEROPAD;
+                       }
+                       str = number(str,
+                               (unsigned long) va_arg(args, void *), 16,
+                               field_width, precision, flags);
+                       continue;
+
+
+               case 'n':
+                       if (qualifier == 'l') {
+                               long * ip = va_arg(args, long *);
+                               *ip = (str - buf);
+                       } else if (qualifier == 'Z') {
+                               size_t * ip = va_arg(args, size_t *);
+                               *ip = (str - buf);
+                       } else {
+                               int * ip = va_arg(args, int *);
+                               *ip = (str - buf);
+                       }
+                       continue;
+
+               case '%':
+                       *str++ = '%';
+                       continue;
+
+               /* integer number formats - set up the flags and "break" */
+               case 'o':
+                       base = 8;
+                       break;
+
+               case 'X':
+                       flags |= LARGE;
+               case 'x':
+                       base = 16;
+                       break;
+
+               case 'd':
+               case 'i':
+                       flags |= SIGN;
+               case 'u':
+                       break;
+
+               default:
+                       *str++ = '%';
+                       if (*fmt)
+                               *str++ = *fmt;
+                       else
+                               --fmt;
+                       continue;
+               }
+               if (qualifier == 'l') {
+                       num = va_arg(args, unsigned long);
+                       if (flags & SIGN)
+                               num = (signed long) num;
+               } else if (qualifier == 'q') {
+                       num = va_arg(args, unsigned long long);
+                       if (flags & SIGN)
+                               num = (signed long long) num;
+               } else if (qualifier == 'Z') {
+                       num = va_arg(args, size_t);
+               } else if (qualifier == 'h') {
+                       num = (unsigned short) va_arg(args, int);
+                       if (flags & SIGN)
+                               num = (signed short) num;
+               } else {
+                       num = va_arg(args, unsigned int);
+                       if (flags & SIGN)
+                               num = (signed int) num;
+               }
+               str = number(str, num, base, field_width, precision, flags);
+       }
+       *str = '\0';
+       return str-buf;
+}
+
+int sprintf(char * buf, const char *fmt, ...)
+{
+       va_list args;
+       int i;
+
+       va_start(args, fmt);
+       i=vsprintf(buf,fmt,args);
+       va_end(args);
+       return i;
+}
index 367d53d..dee8269 100644 (file)
@@ -27,6 +27,9 @@
 #include <linux/param.h>
 #ifdef __ELF__
 # include <linux/elf.h>
+# define elfhdr elf64_hdr
+# define elf_phdr elf64_phdr
+# define elf_check_arch(x) ((x)->e_machine == EM_ALPHA)
 #endif
 
 /* bootfile size must be multiple of BLOCK_SIZE: */
index f61e1a5..4cb4b6d 100644 (file)
@@ -2,6 +2,5 @@
 #define _ALPHA_TYPES_H
 
 #include <asm-generic/int-ll64.h>
-#include <uapi/asm/types.h>
 
 #endif /* _ALPHA_TYPES_H */
index c509d30..a56e608 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define NR_SYSCALLS                    511
+#define NR_SYSCALLS                    514
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
index d214a03..aa33bf5 100644 (file)
 #define __NR_sched_setattr             508
 #define __NR_sched_getattr             509
 #define __NR_renameat2                 510
+#define __NR_getrandom                 511
+#define __NR_memfd_create              512
+#define __NR_execveat                  513
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
index 253cf1a..51267ac 100644 (file)
@@ -6,7 +6,6 @@
  *     Error handling code supporting Alpha systems
  */
 
-#include <linux/init.h>
 #include <linux/sched.h>
 
 #include <asm/io.h>
index 7b2be25..51f2c86 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/interrupt.h>
 #include <linux/random.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
index e51f578..36dc91a 100644 (file)
@@ -1019,14 +1019,13 @@ SYSCALL_DEFINE2(osf_settimeofday, struct timeval32 __user *, tv,
        if (tv) {
                if (get_tv32((struct timeval *)&kts, tv))
                        return -EFAULT;
+               kts.tv_nsec *= 1000;
        }
        if (tz) {
                if (copy_from_user(&ktz, tz, sizeof(*tz)))
                        return -EFAULT;
        }
 
-       kts.tv_nsec *= 1000;
-
        return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
 }
 
index 1941a07..84d1326 100644 (file)
@@ -236,12 +236,11 @@ release_thread(struct task_struct *dead_task)
 }
 
 /*
- * Copy an alpha thread..
+ * Copy architecture-specific thread state
  */
-
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-           unsigned long arg,
+           unsigned long kthread_arg,
            struct task_struct *p)
 {
        extern void ret_from_fork(void);
@@ -262,7 +261,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
                        sizeof(struct switch_stack) + sizeof(struct pt_regs));
                childstack->r26 = (unsigned long) ret_from_kernel_thread;
                childstack->r9 = usp;   /* function */
-               childstack->r10 = arg;
+               childstack->r10 = kthread_arg;
                childregs->hae = alpha_mv.hae_cache,
                childti->pcb.usp = 0;
                return 0;
index 99ac36d..2f24447 100644 (file)
@@ -63,7 +63,6 @@ static struct {
 enum ipi_message_type {
        IPI_RESCHEDULE,
        IPI_CALL_FUNC,
-       IPI_CALL_FUNC_SINGLE,
        IPI_CPU_STOP,
 };
 
@@ -506,7 +505,6 @@ setup_profiling_timer(unsigned int multiplier)
        return -EINVAL;
 }
 
-\f
 static void
 send_ipi_message(const struct cpumask *to_whom, enum ipi_message_type operation)
 {
@@ -552,10 +550,6 @@ handle_ipi(struct pt_regs *regs)
                        generic_smp_call_function_interrupt();
                        break;
 
-               case IPI_CALL_FUNC_SINGLE:
-                       generic_smp_call_function_single_interrupt();
-                       break;
-
                case IPI_CPU_STOP:
                        halt();
 
@@ -606,7 +600,7 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void arch_send_call_function_single_ipi(int cpu)
 {
-       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC);
 }
 
 static void
index 6f01d9a..72b5951 100644 (file)
@@ -237,8 +237,7 @@ srmcons_init(void)
 
        return -ENODEV;
 }
-
-module_init(srmcons_init);
+device_initcall(srmcons_init);
 
 \f
 /*
index f21d61f..24e41bd 100644 (file)
@@ -331,7 +331,7 @@ marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
        pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &intline);
        irq = intline;
 
-       msi_loc = pci_find_capability(dev, PCI_CAP_ID_MSI);
+       msi_loc = dev->msi_cap;
        msg_ctl = 0;
        if (msi_loc) 
                pci_read_config_word(dev, msi_loc + PCI_MSI_FLAGS, &msg_ctl);
index 2478971..9b62e3f 100644 (file)
@@ -529,6 +529,9 @@ sys_call_table:
        .quad sys_sched_setattr
        .quad sys_sched_getattr
        .quad sys_renameat2                     /* 510 */
+       .quad sys_getrandom
+       .quad sys_memfd_create
+       .quad sys_execveat
 
        .size sys_call_table, . - sys_call_table
        .type sys_call_table, @object
index 9c4c189..74aceea 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/tty.h>
 #include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/ratelimit.h>
 
index 18aa9b4..086a0d5 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index c32f8a0..c300f5e 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index 1c84cc2..02edf59 100644 (file)
@@ -8,7 +8,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index 34a57a1..adb1744 100644 (file)
@@ -9,7 +9,6 @@
  */
 
 #include <linux/oprofile.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 #include <asm/ptrace.h>
 
index 86217db..992736b 100644 (file)
@@ -223,7 +223,7 @@ dtb-$(CONFIG_SOC_IMX25) += \
        imx25-eukrea-mbimxsd25-baseboard-dvi-vga.dtb \
        imx25-karo-tx25.dtb \
        imx25-pdk.dtb
-dtb-$(CONFIG_SOC_IMX31) += \
+dtb-$(CONFIG_SOC_IMX27) += \
        imx27-apf27.dtb \
        imx27-apf27dev.dtb \
        imx27-eukrea-mbimxsd27-baseboard.dtb \
index 5c42d25..901739f 100644 (file)
@@ -80,7 +80,3 @@
                status = "okay";
        };
 };
-
-&rtc {
-       system-power-controller;
-};
index 87fc7a3..156d05e 100644 (file)
        wlcore: wlcore@2 {
                compatible = "ti,wl1271";
                reg = <2>;
-               interrupt-parent = <&gpio1>;
+               interrupt-parent = <&gpio0>;
                interrupts = <31 IRQ_TYPE_LEVEL_HIGH>; /* gpio 31 */
                ref-clock-frequency = <38400000>;
        };
index 173ffa4..792394d 100644 (file)
 
                        display-timings {
                                timing-0 {
-                                       clock-frequency = <0>;
+                                       clock-frequency = <57153600>;
                                        hactive = <720>;
                                        vactive = <1280>;
                                        hfront-porch = <5>;
index 6951b66..bc215e4 100644 (file)
 
                        fec: ethernet@1002b000 {
                                compatible = "fsl,imx27-fec";
-                               reg = <0x1002b000 0x4000>;
+                               reg = <0x1002b000 0x1000>;
                                interrupts = <50>;
                                clocks = <&clks IMX27_CLK_FEC_IPG_GATE>,
                                         <&clks IMX27_CLK_FEC_AHB_GATE>;
index 134d3f2..921de66 100644 (file)
        nand@0,0 {
                reg = <0 0 4>; /* CS0, offset 0, IO size 4 */
                nand-bus-width = <16>;
+               gpmc,device-width = <2>;
+               ti,nand-ecc-opt = "sw";
 
                gpmc,sync-clk-ps = <0>;
                gpmc,cs-on-ns = <0>;
index 0ca4a3e..fbbb191 100644 (file)
@@ -429,7 +429,7 @@ CONFIG_USB_EHCI_EXYNOS=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_STI=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
-CONFIG_USB_ISP1760_HCD=y
+CONFIG_USB_ISP1760=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_STI=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
index f8ccc21..4e7f40c 100644 (file)
@@ -33,7 +33,9 @@ ret_fast_syscall:
  UNWIND(.fnstart       )
  UNWIND(.cantunwind    )
        disable_irq                             @ disable interrupts
-       ldr     r1, [tsk, #TI_FLAGS]
+       ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
+       tst     r1, #_TIF_SYSCALL_WORK
+       bne     __sys_trace_return
        tst     r1, #_TIF_WORK_MASK
        bne     fast_work_pending
        asm_trace_hardirqs_on
index 213919b..3b8c283 100644 (file)
@@ -304,16 +304,17 @@ static int probe_current_pmu(struct arm_pmu *pmu)
 static int of_pmu_irq_cfg(struct platform_device *pdev)
 {
        int i, irq;
-       int *irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
-
-       if (!irqs)
-               return -ENOMEM;
+       int *irqs;
 
        /* Don't bother with PPIs; they're already affine */
        irq = platform_get_irq(pdev, 0);
        if (irq >= 0 && irq_is_percpu(irq))
                return 0;
 
+       irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL);
+       if (!irqs)
+               return -ENOMEM;
+
        for (i = 0; i < pdev->num_resources; ++i) {
                struct device_node *dn;
                int cpu;
index 4d60005..6d0893a 100644 (file)
@@ -280,9 +280,15 @@ void __init imx_gpc_check_dt(void)
        struct device_node *np;
 
        np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-gpc");
-       if (WARN_ON(!np ||
-                   !of_find_property(np, "interrupt-controller", NULL)))
-               pr_warn("Outdated DT detected, system is about to crash!!!\n");
+       if (WARN_ON(!np))
+               return;
+
+       if (WARN_ON(!of_find_property(np, "interrupt-controller", NULL))) {
+               pr_warn("Outdated DT detected, suspend/resume will NOT work\n");
+
+               /* map GPC, so that at least CPUidle and WARs keep working */
+               gpc_base = of_iomap(np, 0);
+       }
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
@@ -443,6 +449,10 @@ static int imx_gpc_probe(struct platform_device *pdev)
        struct regulator *pu_reg;
        int ret;
 
+       /* bail out if DT too old and doesn't provide the necessary info */
+       if (!of_property_read_bool(pdev->dev.of_node, "#power-domain-cells"))
+               return 0;
+
        pu_reg = devm_regulator_get_optional(&pdev->dev, "pu");
        if (PTR_ERR(pu_reg) == -ENODEV)
                pu_reg = NULL;
index f1aeb54..2385052 100644 (file)
@@ -107,7 +107,7 @@ static int cplds_probe(struct platform_device *pdev)
        struct resource *res;
        struct cplds *fpga;
        int ret;
-       unsigned int base_irq = 0;
+       int base_irq;
        unsigned long irqflags = 0;
 
        fpga = devm_kzalloc(&pdev->dev, sizeof(*fpga), GFP_KERNEL);
index 4e6ef89..7186382 100644 (file)
@@ -1112,22 +1112,22 @@ void __init sanity_check_meminfo(void)
                        }
 
                        /*
-                        * Find the first non-section-aligned page, and point
+                        * Find the first non-pmd-aligned page, and point
                         * memblock_limit at it. This relies on rounding the
-                        * limit down to be section-aligned, which happens at
-                        * the end of this function.
+                        * limit down to be pmd-aligned, which happens at the
+                        * end of this function.
                         *
                         * With this algorithm, the start or end of almost any
-                        * bank can be non-section-aligned. The only exception
-                        * is that the start of the bank 0 must be section-
+                        * bank can be non-pmd-aligned. The only exception is
+                        * that the start of the bank 0 must be section-
                         * aligned, since otherwise memory would need to be
                         * allocated when mapping the start of bank 0, which
                         * occurs before any free memory is mapped.
                         */
                        if (!memblock_limit) {
-                               if (!IS_ALIGNED(block_start, SECTION_SIZE))
+                               if (!IS_ALIGNED(block_start, PMD_SIZE))
                                        memblock_limit = block_start;
-                               else if (!IS_ALIGNED(block_end, SECTION_SIZE))
+                               else if (!IS_ALIGNED(block_end, PMD_SIZE))
                                        memblock_limit = arm_lowmem_limit;
                        }
 
@@ -1137,12 +1137,12 @@ void __init sanity_check_meminfo(void)
        high_memory = __va(arm_lowmem_limit - 1) + 1;
 
        /*
-        * Round the memblock limit down to a section size.  This
+        * Round the memblock limit down to a pmd size.  This
         * helps to ensure that we will allocate memory from the
-        * last full section, which should be mapped.
+        * last full pmd, which should be mapped.
         */
        if (memblock_limit)
-               memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+               memblock_limit = round_down(memblock_limit, PMD_SIZE);
        if (!memblock_limit)
                memblock_limit = arm_lowmem_limit;
 
index d4e162d..7cc3be9 100644 (file)
@@ -478,9 +478,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-       struct pci_controller *controller = bridge->bus->sysdata;
-
-       ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+       /*
+        * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+        * here, pci_create_root_bus() has been called by someone else and
+        * sysdata is likely to be different from what we expect.  Let it go in
+        * that case.
+        */
+       if (!bridge->dev.parent) {
+               struct pci_controller *controller = bridge->bus->sysdata;
+               ACPI_COMPANION_SET(&bridge->dev, controller->companion);
+       }
        return 0;
 }
 
index e1fe630..597899a 100644 (file)
@@ -1,6 +1,7 @@
 /*
  *  Atheros AR71XX/AR724X/AR913X specific prom routines
  *
+ *  Copyright (C) 2015 Laurent Fasnacht <l@libres.ch>
  *  Copyright (C) 2008-2010 Gabor Juhos <juhosg@openwrt.org>
  *  Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
  *
@@ -25,12 +26,14 @@ void __init prom_init(void)
 {
        fw_init_cmdline();
 
+#ifdef CONFIG_BLK_DEV_INITRD
        /* Read the initrd address from the firmware environment */
        initrd_start = fw_getenvl("initrd_start");
        if (initrd_start) {
                initrd_start = KSEG0ADDR(initrd_start);
                initrd_end = initrd_start + fw_getenvl("initrd_size");
        }
+#endif
 }
 
 void __init prom_free_prom_memory(void)
index 0026806..b2a577e 100644 (file)
@@ -194,7 +194,7 @@ CONFIG_USB_WUSB_CBAF=m
 CONFIG_USB_C67X00_HCD=m
 CONFIG_USB_EHCI_HCD=y
 CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_ISP1760=m
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=m
 CONFIG_USB_R8A66597_HCD=m
index d2bfbc2..51f57d8 100644 (file)
@@ -29,7 +29,7 @@
 int kgdb_early_setup;
 #endif
 
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+static DECLARE_BITMAP(irq_map, NR_IRQS);
 
 int allocate_irqno(void)
 {
index fd528d7..336708a 100644 (file)
@@ -444,7 +444,7 @@ struct plat_smp_ops bmips5000_smp_ops = {
 static void bmips_wr_vec(unsigned long dst, char *start, char *end)
 {
        memcpy((void *)dst, start, end - start);
-       dma_cache_wback((unsigned long)start, end - start);
+       dma_cache_wback(dst, end - start);
        local_flush_icache_range(dst, dst + (end - start));
        instruction_hazard();
 }
index 7d12c0d..77e6494 100644 (file)
@@ -34,7 +34,12 @@ LEAF(__strnlen_\func\()_asm)
 FEXPORT(__strnlen_\func\()_nocheck_asm)
        move            v0, a0
        PTR_ADDU        a1, a0                  # stop pointer
-1:     beq             v0, a1, 1f              # limit reached?
+1:
+#ifdef CONFIG_CPU_DADDI_WORKAROUNDS
+       .set            noat
+       li              AT, 1
+#endif
+       beq             v0, a1, 1f              # limit reached?
 .ifeqs "\func", "kernel"
        EX(lb, t0, (v0), .Lfault\@)
 .else
@@ -42,7 +47,13 @@ FEXPORT(__strnlen_\func\()_nocheck_asm)
 .endif
        .set            noreorder
        bnez            t0, 1b
-1:      PTR_ADDIU      v0, 1
+1:
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+        PTR_ADDIU      v0, 1
+#else
+        PTR_ADDU       v0, AT
+       .set            at
+#endif
        .set            reorder
        PTR_SUBU        v0, a0
        jr              ra
index c469490..3c6bb34 100644 (file)
 #define MSR_CORE_C3_RESIDENCY          0x000003fc
 #define MSR_CORE_C6_RESIDENCY          0x000003fd
 #define MSR_CORE_C7_RESIDENCY          0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY      0x000003ff
 #define MSR_PKG_C2_RESIDENCY           0x0000060d
 #define MSR_PKG_C8_RESIDENCY           0x00000630
 #define MSR_PKG_C9_RESIDENCY           0x00000631
index e535533..20190bd 100644 (file)
@@ -708,6 +708,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                          struct pt_regs *regs)
 {
        int i, ret = 0;
+       char *tmp;
 
        for (i = 0; i < mca_cfg.banks; i++) {
                m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
@@ -716,9 +717,11 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                        if (quirk_no_way_out)
                                quirk_no_way_out(i, m, regs);
                }
-               if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
-                   MCE_PANIC_SEVERITY)
+
+               if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+                       *msg = tmp;
                        ret = 1;
+               }
        }
        return ret;
 }
index 0091832..6185d31 100644 (file)
@@ -173,6 +173,21 @@ static void init_thread_xstate(void)
                xstate_size = sizeof(struct i387_fxsave_struct);
        else
                xstate_size = sizeof(struct i387_fsave_struct);
+
+       /*
+        * Quirk: we don't yet handle the XSAVES* instructions
+        * correctly, as we don't correctly convert between
+        * standard and compacted format when interfacing
+        * with user-space - so disable it for now.
+        *
+        * The difference is small: with recent CPUs the
+        * compacted format is only marginally smaller than
+        * the standard FPU state format.
+        *
+        * ( This is easy to backport while we are fixing
+        *   XSAVES* support. )
+        */
+       setup_clear_cpu_cap(X86_FEATURE_XSAVES);
 }
 
 /*
index 99f7610..ddeff48 100644 (file)
@@ -966,7 +966,12 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
        }
        ctx.cleanup_addr = proglen;
 
-       for (pass = 0; pass < 10; pass++) {
+       /* JITed image shrinks with every pass and the loop iterates
+        * until the image stops shrinking. Very large bpf programs
+        * may converge on the last pass. In such case do one more
+        * pass to emit the final image
+        */
+       for (pass = 0; pass < 10 || image; pass++) {
                proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
                if (proglen <= 0) {
                        image = NULL;
index d939633..14a63ed 100644 (file)
@@ -482,9 +482,16 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-       struct pci_sysdata *sd = bridge->bus->sysdata;
-
-       ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+       /*
+        * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL
+        * here, pci_create_root_bus() has been called by someone else and
+        * sysdata is likely to be different from what we expect.  Let it go in
+        * that case.
+        */
+       if (!bridge->dev.parent) {
+               struct pci_sysdata *sd = bridge->bus->sysdata;
+               ACPI_COMPANION_SET(&bridge->dev, sd->companion);
+       }
        return 0;
 }
 
index 172a02a..ba78ccf 100644 (file)
@@ -185,4 +185,17 @@ static inline int dma_get_sgtable(struct device *dev, struct sg_table *sgt,
        return -EINVAL;
 }
 
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+                                   dma_addr_t *dma_handle, gfp_t flag,
+                                   struct dma_attrs *attrs)
+{
+       return NULL;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+                                 void *vaddr, dma_addr_t dma_handle,
+                                 struct dma_attrs *attrs)
+{
+}
+
 #endif /* _XTENSA_DMA_MAPPING_H */
index 8aaf298..362905e 100644 (file)
@@ -1512,15 +1512,6 @@ config CRYPTO_USER_API_RNG
          This option enables the user-spaces interface for random
          number generator algorithms.
 
-config CRYPTO_USER_API_AEAD
-       tristate "User-space interface for AEAD cipher algorithms"
-       depends on NET
-       select CRYPTO_AEAD
-       select CRYPTO_USER_API
-       help
-         This option enables the user-spaces interface for AEAD
-         cipher algorithms.
-
 config CRYPTO_HASH_INFO
        bool
 
index 5bd792c..ab3bde1 100644 (file)
@@ -453,7 +453,7 @@ void __iomem *mips_cdmm_early_probe(unsigned int dev_type)
 
        /* Look for a specific device type */
        for (; drb < bus->drbs; drb += size + 1) {
-               acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+               acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
                type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
                if (type == dev_type)
                        return cdmm + drb * CDMM_DRB_SIZE;
@@ -500,7 +500,7 @@ static void mips_cdmm_bus_discover(struct mips_cdmm_bus *bus)
        bus->discovered = true;
        pr_info("cdmm%u discovery (%u blocks)\n", cpu, bus->drbs);
        for (; drb < bus->drbs; drb += size + 1) {
-               acsr = readl(cdmm + drb * CDMM_DRB_SIZE);
+               acsr = __raw_readl(cdmm + drb * CDMM_DRB_SIZE);
                type = (acsr & CDMM_ACSR_DEVTYPE) >> CDMM_ACSR_DEVTYPE_SHIFT;
                size = (acsr & CDMM_ACSR_DEVSIZE) >> CDMM_ACSR_DEVSIZE_SHIFT;
                rev  = (acsr & CDMM_ACSR_DEVREV)  >> CDMM_ACSR_DEVREV_SHIFT;
index 6b8115f..83f281d 100644 (file)
@@ -117,7 +117,7 @@ static int kempld_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
                = container_of(chip, struct kempld_gpio_data, chip);
        struct kempld_device_data *pld = gpio->pld;
 
-       return kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
+       return !kempld_gpio_get_bit(pld, KEMPLD_GPIO_DIR_NUM(offset), offset);
 }
 
 static int kempld_gpio_pincount(struct kempld_device_data *pld)
index 59eaa23..6bc612b 100644 (file)
@@ -53,6 +53,11 @@ static DEFINE_MUTEX(gpio_lookup_lock);
 static LIST_HEAD(gpio_lookup_list);
 LIST_HEAD(gpio_chips);
 
+
+static void gpiochip_free_hogs(struct gpio_chip *chip);
+static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
+
+
 static inline void desc_set_label(struct gpio_desc *d, const char *label)
 {
        d->label = label;
@@ -297,6 +302,7 @@ int gpiochip_add(struct gpio_chip *chip)
 
 err_remove_chip:
        acpi_gpiochip_remove(chip);
+       gpiochip_free_hogs(chip);
        of_gpiochip_remove(chip);
        spin_lock_irqsave(&gpio_lock, flags);
        list_del(&chip->list);
@@ -313,10 +319,6 @@ err_free_descs:
 }
 EXPORT_SYMBOL_GPL(gpiochip_add);
 
-/* Forward-declaration */
-static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
-static void gpiochip_free_hogs(struct gpio_chip *chip);
-
 /**
  * gpiochip_remove() - unregister a gpio_chip
  * @chip: the chip to unregister
index 40c1db9..2f0ed11 100644 (file)
@@ -465,6 +465,9 @@ int drm_plane_helper_commit(struct drm_plane *plane,
                if (!crtc[i])
                        continue;
 
+               if (crtc[i]->cursor == plane)
+                       continue;
+
                /* There's no other way to figure out whether the crtc is running. */
                ret = drm_crtc_vblank_get(crtc[i]);
                if (ret == 0) {
index 0b5af0f..64f8b2f 100644 (file)
@@ -14,7 +14,7 @@
 
 #define FERMI_TWOD_A                                                 0x0000902d
 
-#define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x0000903d
+#define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x00009039
 
 #define KEPLER_INLINE_TO_MEMORY_A                                    0x0000a040
 #define KEPLER_INLINE_TO_MEMORY_B                                    0x0000a140
index 2f5eadd..fdb1dcf 100644 (file)
@@ -329,7 +329,6 @@ gm204_gr_init(struct nvkm_object *object)
        nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
 
        for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-       printk(KERN_ERR "ppc %d %d\n", gpc, priv->ppc_nr[gpc]);
                for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++)
                        nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
                nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
index e8778c6..c61102f 100644 (file)
@@ -90,12 +90,14 @@ gf100_devinit_disable(struct nvkm_devinit *devinit)
        return disable;
 }
 
-static int
+int
 gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
                   struct nvkm_oclass *oclass, void *data, u32 size,
                   struct nvkm_object **pobject)
 {
+       struct nvkm_devinit_impl *impl = (void *)oclass;
        struct nv50_devinit_priv *priv;
+       u64 disable;
        int ret;
 
        ret = nvkm_devinit_create(parent, engine, oclass, &priv);
@@ -103,7 +105,8 @@ gf100_devinit_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
        if (ret)
                return ret;
 
-       if (nv_rd32(priv, 0x022500) & 0x00000001)
+       disable = impl->disable(&priv->base);
+       if (disable & (1ULL << NVDEV_ENGINE_DISP))
                priv->base.post = true;
 
        return 0;
index b345a53..87ca0ec 100644 (file)
@@ -48,7 +48,7 @@ struct nvkm_oclass *
 gm107_devinit_oclass = &(struct nvkm_devinit_impl) {
        .base.handle = NV_SUBDEV(DEVINIT, 0x07),
        .base.ofuncs = &(struct nvkm_ofuncs) {
-               .ctor = nv50_devinit_ctor,
+               .ctor = gf100_devinit_ctor,
                .dtor = _nvkm_devinit_dtor,
                .init = nv50_devinit_init,
                .fini = _nvkm_devinit_fini,
index 535172c..1076fcf 100644 (file)
@@ -161,7 +161,7 @@ struct nvkm_oclass *
 gm204_devinit_oclass = &(struct nvkm_devinit_impl) {
        .base.handle = NV_SUBDEV(DEVINIT, 0x07),
        .base.ofuncs = &(struct nvkm_ofuncs) {
-               .ctor = nv50_devinit_ctor,
+               .ctor = gf100_devinit_ctor,
                .dtor = _nvkm_devinit_dtor,
                .init = nv50_devinit_init,
                .fini = _nvkm_devinit_fini,
index b882b65..9243521 100644 (file)
@@ -15,6 +15,9 @@ int  nv50_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
 int  gt215_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
+int  gf100_devinit_ctor(struct nvkm_object *, struct nvkm_object *,
+                       struct nvkm_oclass *, void *, u32,
+                       struct nvkm_object **);
 int  gf100_devinit_pll_set(struct nvkm_devinit *, u32, u32);
 
 u64  gm107_devinit_disable(struct nvkm_devinit *);
index 42b2ea3..e597ffc 100644 (file)
@@ -1798,7 +1798,9 @@ static int radeon_get_shared_nondp_ppll(struct drm_crtc *crtc)
                        if ((crtc->mode.clock == test_crtc->mode.clock) &&
                            (adjusted_clock == test_adjusted_clock) &&
                            (radeon_crtc->ss_enabled == test_radeon_crtc->ss_enabled) &&
-                           (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID))
+                           (test_radeon_crtc->pll_id != ATOM_PPLL_INVALID) &&
+                           (drm_detect_monitor_audio(radeon_connector_edid(test_radeon_crtc->connector)) ==
+                            drm_detect_monitor_audio(radeon_connector_edid(radeon_crtc->connector))))
                                return test_radeon_crtc->pll_id;
                }
        }
index a0c35bb..ba50f3c 100644 (file)
@@ -5822,7 +5822,7 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev)
               L2_CACHE_BIGK_FRAGMENT_SIZE(4));
        /* setup context0 */
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(rdev->dummy_page.addr >> 12));
index 05e6d6e..f848acf 100644 (file)
@@ -2485,7 +2485,7 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev)
        WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
        WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
index 0926739..9953356 100644 (file)
@@ -400,7 +400,7 @@ void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable)
        if (enable) {
                struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 
-               if (drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+               if (connector && drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                        WREG32(HDMI_INFOFRAME_CONTROL0 + dig->afmt->offset,
                               HDMI_AVI_INFO_SEND | /* enable AVI info frames */
                               HDMI_AVI_INFO_CONT | /* required for audio info values to be updated */
@@ -438,7 +438,8 @@ void evergreen_dp_enable(struct drm_encoder *encoder, bool enable)
        if (!dig || !dig->afmt)
                return;
 
-       if (enable && drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+       if (enable && connector &&
+           drm_detect_monitor_audio(radeon_connector_edid(connector))) {
                struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
                struct radeon_connector *radeon_connector = to_radeon_connector(connector);
                struct radeon_connector_atom_dig *dig_connector;
index aba2f42..64d3a77 100644 (file)
@@ -1282,7 +1282,7 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev)
               L2_CACHE_BIGK_FRAGMENT_SIZE(6));
        /* setup context0 */
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(rdev->dummy_page.addr >> 12));
index 25b4ac9..8f6d862 100644 (file)
@@ -1112,7 +1112,7 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
        WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
        WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
index dcb7796..25191f1 100644 (file)
@@ -460,9 +460,6 @@ void radeon_audio_detect(struct drm_connector *connector,
        if (!connector || !connector->encoder)
                return;
 
-       if (!radeon_encoder_is_digital(connector->encoder))
-               return;
-
        rdev = connector->encoder->dev->dev_private;
 
        if (!radeon_audio_chipset_supported(rdev))
@@ -471,26 +468,26 @@ void radeon_audio_detect(struct drm_connector *connector,
        radeon_encoder = to_radeon_encoder(connector->encoder);
        dig = radeon_encoder->enc_priv;
 
-       if (!dig->afmt)
-               return;
-
        if (status == connector_status_connected) {
-               struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+               struct radeon_connector *radeon_connector;
+               int sink_type;
+
+               if (!drm_detect_monitor_audio(radeon_connector_edid(connector))) {
+                       radeon_encoder->audio = NULL;
+                       return;
+               }
+
+               radeon_connector = to_radeon_connector(connector);
+               sink_type = radeon_dp_getsinktype(radeon_connector);
 
                if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort &&
-                   radeon_dp_getsinktype(radeon_connector) ==
-                   CONNECTOR_OBJECT_ID_DISPLAYPORT)
+                       sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
                        radeon_encoder->audio = rdev->audio.dp_funcs;
                else
                        radeon_encoder->audio = rdev->audio.hdmi_funcs;
 
                dig->afmt->pin = radeon_audio_get_pin(connector->encoder);
-               if (drm_detect_monitor_audio(radeon_connector_edid(connector))) {
-                       radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
-               } else {
-                       radeon_audio_enable(rdev, dig->afmt->pin, 0);
-                       dig->afmt->pin = NULL;
-               }
+               radeon_audio_enable(rdev, dig->afmt->pin, 0xf);
        } else {
                radeon_audio_enable(rdev, dig->afmt->pin, 0);
                dig->afmt->pin = NULL;
index d17d251..cebb65e 100644 (file)
@@ -1379,10 +1379,8 @@ out:
        /* updated in get modes as well since we need to know if it's analog or digital */
        radeon_connector_update_scratch_regs(connector, ret);
 
-       if (radeon_audio != 0) {
-               radeon_connector_get_edid(connector);
+       if (radeon_audio != 0)
                radeon_audio_detect(connector, ret);
-       }
 
 exit:
        pm_runtime_mark_last_busy(connector->dev->dev);
@@ -1719,10 +1717,8 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 
        radeon_connector_update_scratch_regs(connector, ret);
 
-       if (radeon_audio != 0) {
-               radeon_connector_get_edid(connector);
+       if (radeon_audio != 0)
                radeon_audio_detect(connector, ret);
-       }
 
 out:
        pm_runtime_mark_last_busy(connector->dev->dev);
index c54d631..01ee96a 100644 (file)
@@ -921,7 +921,7 @@ static int rv770_pcie_gart_enable(struct radeon_device *rdev)
        WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
        WREG32(MC_VM_MB_L1_TLB3_CNTL, tmp);
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
                                RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
index 5326f75..4c679b8 100644 (file)
@@ -4303,7 +4303,7 @@ static int si_pcie_gart_enable(struct radeon_device *rdev)
               L2_CACHE_BIGK_FRAGMENT_SIZE(4));
        /* setup context0 */
        WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
-       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, (rdev->mc.gtt_end >> 12) - 1);
+       WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
        WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
        WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
                        (u32)(rdev->dummy_page.addr >> 12));
index 1055cb7..3f4c7b8 100644 (file)
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o vgem_dma_buf.o
+vgem-y := vgem_drv.o
 
 obj-$(CONFIG_DRM_VGEM) += vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_dma_buf.c b/drivers/gpu/drm/vgem/vgem_dma_buf.c
deleted file mode 100644 (file)
index 0254438..0000000
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright Â© 2012 Intel Corporation
- * Copyright Â© 2014 The Chromium OS Authors
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- * Authors:
- *    Ben Widawsky <ben@bwidawsk.net>
- *
- */
-
-#include <linux/dma-buf.h>
-#include "vgem_drv.h"
-
-struct sg_table *vgem_gem_prime_get_sg_table(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       BUG_ON(obj->pages == NULL);
-
-       return drm_prime_pages_to_sg(obj->pages, obj->base.size / PAGE_SIZE);
-}
-
-int vgem_gem_prime_pin(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       return vgem_gem_get_pages(obj);
-}
-
-void vgem_gem_prime_unpin(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       vgem_gem_put_pages(obj);
-}
-
-void *vgem_gem_prime_vmap(struct drm_gem_object *gobj)
-{
-       struct drm_vgem_gem_object *obj = to_vgem_bo(gobj);
-       BUG_ON(obj->pages == NULL);
-
-       return vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
-}
-
-void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
-{
-       vunmap(vaddr);
-}
-
-struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev,
-                                            struct dma_buf *dma_buf)
-{
-       struct drm_vgem_gem_object *obj = NULL;
-       int ret;
-
-       obj = kzalloc(sizeof(*obj), GFP_KERNEL);
-       if (obj == NULL) {
-               ret = -ENOMEM;
-               goto fail;
-       }
-
-       ret = drm_gem_object_init(dev, &obj->base, dma_buf->size);
-       if (ret) {
-               ret = -ENOMEM;
-               goto fail_free;
-       }
-
-       get_dma_buf(dma_buf);
-
-       obj->base.dma_buf = dma_buf;
-       obj->use_dma_buf = true;
-
-       return &obj->base;
-
-fail_free:
-       kfree(obj);
-fail:
-       return ERR_PTR(ret);
-}
index cb3b435..7a207ca 100644 (file)
@@ -302,22 +302,13 @@ static const struct file_operations vgem_driver_fops = {
 };
 
 static struct drm_driver vgem_driver = {
-       .driver_features                = DRIVER_GEM | DRIVER_PRIME,
+       .driver_features                = DRIVER_GEM,
        .gem_free_object                = vgem_gem_free_object,
        .gem_vm_ops                     = &vgem_gem_vm_ops,
        .ioctls                         = vgem_ioctls,
        .fops                           = &vgem_driver_fops,
        .dumb_create                    = vgem_gem_dumb_create,
        .dumb_map_offset                = vgem_gem_dumb_map,
-       .prime_handle_to_fd             = drm_gem_prime_handle_to_fd,
-       .prime_fd_to_handle             = drm_gem_prime_fd_to_handle,
-       .gem_prime_export               = drm_gem_prime_export,
-       .gem_prime_import               = vgem_gem_prime_import,
-       .gem_prime_pin                  = vgem_gem_prime_pin,
-       .gem_prime_unpin                = vgem_gem_prime_unpin,
-       .gem_prime_get_sg_table         = vgem_gem_prime_get_sg_table,
-       .gem_prime_vmap                 = vgem_gem_prime_vmap,
-       .gem_prime_vunmap               = vgem_gem_prime_vunmap,
        .name   = DRIVER_NAME,
        .desc   = DRIVER_DESC,
        .date   = DRIVER_DATE,
index 57ab4d8..e9f92f7 100644 (file)
@@ -43,15 +43,4 @@ struct drm_vgem_gem_object {
 extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj);
 extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj);
 
-/* vgem_dma_buf.c */
-extern struct sg_table *vgem_gem_prime_get_sg_table(
-                       struct drm_gem_object *gobj);
-extern int vgem_gem_prime_pin(struct drm_gem_object *gobj);
-extern void vgem_gem_prime_unpin(struct drm_gem_object *gobj);
-extern void *vgem_gem_prime_vmap(struct drm_gem_object *gobj);
-extern void vgem_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-extern struct drm_gem_object *vgem_gem_prime_import(struct drm_device *dev,
-                                                   struct dma_buf *dma_buf);
-
-
 #endif
index f3830db..37f0170 100644 (file)
@@ -439,6 +439,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                 (*t)->dev_attr.attr.name, tg->base + i);
                        if ((*t)->s2) {
                                a2 = &su->u.a2;
+                               sysfs_attr_init(&a2->dev_attr.attr);
                                a2->dev_attr.attr.name = su->name;
                                a2->nr = (*t)->u.s.nr + i;
                                a2->index = (*t)->u.s.index;
@@ -449,6 +450,7 @@ nct6683_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                *attrs = &a2->dev_attr.attr;
                        } else {
                                a = &su->u.a1;
+                               sysfs_attr_init(&a->dev_attr.attr);
                                a->dev_attr.attr.name = su->name;
                                a->index = (*t)->u.index + i;
                                a->dev_attr.attr.mode =
index 4fcb481..bd1c99d 100644 (file)
@@ -995,6 +995,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                 (*t)->dev_attr.attr.name, tg->base + i);
                        if ((*t)->s2) {
                                a2 = &su->u.a2;
+                               sysfs_attr_init(&a2->dev_attr.attr);
                                a2->dev_attr.attr.name = su->name;
                                a2->nr = (*t)->u.s.nr + i;
                                a2->index = (*t)->u.s.index;
@@ -1005,6 +1006,7 @@ nct6775_create_attr_group(struct device *dev, struct sensor_template_group *tg,
                                *attrs = &a2->dev_attr.attr;
                        } else {
                                a = &su->u.a1;
+                               sysfs_attr_init(&a->dev_attr.attr);
                                a->dev_attr.attr.name = su->name;
                                a->index = (*t)->u.index + i;
                                a->dev_attr.attr.mode =
index 112e4d4..6880011 100644 (file)
@@ -239,8 +239,10 @@ static struct ntc_thermistor_platform_data *
 ntc_thermistor_parse_dt(struct platform_device *pdev)
 {
        struct iio_channel *chan;
+       enum iio_chan_type type;
        struct device_node *np = pdev->dev.of_node;
        struct ntc_thermistor_platform_data *pdata;
+       int ret;
 
        if (!np)
                return NULL;
@@ -253,6 +255,13 @@ ntc_thermistor_parse_dt(struct platform_device *pdev)
        if (IS_ERR(chan))
                return ERR_CAST(chan);
 
+       ret = iio_get_channel_type(chan, &type);
+       if (ret < 0)
+               return ERR_PTR(ret);
+
+       if (type != IIO_VOLTAGE)
+               return ERR_PTR(-EINVAL);
+
        if (of_property_read_u32(np, "pullup-uv", &pdata->pullup_uv))
                return ERR_PTR(-ENODEV);
        if (of_property_read_u32(np, "pullup-ohm", &pdata->pullup_ohm))
index 99664eb..ccf4cff 100644 (file)
@@ -44,7 +44,7 @@
 #include <linux/sysfs.h>
 
 /* Addresses to scan */
-static const unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4c, 0x4d,
+static const unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4c, 0x4d,
        0x4e, 0x4f, I2C_CLIENT_END };
 
 enum chips { tmp401, tmp411, tmp431, tmp432, tmp435 };
index 327529e..3f40319 100644 (file)
@@ -547,11 +547,11 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
        return 0;
 
 err_prot_mr:
-       ib_dereg_mr(desc->pi_ctx->prot_mr);
+       ib_dereg_mr(pi_ctx->prot_mr);
 err_prot_frpl:
-       ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+       ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
 err_pi_ctx:
-       kfree(desc->pi_ctx);
+       kfree(pi_ctx);
 
        return ret;
 }
index 7dc93aa..312ffd3 100644 (file)
@@ -173,7 +173,7 @@ static void unmap_switcher(void)
 bool lguest_address_ok(const struct lguest *lg,
                       unsigned long addr, unsigned long len)
 {
-       return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
+       return addr+len <= lg->pfn_limit * PAGE_SIZE && (addr+len >= addr);
 }
 
 /*
index 6395347..eff7bdd 100644 (file)
@@ -429,9 +429,11 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
                /* blk-mq request-based interface */
                *__clone = blk_get_request(bdev_get_queue(bdev),
                                           rq_data_dir(rq), GFP_ATOMIC);
-               if (IS_ERR(*__clone))
+               if (IS_ERR(*__clone)) {
                        /* ENOMEM, requeue */
+                       clear_mapinfo(m, map_context);
                        return r;
+               }
                (*__clone)->bio = (*__clone)->biotail = NULL;
                (*__clone)->rq_disk = bdev->bd_disk;
                (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT;
index d9b00b8..16ba55a 100644 (file)
@@ -820,6 +820,12 @@ void dm_consume_args(struct dm_arg_set *as, unsigned num_args)
 }
 EXPORT_SYMBOL(dm_consume_args);
 
+static bool __table_type_request_based(unsigned table_type)
+{
+       return (table_type == DM_TYPE_REQUEST_BASED ||
+               table_type == DM_TYPE_MQ_REQUEST_BASED);
+}
+
 static int dm_table_set_type(struct dm_table *t)
 {
        unsigned i;
@@ -852,8 +858,7 @@ static int dm_table_set_type(struct dm_table *t)
                 * Determine the type from the live device.
                 * Default to bio-based if device is new.
                 */
-               if (live_md_type == DM_TYPE_REQUEST_BASED ||
-                   live_md_type == DM_TYPE_MQ_REQUEST_BASED)
+               if (__table_type_request_based(live_md_type))
                        request_based = 1;
                else
                        bio_based = 1;
@@ -903,7 +908,7 @@ static int dm_table_set_type(struct dm_table *t)
                        }
                t->type = DM_TYPE_MQ_REQUEST_BASED;
 
-       } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) {
+       } else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
                /* inherit live MD type */
                t->type = live_md_type;
 
@@ -925,10 +930,7 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t)
 
 bool dm_table_request_based(struct dm_table *t)
 {
-       unsigned table_type = dm_table_get_type(t);
-
-       return (table_type == DM_TYPE_REQUEST_BASED ||
-               table_type == DM_TYPE_MQ_REQUEST_BASED);
+       return __table_type_request_based(dm_table_get_type(t));
 }
 
 bool dm_table_mq_request_based(struct dm_table *t)
index a930b72..2caf492 100644 (file)
@@ -1082,13 +1082,11 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
        dm_put(md);
 }
 
-static void free_rq_clone(struct request *clone, bool must_be_mapped)
+static void free_rq_clone(struct request *clone)
 {
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
 
-       WARN_ON_ONCE(must_be_mapped && !clone->q);
-
        blk_rq_unprep_clone(clone);
 
        if (md->type == DM_TYPE_MQ_REQUEST_BASED)
@@ -1132,7 +1130,7 @@ static void dm_end_request(struct request *clone, int error)
                        rq->sense_len = clone->sense_len;
        }
 
-       free_rq_clone(clone, true);
+       free_rq_clone(clone);
        if (!rq->q->mq_ops)
                blk_end_request_all(rq, error);
        else
@@ -1151,7 +1149,7 @@ static void dm_unprep_request(struct request *rq)
        }
 
        if (clone)
-               free_rq_clone(clone, false);
+               free_rq_clone(clone);
 }
 
 /*
@@ -1164,6 +1162,7 @@ static void old_requeue_request(struct request *rq)
 
        spin_lock_irqsave(q->queue_lock, flags);
        blk_requeue_request(q, rq);
+       blk_run_queue_async(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
@@ -1724,8 +1723,7 @@ static int dm_merge_bvec(struct request_queue *q,
        struct mapped_device *md = q->queuedata;
        struct dm_table *map = dm_get_live_table_fast(md);
        struct dm_target *ti;
-       sector_t max_sectors;
-       int max_size = 0;
+       sector_t max_sectors, max_size = 0;
 
        if (unlikely(!map))
                goto out;
@@ -1740,8 +1738,16 @@ static int dm_merge_bvec(struct request_queue *q,
        max_sectors = min(max_io_len(bvm->bi_sector, ti),
                          (sector_t) queue_max_sectors(q));
        max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
-       if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
-               max_size = 0;
+
+       /*
+        * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
+        * to the targets' merge function since it holds sectors not bytes).
+        * Just doing this as an interim fix for stable@ because the more
+        * comprehensive cleanup of switching to sector_t will impact every
+        * DM target that implements a ->merge hook.
+        */
+       if (max_size > INT_MAX)
+               max_size = INT_MAX;
 
        /*
         * merge_bvec_fn() returns number of bytes
@@ -1749,7 +1755,7 @@ static int dm_merge_bvec(struct request_queue *q,
         * max is precomputed maximal io size
         */
        if (max_size && ti->type->merge)
-               max_size = ti->type->merge(ti, bvm, biovec, max_size);
+               max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
        /*
         * If the target doesn't support merge method and some of the devices
         * provided their merge_bvec method (we know this by looking for the
@@ -1971,8 +1977,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
                        dm_kill_unmapped_request(rq, r);
                        return r;
                }
-               if (IS_ERR(clone))
-                       return DM_MAPIO_REQUEUE;
+               if (r != DM_MAPIO_REMAPPED)
+                       return r;
                if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
                        /* -ENOMEM */
                        ti->type->release_clone_rq(clone);
@@ -2753,13 +2759,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
                /* clone request is allocated at the end of the pdu */
                tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
-               if (!clone_rq(rq, md, tio, GFP_ATOMIC))
-                       return BLK_MQ_RQ_QUEUE_BUSY;
+               (void) clone_rq(rq, md, tio, GFP_ATOMIC);
                queue_kthread_work(&md->kworker, &tio->work);
        } else {
                /* Direct call is fine since .queue_rq allows allocations */
-               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-                       dm_requeue_unmapped_original_request(md, rq);
+               if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+                       /* Undo dm_start_request() before requeuing */
+                       rq_completed(md, rq_data_dir(rq), false);
+                       return BLK_MQ_RQ_QUEUE_BUSY;
+               }
        }
 
        return BLK_MQ_RQ_QUEUE_OK;
index 593a024..2750630 100644 (file)
@@ -4211,12 +4211,12 @@ action_store(struct mddev *mddev, const char *page, size_t len)
        if (!mddev->pers || !mddev->pers->sync_request)
                return -EINVAL;
 
-       if (cmd_match(page, "frozen"))
-               set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-       else
-               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
        if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
+               if (cmd_match(page, "frozen"))
+                       set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+               else
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                flush_workqueue(md_misc_wq);
                if (mddev->sync_thread) {
                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -4229,16 +4229,17 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                   test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
                return -EBUSY;
        else if (cmd_match(page, "resync"))
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
        else if (cmd_match(page, "recover")) {
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
-               set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        } else if (cmd_match(page, "reshape")) {
                int err;
                if (mddev->pers->start_reshape == NULL)
                        return -EINVAL;
                err = mddev_lock(mddev);
                if (!err) {
+                       clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                        err = mddev->pers->start_reshape(mddev);
                        mddev_unlock(mddev);
                }
@@ -4250,6 +4251,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
                        set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
                else if (!cmd_match(page, "repair"))
                        return -EINVAL;
+               clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
                set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
                set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
        }
index b9f2b9c..553d54b 100644 (file)
@@ -749,6 +749,7 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2)
 static bool stripe_can_batch(struct stripe_head *sh)
 {
        return test_bit(STRIPE_BATCH_READY, &sh->state) &&
+               !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
                is_full_stripe_write(sh);
 }
 
@@ -837,6 +838,15 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
                    < IO_THRESHOLD)
                        md_wakeup_thread(conf->mddev->thread);
 
+       if (test_and_clear_bit(STRIPE_BIT_DELAY, &sh->state)) {
+               int seq = sh->bm_seq;
+               if (test_bit(STRIPE_BIT_DELAY, &sh->batch_head->state) &&
+                   sh->batch_head->bm_seq > seq)
+                       seq = sh->batch_head->bm_seq;
+               set_bit(STRIPE_BIT_DELAY, &sh->batch_head->state);
+               sh->batch_head->bm_seq = seq;
+       }
+
        atomic_inc(&sh->count);
 unlock_out:
        unlock_two_stripes(head, sh);
@@ -2987,14 +2997,32 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx,
        pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
                (unsigned long long)(*bip)->bi_iter.bi_sector,
                (unsigned long long)sh->sector, dd_idx);
-       spin_unlock_irq(&sh->stripe_lock);
 
        if (conf->mddev->bitmap && firstwrite) {
+               /* Cannot hold spinlock over bitmap_startwrite,
+                * but must ensure this isn't added to a batch until
+                * we have added to the bitmap and set bm_seq.
+                * So set STRIPE_BITMAP_PENDING to prevent
+                * batching.
+                * If multiple add_stripe_bio() calls race here they
+                * much all set STRIPE_BITMAP_PENDING.  So only the first one
+                * to complete "bitmap_startwrite" gets to set
+                * STRIPE_BIT_DELAY.  This is important as once a stripe
+                * is added to a batch, STRIPE_BIT_DELAY cannot be changed
+                * any more.
+                */
+               set_bit(STRIPE_BITMAP_PENDING, &sh->state);
+               spin_unlock_irq(&sh->stripe_lock);
                bitmap_startwrite(conf->mddev->bitmap, sh->sector,
                                  STRIPE_SECTORS, 0);
-               sh->bm_seq = conf->seq_flush+1;
-               set_bit(STRIPE_BIT_DELAY, &sh->state);
+               spin_lock_irq(&sh->stripe_lock);
+               clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
+               if (!sh->batch_head) {
+                       sh->bm_seq = conf->seq_flush+1;
+                       set_bit(STRIPE_BIT_DELAY, &sh->state);
+               }
        }
+       spin_unlock_irq(&sh->stripe_lock);
 
        if (stripe_can_batch(sh))
                stripe_add_to_batch_list(conf, sh);
@@ -3392,6 +3420,8 @@ static void handle_stripe_fill(struct stripe_head *sh,
        set_bit(STRIPE_HANDLE, &sh->state);
 }
 
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+                                   unsigned long handle_flags);
 /* handle_stripe_clean_event
  * any written block on an uptodate or failed drive can be returned.
  * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but
@@ -3405,7 +3435,6 @@ static void handle_stripe_clean_event(struct r5conf *conf,
        int discard_pending = 0;
        struct stripe_head *head_sh = sh;
        bool do_endio = false;
-       int wakeup_nr = 0;
 
        for (i = disks; i--; )
                if (sh->dev[i].written) {
@@ -3494,44 +3523,8 @@ unhash:
                if (atomic_dec_and_test(&conf->pending_full_writes))
                        md_wakeup_thread(conf->mddev->thread);
 
-       if (!head_sh->batch_head || !do_endio)
-               return;
-       for (i = 0; i < head_sh->disks; i++) {
-               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
-                       wakeup_nr++;
-       }
-       while (!list_empty(&head_sh->batch_list)) {
-               int i;
-               sh = list_first_entry(&head_sh->batch_list,
-                                     struct stripe_head, batch_list);
-               list_del_init(&sh->batch_list);
-
-               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
-                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
-                                                (1 << STRIPE_PREREAD_ACTIVE) |
-                                                STRIPE_EXPAND_SYNC_FLAG));
-               sh->check_state = head_sh->check_state;
-               sh->reconstruct_state = head_sh->reconstruct_state;
-               for (i = 0; i < sh->disks; i++) {
-                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
-                               wakeup_nr++;
-                       sh->dev[i].flags = head_sh->dev[i].flags;
-               }
-
-               spin_lock_irq(&sh->stripe_lock);
-               sh->batch_head = NULL;
-               spin_unlock_irq(&sh->stripe_lock);
-               if (sh->state & STRIPE_EXPAND_SYNC_FLAG)
-                       set_bit(STRIPE_HANDLE, &sh->state);
-               release_stripe(sh);
-       }
-
-       spin_lock_irq(&head_sh->stripe_lock);
-       head_sh->batch_head = NULL;
-       spin_unlock_irq(&head_sh->stripe_lock);
-       wake_up_nr(&conf->wait_for_overlap, wakeup_nr);
-       if (head_sh->state & STRIPE_EXPAND_SYNC_FLAG)
-               set_bit(STRIPE_HANDLE, &head_sh->state);
+       if (head_sh->batch_head && do_endio)
+               break_stripe_batch_list(head_sh, STRIPE_EXPAND_SYNC_FLAGS);
 }
 
 static void handle_stripe_dirtying(struct r5conf *conf,
@@ -4172,9 +4165,13 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
 
 static int clear_batch_ready(struct stripe_head *sh)
 {
+       /* Return '1' if this is a member of batch, or
+        * '0' if it is a lone stripe or a head which can now be
+        * handled.
+        */
        struct stripe_head *tmp;
        if (!test_and_clear_bit(STRIPE_BATCH_READY, &sh->state))
-               return 0;
+               return (sh->batch_head && sh->batch_head != sh);
        spin_lock(&sh->stripe_lock);
        if (!sh->batch_head) {
                spin_unlock(&sh->stripe_lock);
@@ -4202,38 +4199,65 @@ static int clear_batch_ready(struct stripe_head *sh)
        return 0;
 }
 
-static void check_break_stripe_batch_list(struct stripe_head *sh)
+static void break_stripe_batch_list(struct stripe_head *head_sh,
+                                   unsigned long handle_flags)
 {
-       struct stripe_head *head_sh, *next;
+       struct stripe_head *sh, *next;
        int i;
-
-       if (!test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
-               return;
-
-       head_sh = sh;
+       int do_wakeup = 0;
 
        list_for_each_entry_safe(sh, next, &head_sh->batch_list, batch_list) {
 
                list_del_init(&sh->batch_list);
 
-               set_mask_bits(&sh->state, ~STRIPE_EXPAND_SYNC_FLAG,
-                             head_sh->state & ~((1 << STRIPE_ACTIVE) |
-                                                (1 << STRIPE_PREREAD_ACTIVE) |
-                                                (1 << STRIPE_DEGRADED) |
-                                                STRIPE_EXPAND_SYNC_FLAG));
+               WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
+                                         (1 << STRIPE_SYNCING) |
+                                         (1 << STRIPE_REPLACED) |
+                                         (1 << STRIPE_PREREAD_ACTIVE) |
+                                         (1 << STRIPE_DELAYED) |
+                                         (1 << STRIPE_BIT_DELAY) |
+                                         (1 << STRIPE_FULL_WRITE) |
+                                         (1 << STRIPE_BIOFILL_RUN) |
+                                         (1 << STRIPE_COMPUTE_RUN)  |
+                                         (1 << STRIPE_OPS_REQ_PENDING) |
+                                         (1 << STRIPE_DISCARD) |
+                                         (1 << STRIPE_BATCH_READY) |
+                                         (1 << STRIPE_BATCH_ERR) |
+                                         (1 << STRIPE_BITMAP_PENDING)));
+               WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
+                                             (1 << STRIPE_REPLACED)));
+
+               set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
+                                           (1 << STRIPE_DEGRADED)),
+                             head_sh->state & (1 << STRIPE_INSYNC));
+
                sh->check_state = head_sh->check_state;
                sh->reconstruct_state = head_sh->reconstruct_state;
-               for (i = 0; i < sh->disks; i++)
+               for (i = 0; i < sh->disks; i++) {
+                       if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+                               do_wakeup = 1;
                        sh->dev[i].flags = head_sh->dev[i].flags &
                                (~((1 << R5_WriteError) | (1 << R5_Overlap)));
-
+               }
                spin_lock_irq(&sh->stripe_lock);
                sh->batch_head = NULL;
                spin_unlock_irq(&sh->stripe_lock);
-
-               set_bit(STRIPE_HANDLE, &sh->state);
+               if (handle_flags == 0 ||
+                   sh->state & handle_flags)
+                       set_bit(STRIPE_HANDLE, &sh->state);
                release_stripe(sh);
        }
+       spin_lock_irq(&head_sh->stripe_lock);
+       head_sh->batch_head = NULL;
+       spin_unlock_irq(&head_sh->stripe_lock);
+       for (i = 0; i < head_sh->disks; i++)
+               if (test_and_clear_bit(R5_Overlap, &head_sh->dev[i].flags))
+                       do_wakeup = 1;
+       if (head_sh->state & handle_flags)
+               set_bit(STRIPE_HANDLE, &head_sh->state);
+
+       if (do_wakeup)
+               wake_up(&head_sh->raid_conf->wait_for_overlap);
 }
 
 static void handle_stripe(struct stripe_head *sh)
@@ -4258,7 +4282,8 @@ static void handle_stripe(struct stripe_head *sh)
                return;
        }
 
-       check_break_stripe_batch_list(sh);
+       if (test_and_clear_bit(STRIPE_BATCH_ERR, &sh->state))
+               break_stripe_batch_list(sh, 0);
 
        if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) {
                spin_lock(&sh->stripe_lock);
@@ -4312,6 +4337,7 @@ static void handle_stripe(struct stripe_head *sh)
        if (s.failed > conf->max_degraded) {
                sh->check_state = 0;
                sh->reconstruct_state = 0;
+               break_stripe_batch_list(sh, 0);
                if (s.to_read+s.to_write+s.written)
                        handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
                if (s.syncing + s.replacing)
index 7dc0dd8..896d603 100644 (file)
@@ -337,9 +337,12 @@ enum {
        STRIPE_ON_RELEASE_LIST,
        STRIPE_BATCH_READY,
        STRIPE_BATCH_ERR,
+       STRIPE_BITMAP_PENDING,  /* Being added to bitmap, don't add
+                                * to batch yet.
+                                */
 };
 
-#define STRIPE_EXPAND_SYNC_FLAG \
+#define STRIPE_EXPAND_SYNC_FLAGS \
        ((1 << STRIPE_EXPAND_SOURCE) |\
        (1 << STRIPE_EXPAND_READY) |\
        (1 << STRIPE_EXPANDING) |\
index ae498b5..46e3840 100644 (file)
@@ -431,6 +431,10 @@ int da9052_adc_read_temp(struct da9052 *da9052)
 EXPORT_SYMBOL_GPL(da9052_adc_read_temp);
 
 static const struct mfd_cell da9052_subdev_info[] = {
+       {
+               .name = "da9052-regulator",
+               .id = 0,
+       },
        {
                .name = "da9052-regulator",
                .id = 1,
@@ -483,10 +487,6 @@ static const struct mfd_cell da9052_subdev_info[] = {
                .name = "da9052-regulator",
                .id = 13,
        },
-       {
-               .name = "da9052-regulator",
-               .id = 14,
-       },
        {
                .name = "da9052-onkey",
        },
index a6dcbf8..6f9ffb9 100644 (file)
@@ -2358,11 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter)
                                    adapter->cfg_num_qs);
 
        for_all_evt_queues(adapter, eqo, i) {
+               int numa_node = dev_to_node(&adapter->pdev->dev);
                if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
                        return -ENOMEM;
-               cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev),
-                                           eqo->affinity_mask);
-
+               cpumask_set_cpu(cpumask_local_spread(i, numa_node),
+                               eqo->affinity_mask);
                netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
                               BE_NAPI_WEIGHT);
                napi_hash_add(&eqo->napi);
index de79193..b9df0cb 100644 (file)
@@ -2084,12 +2084,8 @@ static void emac_ethtool_get_pauseparam(struct net_device *ndev,
 
 static int emac_get_regs_len(struct emac_instance *dev)
 {
-       if (emac_has_feature(dev, EMAC_FTR_EMAC4))
-               return sizeof(struct emac_ethtool_regs_subhdr) +
-                       EMAC4_ETHTOOL_REGS_SIZE(dev);
-       else
                return sizeof(struct emac_ethtool_regs_subhdr) +
-                       EMAC_ETHTOOL_REGS_SIZE(dev);
+                       sizeof(struct emac_regs);
 }
 
 static int emac_ethtool_get_regs_len(struct net_device *ndev)
@@ -2114,15 +2110,15 @@ static void *emac_dump_regs(struct emac_instance *dev, void *buf)
        struct emac_ethtool_regs_subhdr *hdr = buf;
 
        hdr->index = dev->cell_index;
-       if (emac_has_feature(dev, EMAC_FTR_EMAC4)) {
+       if (emac_has_feature(dev, EMAC_FTR_EMAC4SYNC)) {
+               hdr->version = EMAC4SYNC_ETHTOOL_REGS_VER;
+       } else if (emac_has_feature(dev, EMAC_FTR_EMAC4)) {
                hdr->version = EMAC4_ETHTOOL_REGS_VER;
-               memcpy_fromio(hdr + 1, dev->emacp, EMAC4_ETHTOOL_REGS_SIZE(dev));
-               return (void *)(hdr + 1) + EMAC4_ETHTOOL_REGS_SIZE(dev);
        } else {
                hdr->version = EMAC_ETHTOOL_REGS_VER;
-               memcpy_fromio(hdr + 1, dev->emacp, EMAC_ETHTOOL_REGS_SIZE(dev));
-               return (void *)(hdr + 1) + EMAC_ETHTOOL_REGS_SIZE(dev);
        }
+       memcpy_fromio(hdr + 1, dev->emacp, sizeof(struct emac_regs));
+       return (void *)(hdr + 1) + sizeof(struct emac_regs);
 }
 
 static void emac_ethtool_get_regs(struct net_device *ndev,
index 67f342a..28df374 100644 (file)
@@ -461,10 +461,7 @@ struct emac_ethtool_regs_subhdr {
 };
 
 #define EMAC_ETHTOOL_REGS_VER          0
-#define EMAC_ETHTOOL_REGS_SIZE(dev)    ((dev)->rsrc_regs.end - \
-                                        (dev)->rsrc_regs.start + 1)
-#define EMAC4_ETHTOOL_REGS_VER         1
-#define EMAC4_ETHTOOL_REGS_SIZE(dev)   ((dev)->rsrc_regs.end - \
-                                        (dev)->rsrc_regs.start + 1)
+#define EMAC4_ETHTOOL_REGS_VER         1
+#define EMAC4SYNC_ETHTOOL_REGS_VER     2
 
 #endif /* __IBM_NEWEMAC_CORE_H */
index 4f7dc04..529ef05 100644 (file)
@@ -714,8 +714,13 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                                         msecs_to_jiffies(timeout))) {
                mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
                          op);
-               err = -EIO;
-               goto out_reset;
+               if (op == MLX4_CMD_NOP) {
+                       err = -EBUSY;
+                       goto out;
+               } else {
+                       err = -EIO;
+                       goto out_reset;
+               }
        }
 
        err = context->result;
index 32f5ec7..cf467a9 100644 (file)
@@ -1501,17 +1501,13 @@ static int mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 {
        struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
        int numa_node = priv->mdev->dev->numa_node;
-       int ret = 0;
 
        if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL))
                return -ENOMEM;
 
-       ret = cpumask_set_cpu_local_first(ring_idx, numa_node,
-                                         ring->affinity_mask);
-       if (ret)
-               free_cpumask_var(ring->affinity_mask);
-
-       return ret;
+       cpumask_set_cpu(cpumask_local_spread(ring_idx, numa_node),
+                       ring->affinity_mask);
+       return 0;
 }
 
 static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
index f7bf312..7bed3a8 100644 (file)
@@ -144,9 +144,9 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
        ring->queue_index = queue_index;
 
        if (queue_index < priv->num_tx_rings_p_up)
-               cpumask_set_cpu_local_first(queue_index,
-                                           priv->mdev->dev->numa_node,
-                                           &ring->affinity_mask);
+               cpumask_set_cpu(cpumask_local_spread(queue_index,
+                                                    priv->mdev->dev->numa_node),
+                               &ring->affinity_mask);
 
        *pring = ring;
        return 0;
index e0c31e3..6409a06 100644 (file)
@@ -3025,9 +3025,9 @@ netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj,
        u8 dw, rows, cols, banks, ranks;
        u32 val;
 
-       if (size != sizeof(struct netxen_dimm_cfg)) {
+       if (size < attr->size) {
                netdev_err(netdev, "Invalid size\n");
-               return -1;
+               return -EINVAL;
        }
 
        memset(&dimm, 0, sizeof(struct netxen_dimm_cfg));
@@ -3137,7 +3137,7 @@ out:
 
 static struct bin_attribute bin_attr_dimm = {
        .attr = { .name = "dimm", .mode = (S_IRUGO | S_IWUSR) },
-       .size = 0,
+       .size = sizeof(struct netxen_dimm_cfg),
        .read = netxen_sysfs_read_dimm,
 };
 
index 2ac9552..73bab98 100644 (file)
@@ -117,6 +117,12 @@ struct stmmac_priv {
        int use_riwt;
        int irq_wake;
        spinlock_t ptp_lock;
+
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *dbgfs_dir;
+       struct dentry *dbgfs_rings_status;
+       struct dentry *dbgfs_dma_cap;
+#endif
 };
 
 int stmmac_mdio_unregister(struct net_device *ndev);
index 05c146f..2c5ce2b 100644 (file)
@@ -118,7 +118,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
 
 #ifdef CONFIG_DEBUG_FS
 static int stmmac_init_fs(struct net_device *dev);
-static void stmmac_exit_fs(void);
+static void stmmac_exit_fs(struct net_device *dev);
 #endif
 
 #define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x))
@@ -1916,7 +1916,7 @@ static int stmmac_release(struct net_device *dev)
        netif_carrier_off(dev);
 
 #ifdef CONFIG_DEBUG_FS
-       stmmac_exit_fs();
+       stmmac_exit_fs(dev);
 #endif
 
        stmmac_release_ptp(priv);
@@ -2508,8 +2508,6 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 #ifdef CONFIG_DEBUG_FS
 static struct dentry *stmmac_fs_dir;
-static struct dentry *stmmac_rings_status;
-static struct dentry *stmmac_dma_cap;
 
 static void sysfs_display_ring(void *head, int size, int extend_desc,
                               struct seq_file *seq)
@@ -2648,36 +2646,39 @@ static const struct file_operations stmmac_dma_cap_fops = {
 
 static int stmmac_init_fs(struct net_device *dev)
 {
-       /* Create debugfs entries */
-       stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       /* Create per netdev entries */
+       priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
 
-       if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) {
-               pr_err("ERROR %s, debugfs create directory failed\n",
-                      STMMAC_RESOURCE_NAME);
+       if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) {
+               pr_err("ERROR %s/%s, debugfs create directory failed\n",
+                      STMMAC_RESOURCE_NAME, dev->name);
 
                return -ENOMEM;
        }
 
        /* Entry to report DMA RX/TX rings */
-       stmmac_rings_status = debugfs_create_file("descriptors_status",
-                                                 S_IRUGO, stmmac_fs_dir, dev,
-                                                 &stmmac_rings_status_fops);
+       priv->dbgfs_rings_status =
+               debugfs_create_file("descriptors_status", S_IRUGO,
+                                   priv->dbgfs_dir, dev,
+                                   &stmmac_rings_status_fops);
 
-       if (!stmmac_rings_status || IS_ERR(stmmac_rings_status)) {
+       if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) {
                pr_info("ERROR creating stmmac ring debugfs file\n");
-               debugfs_remove(stmmac_fs_dir);
+               debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
        }
 
        /* Entry to report the DMA HW features */
-       stmmac_dma_cap = debugfs_create_file("dma_cap", S_IRUGO, stmmac_fs_dir,
-                                            dev, &stmmac_dma_cap_fops);
+       priv->dbgfs_dma_cap = debugfs_create_file("dma_cap", S_IRUGO,
+                                           priv->dbgfs_dir,
+                                           dev, &stmmac_dma_cap_fops);
 
-       if (!stmmac_dma_cap || IS_ERR(stmmac_dma_cap)) {
+       if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
                pr_info("ERROR creating stmmac MMC debugfs file\n");
-               debugfs_remove(stmmac_rings_status);
-               debugfs_remove(stmmac_fs_dir);
+               debugfs_remove_recursive(priv->dbgfs_dir);
 
                return -ENOMEM;
        }
@@ -2685,11 +2686,11 @@ static int stmmac_init_fs(struct net_device *dev)
        return 0;
 }
 
-static void stmmac_exit_fs(void)
+static void stmmac_exit_fs(struct net_device *dev)
 {
-       debugfs_remove(stmmac_rings_status);
-       debugfs_remove(stmmac_dma_cap);
-       debugfs_remove(stmmac_fs_dir);
+       struct stmmac_priv *priv = netdev_priv(dev);
+
+       debugfs_remove_recursive(priv->dbgfs_dir);
 }
 #endif /* CONFIG_DEBUG_FS */
 
@@ -3149,6 +3150,35 @@ err:
 __setup("stmmaceth=", stmmac_cmdline_opt);
 #endif /* MODULE */
 
+static int __init stmmac_init(void)
+{
+#ifdef CONFIG_DEBUG_FS
+       /* Create debugfs main directory if it doesn't exist yet */
+       if (!stmmac_fs_dir) {
+               stmmac_fs_dir = debugfs_create_dir(STMMAC_RESOURCE_NAME, NULL);
+
+               if (!stmmac_fs_dir || IS_ERR(stmmac_fs_dir)) {
+                       pr_err("ERROR %s, debugfs create directory failed\n",
+                              STMMAC_RESOURCE_NAME);
+
+                       return -ENOMEM;
+               }
+       }
+#endif
+
+       return 0;
+}
+
+static void __exit stmmac_exit(void)
+{
+#ifdef CONFIG_DEBUG_FS
+       debugfs_remove_recursive(stmmac_fs_dir);
+#endif
+}
+
+module_init(stmmac_init)
+module_exit(stmmac_exit)
+
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet device driver");
 MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
 MODULE_LICENSE("GPL");
index fb276f6..34a75cb 100644 (file)
@@ -755,6 +755,45 @@ static int amd_xgbe_phy_set_mode(struct phy_device *phydev,
        return ret;
 }
 
+static bool amd_xgbe_phy_use_xgmii_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_10000baseKR_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_10000)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool amd_xgbe_phy_use_gmii_2500_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_2500baseX_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_2500)
+                       return true;
+       }
+
+       return false;
+}
+
+static bool amd_xgbe_phy_use_gmii_mode(struct phy_device *phydev)
+{
+       if (phydev->autoneg == AUTONEG_ENABLE) {
+               if (phydev->advertising & ADVERTISED_1000baseKX_Full)
+                       return true;
+       } else {
+               if (phydev->speed == SPEED_1000)
+                       return true;
+       }
+
+       return false;
+}
+
 static int amd_xgbe_phy_set_an(struct phy_device *phydev, bool enable,
                               bool restart)
 {
@@ -1235,11 +1274,11 @@ static int amd_xgbe_phy_config_init(struct phy_device *phydev)
        /* Set initial mode - call the mode setting routines
         * directly to insure we are properly configured
         */
-       if (phydev->advertising & SUPPORTED_10000baseKR_Full)
+       if (amd_xgbe_phy_use_xgmii_mode(phydev))
                ret = amd_xgbe_phy_xgmii_mode(phydev);
-       else if (phydev->advertising & SUPPORTED_1000baseKX_Full)
+       else if (amd_xgbe_phy_use_gmii_mode(phydev))
                ret = amd_xgbe_phy_gmii_mode(phydev);
-       else if (phydev->advertising & SUPPORTED_2500baseX_Full)
+       else if (amd_xgbe_phy_use_gmii_2500_mode(phydev))
                ret = amd_xgbe_phy_gmii_2500_mode(phydev);
        else
                ret = -EINVAL;
index 64c74c6..b5dc59d 100644 (file)
@@ -404,7 +404,7 @@ static struct phy_driver bcm7xxx_driver[] = {
        .name           = "Broadcom BCM7425",
        .features       = PHY_GBIT_FEATURES |
                          SUPPORTED_Pause | SUPPORTED_Asym_Pause,
-       .flags          = 0,
+       .flags          = PHY_IS_INTERNAL,
        .config_init    = bcm7xxx_config_init,
        .config_aneg    = genphy_config_aneg,
        .read_status    = genphy_read_status,
index 496e02f..00cb41e 100644 (file)
@@ -47,7 +47,7 @@
 #define PSF_TX         0x1000
 #define EXT_EVENT      1
 #define CAL_EVENT      7
-#define CAL_TRIGGER    7
+#define CAL_TRIGGER    1
 #define DP83640_N_PINS 12
 
 #define MII_DP83640_MICR 0x11
@@ -496,7 +496,9 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
                        else
                                evnt |= EVNT_RISE;
                }
+               mutex_lock(&clock->extreg_lock);
                ext_write(0, phydev, PAGE5, PTP_EVNT, evnt);
+               mutex_unlock(&clock->extreg_lock);
                return 0;
 
        case PTP_CLK_REQ_PEROUT:
@@ -532,6 +534,8 @@ static u8 status_frame_src[6] = { 0x08, 0x00, 0x17, 0x0B, 0x6B, 0x0F };
 
 static void enable_status_frames(struct phy_device *phydev, bool on)
 {
+       struct dp83640_private *dp83640 = phydev->priv;
+       struct dp83640_clock *clock = dp83640->clock;
        u16 cfg0 = 0, ver;
 
        if (on)
@@ -539,9 +543,13 @@ static void enable_status_frames(struct phy_device *phydev, bool on)
 
        ver = (PSF_PTPVER & VERSIONPTP_MASK) << VERSIONPTP_SHIFT;
 
+       mutex_lock(&clock->extreg_lock);
+
        ext_write(0, phydev, PAGE5, PSF_CFG0, cfg0);
        ext_write(0, phydev, PAGE6, PSF_CFG1, ver);
 
+       mutex_unlock(&clock->extreg_lock);
+
        if (!phydev->attached_dev) {
                pr_warn("expected to find an attached netdevice\n");
                return;
@@ -838,7 +846,7 @@ static void decode_rxts(struct dp83640_private *dp83640,
        list_del_init(&rxts->list);
        phy2rxts(phy_rxts, rxts);
 
-       spin_lock_irqsave(&dp83640->rx_queue.lock, flags);
+       spin_lock(&dp83640->rx_queue.lock);
        skb_queue_walk(&dp83640->rx_queue, skb) {
                struct dp83640_skb_info *skb_info;
 
@@ -853,7 +861,7 @@ static void decode_rxts(struct dp83640_private *dp83640,
                        break;
                }
        }
-       spin_unlock_irqrestore(&dp83640->rx_queue.lock, flags);
+       spin_unlock(&dp83640->rx_queue.lock);
 
        if (!shhwtstamps)
                list_add_tail(&rxts->list, &dp83640->rxts);
@@ -1173,11 +1181,18 @@ static int dp83640_config_init(struct phy_device *phydev)
 
        if (clock->chosen && !list_empty(&clock->phylist))
                recalibrate(clock);
-       else
+       else {
+               mutex_lock(&clock->extreg_lock);
                enable_broadcast(phydev, clock->page, 1);
+               mutex_unlock(&clock->extreg_lock);
+       }
 
        enable_status_frames(phydev, true);
+
+       mutex_lock(&clock->extreg_lock);
        ext_write(0, phydev, PAGE4, PTP_CTL, PTP_ENABLE);
+       mutex_unlock(&clock->extreg_lock);
+
        return 0;
 }
 
index ab019b4..f89f446 100644 (file)
@@ -21,6 +21,7 @@ config IWLWIFI
                Intel 7260 Wi-Fi Adapter
                Intel 3160 Wi-Fi Adapter
                Intel 7265 Wi-Fi Adapter
+               Intel 3165 Wi-Fi Adapter
 
 
          This driver uses the kernel's mac80211 subsystem.
index 36e786f..74ad278 100644 (file)
 
 /* Highest firmware API version supported */
 #define IWL7260_UCODE_API_MAX  13
-#define IWL3160_UCODE_API_MAX  13
 
 /* Oldest version we won't warn about */
 #define IWL7260_UCODE_API_OK   12
-#define IWL3160_UCODE_API_OK   12
+#define IWL3165_UCODE_API_OK   13
 
 /* Lowest firmware API version supported */
 #define IWL7260_UCODE_API_MIN  10
-#define IWL3160_UCODE_API_MIN  10
+#define IWL3165_UCODE_API_MIN  13
 
 /* NVM versions */
 #define IWL7260_NVM_VERSION            0x0a1d
 #define IWL3160_FW_PRE "iwlwifi-3160-"
 #define IWL3160_MODULE_FIRMWARE(api) IWL3160_FW_PRE __stringify(api) ".ucode"
 
-#define IWL3165_FW_PRE "iwlwifi-3165-"
-#define IWL3165_MODULE_FIRMWARE(api) IWL3165_FW_PRE __stringify(api) ".ucode"
-
 #define IWL7265_FW_PRE "iwlwifi-7265-"
 #define IWL7265_MODULE_FIRMWARE(api) IWL7265_FW_PRE __stringify(api) ".ucode"
 
@@ -248,8 +244,13 @@ static const struct iwl_ht_params iwl7265_ht_params = {
 
 const struct iwl_cfg iwl3165_2ac_cfg = {
        .name = "Intel(R) Dual Band Wireless AC 3165",
-       .fw_name_pre = IWL3165_FW_PRE,
+       .fw_name_pre = IWL7265D_FW_PRE,
        IWL_DEVICE_7000,
+       /* sparse doens't like the re-assignment but it is safe */
+#ifndef __CHECKER__
+       .ucode_api_ok = IWL3165_UCODE_API_OK,
+       .ucode_api_min = IWL3165_UCODE_API_MIN,
+#endif
        .ht_params = &iwl7000_ht_params,
        .nvm_ver = IWL3165_NVM_VERSION,
        .nvm_calib_ver = IWL3165_TX_POWER_VERSION,
@@ -325,6 +326,5 @@ const struct iwl_cfg iwl7265d_n_cfg = {
 
 MODULE_FIRMWARE(IWL7260_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL3160_MODULE_FIRMWARE(IWL3160_UCODE_API_OK));
-MODULE_FIRMWARE(IWL3165_MODULE_FIRMWARE(IWL3160_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
 MODULE_FIRMWARE(IWL7265D_MODULE_FIRMWARE(IWL7260_UCODE_API_OK));
index 41ff85d..21302b6 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -748,6 +750,9 @@ void iwl_init_ht_hw_capab(const struct iwl_cfg *cfg,
                return;
        }
 
+       if (data->sku_cap_mimo_disabled)
+               rx_chains = 1;
+
        ht_info->ht_supported = true;
        ht_info->cap = IEEE80211_HT_CAP_DSSSCCK40;
 
index 5234a0b..750c8c9 100644 (file)
@@ -6,6 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -84,6 +86,7 @@ struct iwl_nvm_data {
        bool sku_cap_11ac_enable;
        bool sku_cap_amt_enable;
        bool sku_cap_ipan_enable;
+       bool sku_cap_mimo_disabled;
 
        u16 radio_cfg_type;
        u8 radio_cfg_step;
index 83903a5..75e96db 100644 (file)
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -32,7 +32,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -116,10 +116,11 @@ enum family_8000_nvm_offsets {
 
 /* SKU Capabilities (actual values from NVM definition) */
 enum nvm_sku_bits {
-       NVM_SKU_CAP_BAND_24GHZ  = BIT(0),
-       NVM_SKU_CAP_BAND_52GHZ  = BIT(1),
-       NVM_SKU_CAP_11N_ENABLE  = BIT(2),
-       NVM_SKU_CAP_11AC_ENABLE = BIT(3),
+       NVM_SKU_CAP_BAND_24GHZ          = BIT(0),
+       NVM_SKU_CAP_BAND_52GHZ          = BIT(1),
+       NVM_SKU_CAP_11N_ENABLE          = BIT(2),
+       NVM_SKU_CAP_11AC_ENABLE         = BIT(3),
+       NVM_SKU_CAP_MIMO_DISABLE        = BIT(5),
 };
 
 /*
@@ -368,6 +369,11 @@ static void iwl_init_vht_hw_capab(const struct iwl_cfg *cfg,
        if (cfg->ht_params->ldpc)
                vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC;
 
+       if (data->sku_cap_mimo_disabled) {
+               num_rx_ants = 1;
+               num_tx_ants = 1;
+       }
+
        if (num_tx_ants > 1)
                vht_cap->cap |= IEEE80211_VHT_CAP_TXSTBC;
        else
@@ -527,6 +533,10 @@ static void iwl_set_hw_address_family_8000(struct device *dev,
        const u8 *hw_addr;
 
        if (mac_override) {
+               static const u8 reserved_mac[] = {
+                       0x02, 0xcc, 0xaa, 0xff, 0xee, 0x00
+               };
+
                hw_addr = (const u8 *)(mac_override +
                                 MAC_ADDRESS_OVERRIDE_FAMILY_8000);
 
@@ -538,7 +548,12 @@ static void iwl_set_hw_address_family_8000(struct device *dev,
                data->hw_addr[4] = hw_addr[5];
                data->hw_addr[5] = hw_addr[4];
 
-               if (is_valid_ether_addr(data->hw_addr))
+               /*
+                * Force the use of the OTP MAC address in case of reserved MAC
+                * address in the NVM, or if address is given but invalid.
+                */
+               if (is_valid_ether_addr(data->hw_addr) &&
+                   memcmp(reserved_mac, hw_addr, ETH_ALEN) != 0)
                        return;
 
                IWL_ERR_DEV(dev,
@@ -610,6 +625,7 @@ iwl_parse_nvm_data(struct device *dev, const struct iwl_cfg *cfg,
                data->sku_cap_11n_enable = false;
        data->sku_cap_11ac_enable = data->sku_cap_11n_enable &&
                                    (sku & NVM_SKU_CAP_11AC_ENABLE);
+       data->sku_cap_mimo_disabled = sku & NVM_SKU_CAP_MIMO_DISABLE;
 
        data->n_hw_addrs = iwl_get_n_hw_addrs(cfg, nvm_sw);
 
index d954591..6ac6de2 100644 (file)
@@ -776,7 +776,7 @@ static int iwl_mvm_bt_coex_reduced_txp(struct iwl_mvm *mvm, u8 sta_id,
        struct iwl_host_cmd cmd = {
                .id = BT_CONFIG,
                .len = { sizeof(*bt_cmd), },
-               .dataflags = { IWL_HCMD_DFL_NOCOPY, },
+               .dataflags = { IWL_HCMD_DFL_DUP, },
                .flags = CMD_ASYNC,
        };
        struct iwl_mvm_sta *mvmsta;
index 1b1b2bf..4310cf1 100644 (file)
@@ -1750,8 +1750,10 @@ static void iwl_mvm_query_netdetect_reasons(struct iwl_mvm *mvm,
        int i, j, n_matches, ret;
 
        fw_status = iwl_mvm_get_wakeup_status(mvm, vif);
-       if (!IS_ERR_OR_NULL(fw_status))
+       if (!IS_ERR_OR_NULL(fw_status)) {
                reasons = le32_to_cpu(fw_status->wakeup_reasons);
+               kfree(fw_status);
+       }
 
        if (reasons & IWL_WOWLAN_WAKEUP_BY_RFKILL_DEASSERTED)
                wakeup.rfkill_release = true;
@@ -1868,15 +1870,15 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
        /* get the BSS vif pointer again */
        vif = iwl_mvm_get_bss_vif(mvm);
        if (IS_ERR_OR_NULL(vif))
-               goto out_unlock;
+               goto err;
 
        ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test);
        if (ret)
-               goto out_unlock;
+               goto err;
 
        if (d3_status != IWL_D3_STATUS_ALIVE) {
                IWL_INFO(mvm, "Device was reset during suspend\n");
-               goto out_unlock;
+               goto err;
        }
 
        /* query SRAM first in case we want event logging */
@@ -1902,7 +1904,8 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
                goto out_iterate;
        }
 
- out_unlock:
+err:
+       iwl_mvm_free_nd(mvm);
        mutex_unlock(&mvm->mutex);
 
 out_iterate:
@@ -1915,6 +1918,14 @@ out:
        /* return 1 to reconfigure the device */
        set_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status);
        set_bit(IWL_MVM_STATUS_D3_RECONFIG, &mvm->status);
+
+       /* We always return 1, which causes mac80211 to do a reconfig
+        * with IEEE80211_RECONFIG_TYPE_RESTART.  This type of
+        * reconfig calls iwl_mvm_restart_complete(), where we unref
+        * the IWL_MVM_REF_UCODE_DOWN, so we need to take the
+        * reference here.
+        */
+       iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
        return 1;
 }
 
@@ -2021,7 +2032,6 @@ static int iwl_mvm_d3_test_release(struct inode *inode, struct file *file)
        __iwl_mvm_resume(mvm, true);
        rtnl_unlock();
        iwl_abort_notification_waits(&mvm->notif_wait);
-       iwl_mvm_ref(mvm, IWL_MVM_REF_UCODE_DOWN);
        ieee80211_restart_hw(mvm->hw);
 
        /* wait for restart and disconnect all interfaces */
index 40265b9..dda9f7b 100644 (file)
@@ -3995,9 +3995,6 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
        if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_MLME))
                return;
 
-       if (event->u.mlme.status == MLME_SUCCESS)
-               return;
-
        trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_MLME);
        trig_mlme = (void *)trig->data;
        if (!iwl_fw_dbg_trigger_check_stop(mvm, vif, trig))
index 1c66297..2ea0123 100644 (file)
@@ -1263,11 +1263,13 @@ static void iwl_mvm_d0i3_exit_work(struct work_struct *wk)
                ieee80211_iterate_active_interfaces(
                        mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
                        iwl_mvm_d0i3_disconnect_iter, mvm);
-
-       iwl_free_resp(&get_status_cmd);
 out:
        iwl_mvm_d0i3_enable_tx(mvm, qos_seq);
 
+       /* qos_seq might point inside resp_pkt, so free it only now */
+       if (get_status_cmd.resp_pkt)
+               iwl_free_resp(&get_status_cmd);
+
        /* the FW might have updated the regdomain */
        iwl_mvm_update_changed_regdom(mvm);
 
index f9928f2..33cd68a 100644 (file)
@@ -180,6 +180,9 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
        if (iwl_mvm_vif_low_latency(mvmvif) && mvmsta->vif->p2p)
                return false;
 
+       if (mvm->nvm_data->sku_cap_mimo_disabled)
+               return false;
+
        return true;
 }
 
index 47bbf57..d6f6515 100644 (file)
@@ -1049,9 +1049,11 @@ static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
                iwl_pcie_rx_stop(trans);
 
                /* Power-down device's busmaster DMA clocks */
-               iwl_write_prph(trans, APMG_CLK_DIS_REG,
-                              APMG_CLK_VAL_DMA_CLK_RQT);
-               udelay(5);
+               if (trans->cfg->device_family != IWL_DEVICE_FAMILY_8000) {
+                       iwl_write_prph(trans, APMG_CLK_DIS_REG,
+                                      APMG_CLK_VAL_DMA_CLK_RQT);
+                       udelay(5);
+               }
        }
 
        /* Make sure (redundant) we've released our request to stay awake */
index 3d8dbf5..fee0241 100644 (file)
@@ -793,6 +793,7 @@ static void connect(struct backend_info *be)
                        goto err;
                }
 
+               queue->credit_bytes = credit_bytes;
                queue->remaining_credit = credit_bytes;
                queue->credit_usec = credit_usec;
 
index 3f45afd..e031c94 100644 (file)
@@ -1698,6 +1698,7 @@ static void xennet_destroy_queues(struct netfront_info *info)
 
                if (netif_running(info->netdev))
                        napi_disable(&queue->napi);
+               del_timer_sync(&queue->rx_refill_timer);
                netif_napi_del(&queue->napi);
        }
 
@@ -2102,9 +2103,6 @@ static const struct attribute_group xennet_dev_group = {
 static int xennet_remove(struct xenbus_device *dev)
 {
        struct netfront_info *info = dev_get_drvdata(&dev->dev);
-       unsigned int num_queues = info->netdev->real_num_tx_queues;
-       struct netfront_queue *queue = NULL;
-       unsigned int i = 0;
 
        dev_dbg(&dev->dev, "%s\n", dev->nodename);
 
@@ -2112,16 +2110,7 @@ static int xennet_remove(struct xenbus_device *dev)
 
        unregister_netdev(info->netdev);
 
-       for (i = 0; i < num_queues; ++i) {
-               queue = &info->queues[i];
-               del_timer_sync(&queue->rx_refill_timer);
-       }
-
-       if (num_queues) {
-               kfree(info->queues);
-               info->queues = NULL;
-       }
-
+       xennet_destroy_queues(info);
        xennet_free_netdev(info->netdev);
 
        return 0;
index 4ad5c1a..e406e3d 100644 (file)
@@ -643,7 +643,9 @@ static const struct cygnus_gpio_pin_range cygnus_gpio_pintable[] = {
        CYGNUS_PINRANGE(87, 104, 12),
        CYGNUS_PINRANGE(99, 102, 2),
        CYGNUS_PINRANGE(101, 90, 4),
-       CYGNUS_PINRANGE(105, 116, 10),
+       CYGNUS_PINRANGE(105, 116, 6),
+       CYGNUS_PINRANGE(111, 100, 2),
+       CYGNUS_PINRANGE(113, 122, 4),
        CYGNUS_PINRANGE(123, 11, 1),
        CYGNUS_PINRANGE(124, 38, 4),
        CYGNUS_PINRANGE(128, 43, 1),
index 82f691e..732ff75 100644 (file)
@@ -1292,6 +1292,49 @@ static void chv_gpio_irq_unmask(struct irq_data *d)
        chv_gpio_irq_mask_unmask(d, false);
 }
 
+static unsigned chv_gpio_irq_startup(struct irq_data *d)
+{
+       /*
+        * Check if the interrupt has been requested with 0 as triggering
+        * type. In that case it is assumed that the current values
+        * programmed to the hardware are used (e.g BIOS configured
+        * defaults).
+        *
+        * In that case ->irq_set_type() will never be called so we need to
+        * read back the values from hardware now, set correct flow handler
+        * and update mappings before the interrupt is being used.
+        */
+       if (irqd_get_trigger_type(d) == IRQ_TYPE_NONE) {
+               struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+               struct chv_pinctrl *pctrl = gpiochip_to_pinctrl(gc);
+               unsigned offset = irqd_to_hwirq(d);
+               int pin = chv_gpio_offset_to_pin(pctrl, offset);
+               irq_flow_handler_t handler;
+               unsigned long flags;
+               u32 intsel, value;
+
+               intsel = readl(chv_padreg(pctrl, pin, CHV_PADCTRL0));
+               intsel &= CHV_PADCTRL0_INTSEL_MASK;
+               intsel >>= CHV_PADCTRL0_INTSEL_SHIFT;
+
+               value = readl(chv_padreg(pctrl, pin, CHV_PADCTRL1));
+               if (value & CHV_PADCTRL1_INTWAKECFG_LEVEL)
+                       handler = handle_level_irq;
+               else
+                       handler = handle_edge_irq;
+
+               spin_lock_irqsave(&pctrl->lock, flags);
+               if (!pctrl->intr_lines[intsel]) {
+                       __irq_set_handler_locked(d->irq, handler);
+                       pctrl->intr_lines[intsel] = offset;
+               }
+               spin_unlock_irqrestore(&pctrl->lock, flags);
+       }
+
+       chv_gpio_irq_unmask(d);
+       return 0;
+}
+
 static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
 {
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
@@ -1357,6 +1400,7 @@ static int chv_gpio_irq_type(struct irq_data *d, unsigned type)
 
 static struct irq_chip chv_gpio_irqchip = {
        .name = "chv-gpio",
+       .irq_startup = chv_gpio_irq_startup,
        .irq_ack = chv_gpio_irq_ack,
        .irq_mask = chv_gpio_irq_mask,
        .irq_unmask = chv_gpio_irq_unmask,
index edcd140..a70a5fe 100644 (file)
@@ -569,7 +569,7 @@ static int meson_gpiolib_register(struct meson_pinctrl *pc)
                domain->chip.direction_output = meson_gpio_direction_output;
                domain->chip.get = meson_gpio_get;
                domain->chip.set = meson_gpio_set;
-               domain->chip.base = -1;
+               domain->chip.base = domain->data->pin_base;
                domain->chip.ngpio = domain->data->num_pins;
                domain->chip.can_sleep = false;
                domain->chip.of_node = domain->of_node;
index 2f7ea62..9677807 100644 (file)
@@ -876,13 +876,13 @@ static struct meson_domain_data meson8b_domain_data[] = {
                .banks          = meson8b_banks,
                .num_banks      = ARRAY_SIZE(meson8b_banks),
                .pin_base       = 0,
-               .num_pins       = 83,
+               .num_pins       = 130,
        },
        {
                .name           = "ao-bank",
                .banks          = meson8b_ao_banks,
                .num_banks      = ARRAY_SIZE(meson8b_ao_banks),
-               .pin_base       = 83,
+               .pin_base       = 130,
                .num_pins       = 16,
        },
 };
index 9bb9ad6..28f3281 100644 (file)
@@ -2897,7 +2897,7 @@ static ssize_t hotkey_wakeup_reason_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_wakeup_reason);
 }
 
-static DEVICE_ATTR_RO(hotkey_wakeup_reason);
+static DEVICE_ATTR(wakeup_reason, S_IRUGO, hotkey_wakeup_reason_show, NULL);
 
 static void hotkey_wakeup_reason_notify_change(void)
 {
@@ -2913,7 +2913,8 @@ static ssize_t hotkey_wakeup_hotunplug_complete_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%d\n", hotkey_autosleep_ack);
 }
 
-static DEVICE_ATTR_RO(hotkey_wakeup_hotunplug_complete);
+static DEVICE_ATTR(wakeup_hotunplug_complete, S_IRUGO,
+                  hotkey_wakeup_hotunplug_complete_show, NULL);
 
 static void hotkey_wakeup_hotunplug_complete_notify_change(void)
 {
@@ -2978,8 +2979,8 @@ static struct attribute *hotkey_attributes[] __initdata = {
        &dev_attr_hotkey_enable.attr,
        &dev_attr_hotkey_bios_enabled.attr,
        &dev_attr_hotkey_bios_mask.attr,
-       &dev_attr_hotkey_wakeup_reason.attr,
-       &dev_attr_hotkey_wakeup_hotunplug_complete.attr,
+       &dev_attr_wakeup_reason.attr,
+       &dev_attr_wakeup_hotunplug_complete.attr,
        &dev_attr_hotkey_mask.attr,
        &dev_attr_hotkey_all_mask.attr,
        &dev_attr_hotkey_recommended_mask.attr,
@@ -4393,12 +4394,13 @@ static ssize_t wan_enable_store(struct device *dev,
                        attr, buf, count);
 }
 
-static DEVICE_ATTR_RW(wan_enable);
+static DEVICE_ATTR(wwan_enable, S_IWUSR | S_IRUGO,
+                  wan_enable_show, wan_enable_store);
 
 /* --------------------------------------------------------------------- */
 
 static struct attribute *wan_attributes[] = {
-       &dev_attr_wan_enable.attr,
+       &dev_attr_wwan_enable.attr,
        NULL
 };
 
@@ -8138,7 +8140,8 @@ static ssize_t fan_pwm1_enable_store(struct device *dev,
        return count;
 }
 
-static DEVICE_ATTR_RW(fan_pwm1_enable);
+static DEVICE_ATTR(pwm1_enable, S_IWUSR | S_IRUGO,
+                  fan_pwm1_enable_show, fan_pwm1_enable_store);
 
 /* sysfs fan pwm1 ------------------------------------------------------ */
 static ssize_t fan_pwm1_show(struct device *dev,
@@ -8198,7 +8201,7 @@ static ssize_t fan_pwm1_store(struct device *dev,
        return (rc) ? rc : count;
 }
 
-static DEVICE_ATTR_RW(fan_pwm1);
+static DEVICE_ATTR(pwm1, S_IWUSR | S_IRUGO, fan_pwm1_show, fan_pwm1_store);
 
 /* sysfs fan fan1_input ------------------------------------------------ */
 static ssize_t fan_fan1_input_show(struct device *dev,
@@ -8215,7 +8218,7 @@ static ssize_t fan_fan1_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static DEVICE_ATTR_RO(fan_fan1_input);
+static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL);
 
 /* sysfs fan fan2_input ------------------------------------------------ */
 static ssize_t fan_fan2_input_show(struct device *dev,
@@ -8232,7 +8235,7 @@ static ssize_t fan_fan2_input_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", speed);
 }
 
-static DEVICE_ATTR_RO(fan_fan2_input);
+static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL);
 
 /* sysfs fan fan_watchdog (hwmon driver) ------------------------------- */
 static ssize_t fan_fan_watchdog_show(struct device_driver *drv,
@@ -8265,8 +8268,8 @@ static DRIVER_ATTR(fan_watchdog, S_IWUSR | S_IRUGO,
 
 /* --------------------------------------------------------------------- */
 static struct attribute *fan_attributes[] = {
-       &dev_attr_fan_pwm1_enable.attr, &dev_attr_fan_pwm1.attr,
-       &dev_attr_fan_fan1_input.attr,
+       &dev_attr_pwm1_enable.attr, &dev_attr_pwm1.attr,
+       &dev_attr_fan1_input.attr,
        NULL, /* for fan2_input */
        NULL
 };
@@ -8400,7 +8403,7 @@ static int __init fan_init(struct ibm_init_struct *iibm)
                if (tp_features.second_fan) {
                        /* attach second fan tachometer */
                        fan_attributes[ARRAY_SIZE(fan_attributes)-2] =
-                                       &dev_attr_fan_fan2_input.attr;
+                                       &dev_attr_fan2_input.attr;
                }
                rc = sysfs_create_group(&tpacpi_sensors_pdev->dev.kobj,
                                         &fan_attr_group);
@@ -8848,7 +8851,7 @@ static ssize_t thinkpad_acpi_pdev_name_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%s\n", TPACPI_NAME);
 }
 
-static DEVICE_ATTR_RO(thinkpad_acpi_pdev_name);
+static DEVICE_ATTR(name, S_IRUGO, thinkpad_acpi_pdev_name_show, NULL);
 
 /* --------------------------------------------------------------------- */
 
@@ -9390,8 +9393,7 @@ static void thinkpad_acpi_module_exit(void)
                hwmon_device_unregister(tpacpi_hwmon);
 
        if (tp_features.sensors_pdev_attrs_registered)
-               device_remove_file(&tpacpi_sensors_pdev->dev,
-                                  &dev_attr_thinkpad_acpi_pdev_name);
+               device_remove_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
        if (tpacpi_sensors_pdev)
                platform_device_unregister(tpacpi_sensors_pdev);
        if (tpacpi_pdev)
@@ -9512,8 +9514,7 @@ static int __init thinkpad_acpi_module_init(void)
                thinkpad_acpi_module_exit();
                return ret;
        }
-       ret = device_create_file(&tpacpi_sensors_pdev->dev,
-                                &dev_attr_thinkpad_acpi_pdev_name);
+       ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_name);
        if (ret) {
                pr_err("unable to create sysfs hwmon device attributes\n");
                thinkpad_acpi_module_exit();
index 8a4df7a..e628d4c 100644 (file)
@@ -394,6 +394,7 @@ static inline struct da9052_regulator_info *find_regulator_info(u8 chip_id,
 
 static int da9052_regulator_probe(struct platform_device *pdev)
 {
+       const struct mfd_cell *cell = mfd_get_cell(pdev);
        struct regulator_config config = { };
        struct da9052_regulator *regulator;
        struct da9052 *da9052;
@@ -409,7 +410,7 @@ static int da9052_regulator_probe(struct platform_device *pdev)
        regulator->da9052 = da9052;
 
        regulator->info = find_regulator_info(regulator->da9052->chip_id,
-                                             pdev->id);
+                                             cell->id);
        if (regulator->info == NULL) {
                dev_err(&pdev->dev, "invalid regulator ID specified\n");
                return -EINVAL;
@@ -419,7 +420,7 @@ static int da9052_regulator_probe(struct platform_device *pdev)
        config.driver_data = regulator;
        config.regmap = da9052->regmap;
        if (pdata && pdata->regulators) {
-               config.init_data = pdata->regulators[pdev->id];
+               config.init_data = pdata->regulators[cell->id];
        } else {
 #ifdef CONFIG_OF
                struct device_node *nproot = da9052->dev->of_node;
index 68c2002..5c9e680 100644 (file)
@@ -1020,8 +1020,7 @@ static void tcm_qla2xxx_depend_tpg(struct work_struct *work)
        struct se_portal_group *se_tpg = &base_tpg->se_tpg;
        struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha;
 
-       if (!configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                 &se_tpg->tpg_group.cg_item)) {
+       if (!target_depend_item(&se_tpg->tpg_group.cg_item)) {
                atomic_set(&base_tpg->lport_tpg_enabled, 1);
                qlt_enable_vha(base_vha);
        }
@@ -1037,8 +1036,7 @@ static void tcm_qla2xxx_undepend_tpg(struct work_struct *work)
 
        if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) {
                atomic_set(&base_tpg->lport_tpg_enabled, 0);
-               configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                      &se_tpg->tpg_group.cg_item);
+               target_undepend_item(&se_tpg->tpg_group.cg_item);
        }
        complete(&base_tpg->tpg_base_comp);
 }
index 15a7ee3..5fe1c22 100644 (file)
@@ -359,12 +359,13 @@ static void ssb_pcicore_init_hostmode(struct ssb_pcicore *pc)
 
        /*
         * Accessing PCI config without a proper delay after devices reset (not
-        * GPIO reset) was causing reboots on WRT300N v1.0.
+        * GPIO reset) was causing reboots on WRT300N v1.0 (BCM4704).
         * Tested delay 850 us lowered reboot chance to 50-80%, 1000 us fixed it
         * completely. Flushing all writes was also tested but with no luck.
+        * The same problem was reported for WRT350N v1 (BCM4705), so we just
+        * sleep here unconditionally.
         */
-       if (pc->dev->bus->chip_id == 0x4704)
-               usleep_range(1000, 2000);
+       usleep_range(1000, 2000);
 
        /* Enable PCI bridge BAR0 prefetch and burst */
        val = PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY;
index 34871a6..74e6114 100644 (file)
@@ -230,7 +230,7 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
         * Here we serialize access across the TIQN+TPG Tuple.
         */
        ret = down_interruptible(&tpg->np_login_sem);
-       if ((ret != 0) || signal_pending(current))
+       if (ret != 0)
                return -1;
 
        spin_lock_bh(&tpg->tpg_state_lock);
index 8ce94ff..70d799d 100644 (file)
@@ -346,6 +346,7 @@ static int iscsi_login_zero_tsih_s1(
        if (IS_ERR(sess->se_sess)) {
                iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               kfree(sess->sess_ops);
                kfree(sess);
                return -ENOMEM;
        }
index e8a2408..5e3295f 100644 (file)
@@ -161,10 +161,7 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np(
 int iscsit_get_tpg(
        struct iscsi_portal_group *tpg)
 {
-       int ret;
-
-       ret = mutex_lock_interruptible(&tpg->tpg_access_lock);
-       return ((ret != 0) || signal_pending(current)) ? -1 : 0;
+       return mutex_lock_interruptible(&tpg->tpg_access_lock);
 }
 
 void iscsit_put_tpg(struct iscsi_portal_group *tpg)
index 75cbde1..4f8d4d4 100644 (file)
@@ -704,7 +704,7 @@ target_alua_state_check(struct se_cmd *cmd)
 
        if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                return 0;
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        if (!port)
@@ -2377,7 +2377,7 @@ ssize_t core_alua_store_secondary_write_metadata(
 
 int core_setup_alua(struct se_device *dev)
 {
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV &&
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                struct t10_alua_lu_gp_member *lu_gp_mem;
 
index ddaf76a..e7b0430 100644 (file)
@@ -212,10 +212,6 @@ static struct config_group *target_core_register_fabric(
 
        pr_debug("Target_Core_ConfigFS: REGISTER -> Allocated Fabric:"
                        " %s\n", tf->tf_group.cg_item.ci_name);
-       /*
-        * Setup tf_ops.tf_subsys pointer for usage with configfs_depend_item()
-        */
-       tf->tf_ops.tf_subsys = tf->tf_subsys;
        tf->tf_fabric = &tf->tf_group.cg_item;
        pr_debug("Target_Core_ConfigFS: REGISTER -> Set tf->tf_fabric"
                        " for %s\n", name);
@@ -291,10 +287,17 @@ static struct configfs_subsystem target_core_fabrics = {
        },
 };
 
-struct configfs_subsystem *target_core_subsystem[] = {
-       &target_core_fabrics,
-       NULL,
-};
+int target_depend_item(struct config_item *item)
+{
+       return configfs_depend_item(&target_core_fabrics, item);
+}
+EXPORT_SYMBOL(target_depend_item);
+
+void target_undepend_item(struct config_item *item)
+{
+       return configfs_undepend_item(&target_core_fabrics, item);
+}
+EXPORT_SYMBOL(target_undepend_item);
 
 /*##############################################################################
 // Start functions called by external Target Fabrics Modules
@@ -467,7 +470,6 @@ int target_register_template(const struct target_core_fabric_ops *fo)
         * struct target_fabric_configfs->tf_cit_tmpl
         */
        tf->tf_module = fo->module;
-       tf->tf_subsys = target_core_subsystem[0];
        snprintf(tf->tf_name, TARGET_FABRIC_NAME_SIZE, "%s", fo->name);
 
        tf->tf_ops = *fo;
@@ -809,7 +811,7 @@ static ssize_t target_core_dev_pr_show_attr_res_holder(struct se_device *dev,
 {
        int ret;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return sprintf(page, "Passthrough\n");
 
        spin_lock(&dev->dev_reservation_lock);
@@ -960,7 +962,7 @@ SE_DEV_PR_ATTR_RO(res_pr_type);
 static ssize_t target_core_dev_pr_show_attr_res_type(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return sprintf(page, "SPC_PASSTHROUGH\n");
        else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
                return sprintf(page, "SPC2_RESERVATIONS\n");
@@ -973,7 +975,7 @@ SE_DEV_PR_ATTR_RO(res_type);
 static ssize_t target_core_dev_pr_show_attr_res_aptpl_active(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        return sprintf(page, "APTPL Bit Status: %s\n",
@@ -988,7 +990,7 @@ SE_DEV_PR_ATTR_RO(res_aptpl_active);
 static ssize_t target_core_dev_pr_show_attr_res_aptpl_metadata(
                struct se_device *dev, char *page)
 {
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        return sprintf(page, "Ready to process PR APTPL metadata..\n");
@@ -1035,7 +1037,7 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
        u16 port_rpti = 0, tpgt = 0;
        u8 type = 0, scope;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
        if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS)
                return 0;
@@ -2870,7 +2872,7 @@ static int __init target_core_init_configfs(void)
 {
        struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;
        struct config_group *lu_gp_cg = NULL;
-       struct configfs_subsystem *subsys;
+       struct configfs_subsystem *subsys = &target_core_fabrics;
        struct t10_alua_lu_gp *lu_gp;
        int ret;
 
@@ -2878,7 +2880,6 @@ static int __init target_core_init_configfs(void)
                " Engine: %s on %s/%s on "UTS_RELEASE"\n",
                TARGET_CORE_VERSION, utsname()->sysname, utsname()->machine);
 
-       subsys = target_core_subsystem[0];
        config_group_init(&subsys->su_group);
        mutex_init(&subsys->su_mutex);
 
@@ -3008,13 +3009,10 @@ out_global:
 
 static void __exit target_core_exit_configfs(void)
 {
-       struct configfs_subsystem *subsys;
        struct config_group *hba_cg, *alua_cg, *lu_gp_cg;
        struct config_item *item;
        int i;
 
-       subsys = target_core_subsystem[0];
-
        lu_gp_cg = &alua_lu_gps_group;
        for (i = 0; lu_gp_cg->default_groups[i]; i++) {
                item = &lu_gp_cg->default_groups[i]->cg_item;
@@ -3045,8 +3043,8 @@ static void __exit target_core_exit_configfs(void)
         * We expect subsys->su_group.default_groups to be released
         * by configfs subsystem provider logic..
         */
-       configfs_unregister_subsystem(subsys);
-       kfree(subsys->su_group.default_groups);
+       configfs_unregister_subsystem(&target_core_fabrics);
+       kfree(target_core_fabrics.su_group.default_groups);
 
        core_alua_free_lu_gp(default_lu_gp);
        default_lu_gp = NULL;
index 7faa6ae..ce5f768 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/kthread.h>
 #include <linux/in.h>
 #include <linux/export.h>
+#include <asm/unaligned.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 #include <scsi/scsi.h>
@@ -527,7 +528,7 @@ static void core_export_port(
        list_add_tail(&port->sep_list, &dev->dev_sep_list);
        spin_unlock(&dev->se_port_lock);
 
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV &&
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) &&
            !(dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)) {
                tg_pt_gp_mem = core_alua_allocate_tg_pt_gp_mem(port);
                if (IS_ERR(tg_pt_gp_mem) || !tg_pt_gp_mem) {
@@ -1603,7 +1604,7 @@ int target_configure_device(struct se_device *dev)
         * anything virtual (IBLOCK, FILEIO, RAMDISK), but not for TCM/pSCSI
         * passthrough because this is being provided by the backend LLD.
         */
-       if (dev->transport->transport_type != TRANSPORT_PLUGIN_PHBA_PDEV) {
+       if (!(dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)) {
                strncpy(&dev->t10_wwn.vendor[0], "LIO-ORG", 8);
                strncpy(&dev->t10_wwn.model[0],
                        dev->transport->inquiry_prod, 16);
@@ -1707,3 +1708,76 @@ void core_dev_release_virtual_lun0(void)
                target_free_device(g_lun0_dev);
        core_delete_hba(hba);
 }
+
+/*
+ * Common CDB parsing for kernel and user passthrough.
+ */
+sense_reason_t
+passthrough_parse_cdb(struct se_cmd *cmd,
+       sense_reason_t (*exec_cmd)(struct se_cmd *cmd))
+{
+       unsigned char *cdb = cmd->t_task_cdb;
+
+       /*
+        * Clear a lun set in the cdb if the initiator talking to use spoke
+        * and old standards version, as we can't assume the underlying device
+        * won't choke up on it.
+        */
+       switch (cdb[0]) {
+       case READ_10: /* SBC - RDProtect */
+       case READ_12: /* SBC - RDProtect */
+       case READ_16: /* SBC - RDProtect */
+       case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
+       case VERIFY: /* SBC - VRProtect */
+       case VERIFY_16: /* SBC - VRProtect */
+       case WRITE_VERIFY: /* SBC - VRProtect */
+       case WRITE_VERIFY_12: /* SBC - VRProtect */
+       case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
+               break;
+       default:
+               cdb[1] &= 0x1f; /* clear logical unit number */
+               break;
+       }
+
+       /*
+        * For REPORT LUNS we always need to emulate the response, for everything
+        * else, pass it up.
+        */
+       if (cdb[0] == REPORT_LUNS) {
+               cmd->execute_cmd = spc_emulate_report_luns;
+               return TCM_NO_SENSE;
+       }
+
+       /* Set DATA_CDB flag for ops that should have it */
+       switch (cdb[0]) {
+       case READ_6:
+       case READ_10:
+       case READ_12:
+       case READ_16:
+       case WRITE_6:
+       case WRITE_10:
+       case WRITE_12:
+       case WRITE_16:
+       case WRITE_VERIFY:
+       case WRITE_VERIFY_12:
+       case 0x8e: /* WRITE_VERIFY_16 */
+       case COMPARE_AND_WRITE:
+       case XDWRITEREAD_10:
+               cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+               break;
+       case VARIABLE_LENGTH_CMD:
+               switch (get_unaligned_be16(&cdb[8])) {
+               case READ_32:
+               case WRITE_32:
+               case 0x0c: /* WRITE_VERIFY_32 */
+               case XDWRITEREAD_32:
+                       cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
+                       break;
+               }
+       }
+
+       cmd->execute_cmd = exec_cmd;
+
+       return TCM_NO_SENSE;
+}
+EXPORT_SYMBOL(passthrough_parse_cdb);
index f7e6e51..3f27bfd 100644 (file)
@@ -958,7 +958,6 @@ static struct se_subsystem_api fileio_template = {
        .inquiry_prod           = "FILEIO",
        .inquiry_rev            = FD_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
        .attach_hba             = fd_attach_hba,
        .detach_hba             = fd_detach_hba,
        .alloc_device           = fd_alloc_device,
index 1b7947c..8c96568 100644 (file)
@@ -904,7 +904,6 @@ static struct se_subsystem_api iblock_template = {
        .inquiry_prod           = "IBLOCK",
        .inquiry_rev            = IBLOCK_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
        .attach_hba             = iblock_attach_hba,
        .detach_hba             = iblock_detach_hba,
        .alloc_device           = iblock_alloc_device,
index 874a9bc..68bd7f5 100644 (file)
@@ -4,9 +4,6 @@
 /* target_core_alua.c */
 extern struct t10_alua_lu_gp *default_lu_gp;
 
-/* target_core_configfs.c */
-extern struct configfs_subsystem *target_core_subsystem[];
-
 /* target_core_device.c */
 extern struct mutex g_device_mutex;
 extern struct list_head g_device_list;
index c1aa965..a15411c 100644 (file)
@@ -1367,41 +1367,26 @@ void core_scsi3_free_all_registrations(
 
 static int core_scsi3_tpg_depend_item(struct se_portal_group *tpg)
 {
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &tpg->tpg_group.cg_item);
+       return target_depend_item(&tpg->tpg_group.cg_item);
 }
 
 static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg)
 {
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &tpg->tpg_group.cg_item);
-
+       target_undepend_item(&tpg->tpg_group.cg_item);
        atomic_dec_mb(&tpg->tpg_pr_ref_count);
 }
 
 static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl)
 {
-       struct se_portal_group *tpg = nacl->se_tpg;
-
        if (nacl->dynamic_node_acl)
                return 0;
-
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &nacl->acl_group.cg_item);
+       return target_depend_item(&nacl->acl_group.cg_item);
 }
 
 static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl)
 {
-       struct se_portal_group *tpg = nacl->se_tpg;
-
-       if (nacl->dynamic_node_acl) {
-               atomic_dec_mb(&nacl->acl_pr_ref_count);
-               return;
-       }
-
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &nacl->acl_group.cg_item);
-
+       if (!nacl->dynamic_node_acl)
+               target_undepend_item(&nacl->acl_group.cg_item);
        atomic_dec_mb(&nacl->acl_pr_ref_count);
 }
 
@@ -1419,8 +1404,7 @@ static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
        nacl = lun_acl->se_lun_nacl;
        tpg = nacl->se_tpg;
 
-       return configfs_depend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &lun_acl->se_lun_group.cg_item);
+       return target_depend_item(&lun_acl->se_lun_group.cg_item);
 }
 
 static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
@@ -1438,9 +1422,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
        nacl = lun_acl->se_lun_nacl;
        tpg = nacl->se_tpg;
 
-       configfs_undepend_item(tpg->se_tpg_tfo->tf_subsys,
-                       &lun_acl->se_lun_group.cg_item);
-
+       target_undepend_item(&lun_acl->se_lun_group.cg_item);
        atomic_dec_mb(&se_deve->pr_ref_count);
 }
 
@@ -4111,7 +4093,7 @@ target_check_reservation(struct se_cmd *cmd)
                return 0;
        if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE)
                return 0;
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        spin_lock(&dev->dev_reservation_lock);
index f6c954c..ecc5eae 100644 (file)
@@ -521,6 +521,7 @@ static int pscsi_configure_device(struct se_device *dev)
                                        " pdv_host_id: %d\n", pdv->pdv_host_id);
                                return -EINVAL;
                        }
+                       pdv->pdv_lld_host = sh;
                }
        } else {
                if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) {
@@ -603,6 +604,8 @@ static void pscsi_free_device(struct se_device *dev)
                if ((phv->phv_mode == PHV_LLD_SCSI_HOST_NO) &&
                    (phv->phv_lld_host != NULL))
                        scsi_host_put(phv->phv_lld_host);
+               else if (pdv->pdv_lld_host)
+                       scsi_host_put(pdv->pdv_lld_host);
 
                if ((sd->type == TYPE_DISK) || (sd->type == TYPE_ROM))
                        scsi_device_put(sd);
@@ -970,64 +973,13 @@ fail:
        return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 }
 
-/*
- * Clear a lun set in the cdb if the initiator talking to use spoke
- * and old standards version, as we can't assume the underlying device
- * won't choke up on it.
- */
-static inline void pscsi_clear_cdb_lun(unsigned char *cdb)
-{
-       switch (cdb[0]) {
-       case READ_10: /* SBC - RDProtect */
-       case READ_12: /* SBC - RDProtect */
-       case READ_16: /* SBC - RDProtect */
-       case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */
-       case VERIFY: /* SBC - VRProtect */
-       case VERIFY_16: /* SBC - VRProtect */
-       case WRITE_VERIFY: /* SBC - VRProtect */
-       case WRITE_VERIFY_12: /* SBC - VRProtect */
-       case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */
-               break;
-       default:
-               cdb[1] &= 0x1f; /* clear logical unit number */
-               break;
-       }
-}
-
 static sense_reason_t
 pscsi_parse_cdb(struct se_cmd *cmd)
 {
-       unsigned char *cdb = cmd->t_task_cdb;
-
        if (cmd->se_cmd_flags & SCF_BIDI)
                return TCM_UNSUPPORTED_SCSI_OPCODE;
 
-       pscsi_clear_cdb_lun(cdb);
-
-       /*
-        * For REPORT LUNS we always need to emulate the response, for everything
-        * else the default for pSCSI is to pass the command to the underlying
-        * LLD / physical hardware.
-        */
-       switch (cdb[0]) {
-       case REPORT_LUNS:
-               cmd->execute_cmd = spc_emulate_report_luns;
-               return 0;
-       case READ_6:
-       case READ_10:
-       case READ_12:
-       case READ_16:
-       case WRITE_6:
-       case WRITE_10:
-       case WRITE_12:
-       case WRITE_16:
-       case WRITE_VERIFY:
-               cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               /* FALLTHROUGH*/
-       default:
-               cmd->execute_cmd = pscsi_execute_cmd;
-               return 0;
-       }
+       return passthrough_parse_cdb(cmd, pscsi_execute_cmd);
 }
 
 static sense_reason_t
@@ -1189,7 +1141,7 @@ static struct configfs_attribute *pscsi_backend_dev_attrs[] = {
 static struct se_subsystem_api pscsi_template = {
        .name                   = "pscsi",
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_PHBA_PDEV,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
        .attach_hba             = pscsi_attach_hba,
        .detach_hba             = pscsi_detach_hba,
        .pmode_enable_hba       = pscsi_pmode_enable_hba,
index 1bd757d..820d305 100644 (file)
@@ -45,6 +45,7 @@ struct pscsi_dev_virt {
        int     pdv_lun_id;
        struct block_device *pdv_bd;
        struct scsi_device *pdv_sd;
+       struct Scsi_Host *pdv_lld_host;
 } ____cacheline_aligned;
 
 typedef enum phv_modes {
index a263bf5..d16489b 100644 (file)
@@ -733,7 +733,6 @@ static struct se_subsystem_api rd_mcp_template = {
        .name                   = "rd_mcp",
        .inquiry_prod           = "RAMDISK-MCP",
        .inquiry_rev            = RD_MCP_VERSION,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_VDEV,
        .attach_hba             = rd_attach_hba,
        .detach_hba             = rd_detach_hba,
        .alloc_device           = rd_alloc_device,
index 8855781..733824e 100644 (file)
@@ -568,7 +568,7 @@ sbc_compare_and_write(struct se_cmd *cmd)
         * comparision using SGLs at cmd->t_bidi_data_sg..
         */
        rc = down_interruptible(&dev->caw_sem);
-       if ((rc != 0) || signal_pending(current)) {
+       if (rc != 0) {
                cmd->transport_complete_callback = NULL;
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
index 3fe5cb2..675f2d9 100644 (file)
@@ -1196,7 +1196,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd)
         * Check if SAM Task Attribute emulation is enabled for this
         * struct se_device storage object
         */
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return 0;
 
        if (cmd->sam_task_attr == TCM_ACA_TAG) {
@@ -1770,7 +1770,7 @@ static int target_write_prot_action(struct se_cmd *cmd)
                                                   sectors, 0, NULL, 0);
                if (unlikely(cmd->pi_err)) {
                        spin_lock_irq(&cmd->t_state_lock);
-                       cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+                       cmd->transport_state &= ~(CMD_T_BUSY|CMD_T_SENT);
                        spin_unlock_irq(&cmd->t_state_lock);
                        transport_generic_request_failure(cmd, cmd->pi_err);
                        return -1;
@@ -1787,7 +1787,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return false;
 
        /*
@@ -1868,7 +1868,7 @@ void target_execute_cmd(struct se_cmd *cmd)
 
        if (target_handle_task_attr(cmd)) {
                spin_lock_irq(&cmd->t_state_lock);
-               cmd->transport_state &= ~CMD_T_BUSY|CMD_T_SENT;
+               cmd->transport_state &= ~(CMD_T_BUSY | CMD_T_SENT);
                spin_unlock_irq(&cmd->t_state_lock);
                return;
        }
@@ -1912,7 +1912,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 {
        struct se_device *dev = cmd->se_dev;
 
-       if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
+       if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
                return;
 
        if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
@@ -1957,8 +1957,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
        case DMA_TO_DEVICE:
                if (cmd->se_cmd_flags & SCF_BIDI) {
                        ret = cmd->se_tfo->queue_data_in(cmd);
-                       if (ret < 0)
-                               break;
+                       break;
                }
                /* Fall through for DMA_TO_DEVICE */
        case DMA_NONE:
index dbc872a..07d2996 100644 (file)
@@ -71,13 +71,6 @@ struct tcmu_hba {
        u32 host_id;
 };
 
-/* User wants all cmds or just some */
-enum passthru_level {
-       TCMU_PASS_ALL = 0,
-       TCMU_PASS_IO,
-       TCMU_PASS_INVALID,
-};
-
 #define TCMU_CONFIG_LEN 256
 
 struct tcmu_dev {
@@ -89,7 +82,6 @@ struct tcmu_dev {
 #define TCMU_DEV_BIT_OPEN 0
 #define TCMU_DEV_BIT_BROKEN 1
        unsigned long flags;
-       enum passthru_level pass_level;
 
        struct uio_info uio_info;
 
@@ -683,8 +675,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
        setup_timer(&udev->timeout, tcmu_device_timedout,
                (unsigned long)udev);
 
-       udev->pass_level = TCMU_PASS_ALL;
-
        return &udev->se_dev;
 }
 
@@ -948,13 +938,13 @@ static void tcmu_free_device(struct se_device *dev)
 }
 
 enum {
-       Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
+       Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_dev_config, "dev_config=%s"},
        {Opt_dev_size, "dev_size=%u"},
-       {Opt_pass_level, "pass_level=%u"},
+       {Opt_hw_block_size, "hw_block_size=%u"},
        {Opt_err, NULL}
 };
 
@@ -965,7 +955,7 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
        char *orig, *ptr, *opts, *arg_p;
        substring_t args[MAX_OPT_ARGS];
        int ret = 0, token;
-       int arg;
+       unsigned long tmp_ul;
 
        opts = kstrdup(page, GFP_KERNEL);
        if (!opts)
@@ -998,15 +988,23 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
                        if (ret < 0)
                                pr_err("kstrtoul() failed for dev_size=\n");
                        break;
-               case Opt_pass_level:
-                       match_int(args, &arg);
-                       if (arg >= TCMU_PASS_INVALID) {
-                               pr_warn("TCMU: Invalid pass_level: %d\n", arg);
+               case Opt_hw_block_size:
+                       arg_p = match_strdup(&args[0]);
+                       if (!arg_p) {
+                               ret = -ENOMEM;
                                break;
                        }
-
-                       pr_debug("TCMU: Setting pass_level to %d\n", arg);
-                       udev->pass_level = arg;
+                       ret = kstrtoul(arg_p, 0, &tmp_ul);
+                       kfree(arg_p);
+                       if (ret < 0) {
+                               pr_err("kstrtoul() failed for hw_block_size=\n");
+                               break;
+                       }
+                       if (!tmp_ul) {
+                               pr_err("hw_block_size must be nonzero\n");
+                               break;
+                       }
+                       dev->dev_attrib.hw_block_size = tmp_ul;
                        break;
                default:
                        break;
@@ -1024,8 +1022,7 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
 
        bl = sprintf(b + bl, "Config: %s ",
                     udev->dev_config[0] ? udev->dev_config : "NULL");
-       bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
-                     udev->dev_size, udev->pass_level);
+       bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size);
 
        return bl;
 }
@@ -1038,20 +1035,6 @@ static sector_t tcmu_get_blocks(struct se_device *dev)
                       dev->dev_attrib.block_size);
 }
 
-static sense_reason_t
-tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
-               enum dma_data_direction data_direction)
-{
-       int ret;
-
-       ret = tcmu_queue_cmd(se_cmd);
-
-       if (ret != 0)
-               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
-       else
-               return TCM_NO_SENSE;
-}
-
 static sense_reason_t
 tcmu_pass_op(struct se_cmd *se_cmd)
 {
@@ -1063,91 +1046,29 @@ tcmu_pass_op(struct se_cmd *se_cmd)
                return TCM_NO_SENSE;
 }
 
-static struct sbc_ops tcmu_sbc_ops = {
-       .execute_rw = tcmu_execute_rw,
-       .execute_sync_cache     = tcmu_pass_op,
-       .execute_write_same     = tcmu_pass_op,
-       .execute_write_same_unmap = tcmu_pass_op,
-       .execute_unmap          = tcmu_pass_op,
-};
-
 static sense_reason_t
 tcmu_parse_cdb(struct se_cmd *cmd)
 {
-       unsigned char *cdb = cmd->t_task_cdb;
-       struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
-       sense_reason_t ret;
-
-       switch (udev->pass_level) {
-       case TCMU_PASS_ALL:
-               /* We're just like pscsi, then */
-               /*
-                * For REPORT LUNS we always need to emulate the response, for everything
-                * else, pass it up.
-                */
-               switch (cdb[0]) {
-               case REPORT_LUNS:
-                       cmd->execute_cmd = spc_emulate_report_luns;
-                       break;
-               case READ_6:
-               case READ_10:
-               case READ_12:
-               case READ_16:
-               case WRITE_6:
-               case WRITE_10:
-               case WRITE_12:
-               case WRITE_16:
-               case WRITE_VERIFY:
-                       cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-                       /* FALLTHROUGH */
-               default:
-                       cmd->execute_cmd = tcmu_pass_op;
-               }
-               ret = TCM_NO_SENSE;
-               break;
-       case TCMU_PASS_IO:
-               ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
-               break;
-       default:
-               pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
-               ret = TCM_CHECK_CONDITION_ABORT_CMD;
-       }
-
-       return ret;
+       return passthrough_parse_cdb(cmd, tcmu_pass_op);
 }
 
-DEF_TB_DEFAULT_ATTRIBS(tcmu);
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_pi_prot_type);
+TB_DEV_ATTR_RO(tcmu, hw_pi_prot_type);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_block_size);
+TB_DEV_ATTR_RO(tcmu, hw_block_size);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_max_sectors);
+TB_DEV_ATTR_RO(tcmu, hw_max_sectors);
+
+DEF_TB_DEV_ATTRIB_RO(tcmu, hw_queue_depth);
+TB_DEV_ATTR_RO(tcmu, hw_queue_depth);
 
 static struct configfs_attribute *tcmu_backend_dev_attrs[] = {
-       &tcmu_dev_attrib_emulate_model_alias.attr,
-       &tcmu_dev_attrib_emulate_dpo.attr,
-       &tcmu_dev_attrib_emulate_fua_write.attr,
-       &tcmu_dev_attrib_emulate_fua_read.attr,
-       &tcmu_dev_attrib_emulate_write_cache.attr,
-       &tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr,
-       &tcmu_dev_attrib_emulate_tas.attr,
-       &tcmu_dev_attrib_emulate_tpu.attr,
-       &tcmu_dev_attrib_emulate_tpws.attr,
-       &tcmu_dev_attrib_emulate_caw.attr,
-       &tcmu_dev_attrib_emulate_3pc.attr,
-       &tcmu_dev_attrib_pi_prot_type.attr,
        &tcmu_dev_attrib_hw_pi_prot_type.attr,
-       &tcmu_dev_attrib_pi_prot_format.attr,
-       &tcmu_dev_attrib_enforce_pr_isids.attr,
-       &tcmu_dev_attrib_is_nonrot.attr,
-       &tcmu_dev_attrib_emulate_rest_reord.attr,
-       &tcmu_dev_attrib_force_pr_aptpl.attr,
        &tcmu_dev_attrib_hw_block_size.attr,
-       &tcmu_dev_attrib_block_size.attr,
        &tcmu_dev_attrib_hw_max_sectors.attr,
-       &tcmu_dev_attrib_optimal_sectors.attr,
        &tcmu_dev_attrib_hw_queue_depth.attr,
-       &tcmu_dev_attrib_queue_depth.attr,
-       &tcmu_dev_attrib_max_unmap_lba_count.attr,
-       &tcmu_dev_attrib_max_unmap_block_desc_count.attr,
-       &tcmu_dev_attrib_unmap_granularity.attr,
-       &tcmu_dev_attrib_unmap_granularity_alignment.attr,
-       &tcmu_dev_attrib_max_write_same_len.attr,
        NULL,
 };
 
@@ -1156,7 +1077,7 @@ static struct se_subsystem_api tcmu_template = {
        .inquiry_prod           = "USER",
        .inquiry_rev            = TCMU_VERSION,
        .owner                  = THIS_MODULE,
-       .transport_type         = TRANSPORT_PLUGIN_VHBA_PDEV,
+       .transport_flags        = TRANSPORT_FLAG_PASSTHROUGH,
        .attach_hba             = tcmu_attach_hba,
        .detach_hba             = tcmu_detach_hba,
        .alloc_device           = tcmu_alloc_device,
index a600ff1..8fd680a 100644 (file)
@@ -58,7 +58,6 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                                        bool src)
 {
        struct se_device *se_dev;
-       struct configfs_subsystem *subsys = target_core_subsystem[0];
        unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
        int rc;
 
@@ -90,8 +89,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                                " se_dev\n", xop->src_dev);
                }
 
-               rc = configfs_depend_item(subsys,
-                               &se_dev->dev_group.cg_item);
+               rc = target_depend_item(&se_dev->dev_group.cg_item);
                if (rc != 0) {
                        pr_err("configfs_depend_item attempt failed:"
                                " %d for se_dev: %p\n", rc, se_dev);
@@ -99,8 +97,8 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op
                        return rc;
                }
 
-               pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p"
-                       " se_dev->se_dev_group: %p\n", subsys, se_dev,
+               pr_debug("Called configfs_depend_item for se_dev: %p"
+                       " se_dev->se_dev_group: %p\n", se_dev,
                        &se_dev->dev_group);
 
                mutex_unlock(&g_device_mutex);
@@ -373,7 +371,6 @@ static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd)
 
 static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop)
 {
-       struct configfs_subsystem *subsys = target_core_subsystem[0];
        struct se_device *remote_dev;
 
        if (xop->op_origin == XCOL_SOURCE_RECV_OP)
@@ -381,11 +378,11 @@ static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop)
        else
                remote_dev = xop->src_dev;
 
-       pr_debug("Calling configfs_undepend_item for subsys: %p"
+       pr_debug("Calling configfs_undepend_item for"
                  " remote_dev: %p remote_dev->dev_group: %p\n",
-                 subsys, remote_dev, &remote_dev->dev_group.cg_item);
+                 remote_dev, &remote_dev->dev_group.cg_item);
 
-       configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item);
+       target_undepend_item(&remote_dev->dev_group.cg_item);
 }
 
 static void xcopy_pt_release_cmd(struct se_cmd *se_cmd)
index 04d9e23..358323c 100644 (file)
@@ -174,13 +174,13 @@ struct mips_ejtag_fdc_tty {
 static inline void mips_ejtag_fdc_write(struct mips_ejtag_fdc_tty *priv,
                                        unsigned int offs, unsigned int data)
 {
-       iowrite32(data, priv->reg + offs);
+       __raw_writel(data, priv->reg + offs);
 }
 
 static inline unsigned int mips_ejtag_fdc_read(struct mips_ejtag_fdc_tty *priv,
                                               unsigned int offs)
 {
-       return ioread32(priv->reg + offs);
+       return __raw_readl(priv->reg + offs);
 }
 
 /* Encoding of byte stream in FDC words */
@@ -347,9 +347,9 @@ static void mips_ejtag_fdc_console_write(struct console *c, const char *s,
                s += inc[word.bytes - 1];
 
                /* Busy wait until there's space in fifo */
-               while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
+               while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
                        ;
-               iowrite32(word.word, regs + REG_FDTX(c->index));
+               __raw_writel(word.word, regs + REG_FDTX(c->index));
        }
 out:
        local_irq_restore(flags);
@@ -1227,7 +1227,7 @@ static int kgdbfdc_read_char(void)
 
                /* Read next word from KGDB channel */
                do {
-                       stat = ioread32(regs + REG_FDSTAT);
+                       stat = __raw_readl(regs + REG_FDSTAT);
 
                        /* No data waiting? */
                        if (stat & REG_FDSTAT_RXE)
@@ -1236,7 +1236,7 @@ static int kgdbfdc_read_char(void)
                        /* Read next word */
                        channel = (stat & REG_FDSTAT_RXCHAN) >>
                                        REG_FDSTAT_RXCHAN_SHIFT;
-                       data = ioread32(regs + REG_FDRX);
+                       data = __raw_readl(regs + REG_FDRX);
                } while (channel != CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN);
 
                /* Decode into rbuf */
@@ -1266,9 +1266,10 @@ static void kgdbfdc_push_one(void)
                return;
 
        /* Busy wait until there's space in fifo */
-       while (ioread32(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
+       while (__raw_readl(regs + REG_FDSTAT) & REG_FDSTAT_TXF)
                ;
-       iowrite32(word.word, regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN));
+       __raw_writel(word.word,
+                    regs + REG_FDTX(CONFIG_MIPS_EJTAG_FDC_KGDB_CHAN));
 }
 
 /* flush the whole write buffer to the TX FIFO */
index 5e19bb5..ea32b38 100644 (file)
@@ -1409,8 +1409,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
                         * dependency now.
                         */
                        se_tpg = &tpg->se_tpg;
-                       ret = configfs_depend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                                  &se_tpg->tpg_group.cg_item);
+                       ret = target_depend_item(&se_tpg->tpg_group.cg_item);
                        if (ret) {
                                pr_warn("configfs_depend_item() failed: %d\n", ret);
                                kfree(vs_tpg);
@@ -1513,8 +1512,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
                 * to allow vhost-scsi WWPN se_tpg->tpg_group shutdown to occur.
                 */
                se_tpg = &tpg->se_tpg;
-               configfs_undepend_item(se_tpg->se_tpg_tfo->tf_subsys,
-                                      &se_tpg->tpg_group.cg_item);
+               target_undepend_item(&se_tpg->tpg_group.cg_item);
        }
        if (match) {
                for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
index 3a145a6..6897f1c 100644 (file)
@@ -274,6 +274,10 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 
        pb->pwm = devm_pwm_get(&pdev->dev, NULL);
        if (IS_ERR(pb->pwm)) {
+               ret = PTR_ERR(pb->pwm);
+               if (ret == -EPROBE_DEFER)
+                       goto err_alloc;
+
                dev_err(&pdev->dev, "unable to request PWM, trying legacy API\n");
                pb->legacy = true;
                pb->pwm = pwm_request(data->pwm_id, "pwm-backlight");
index 241ef68..cd46e41 100644 (file)
@@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
                        total_size = total_mapping_size(elf_phdata,
                                                        loc->elf_ex.e_phnum);
                        if (!total_size) {
-                               error = -EINVAL;
+                               retval = -EINVAL;
                                goto out_free_dentry;
                        }
                }
index df9932b..1ce06c8 100644 (file)
@@ -85,6 +85,7 @@ BTRFS_WORK_HELPER(extent_refs_helper);
 BTRFS_WORK_HELPER(scrub_helper);
 BTRFS_WORK_HELPER(scrubwrc_helper);
 BTRFS_WORK_HELPER(scrubnc_helper);
+BTRFS_WORK_HELPER(scrubparity_helper);
 
 static struct __btrfs_workqueue *
 __btrfs_alloc_workqueue(const char *name, unsigned int flags, int max_active,
index ec2ee47..b0b093b 100644 (file)
@@ -64,6 +64,8 @@ BTRFS_WORK_HELPER_PROTO(extent_refs_helper);
 BTRFS_WORK_HELPER_PROTO(scrub_helper);
 BTRFS_WORK_HELPER_PROTO(scrubwrc_helper);
 BTRFS_WORK_HELPER_PROTO(scrubnc_helper);
+BTRFS_WORK_HELPER_PROTO(scrubparity_helper);
+
 
 struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
                                              unsigned int flags,
index 614aaa1..802fabb 100644 (file)
@@ -250,8 +250,12 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
         * the first item to check. But sometimes, we may enter it with
         * slot==nritems. In that case, go to the next leaf before we continue.
         */
-       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
-               ret = btrfs_next_old_leaf(root, path, time_seq);
+       if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_leaf(root, path);
+               else
+                       ret = btrfs_next_old_leaf(root, path, time_seq);
+       }
 
        while (!ret && count < total_refs) {
                eb = path->nodes[0];
@@ -291,7 +295,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                        eie = NULL;
                }
 next:
-               ret = btrfs_next_old_item(root, path, time_seq);
+               if (time_seq == (u64)-1)
+                       ret = btrfs_next_item(root, path);
+               else
+                       ret = btrfs_next_old_item(root, path, time_seq);
        }
 
        if (ret > 0)
@@ -334,6 +341,8 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 
        if (path->search_commit_root)
                root_level = btrfs_header_level(root->commit_root);
+       else if (time_seq == (u64)-1)
+               root_level = btrfs_header_level(root->node);
        else
                root_level = btrfs_old_root_level(root, time_seq);
 
@@ -343,7 +352,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        }
 
        path->lowest_level = level;
-       ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
+       if (time_seq == (u64)-1)
+               ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
+                                       0, 0);
+       else
+               ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
+                                           time_seq);
 
        /* root node has been locked, we can release @subvol_srcu safely here */
        srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -491,7 +505,9 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
                BUG_ON(!ref->wanted_disk_byte);
                eb = read_tree_block(fs_info->tree_root, ref->wanted_disk_byte,
                                     0);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       return PTR_ERR(eb);
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        return -EIO;
                }
@@ -507,7 +523,7 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info,
 }
 
 /*
- * merge two lists of backrefs and adjust counts accordingly
+ * merge backrefs and adjust counts accordingly
  *
  * mode = 1: merge identical keys, if key is set
  *    FIXME: if we add more keys in __add_prelim_ref, we can merge more here.
@@ -535,9 +551,9 @@ static void __merge_refs(struct list_head *head, int mode)
 
                        ref2 = list_entry(pos2, struct __prelim_ref, list);
 
+                       if (!ref_for_same_block(ref1, ref2))
+                               continue;
                        if (mode == 1) {
-                               if (!ref_for_same_block(ref1, ref2))
-                                       continue;
                                if (!ref1->parent && ref2->parent) {
                                        xchg = ref1;
                                        ref1 = ref2;
@@ -572,8 +588,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                              struct list_head *prefs, u64 *total_refs,
                              u64 inum)
 {
+       struct btrfs_delayed_ref_node *node;
        struct btrfs_delayed_extent_op *extent_op = head->extent_op;
-       struct rb_node *n = &head->node.rb_node;
        struct btrfs_key key;
        struct btrfs_key op_key = {0};
        int sgn;
@@ -583,12 +599,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                btrfs_disk_key_to_cpu(&op_key, &extent_op->key);
 
        spin_lock(&head->lock);
-       n = rb_first(&head->ref_root);
-       while (n) {
-               struct btrfs_delayed_ref_node *node;
-               node = rb_entry(n, struct btrfs_delayed_ref_node,
-                               rb_node);
-               n = rb_next(n);
+       list_for_each_entry(node, &head->ref_list, list) {
                if (node->seq > seq)
                        continue;
 
@@ -882,6 +893,11 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
  *
  * NOTE: This can return values > 0
  *
+ * If time_seq is set to (u64)-1, it will not search delayed_refs, and behave
+ * much like trans == NULL case, the difference only lies in it will not
+ * commit root.
+ * The special case is for qgroup to search roots in commit_transaction().
+ *
  * FIXME some caching might speed things up
  */
 static int find_parent_nodes(struct btrfs_trans_handle *trans,
@@ -920,6 +936,9 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
                path->skip_locking = 1;
        }
 
+       if (time_seq == (u64)-1)
+               path->skip_locking = 1;
+
        /*
         * grab both a lock on the path and a lock on the delayed ref head.
         * We need both to get a consistent picture of how the refs look
@@ -934,9 +953,10 @@ again:
        BUG_ON(ret == 0);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-       if (trans && likely(trans->type != __TRANS_DUMMY)) {
+       if (trans && likely(trans->type != __TRANS_DUMMY) &&
+           time_seq != (u64)-1) {
 #else
-       if (trans) {
+       if (trans && time_seq != (u64)-1) {
 #endif
                /*
                 * look if there are updates for this ref queued and lock the
@@ -1034,7 +1054,10 @@ again:
 
                                eb = read_tree_block(fs_info->extent_root,
                                                           ref->parent, 0);
-                               if (!eb || !extent_buffer_uptodate(eb)) {
+                               if (IS_ERR(eb)) {
+                                       ret = PTR_ERR(eb);
+                                       goto out;
+                               } else if (!extent_buffer_uptodate(eb)) {
                                        free_extent_buffer(eb);
                                        ret = -EIO;
                                        goto out;
index 0f11ebc..54114b4 100644 (file)
@@ -1439,8 +1439,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                btrfs_tree_read_unlock(eb_root);
                free_extent_buffer(eb_root);
                old = read_tree_block(root, logical, 0);
-               if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
-                       free_extent_buffer(old);
+               if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
+                       if (!IS_ERR(old))
+                               free_extent_buffer(old);
                        btrfs_warn(root->fs_info,
                                "failed to read tree block %llu from get_old_root", logical);
                } else {
@@ -1685,7 +1686,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                if (!cur || !uptodate) {
                        if (!cur) {
                                cur = read_tree_block(root, blocknr, gen);
-                               if (!cur || !extent_buffer_uptodate(cur)) {
+                               if (IS_ERR(cur)) {
+                                       return PTR_ERR(cur);
+                               } else if (!extent_buffer_uptodate(cur)) {
                                        free_extent_buffer(cur);
                                        return -EIO;
                                }
@@ -1864,8 +1867,9 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
 
        eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
                             btrfs_node_ptr_generation(parent, slot));
-       if (eb && !extent_buffer_uptodate(eb)) {
-               free_extent_buffer(eb);
+       if (IS_ERR(eb) || !extent_buffer_uptodate(eb)) {
+               if (!IS_ERR(eb))
+                       free_extent_buffer(eb);
                eb = NULL;
        }
 
@@ -2494,7 +2498,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 
        ret = -EAGAIN;
        tmp = read_tree_block(root, blocknr, 0);
-       if (tmp) {
+       if (!IS_ERR(tmp)) {
                /*
                 * If the read above didn't mark this buffer up to date,
                 * it will never end up being up to date.  Set ret to EIO now
index 3335245..80a9aef 100644 (file)
@@ -174,7 +174,7 @@ struct btrfs_ordered_sum;
 /* csum types */
 #define BTRFS_CSUM_TYPE_CRC32  0
 
-static int btrfs_csum_sizes[] = { 4, 0 };
+static int btrfs_csum_sizes[] = { 4 };
 
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
@@ -1695,6 +1695,7 @@ struct btrfs_fs_info {
        struct btrfs_workqueue *scrub_workers;
        struct btrfs_workqueue *scrub_wr_completion_workers;
        struct btrfs_workqueue *scrub_nocow_workers;
+       struct btrfs_workqueue *scrub_parity_workers;
 
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        u32 check_integrity_print_mask;
@@ -1732,7 +1733,7 @@ struct btrfs_fs_info {
        /* list of dirty qgroups to be written at next commit */
        struct list_head dirty_qgroups;
 
-       /* used by btrfs_qgroup_record_ref for an efficient tree traversal */
+       /* used by qgroup for an efficient tree traversal */
        u64 qgroup_seq;
 
        /* qgroup rescan items */
@@ -3455,6 +3456,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes, u64 write_bytes)
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct inode *inode);
 void btrfs_orphan_release_metadata(struct inode *inode);
@@ -3512,6 +3514,9 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
 int __get_raid_index(u64 flags);
 int btrfs_start_write_no_snapshoting(struct btrfs_root *root);
 void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
+void check_system_chunk(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       const u64 type);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -4047,6 +4052,7 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 
 #ifdef CONFIG_BTRFS_ASSERT
 
+__cold
 static inline void assfail(char *expr, char *file, int line)
 {
        pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
@@ -4062,10 +4068,12 @@ static inline void assfail(char *expr, char *file, int line)
 
 #define btrfs_assert()
 __printf(5, 6)
+__cold
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                     unsigned int line, int errno, const char *fmt, ...);
 
 
+__cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, const char *function,
                               unsigned int line, int errno);
@@ -4108,11 +4116,17 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
  * Call btrfs_abort_transaction as early as possible when an error condition is
  * detected, that way the exact line number is reported.
  */
-
 #define btrfs_abort_transaction(trans, root, errno)            \
 do {                                                           \
-       __btrfs_abort_transaction(trans, root, __func__,        \
-                                 __LINE__, errno);             \
+       /* Report first abort since mount */                    \
+       if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,     \
+                       &((root)->fs_info->fs_state))) {        \
+               WARN(1, KERN_DEBUG                              \
+               "BTRFS: Transaction aborted (error %d)\n",      \
+               (errno));                                       \
+       }                                                       \
+       __btrfs_abort_transaction((trans), (root), __func__,    \
+                                 __LINE__, (errno));           \
 } while (0)
 
 #define btrfs_std_error(fs_info, errno)                                \
@@ -4129,6 +4143,7 @@ do {                                                              \
 } while (0)
 
 __printf(5, 6)
+__cold
 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                   unsigned int line, int errno, const char *fmt, ...);
 
index 8f8ed7d..fd64fd0 100644 (file)
@@ -22,6 +22,7 @@
 #include "ctree.h"
 #include "delayed-ref.h"
 #include "transaction.h"
+#include "qgroup.h"
 
 struct kmem_cache *btrfs_delayed_ref_head_cachep;
 struct kmem_cache *btrfs_delayed_tree_ref_cachep;
@@ -84,87 +85,6 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
        return 0;
 }
 
-/*
- * entries in the rb tree are ordered by the byte number of the extent,
- * type of the delayed backrefs and content of delayed backrefs.
- */
-static int comp_entry(struct btrfs_delayed_ref_node *ref2,
-                     struct btrfs_delayed_ref_node *ref1,
-                     bool compare_seq)
-{
-       if (ref1->bytenr < ref2->bytenr)
-               return -1;
-       if (ref1->bytenr > ref2->bytenr)
-               return 1;
-       if (ref1->is_head && ref2->is_head)
-               return 0;
-       if (ref2->is_head)
-               return -1;
-       if (ref1->is_head)
-               return 1;
-       if (ref1->type < ref2->type)
-               return -1;
-       if (ref1->type > ref2->type)
-               return 1;
-       if (ref1->no_quota > ref2->no_quota)
-               return 1;
-       if (ref1->no_quota < ref2->no_quota)
-               return -1;
-       /* merging of sequenced refs is not allowed */
-       if (compare_seq) {
-               if (ref1->seq < ref2->seq)
-                       return -1;
-               if (ref1->seq > ref2->seq)
-                       return 1;
-       }
-       if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
-           ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
-               return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
-                                     btrfs_delayed_node_to_tree_ref(ref1),
-                                     ref1->type);
-       } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
-                  ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
-               return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
-                                     btrfs_delayed_node_to_data_ref(ref1));
-       }
-       BUG();
-       return 0;
-}
-
-/*
- * insert a new ref into the rbtree.  This returns any existing refs
- * for the same (bytenr,parent) tuple, or NULL if the new node was properly
- * inserted.
- */
-static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
-                                                 struct rb_node *node)
-{
-       struct rb_node **p = &root->rb_node;
-       struct rb_node *parent_node = NULL;
-       struct btrfs_delayed_ref_node *entry;
-       struct btrfs_delayed_ref_node *ins;
-       int cmp;
-
-       ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-       while (*p) {
-               parent_node = *p;
-               entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
-                                rb_node);
-
-               cmp = comp_entry(entry, ins, 1);
-               if (cmp < 0)
-                       p = &(*p)->rb_left;
-               else if (cmp > 0)
-                       p = &(*p)->rb_right;
-               else
-                       return entry;
-       }
-
-       rb_link_node(node, parent_node, p);
-       rb_insert_color(node, root);
-       return NULL;
-}
-
 /* insert a new ref to head ref rbtree */
 static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
                                                   struct rb_node *node)
@@ -268,7 +188,7 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                rb_erase(&head->href_node, &delayed_refs->href_root);
        } else {
                assert_spin_locked(&head->lock);
-               rb_erase(&ref->rb_node, &head->ref_root);
+               list_del(&ref->list);
        }
        ref->in_tree = 0;
        btrfs_put_delayed_ref(ref);
@@ -277,99 +197,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                trans->delayed_ref_updates--;
 }
 
-static int merge_ref(struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_root *delayed_refs,
-                    struct btrfs_delayed_ref_head *head,
-                    struct btrfs_delayed_ref_node *ref, u64 seq)
-{
-       struct rb_node *node;
-       int mod = 0;
-       int done = 0;
-
-       node = rb_next(&ref->rb_node);
-       while (!done && node) {
-               struct btrfs_delayed_ref_node *next;
-
-               next = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-               if (seq && next->seq >= seq)
-                       break;
-               if (comp_entry(ref, next, 0))
-                       continue;
-
-               if (ref->action == next->action) {
-                       mod = next->ref_mod;
-               } else {
-                       if (ref->ref_mod < next->ref_mod) {
-                               struct btrfs_delayed_ref_node *tmp;
-
-                               tmp = ref;
-                               ref = next;
-                               next = tmp;
-                               done = 1;
-                       }
-                       mod = -next->ref_mod;
-               }
-
-               drop_delayed_ref(trans, delayed_refs, head, next);
-               ref->ref_mod += mod;
-               if (ref->ref_mod == 0) {
-                       drop_delayed_ref(trans, delayed_refs, head, ref);
-                       done = 1;
-               } else {
-                       /*
-                        * You can't have multiples of the same ref on a tree
-                        * block.
-                        */
-                       WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               }
-       }
-       return done;
-}
-
-void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
-                             struct btrfs_fs_info *fs_info,
-                             struct btrfs_delayed_ref_root *delayed_refs,
-                             struct btrfs_delayed_ref_head *head)
-{
-       struct rb_node *node;
-       u64 seq = 0;
-
-       assert_spin_locked(&head->lock);
-       /*
-        * We don't have too much refs to merge in the case of delayed data
-        * refs.
-        */
-       if (head->is_data)
-               return;
-
-       spin_lock(&fs_info->tree_mod_seq_lock);
-       if (!list_empty(&fs_info->tree_mod_seq_list)) {
-               struct seq_list *elem;
-
-               elem = list_first_entry(&fs_info->tree_mod_seq_list,
-                                       struct seq_list, list);
-               seq = elem->seq;
-       }
-       spin_unlock(&fs_info->tree_mod_seq_lock);
-
-       node = rb_first(&head->ref_root);
-       while (node) {
-               struct btrfs_delayed_ref_node *ref;
-
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                              rb_node);
-               /* We can't merge refs that are outside of our seq count */
-               if (seq && ref->seq >= seq)
-                       break;
-               if (merge_ref(trans, delayed_refs, head, ref, seq))
-                       node = rb_first(&head->ref_root);
-               else
-                       node = rb_next(&ref->rb_node);
-       }
-}
-
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
                            struct btrfs_delayed_ref_root *delayed_refs,
                            u64 seq)
@@ -443,45 +270,71 @@ again:
 }
 
 /*
- * helper function to update an extent delayed ref in the
- * rbtree.  existing and update must both have the same
- * bytenr and parent
+ * Helper to insert the ref_node to the tail or merge with tail.
  *
- * This may free existing if the update cancels out whatever
- * operation it was doing.
+ * Return 0 for insert.
+ * Return >0 for merge.
  */
-static noinline void
-update_existing_ref(struct btrfs_trans_handle *trans,
-                   struct btrfs_delayed_ref_root *delayed_refs,
-                   struct btrfs_delayed_ref_head *head,
-                   struct btrfs_delayed_ref_node *existing,
-                   struct btrfs_delayed_ref_node *update)
+static int
+add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
+                          struct btrfs_delayed_ref_root *root,
+                          struct btrfs_delayed_ref_head *href,
+                          struct btrfs_delayed_ref_node *ref)
 {
-       if (update->action != existing->action) {
-               /*
-                * this is effectively undoing either an add or a
-                * drop.  We decrement the ref_mod, and if it goes
-                * down to zero we just delete the entry without
-                * every changing the extent allocation tree.
-                */
-               existing->ref_mod--;
-               if (existing->ref_mod == 0)
-                       drop_delayed_ref(trans, delayed_refs, head, existing);
-               else
-                       WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                               existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
+       struct btrfs_delayed_ref_node *exist;
+       int mod;
+       int ret = 0;
+
+       spin_lock(&href->lock);
+       /* Check whether we can merge the tail node with ref */
+       if (list_empty(&href->ref_list))
+               goto add_tail;
+       exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
+                          list);
+       /* No need to compare bytenr nor is_head */
+       if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
+           exist->seq != ref->seq)
+               goto add_tail;
+
+       if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
+            exist->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+           comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist),
+                          btrfs_delayed_node_to_tree_ref(ref),
+                          ref->type))
+               goto add_tail;
+       if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY ||
+            exist->type == BTRFS_SHARED_DATA_REF_KEY) &&
+           comp_data_refs(btrfs_delayed_node_to_data_ref(exist),
+                          btrfs_delayed_node_to_data_ref(ref)))
+               goto add_tail;
+
+       /* Now we are sure we can merge */
+       ret = 1;
+       if (exist->action == ref->action) {
+               mod = ref->ref_mod;
        } else {
-               WARN_ON(existing->type == BTRFS_TREE_BLOCK_REF_KEY ||
-                       existing->type == BTRFS_SHARED_BLOCK_REF_KEY);
-               /*
-                * the action on the existing ref matches
-                * the action on the ref we're trying to add.
-                * Bump the ref_mod by one so the backref that
-                * is eventually added/removed has the correct
-                * reference count
-                */
-               existing->ref_mod += update->ref_mod;
+               /* Need to change action */
+               if (exist->ref_mod < ref->ref_mod) {
+                       exist->action = ref->action;
+                       mod = -exist->ref_mod;
+                       exist->ref_mod = ref->ref_mod;
+               } else
+                       mod = -ref->ref_mod;
        }
+       exist->ref_mod += mod;
+
+       /* remove existing tail if its ref_mod is zero */
+       if (exist->ref_mod == 0)
+               drop_delayed_ref(trans, root, href, exist);
+       spin_unlock(&href->lock);
+       return ret;
+
+add_tail:
+       list_add_tail(&ref->list, &href->ref_list);
+       atomic_inc(&root->num_entries);
+       trans->delayed_ref_updates++;
+       spin_unlock(&href->lock);
+       return ret;
 }
 
 /*
@@ -568,12 +421,14 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
 static noinline struct btrfs_delayed_ref_head *
 add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_trans_handle *trans,
-                    struct btrfs_delayed_ref_node *ref, u64 bytenr,
-                    u64 num_bytes, int action, int is_data)
+                    struct btrfs_delayed_ref_node *ref,
+                    struct btrfs_qgroup_extent_record *qrecord,
+                    u64 bytenr, u64 num_bytes, int action, int is_data)
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *qexisting;
        int count_mod = 1;
        int must_insert_reserved = 0;
 
@@ -618,10 +473,22 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        head_ref = btrfs_delayed_node_to_head(ref);
        head_ref->must_insert_reserved = must_insert_reserved;
        head_ref->is_data = is_data;
-       head_ref->ref_root = RB_ROOT;
+       INIT_LIST_HEAD(&head_ref->ref_list);
        head_ref->processing = 0;
        head_ref->total_ref_mod = count_mod;
 
+       /* Record qgroup extent info if provided */
+       if (qrecord) {
+               qrecord->bytenr = bytenr;
+               qrecord->num_bytes = num_bytes;
+               qrecord->old_roots = NULL;
+
+               qexisting = btrfs_qgroup_insert_dirty_extent(delayed_refs,
+                                                            qrecord);
+               if (qexisting)
+                       kfree(qrecord);
+       }
+
        spin_lock_init(&head_ref->lock);
        mutex_init(&head_ref->mutex);
 
@@ -659,10 +526,10 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, int level,
                     int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_tree_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -693,21 +560,14 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_tree_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       /*
+        * XXX: memory should be freed at the same level allocated.
+        * But bad practice is anywhere... Follow it now. Need cleanup.
+        */
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -721,10 +581,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
                     u64 offset, int action, int no_quota)
 {
-       struct btrfs_delayed_ref_node *existing;
        struct btrfs_delayed_data_ref *full_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        u64 seq = 0;
+       int ret;
 
        if (action == BTRFS_ADD_DELAYED_EXTENT)
                action = BTRFS_ADD_DELAYED_REF;
@@ -758,21 +618,10 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
        trace_add_delayed_data_ref(ref, full_ref, action);
 
-       spin_lock(&head_ref->lock);
-       existing = tree_insert(&head_ref->ref_root, &ref->rb_node);
-       if (existing) {
-               update_existing_ref(trans, delayed_refs, head_ref, existing,
-                                   ref);
-               /*
-                * we've updated the existing ref, free the newly
-                * allocated ref
-                */
+       ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref);
+
+       if (ret > 0)
                kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
-       } else {
-               atomic_inc(&delayed_refs->num_entries);
-               trans->delayed_ref_updates++;
-       }
-       spin_unlock(&head_ref->lock);
 }
 
 /*
@@ -790,6 +639,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_tree_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -805,6 +655,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
        }
 
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record) {
+                       kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
+                       kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
+                       return -ENOMEM;
+               }
+       }
+
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -814,7 +673,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 0);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -839,6 +698,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        struct btrfs_delayed_data_ref *ref;
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
+       struct btrfs_qgroup_extent_record *record = NULL;
 
        if (!is_fstree(ref_root) || !fs_info->quota_enabled)
                no_quota = 0;
@@ -854,6 +714,16 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                return -ENOMEM;
        }
 
+       if (fs_info->quota_enabled && is_fstree(ref_root)) {
+               record = kmalloc(sizeof(*record), GFP_NOFS);
+               if (!record) {
+                       kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
+                       kmem_cache_free(btrfs_delayed_ref_head_cachep,
+                                       head_ref);
+                       return -ENOMEM;
+               }
+       }
+
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -863,7 +733,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         * insert both the head node and the new ref without dropping
         * the spin lock
         */
-       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node,
+       head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
                                        bytenr, num_bytes, action, 1);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
@@ -891,9 +761,9 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
        delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
-       add_delayed_ref_head(fs_info, trans, &head_ref->node, bytenr,
-                                  num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
-                                  extent_op->is_data);
+       add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
+                            num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+                            extent_op->is_data);
 
        spin_unlock(&delayed_refs->lock);
        return 0;
index 5eb0892..13fb5e6 100644 (file)
 #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */
 #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */
 
+/*
+ * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the
+ * same ref_node structure.
+ * Ref_head is in a higher logic level than tree/data ref, and duplicated
+ * bytenr/num_bytes in ref_node is really a waste or memory, they should be
+ * referred from ref_head.
+ * This gets more disgusting after we use list to store tree/data ref in
+ * ref_head. Must clean this mess up later.
+ */
 struct btrfs_delayed_ref_node {
+       /*
+        * ref_head use rb tree, stored in ref_root->href.
+        * indexed by bytenr
+        */
        struct rb_node rb_node;
 
+       /*data/tree ref use list, stored in ref_head->ref_list. */
+       struct list_head list;
+
        /* the starting bytenr of the extent */
        u64 bytenr;
 
@@ -83,7 +99,7 @@ struct btrfs_delayed_ref_head {
        struct mutex mutex;
 
        spinlock_t lock;
-       struct rb_root ref_root;
+       struct list_head ref_list;
 
        struct rb_node href_node;
 
@@ -132,6 +148,9 @@ struct btrfs_delayed_ref_root {
        /* head ref rbtree */
        struct rb_root href_root;
 
+       /* dirty extent records */
+       struct rb_root dirty_extent_root;
+
        /* this spin lock protects the rbtree and the entries inside */
        spinlock_t lock;
 
@@ -156,6 +175,14 @@ struct btrfs_delayed_ref_root {
        int flushing;
 
        u64 run_delayed_start;
+
+       /*
+        * To make qgroup to skip given root.
+        * This is for snapshot, as btrfs_qgroup_inherit() will manully
+        * modify counters for snapshot and its source, so we should skip
+        * the snapshot in new_root/old_roots or it will get calculated twice
+        */
+       u64 qgroup_to_skip;
 };
 
 extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
index d29a251..b977fc8 100644 (file)
@@ -1149,12 +1149,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 
        buf = btrfs_find_create_tree_block(root, bytenr);
        if (!buf)
-               return NULL;
+               return ERR_PTR(-ENOMEM);
 
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
        if (ret) {
                free_extent_buffer(buf);
-               return NULL;
+               return ERR_PTR(ret);
        }
        return buf;
 
@@ -1509,20 +1509,19 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
        generation = btrfs_root_generation(&root->root_item);
        root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                     generation);
-       if (!root->node) {
-               ret = -ENOMEM;
+       if (IS_ERR(root->node)) {
+               ret = PTR_ERR(root->node);
                goto find_fail;
        } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
                ret = -EIO;
-               goto read_fail;
+               free_extent_buffer(root->node);
+               goto find_fail;
        }
        root->commit_root = btrfs_root_node(root);
 out:
        btrfs_free_path(path);
        return root;
 
-read_fail:
-       free_extent_buffer(root->node);
 find_fail:
        kfree(root);
 alloc_fail:
@@ -2320,8 +2319,12 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 
        log_tree_root->node = read_tree_block(tree_root, bytenr,
                        fs_info->generation + 1);
-       if (!log_tree_root->node ||
-           !extent_buffer_uptodate(log_tree_root->node)) {
+       if (IS_ERR(log_tree_root->node)) {
+               printk(KERN_ERR "BTRFS: failed to read log tree\n");
+               ret = PTR_ERR(log_tree_root->node);
+               kfree(log_tree_root);
+               return ret;
+       } else if (!extent_buffer_uptodate(log_tree_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read log tree\n");
                free_extent_buffer(log_tree_root->node);
                kfree(log_tree_root);
@@ -2796,8 +2799,8 @@ int open_ctree(struct super_block *sb,
        chunk_root->node = read_tree_block(chunk_root,
                                           btrfs_super_chunk_root(disk_super),
                                           generation);
-       if (!chunk_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+       if (IS_ERR(chunk_root->node) ||
+           !extent_buffer_uptodate(chunk_root->node)) {
                printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
@@ -2833,8 +2836,8 @@ retry_root_backup:
        tree_root->node = read_tree_block(tree_root,
                                          btrfs_super_root(disk_super),
                                          generation);
-       if (!tree_root->node ||
-           !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
+       if (IS_ERR(tree_root->node) ||
+           !extent_buffer_uptodate(tree_root->node)) {
                printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
 
@@ -4075,6 +4078,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 
        while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
                struct btrfs_delayed_ref_head *head;
+               struct btrfs_delayed_ref_node *tmp;
                bool pin_bytes = false;
 
                head = rb_entry(node, struct btrfs_delayed_ref_head,
@@ -4090,11 +4094,10 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
                        continue;
                }
                spin_lock(&head->lock);
-               while ((node = rb_first(&head->ref_root)) != NULL) {
-                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                                      rb_node);
+               list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list,
+                                                list) {
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &head->ref_root);
+                       list_del(&ref->list);
                        atomic_dec(&delayed_refs->num_entries);
                        btrfs_put_delayed_ref(ref);
                }
index 0ec3acd..38b76cc 100644 (file)
@@ -79,11 +79,10 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                              u64 num_bytes, int alloc);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               u64 bytenr, u64 num_bytes, u64 parent,
+                               struct btrfs_delayed_ref_node *node, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extra_op,
-                               int no_quota);
+                               struct btrfs_delayed_extent_op *extra_op);
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
                                    struct extent_buffer *leaf,
                                    struct btrfs_extent_item *ei);
@@ -1967,10 +1966,9 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
-                                 u64 bytenr, u64 num_bytes,
+                                 struct btrfs_delayed_ref_node *node,
                                  u64 parent, u64 root_objectid,
                                  u64 owner, u64 offset, int refs_to_add,
-                                 int no_quota,
                                  struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1978,9 +1976,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        struct btrfs_extent_item *item;
        struct btrfs_key key;
+       u64 bytenr = node->bytenr;
+       u64 num_bytes = node->num_bytes;
        u64 refs;
        int ret;
-       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
+       int no_quota = node->no_quota;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -1996,26 +1996,8 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                           bytenr, num_bytes, parent,
                                           root_objectid, owner, offset,
                                           refs_to_add, extent_op);
-       if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
+       if ((ret < 0 && ret != -EAGAIN) || !ret)
                goto out;
-       /*
-        * Ok we were able to insert an inline extent and it appears to be a new
-        * reference, deal with the qgroup accounting.
-        */
-       if (!ret && !no_quota) {
-               ASSERT(root->fs_info->quota_enabled);
-               leaf = path->nodes[0];
-               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               item = btrfs_item_ptr(leaf, path->slots[0],
-                                     struct btrfs_extent_item);
-               if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
-                       type = BTRFS_QGROUP_OPER_ADD_SHARED;
-               btrfs_release_path(path);
-
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             bytenr, num_bytes, type, 0);
-               goto out;
-       }
 
        /*
         * Ok we had -EAGAIN which means we didn't have space to insert and
@@ -2026,8 +2008,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, item);
-       if (refs)
-               type = BTRFS_QGROUP_OPER_ADD_SHARED;
        btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
        if (extent_op)
                __run_delayed_extent_op(extent_op, leaf, item);
@@ -2035,13 +2015,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
-       if (!no_quota) {
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             bytenr, num_bytes, type, 0);
-               if (ret)
-                       goto out;
-       }
-
        path->reada = 1;
        path->leave_spinning = 1;
        /* now insert the actual backref */
@@ -2087,17 +2060,15 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
                                                 ref->objectid, ref->offset,
                                                 &ins, node->ref_mod);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
-               ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
-                                            node->num_bytes, parent,
+               ret = __btrfs_inc_extent_ref(trans, root, node, parent,
                                             ref_root, ref->objectid,
                                             ref->offset, node->ref_mod,
-                                            node->no_quota, extent_op);
+                                            extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
-               ret = __btrfs_free_extent(trans, root, node->bytenr,
-                                         node->num_bytes, parent,
+               ret = __btrfs_free_extent(trans, root, node, parent,
                                          ref_root, ref->objectid,
                                          ref->offset, node->ref_mod,
-                                         extent_op, node->no_quota);
+                                         extent_op);
        } else {
                BUG();
        }
@@ -2255,15 +2226,14 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                ref->level, &ins,
                                                node->no_quota);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
-               ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
-                                            node->num_bytes, parent, ref_root,
-                                            ref->level, 0, 1, node->no_quota,
+               ret = __btrfs_inc_extent_ref(trans, root, node,
+                                            parent, ref_root,
+                                            ref->level, 0, 1,
                                             extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
-               ret = __btrfs_free_extent(trans, root, node->bytenr,
-                                         node->num_bytes, parent, ref_root,
-                                         ref->level, 0, 1, extent_op,
-                                         node->no_quota);
+               ret = __btrfs_free_extent(trans, root, node,
+                                         parent, ref_root,
+                                         ref->level, 0, 1, extent_op);
        } else {
                BUG();
        }
@@ -2323,28 +2293,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static noinline struct btrfs_delayed_ref_node *
+static inline struct btrfs_delayed_ref_node *
 select_delayed_ref(struct btrfs_delayed_ref_head *head)
 {
-       struct rb_node *node;
-       struct btrfs_delayed_ref_node *ref, *last = NULL;;
+       if (list_empty(&head->ref_list))
+               return NULL;
 
-       /*
-        * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
-        * this prevents ref count from going down to zero when
-        * there still are pending delayed ref.
-        */
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node,
-                               rb_node);
-               if (ref->action == BTRFS_ADD_DELAYED_REF)
-                       return ref;
-               else if (last == NULL)
-                       last = ref;
-               node = rb_next(node);
-       }
-       return last;
+       return list_entry(head->ref_list.next, struct btrfs_delayed_ref_node,
+                         list);
 }
 
 /*
@@ -2396,16 +2352,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        }
                }
 
-               /*
-                * We need to try and merge add/drops of the same ref since we
-                * can run into issues with relocate dropping the implicit ref
-                * and then it being added back again before the drop can
-                * finish.  If we merged anything we need to re-loop so we can
-                * get a good ref.
-                */
                spin_lock(&locked_ref->lock);
-               btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
-                                        locked_ref);
 
                /*
                 * locked_ref is the head node, so we have to go one
@@ -2482,7 +2429,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        spin_unlock(&locked_ref->lock);
                        spin_lock(&delayed_refs->lock);
                        spin_lock(&locked_ref->lock);
-                       if (rb_first(&locked_ref->ref_root) ||
+                       if (!list_empty(&locked_ref->ref_list) ||
                            locked_ref->extent_op) {
                                spin_unlock(&locked_ref->lock);
                                spin_unlock(&delayed_refs->lock);
@@ -2496,7 +2443,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                } else {
                        actual_count++;
                        ref->in_tree = 0;
-                       rb_erase(&ref->rb_node, &locked_ref->ref_root);
+                       list_del(&ref->list);
                }
                atomic_dec(&delayed_refs->num_entries);
 
@@ -2864,9 +2811,6 @@ again:
                goto again;
        }
 out:
-       ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
-       if (ret)
-               return ret;
        assert_qgroups_uptodate(trans);
        return 0;
 }
@@ -2905,7 +2849,6 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_ref_node *ref;
        struct btrfs_delayed_data_ref *data_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
-       struct rb_node *node;
        int ret = 0;
 
        delayed_refs = &trans->transaction->delayed_refs;
@@ -2934,11 +2877,7 @@ static noinline int check_delayed_ref(struct btrfs_trans_handle *trans,
        spin_unlock(&delayed_refs->lock);
 
        spin_lock(&head->lock);
-       node = rb_first(&head->ref_root);
-       while (node) {
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               node = rb_next(node);
-
+       list_for_each_entry(ref, &head->ref_list, list) {
                /* If it's a shared ref we know a cross reference exists */
                if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) {
                        ret = 1;
@@ -3693,7 +3632,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                found->disk_total += total_bytes * factor;
                found->bytes_used += bytes_used;
                found->disk_used += bytes_used * factor;
-               found->full = 0;
+               if (total_bytes > 0)
+                       found->full = 0;
                spin_unlock(&found->lock);
                *space_info = found;
                return 0;
@@ -3721,7 +3661,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->bytes_reserved = 0;
        found->bytes_readonly = 0;
        found->bytes_may_use = 0;
-       found->full = 0;
+       if (total_bytes > 0)
+               found->full = 0;
+       else
+               found->full = 1;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
        found->flush = 0;
@@ -3975,6 +3918,9 @@ commit_trans:
                    !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        need_commit--;
 
+                       if (need_commit > 0)
+                               btrfs_wait_ordered_roots(fs_info, -1);
+
                        trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
                                return PTR_ERR(trans);
@@ -4088,7 +4034,7 @@ static int should_alloc_chunk(struct btrfs_root *root,
        return 1;
 }
 
-static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
+static u64 get_profile_num_devs(struct btrfs_root *root, u64 type)
 {
        u64 num_dev;
 
@@ -4102,24 +4048,43 @@ static u64 get_system_chunk_thresh(struct btrfs_root *root, u64 type)
        else
                num_dev = 1;    /* DUP or single */
 
-       /* metadata for updaing devices and chunk tree */
-       return btrfs_calc_trans_metadata_size(root, num_dev + 1);
+       return num_dev;
 }
 
-static void check_system_chunk(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root, u64 type)
+/*
+ * If @is_allocation is true, reserve space in the system space info necessary
+ * for allocating a chunk, otherwise if it's false, reserve space necessary for
+ * removing a chunk.
+ */
+void check_system_chunk(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *root,
+                       u64 type)
 {
        struct btrfs_space_info *info;
        u64 left;
        u64 thresh;
+       int ret = 0;
+       u64 num_devs;
+
+       /*
+        * Needed because we can end up allocating a system chunk and for an
+        * atomic and race free space reservation in the chunk block reserve.
+        */
+       ASSERT(mutex_is_locked(&root->fs_info->chunk_mutex));
 
        info = __find_space_info(root->fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
        spin_lock(&info->lock);
        left = info->total_bytes - info->bytes_used - info->bytes_pinned -
-               info->bytes_reserved - info->bytes_readonly;
+               info->bytes_reserved - info->bytes_readonly -
+               info->bytes_may_use;
        spin_unlock(&info->lock);
 
-       thresh = get_system_chunk_thresh(root, type);
+       num_devs = get_profile_num_devs(root, type);
+
+       /* num_devs device items to update and 1 chunk item to add or remove */
+       thresh = btrfs_calc_trunc_metadata_size(root, num_devs) +
+               btrfs_calc_trans_metadata_size(root, 1);
+
        if (left < thresh && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                btrfs_info(root->fs_info, "left=%llu, need=%llu, flags=%llu",
                        left, thresh, type);
@@ -4130,7 +4095,21 @@ static void check_system_chunk(struct btrfs_trans_handle *trans,
                u64 flags;
 
                flags = btrfs_get_alloc_profile(root->fs_info->chunk_root, 0);
-               btrfs_alloc_chunk(trans, root, flags);
+               /*
+                * Ignore failure to create system chunk. We might end up not
+                * needing it, as we might not need to COW all nodes/leafs from
+                * the paths we visit in the chunk tree (they were already COWed
+                * or created in the current transaction for example).
+                */
+               ret = btrfs_alloc_chunk(trans, root, flags);
+       }
+
+       if (!ret) {
+               ret = btrfs_block_rsv_add(root->fs_info->chunk_root,
+                                         &root->fs_info->chunk_block_rsv,
+                                         thresh, BTRFS_RESERVE_NO_FLUSH);
+               if (!ret)
+                       trans->chunk_bytes_reserved += thresh;
        }
 }
 
@@ -5188,6 +5167,24 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
        trans->bytes_reserved = 0;
 }
 
+/*
+ * To be called after all the new block groups attached to the transaction
+ * handle have been created (btrfs_create_pending_block_groups()).
+ */
+void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_fs_info *fs_info = trans->root->fs_info;
+
+       if (!trans->chunk_bytes_reserved)
+               return;
+
+       WARN_ON_ONCE(!list_empty(&trans->new_bgs));
+
+       block_rsv_release_bytes(fs_info, &fs_info->chunk_block_rsv, NULL,
+                               trans->chunk_bytes_reserved);
+       trans->chunk_bytes_reserved = 0;
+}
+
 /* Can only return 0 or -ENOSPC */
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct inode *inode)
@@ -6092,11 +6089,10 @@ static void add_pinned_bytes(struct btrfs_fs_info *fs_info, u64 num_bytes,
 
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
-                               u64 bytenr, u64 num_bytes, u64 parent,
+                               struct btrfs_delayed_ref_node *node, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extent_op,
-                               int no_quota)
+                               struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_key key;
        struct btrfs_path *path;
@@ -6110,10 +6106,12 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        int extent_slot = 0;
        int found_extent = 0;
        int num_to_del = 1;
+       int no_quota = node->no_quota;
        u32 item_size;
        u64 refs;
+       u64 bytenr = node->bytenr;
+       u64 num_bytes = node->num_bytes;
        int last_ref = 0;
-       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
@@ -6294,7 +6292,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        refs -= refs_to_drop;
 
        if (refs > 0) {
-               type = BTRFS_QGROUP_OPER_SUB_SHARED;
                if (extent_op)
                        __run_delayed_extent_op(extent_op, leaf, ei);
                /*
@@ -6356,18 +6353,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        }
        btrfs_release_path(path);
 
-       /* Deal with the quota accounting */
-       if (!ret && last_ref && !no_quota) {
-               int mod_seq = 0;
-
-               if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
-                   type == BTRFS_QGROUP_OPER_SUB_SHARED)
-                       mod_seq = 1;
-
-               ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
-                                             bytenr, num_bytes, type,
-                                             mod_seq);
-       }
 out:
        btrfs_free_path(path);
        return ret;
@@ -6393,7 +6378,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
                goto out_delayed_unlock;
 
        spin_lock(&head->lock);
-       if (rb_first(&head->ref_root))
+       if (!list_empty(&head->ref_list))
                goto out;
 
        if (head->extent_op) {
@@ -7303,13 +7288,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       /* Always set parent to 0 here since its exclusive anyway. */
-       ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                     ins->objectid, ins->offset,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
-       if (ret)
-               return ret;
-
        ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7391,14 +7369,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
-       if (!no_quota) {
-               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
-                                             ins->objectid, num_bytes,
-                                             BTRFS_QGROUP_OPER_ADD_EXCL, 0);
-               if (ret)
-                       return ret;
-       }
-
        ret = update_block_group(trans, root, ins->objectid, root->nodesize,
                                 1);
        if (ret) { /* -ENOENT, logic error */
@@ -7755,12 +7725,18 @@ reada:
        wc->reada_slot = slot;
 }
 
+/*
+ * TODO: Modify related function to add related node/leaf to dirty_extent_root,
+ * for later qgroup accounting.
+ *
+ * Current, this function does nothing.
+ */
 static int account_leaf_items(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              struct extent_buffer *eb)
 {
        int nr = btrfs_header_nritems(eb);
-       int i, extent_type, ret;
+       int i, extent_type;
        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
        u64 bytenr, num_bytes;
@@ -7783,13 +7759,6 @@ static int account_leaf_items(struct btrfs_trans_handle *trans,
                        continue;
 
                num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
-
-               ret = btrfs_qgroup_record_ref(trans, root->fs_info,
-                                             root->objectid,
-                                             bytenr, num_bytes,
-                                             BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
-               if (ret)
-                       return ret;
        }
        return 0;
 }
@@ -7858,6 +7827,8 @@ static int adjust_slots_upwards(struct btrfs_root *root,
 
 /*
  * root_eb is the subtree root and is locked before this function is called.
+ * TODO: Modify this function to mark all (including complete shared node)
+ * to dirty_extent_root to allow it get accounted in qgroup.
  */
 static int account_shared_subtree(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
@@ -7920,7 +7891,11 @@ walk_down:
                        child_gen = btrfs_node_ptr_generation(eb, parent_slot);
 
                        eb = read_tree_block(root, child_bytenr, child_gen);
-                       if (!eb || !extent_buffer_uptodate(eb)) {
+                       if (IS_ERR(eb)) {
+                               ret = PTR_ERR(eb);
+                               goto out;
+                       } else if (!extent_buffer_uptodate(eb)) {
+                               free_extent_buffer(eb);
                                ret = -EIO;
                                goto out;
                        }
@@ -7931,16 +7906,6 @@ walk_down:
                        btrfs_tree_read_lock(eb);
                        btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                        path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
-
-                       ret = btrfs_qgroup_record_ref(trans, root->fs_info,
-                                               root->objectid,
-                                               child_bytenr,
-                                               root->nodesize,
-                                               BTRFS_QGROUP_OPER_SUB_SUBTREE,
-                                               0);
-                       if (ret)
-                               goto out;
-
                }
 
                if (level == 0) {
@@ -8151,7 +8116,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                if (reada && level == 1)
                        reada_walk_down(trans, root, wc, path);
                next = read_tree_block(root, bytenr, generation);
-               if (!next || !extent_buffer_uptodate(next)) {
+               if (IS_ERR(next)) {
+                       return PTR_ERR(next);
+               } else if (!extent_buffer_uptodate(next)) {
                        free_extent_buffer(next);
                        return -EIO;
                }
@@ -8533,24 +8500,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                                goto out_end_trans;
                        }
 
-                       /*
-                        * Qgroup update accounting is run from
-                        * delayed ref handling. This usually works
-                        * out because delayed refs are normally the
-                        * only way qgroup updates are added. However,
-                        * we may have added updates during our tree
-                        * walk so run qgroups here to make sure we
-                        * don't lose any updates.
-                        */
-                       ret = btrfs_delayed_qgroup_accounting(trans,
-                                                             root->fs_info);
-                       if (ret)
-                               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
-                                                  "running qgroup updates "
-                                                  "during snapshot delete. "
-                                                  "Quota is out of sync, "
-                                                  "rescan required.\n", ret);
-
                        btrfs_end_transaction_throttle(trans, tree_root);
                        if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
                                pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8604,14 +8553,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        }
        root_dropped = true;
 out_end_trans:
-       ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
-       if (ret)
-               printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
-                                  "running qgroup updates "
-                                  "during snapshot delete. "
-                                  "Quota is out of sync, "
-                                  "rescan required.\n", ret);
-
        btrfs_end_transaction_throttle(trans, tree_root);
 out_free:
        kfree(wc);
@@ -9562,6 +9503,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        free_excluded_extents(root, cache);
 
+       /*
+        * Call to ensure the corresponding space_info object is created and
+        * assigned to our block group, but don't update its counters just yet.
+        * We want our bg to be added to the rbtree with its ->space_info set.
+        */
+       ret = update_space_info(root->fs_info, cache->flags, 0, 0,
+                               &cache->space_info);
+       if (ret) {
+               btrfs_remove_free_space_cache(cache);
+               btrfs_put_block_group(cache);
+               return ret;
+       }
+
        ret = btrfs_add_block_group_cache(root->fs_info, cache);
        if (ret) {
                btrfs_remove_free_space_cache(cache);
@@ -9569,6 +9523,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
                return ret;
        }
 
+       /*
+        * Now that our block group has its ->space_info set and is inserted in
+        * the rbtree, update the space info's counters.
+        */
        ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
                                &cache->space_info);
        if (ret) {
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
new file mode 100644 (file)
index 0000000..e69de29
index c32d226..a3ec2c8 100644 (file)
@@ -1277,7 +1277,12 @@ int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
 int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                      unsigned bits, gfp_t mask)
 {
-       return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask);
+       int wake = 0;
+
+       if (bits & EXTENT_LOCKED)
+               wake = 1;
+
+       return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
 }
 
 int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
@@ -4492,6 +4497,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                }
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
+               if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+                       flags |= FIEMAP_EXTENT_UNWRITTEN;
 
                free_extent_map(em);
                em = NULL;
index b072e17..795d754 100644 (file)
@@ -1868,6 +1868,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
        struct btrfs_log_ctx ctx;
        int ret = 0;
        bool full_sync = 0;
+       const u64 len = end - start + 1;
 
        trace_btrfs_sync_file(file, datasync);
 
@@ -1896,7 +1897,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
                 * all extents are persisted and the respective file extent
                 * items are in the fs/subvol btree.
                 */
-               ret = btrfs_wait_ordered_range(inode, start, end - start + 1);
+               ret = btrfs_wait_ordered_range(inode, start, len);
        } else {
                /*
                 * Start any new ordered operations before starting to log the
@@ -1968,8 +1969,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
         */
        smp_mb();
        if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
-           (full_sync && BTRFS_I(inode)->last_trans <=
-            root->fs_info->last_trans_committed)) {
+           (BTRFS_I(inode)->last_trans <=
+            root->fs_info->last_trans_committed &&
+            (full_sync ||
+             !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
                /*
                 * We'v had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
index 9dbe5b5..fb5a6b1 100644 (file)
@@ -231,6 +231,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
 {
        int ret = 0;
        struct btrfs_path *path = btrfs_alloc_path();
+       bool locked = false;
 
        if (!path) {
                ret = -ENOMEM;
@@ -238,6 +239,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
        }
 
        if (block_group) {
+               locked = true;
                mutex_lock(&trans->transaction->cache_write_mutex);
                if (!list_empty(&block_group->io_list)) {
                        list_del_init(&block_group->io_list);
@@ -269,18 +271,14 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
         */
        ret = btrfs_truncate_inode_items(trans, root, inode,
                                         0, BTRFS_EXTENT_DATA_KEY);
-       if (ret) {
-               mutex_unlock(&trans->transaction->cache_write_mutex);
-               btrfs_abort_transaction(trans, root, ret);
-               return ret;
-       }
+       if (ret)
+               goto fail;
 
        ret = btrfs_update_inode(trans, root, inode);
 
-       if (block_group)
-               mutex_unlock(&trans->transaction->cache_write_mutex);
-
 fail:
+       if (locked)
+               mutex_unlock(&trans->transaction->cache_write_mutex);
        if (ret)
                btrfs_abort_transaction(trans, root, ret);
 
index 8bb0136..855935f 100644 (file)
@@ -4986,24 +4986,40 @@ static void evict_inode_truncate_pages(struct inode *inode)
        }
        write_unlock(&map_tree->lock);
 
+       /*
+        * Keep looping until we have no more ranges in the io tree.
+        * We can have ongoing bios started by readpages (called from readahead)
+        * that didn't get their end io callbacks called yet or they are still
+        * in progress ((extent_io.c:end_bio_extent_readpage()). This means some
+        * ranges can still be locked and eviction started because before
+        * submitting those bios, which are executed by a separate task (work
+        * queue kthread), inode references (inode->i_count) were not taken
+        * (which would be dropped in the end io callback of each bio).
+        * Therefore here we effectively end up waiting for those bios and
+        * anyone else holding locked ranges without having bumped the inode's
+        * reference count - if we don't do it, when they access the inode's
+        * io_tree to unlock a range it may be too late, leading to an
+        * use-after-free issue.
+        */
        spin_lock(&io_tree->lock);
        while (!RB_EMPTY_ROOT(&io_tree->state)) {
                struct extent_state *state;
                struct extent_state *cached_state = NULL;
+               u64 start;
+               u64 end;
 
                node = rb_first(&io_tree->state);
                state = rb_entry(node, struct extent_state, rb_node);
-               atomic_inc(&state->refs);
+               start = state->start;
+               end = state->end;
                spin_unlock(&io_tree->lock);
 
-               lock_extent_bits(io_tree, state->start, state->end,
-                                0, &cached_state);
-               clear_extent_bit(io_tree, state->start, state->end,
+               lock_extent_bits(io_tree, start, end, 0, &cached_state);
+               clear_extent_bit(io_tree, start, end,
                                 EXTENT_LOCKED | EXTENT_DIRTY |
                                 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
                                 EXTENT_DEFRAG, 1, 1,
                                 &cached_state, GFP_NOFS);
-               free_extent_state(state);
 
                cond_resched();
                spin_lock(&io_tree->lock);
index 1c22c65..c86b835 100644 (file)
@@ -553,8 +553,8 @@ static noinline int create_subvol(struct inode *dir,
        key.offset = (u64)-1;
        new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
        if (IS_ERR(new_root)) {
-               btrfs_abort_transaction(trans, root, PTR_ERR(new_root));
                ret = PTR_ERR(new_root);
+               btrfs_abort_transaction(trans, root, ret);
                goto fail;
        }
 
@@ -1318,7 +1318,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                i = range->start >> PAGE_CACHE_SHIFT;
        }
        if (!max_to_defrag)
-               max_to_defrag = last_index + 1;
+               max_to_defrag = last_index - i + 1;
 
        /*
         * make writeback starts from i, so the defrag range can be
@@ -1368,7 +1368,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        ra_index = max(i, ra_index);
                        btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
                                       cluster);
-                       ra_index += max_cluster;
+                       ra_index += cluster;
                }
 
                mutex_lock(&inode->i_mutex);
@@ -2271,10 +2271,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 {
         struct btrfs_ioctl_ino_lookup_args *args;
         struct inode *inode;
-        int ret;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
+       int ret = 0;
 
        args = memdup_user(argp, sizeof(*args));
        if (IS_ERR(args))
@@ -2282,13 +2279,28 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 
        inode = file_inode(file);
 
+       /*
+        * Unprivileged query to obtain the containing subvolume root id. The
+        * path is reset so it's consistent with btrfs_search_path_in_tree.
+        */
        if (args->treeid == 0)
                args->treeid = BTRFS_I(inode)->root->root_key.objectid;
 
+       if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+               args->name[0] = 0;
+               goto out;
+       }
+
+       if (!capable(CAP_SYS_ADMIN)) {
+               ret = -EPERM;
+               goto out;
+       }
+
        ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
                                        args->treeid, args->objectid,
                                        args->name);
 
+out:
        if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
                ret = -EFAULT;
 
@@ -2413,8 +2425,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                goto out_unlock_inode;
        }
 
-       d_invalidate(dentry);
-
        down_write(&root->fs_info->subvol_sem);
 
        err = may_destroy_subvol(dest);
@@ -2508,7 +2518,7 @@ out_up_write:
 out_unlock_inode:
        mutex_unlock(&inode->i_mutex);
        if (!err) {
-               shrink_dcache_sb(root->fs_info->sb);
+               d_invalidate(dentry);
                btrfs_invalidate_inodes(dest);
                d_delete(dentry);
                ASSERT(dest->send_in_progress == 0);
@@ -2879,12 +2889,19 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
        return ret;
 }
 
-static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen,
+                                    u64 olen)
 {
+       u64 len = *plen;
        u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
 
-       if (off + len > inode->i_size || off + len < off)
+       if (off + olen > inode->i_size || off + olen < off)
                return -EINVAL;
+
+       /* if we extend to eof, continue to block boundary */
+       if (off + len == inode->i_size)
+               *plen = len = ALIGN(inode->i_size, bs) - off;
+
        /* Check that we are block aligned - btrfs_clone() requires this */
        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
                return -EINVAL;
@@ -2892,10 +2909,11 @@ static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
        return 0;
 }
 
-static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
                             struct inode *dst, u64 dst_loff)
 {
        int ret;
+       u64 len = olen;
 
        /*
         * btrfs_clone() can't handle extents in the same file
@@ -2910,11 +2928,11 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
 
        btrfs_double_lock(src, loff, dst, dst_loff, len);
 
-       ret = extent_same_check_offsets(src, loff, len);
+       ret = extent_same_check_offsets(src, loff, &len, olen);
        if (ret)
                goto out_unlock;
 
-       ret = extent_same_check_offsets(dst, dst_loff, len);
+       ret = extent_same_check_offsets(dst, dst_loff, &len, olen);
        if (ret)
                goto out_unlock;
 
@@ -2927,7 +2945,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
 
        ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
        if (ret == 0)
-               ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+               ret = btrfs_clone(src, dst, loff, olen, len, dst_loff);
 
 out_unlock:
        btrfs_double_unlock(src, loff, dst, dst_loff, len);
index 760c4a5..89656d7 100644 (file)
@@ -198,9 +198,6 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->file_offset = file_offset;
        entry->start = start;
        entry->len = len;
-       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
-           !(type == BTRFS_ORDERED_NOCOW))
-               entry->csum_bytes_left = disk_len;
        entry->disk_len = disk_len;
        entry->bytes_left = len;
        entry->inode = igrab(inode);
@@ -286,10 +283,6 @@ void btrfs_add_ordered_sum(struct inode *inode,
        tree = &BTRFS_I(inode)->ordered_tree;
        spin_lock_irq(&tree->lock);
        list_add_tail(&sum->list, &entry->list);
-       WARN_ON(entry->csum_bytes_left < sum->len);
-       entry->csum_bytes_left -= sum->len;
-       if (entry->csum_bytes_left == 0)
-               wake_up(&entry->wait);
        spin_unlock_irq(&tree->lock);
 }
 
@@ -509,7 +502,21 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
                wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
                                                   &ordered->flags));
 
-               list_add_tail(&ordered->trans_list, &trans->ordered);
+               /*
+                * If our ordered extent completed it means it updated the
+                * fs/subvol and csum trees already, so no need to make the
+                * current transaction's commit wait for it, as we end up
+                * holding memory unnecessarily and delaying the inode's iput
+                * until the transaction commit (we schedule an iput for the
+                * inode when the ordered extent's refcount drops to 0), which
+                * prevents it from being evictable until the transaction
+                * commits.
+                */
+               if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags))
+                       btrfs_put_ordered_extent(ordered);
+               else
+                       list_add_tail(&ordered->trans_list, &trans->ordered);
+
                spin_lock_irq(&log->log_extents_lock[index]);
        }
        spin_unlock_irq(&log->log_extents_lock[index]);
@@ -844,6 +851,20 @@ out:
        return entry;
 }
 
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+                                        u64 file_offset,
+                                        u64 len)
+{
+       struct btrfs_ordered_extent *oe;
+
+       oe = btrfs_lookup_ordered_range(inode, file_offset, len);
+       if (oe) {
+               btrfs_put_ordered_extent(oe);
+               return true;
+       }
+       return false;
+}
+
 /*
  * lookup and return any extent before 'file_offset'.  NULL is returned
  * if none is found
index e96cd4c..7176cc0 100644 (file)
@@ -89,9 +89,6 @@ struct btrfs_ordered_extent {
        /* number of bytes that still need writing */
        u64 bytes_left;
 
-       /* number of bytes that still need csumming */
-       u64 csum_bytes_left;
-
        /*
         * the end of the ordered extent which is behind it but
         * didn't update disk_i_size. Please see the comment of
@@ -191,6 +188,9 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
                                                        u64 file_offset,
                                                        u64 len);
+bool btrfs_have_ordered_extents_in_range(struct inode *inode,
+                                        u64 file_offset,
+                                        u64 len);
 int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
                                struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
index 3d65465..d5f1f03 100644 (file)
@@ -34,6 +34,7 @@
 #include "extent_io.h"
 #include "qgroup.h"
 
+
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
@@ -84,11 +85,42 @@ struct btrfs_qgroup {
 
        /*
         * temp variables for accounting operations
+        * Refer to qgroup_shared_accouting() for details.
         */
        u64 old_refcnt;
        u64 new_refcnt;
 };
 
+static void btrfs_qgroup_update_old_refcnt(struct btrfs_qgroup *qg, u64 seq,
+                                          int mod)
+{
+       if (qg->old_refcnt < seq)
+               qg->old_refcnt = seq;
+       qg->old_refcnt += mod;
+}
+
+static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
+                                          int mod)
+{
+       if (qg->new_refcnt < seq)
+               qg->new_refcnt = seq;
+       qg->new_refcnt += mod;
+}
+
+static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+       if (qg->old_refcnt < seq)
+               return 0;
+       return qg->old_refcnt - seq;
+}
+
+static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
+{
+       if (qg->new_refcnt < seq)
+               return 0;
+       return qg->new_refcnt - seq;
+}
+
 /*
  * glue structure to represent the relations between qgroups.
  */
@@ -1115,14 +1147,14 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
        struct ulist *tmp;
        int ret = 0;
 
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               return -ENOMEM;
-
        /* Check the level of src and dst first */
        if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
                return -EINVAL;
 
+       tmp = ulist_alloc(GFP_NOFS);
+       if (!tmp)
+               return -ENOMEM;
+
        mutex_lock(&fs_info->qgroup_ioctl_lock);
        quota_root = fs_info->quota_root;
        if (!quota_root) {
@@ -1356,239 +1388,86 @@ out:
        return ret;
 }
 
-static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
-                          struct btrfs_qgroup_operation *oper2)
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info)
 {
-       /*
-        * Ignore seq and type here, we're looking for any operation
-        * at all related to this extent on that root.
-        */
-       if (oper1->bytenr < oper2->bytenr)
-               return -1;
-       if (oper1->bytenr > oper2->bytenr)
-               return 1;
-       if (oper1->ref_root < oper2->ref_root)
-               return -1;
-       if (oper1->ref_root > oper2->ref_root)
-               return 1;
-       return 0;
-}
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       struct rb_node *node;
+       u64 qgroup_to_skip;
+       int ret = 0;
 
-static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
-                             struct btrfs_qgroup_operation *oper)
-{
-       struct rb_node *n;
-       struct btrfs_qgroup_operation *cur;
-       int cmp;
+       delayed_refs = &trans->transaction->delayed_refs;
+       qgroup_to_skip = delayed_refs->qgroup_to_skip;
 
-       spin_lock(&fs_info->qgroup_op_lock);
-       n = fs_info->qgroup_op_tree.rb_node;
-       while (n) {
-               cur = rb_entry(n, struct btrfs_qgroup_operation, n);
-               cmp = comp_oper_exist(cur, oper);
-               if (cmp < 0) {
-                       n = n->rb_right;
-               } else if (cmp) {
-                       n = n->rb_left;
-               } else {
-                       spin_unlock(&fs_info->qgroup_op_lock);
-                       return -EEXIST;
-               }
+       /*
+        * No need to do lock, since this function will only be called in
+        * btrfs_commmit_transaction().
+        */
+       node = rb_first(&delayed_refs->dirty_extent_root);
+       while (node) {
+               record = rb_entry(node, struct btrfs_qgroup_extent_record,
+                                 node);
+               ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0,
+                                          &record->old_roots);
+               if (ret < 0)
+                       break;
+               if (qgroup_to_skip)
+                       ulist_del(record->old_roots, qgroup_to_skip, 0);
+               node = rb_next(node);
        }
-       spin_unlock(&fs_info->qgroup_op_lock);
-       return 0;
-}
-
-static int comp_oper(struct btrfs_qgroup_operation *oper1,
-                    struct btrfs_qgroup_operation *oper2)
-{
-       if (oper1->bytenr < oper2->bytenr)
-               return -1;
-       if (oper1->bytenr > oper2->bytenr)
-               return 1;
-       if (oper1->ref_root < oper2->ref_root)
-               return -1;
-       if (oper1->ref_root > oper2->ref_root)
-               return 1;
-       if (oper1->seq < oper2->seq)
-               return -1;
-       if (oper1->seq > oper2->seq)
-               return 1;
-       if (oper1->type < oper2->type)
-               return -1;
-       if (oper1->type > oper2->type)
-               return 1;
-       return 0;
+       return ret;
 }
 
-static int insert_qgroup_oper(struct btrfs_fs_info *fs_info,
-                             struct btrfs_qgroup_operation *oper)
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+                                 struct btrfs_qgroup_extent_record *record)
 {
-       struct rb_node **p;
-       struct rb_node *parent = NULL;
-       struct btrfs_qgroup_operation *cur;
-       int cmp;
+       struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node;
+       struct rb_node *parent_node = NULL;
+       struct btrfs_qgroup_extent_record *entry;
+       u64 bytenr = record->bytenr;
 
-       spin_lock(&fs_info->qgroup_op_lock);
-       p = &fs_info->qgroup_op_tree.rb_node;
        while (*p) {
-               parent = *p;
-               cur = rb_entry(parent, struct btrfs_qgroup_operation, n);
-               cmp = comp_oper(cur, oper);
-               if (cmp < 0) {
-                       p = &(*p)->rb_right;
-               } else if (cmp) {
+               parent_node = *p;
+               entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record,
+                                node);
+               if (bytenr < entry->bytenr)
                        p = &(*p)->rb_left;
-               } else {
-                       spin_unlock(&fs_info->qgroup_op_lock);
-                       return -EEXIST;
-               }
-       }
-       rb_link_node(&oper->n, parent, p);
-       rb_insert_color(&oper->n, &fs_info->qgroup_op_tree);
-       spin_unlock(&fs_info->qgroup_op_lock);
-       return 0;
-}
-
-/*
- * Record a quota operation for processing later on.
- * @trans: the transaction we are adding the delayed op to.
- * @fs_info: the fs_info for this fs.
- * @ref_root: the root of the reference we are acting on,
- * @bytenr: the bytenr we are acting on.
- * @num_bytes: the number of bytes in the reference.
- * @type: the type of operation this is.
- * @mod_seq: do we need to get a sequence number for looking up roots.
- *
- * We just add it to our trans qgroup_ref_list and carry on and process these
- * operations in order at some later point.  If the reference root isn't a fs
- * root then we don't bother with doing anything.
- *
- * MUST BE HOLDING THE REF LOCK.
- */
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info, u64 ref_root,
-                           u64 bytenr, u64 num_bytes,
-                           enum btrfs_qgroup_operation_type type, int mod_seq)
-{
-       struct btrfs_qgroup_operation *oper;
-       int ret;
-
-       if (!is_fstree(ref_root) || !fs_info->quota_enabled)
-               return 0;
-
-       oper = kmalloc(sizeof(*oper), GFP_NOFS);
-       if (!oper)
-               return -ENOMEM;
-
-       oper->ref_root = ref_root;
-       oper->bytenr = bytenr;
-       oper->num_bytes = num_bytes;
-       oper->type = type;
-       oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
-       INIT_LIST_HEAD(&oper->elem.list);
-       oper->elem.seq = 0;
-
-       trace_btrfs_qgroup_record_ref(oper);
-
-       if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
-               /*
-                * If any operation for this bytenr/ref_root combo
-                * exists, then we know it's not exclusively owned and
-                * shouldn't be queued up.
-                *
-                * This also catches the case where we have a cloned
-                * extent that gets queued up multiple times during
-                * drop snapshot.
-                */
-               if (qgroup_oper_exists(fs_info, oper)) {
-                       kfree(oper);
-                       return 0;
-               }
-       }
-
-       ret = insert_qgroup_oper(fs_info, oper);
-       if (ret) {
-               /* Shouldn't happen so have an assert for developers */
-               ASSERT(0);
-               kfree(oper);
-               return ret;
+               else if (bytenr > entry->bytenr)
+                       p = &(*p)->rb_right;
+               else
+                       return entry;
        }
-       list_add_tail(&oper->list, &trans->qgroup_ref_list);
 
-       if (mod_seq)
-               btrfs_get_tree_mod_seq(fs_info, &oper->elem);
-
-       return 0;
-}
-
-static int qgroup_excl_accounting(struct btrfs_fs_info *fs_info,
-                                 struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *tmp;
-       int sign = 0;
-       int ret = 0;
-
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               return -ENOMEM;
-
-       spin_lock(&fs_info->qgroup_lock);
-       if (!fs_info->quota_root)
-               goto out;
-
-       switch (oper->type) {
-       case BTRFS_QGROUP_OPER_ADD_EXCL:
-               sign = 1;
-               break;
-       case BTRFS_QGROUP_OPER_SUB_EXCL:
-               sign = -1;
-               break;
-       default:
-               ASSERT(0);
-       }
-       ret = __qgroup_excl_accounting(fs_info, tmp, oper->ref_root,
-                                      oper->num_bytes, sign);
-out:
-       spin_unlock(&fs_info->qgroup_lock);
-       ulist_free(tmp);
-       return ret;
+       rb_link_node(&record->node, parent_node, p);
+       rb_insert_color(&record->node, &delayed_refs->dirty_extent_root);
+       return NULL;
 }
 
+#define UPDATE_NEW     0
+#define UPDATE_OLD     1
 /*
- * Walk all of the roots that pointed to our bytenr and adjust their refcnts as
- * properly.
+ * Walk all of the roots that points to the bytenr and adjust their refcnts.
  */
-static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
-                                 u64 root_to_skip, struct ulist *tmp,
-                                 struct ulist *roots, struct ulist *qgroups,
-                                 u64 seq, int *old_roots, int rescan)
+static int qgroup_update_refcnt(struct btrfs_fs_info *fs_info,
+                               struct ulist *roots, struct ulist *tmp,
+                               struct ulist *qgroups, u64 seq, int update_old)
 {
        struct ulist_node *unode;
        struct ulist_iterator uiter;
        struct ulist_node *tmp_unode;
        struct ulist_iterator tmp_uiter;
        struct btrfs_qgroup *qg;
-       int ret;
+       int ret = 0;
 
+       if (!roots)
+               return 0;
        ULIST_ITER_INIT(&uiter);
        while ((unode = ulist_next(roots, &uiter))) {
-               /* We don't count our current root here */
-               if (unode->val == root_to_skip)
-                       continue;
                qg = find_qgroup_rb(fs_info, unode->val);
                if (!qg)
                        continue;
-               /*
-                * We could have a pending removal of this same ref so we may
-                * not have actually found our ref root when doing
-                * btrfs_find_all_roots, so we need to keep track of how many
-                * old roots we find in case we removed ours and added a
-                * different one at the same time.  I don't think this could
-                * happen in practice but that sort of thinking leads to pain
-                * and suffering and to the dark side.
-                */
-               (*old_roots)++;
 
                ulist_reinit(tmp);
                ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
@@ -1603,29 +1482,10 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
                        struct btrfs_qgroup_list *glist;
 
                        qg = u64_to_ptr(tmp_unode->aux);
-                       /*
-                        * We use this sequence number to keep from having to
-                        * run the whole list and 0 out the refcnt every time.
-                        * We basically use sequnce as the known 0 count and
-                        * then add 1 everytime we see a qgroup.  This is how we
-                        * get how many of the roots actually point up to the
-                        * upper level qgroups in order to determine exclusive
-                        * counts.
-                        *
-                        * For rescan we want to set old_refcnt to seq so our
-                        * exclusive calculations end up correct.
-                        */
-                       if (rescan)
-                               qg->old_refcnt = seq;
-                       else if (qg->old_refcnt < seq)
-                               qg->old_refcnt = seq + 1;
+                       if (update_old)
+                               btrfs_qgroup_update_old_refcnt(qg, seq, 1);
                        else
-                               qg->old_refcnt++;
-
-                       if (qg->new_refcnt < seq)
-                               qg->new_refcnt = seq + 1;
-                       else
-                               qg->new_refcnt++;
+                               btrfs_qgroup_update_new_refcnt(qg, seq, 1);
                        list_for_each_entry(glist, &qg->groups, next_group) {
                                ret = ulist_add(qgroups, glist->group->qgroupid,
                                                ptr_to_u64(glist->group),
@@ -1644,161 +1504,46 @@ static int qgroup_calc_old_refcnt(struct btrfs_fs_info *fs_info,
 }
 
 /*
- * We need to walk forward in our operation tree and account for any roots that
- * were deleted after we made this operation.
- */
-static int qgroup_account_deleted_refs(struct btrfs_fs_info *fs_info,
-                                      struct btrfs_qgroup_operation *oper,
-                                      struct ulist *tmp,
-                                      struct ulist *qgroups, u64 seq,
-                                      int *old_roots)
-{
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup *qg;
-       struct btrfs_qgroup_operation *tmp_oper;
-       struct rb_node *n;
-       int ret;
-
-       ulist_reinit(tmp);
-
-       /*
-        * We only walk forward in the tree since we're only interested in
-        * removals that happened _after_  our operation.
-        */
-       spin_lock(&fs_info->qgroup_op_lock);
-       n = rb_next(&oper->n);
-       spin_unlock(&fs_info->qgroup_op_lock);
-       if (!n)
-               return 0;
-       tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
-       while (tmp_oper->bytenr == oper->bytenr) {
-               /*
-                * If it's not a removal we don't care, additions work out
-                * properly with our refcnt tracking.
-                */
-               if (tmp_oper->type != BTRFS_QGROUP_OPER_SUB_SHARED &&
-                   tmp_oper->type != BTRFS_QGROUP_OPER_SUB_EXCL)
-                       goto next;
-               qg = find_qgroup_rb(fs_info, tmp_oper->ref_root);
-               if (!qg)
-                       goto next;
-               ret = ulist_add(qgroups, qg->qgroupid, ptr_to_u64(qg),
-                               GFP_ATOMIC);
-               if (ret) {
-                       if (ret < 0)
-                               return ret;
-                       /*
-                        * We only want to increase old_roots if this qgroup is
-                        * not already in the list of qgroups.  If it is already
-                        * there then that means it must have been re-added or
-                        * the delete will be discarded because we had an
-                        * existing ref that we haven't looked up yet.  In this
-                        * case we don't want to increase old_roots.  So if ret
-                        * == 1 then we know that this is the first time we've
-                        * seen this qgroup and we can bump the old_roots.
-                        */
-                       (*old_roots)++;
-                       ret = ulist_add(tmp, qg->qgroupid, ptr_to_u64(qg),
-                                       GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-next:
-               spin_lock(&fs_info->qgroup_op_lock);
-               n = rb_next(&tmp_oper->n);
-               spin_unlock(&fs_info->qgroup_op_lock);
-               if (!n)
-                       break;
-               tmp_oper = rb_entry(n, struct btrfs_qgroup_operation, n);
-       }
-
-       /* Ok now process the qgroups we found */
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               struct btrfs_qgroup_list *glist;
-
-               qg = u64_to_ptr(unode->aux);
-               if (qg->old_refcnt < seq)
-                       qg->old_refcnt = seq + 1;
-               else
-                       qg->old_refcnt++;
-               if (qg->new_refcnt < seq)
-                       qg->new_refcnt = seq + 1;
-               else
-                       qg->new_refcnt++;
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       ret = ulist_add(qgroups, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-       return 0;
-}
-
-/* Add refcnt for the newly added reference. */
-static int qgroup_calc_new_refcnt(struct btrfs_fs_info *fs_info,
-                                 struct btrfs_qgroup_operation *oper,
-                                 struct btrfs_qgroup *qgroup,
-                                 struct ulist *tmp, struct ulist *qgroups,
-                                 u64 seq)
-{
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup *qg;
-       int ret;
-
-       ulist_reinit(tmp);
-       ret = ulist_add(qgroups, qgroup->qgroupid, ptr_to_u64(qgroup),
-                       GFP_ATOMIC);
-       if (ret < 0)
-               return ret;
-       ret = ulist_add(tmp, qgroup->qgroupid, ptr_to_u64(qgroup),
-                       GFP_ATOMIC);
-       if (ret < 0)
-               return ret;
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(tmp, &uiter))) {
-               struct btrfs_qgroup_list *glist;
-
-               qg = u64_to_ptr(unode->aux);
-               if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
-                       if (qg->new_refcnt < seq)
-                               qg->new_refcnt = seq + 1;
-                       else
-                               qg->new_refcnt++;
-               } else {
-                       if (qg->old_refcnt < seq)
-                               qg->old_refcnt = seq + 1;
-                       else
-                               qg->old_refcnt++;
-               }
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       ret = ulist_add(tmp, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-                       ret = ulist_add(qgroups, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (ret < 0)
-                               return ret;
-               }
-       }
-       return 0;
-}
-
-/*
- * This adjusts the counters for all referenced qgroups if need be.
+ * Update qgroup rfer/excl counters.
+ * Rfer update is easy, codes can explain themselves.
+ *
+ * Excl update is tricky, the update is split into 2 part.
+ * Part 1: Possible exclusive <-> sharing detect:
+ *     |       A       |       !A      |
+ *  -------------------------------------
+ *  B  |       *       |       -       |
+ *  -------------------------------------
+ *  !B |       +       |       **      |
+ *  -------------------------------------
+ *
+ * Conditions:
+ * A:  cur_old_roots < nr_old_roots    (not exclusive before)
+ * !A: cur_old_roots == nr_old_roots   (possible exclusive before)
+ * B:  cur_new_roots < nr_new_roots    (not exclusive now)
+ * !B: cur_new_roots == nr_new_roots   (possible exclsuive now)
+ *
+ * Results:
+ * +: Possible sharing -> exclusive    -: Possible exclusive -> sharing
+ * *: Definitely not changed.          **: Possible unchanged.
+ *
+ * For !A and !B condition, the exception is cur_old/new_roots == 0 case.
+ *
+ * To make the logic clear, we first use condition A and B to split
+ * combination into 4 results.
+ *
+ * Then, for result "+" and "-", check old/new_roots == 0 case, as in them
+ * only on variant maybe 0.
+ *
+ * Lastly, check result **, since there are 2 variants maybe 0, split them
+ * again(2x2).
+ * But this time we don't need to consider other things, the codes and logic
+ * is easy to understand now.
  */
-static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
-                                 u64 root_to_skip, u64 num_bytes,
-                                 struct ulist *qgroups, u64 seq,
-                                 int old_roots, int new_roots, int rescan)
+static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
+                                 struct ulist *qgroups,
+                                 u64 nr_old_roots,
+                                 u64 nr_new_roots,
+                                 u64 num_bytes, u64 seq)
 {
        struct ulist_node *unode;
        struct ulist_iterator uiter;
@@ -1810,423 +1555,191 @@ static int qgroup_adjust_counters(struct btrfs_fs_info *fs_info,
                bool dirty = false;
 
                qg = u64_to_ptr(unode->aux);
-               /*
-                * Wasn't referenced before but is now, add to the reference
-                * counters.
-                */
-               if (qg->old_refcnt <= seq && qg->new_refcnt > seq) {
+               cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
+               cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+
+               /* Rfer update part */
+               if (cur_old_count == 0 && cur_new_count > 0) {
                        qg->rfer += num_bytes;
                        qg->rfer_cmpr += num_bytes;
                        dirty = true;
                }
-
-               /*
-                * Was referenced before but isn't now, subtract from the
-                * reference counters.
-                */
-               if (qg->old_refcnt > seq && qg->new_refcnt <= seq) {
+               if (cur_old_count > 0 && cur_new_count == 0) {
                        qg->rfer -= num_bytes;
                        qg->rfer_cmpr -= num_bytes;
                        dirty = true;
                }
 
-               if (qg->old_refcnt < seq)
-                       cur_old_count = 0;
-               else
-                       cur_old_count = qg->old_refcnt - seq;
-               if (qg->new_refcnt < seq)
-                       cur_new_count = 0;
-               else
-                       cur_new_count = qg->new_refcnt - seq;
-
-               /*
-                * If our refcount was the same as the roots previously but our
-                * new count isn't the same as the number of roots now then we
-                * went from having a exclusive reference on this range to not.
-                */
-               if (old_roots && cur_old_count == old_roots &&
-                   (cur_new_count != new_roots || new_roots == 0)) {
-                       WARN_ON(cur_new_count != new_roots && new_roots == 0);
-                       qg->excl -= num_bytes;
-                       qg->excl_cmpr -= num_bytes;
-                       dirty = true;
+               /* Excl update part */
+               /* Exclusive/none -> shared case */
+               if (cur_old_count == nr_old_roots &&
+                   cur_new_count < nr_new_roots) {
+                       /* Exclusive -> shared */
+                       if (cur_old_count != 0) {
+                               qg->excl -= num_bytes;
+                               qg->excl_cmpr -= num_bytes;
+                               dirty = true;
+                       }
                }
 
-               /*
-                * If we didn't reference all the roots before but now we do we
-                * have an exclusive reference to this range.
-                */
-               if ((!old_roots || (old_roots && cur_old_count != old_roots))
-                   && cur_new_count == new_roots) {
-                       qg->excl += num_bytes;
-                       qg->excl_cmpr += num_bytes;
-                       dirty = true;
+               /* Shared -> exclusive/none case */
+               if (cur_old_count < nr_old_roots &&
+                   cur_new_count == nr_new_roots) {
+                       /* Shared->exclusive */
+                       if (cur_new_count != 0) {
+                               qg->excl += num_bytes;
+                               qg->excl_cmpr += num_bytes;
+                               dirty = true;
+                       }
                }
 
+               /* Exclusive/none -> exclusive/none case */
+               if (cur_old_count == nr_old_roots &&
+                   cur_new_count == nr_new_roots) {
+                       if (cur_old_count == 0) {
+                               /* None -> exclusive/none */
+
+                               if (cur_new_count != 0) {
+                                       /* None -> exclusive */
+                                       qg->excl += num_bytes;
+                                       qg->excl_cmpr += num_bytes;
+                                       dirty = true;
+                               }
+                               /* None -> none, nothing changed */
+                       } else {
+                               /* Exclusive -> exclusive/none */
+
+                               if (cur_new_count == 0) {
+                                       /* Exclusive -> none */
+                                       qg->excl -= num_bytes;
+                                       qg->excl_cmpr -= num_bytes;
+                                       dirty = true;
+                               }
+                               /* Exclusive -> exclusive, nothing changed */
+                       }
+               }
                if (dirty)
                        qgroup_dirty(fs_info, qg);
        }
        return 0;
 }
 
-/*
- * If we removed a data extent and there were other references for that bytenr
- * then we need to lookup all referenced roots to make sure we still don't
- * reference this bytenr.  If we do then we can just discard this operation.
- */
-static int check_existing_refs(struct btrfs_trans_handle *trans,
-                              struct btrfs_fs_info *fs_info,
-                              struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *roots = NULL;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       int ret = 0;
-
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
-                                  oper->elem.seq, &roots);
-       if (ret < 0)
-               return ret;
-       ret = 0;
-
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(roots, &uiter))) {
-               if (unode->val == oper->ref_root) {
-                       ret = 1;
-                       break;
-               }
-       }
-       ulist_free(roots);
-       btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-
-       return ret;
-}
-
-/*
- * If we share a reference across multiple roots then we may need to adjust
- * various qgroups referenced and exclusive counters.  The basic premise is this
- *
- * 1) We have seq to represent a 0 count.  Instead of looping through all of the
- * qgroups and resetting their refcount to 0 we just constantly bump this
- * sequence number to act as the base reference count.  This means that if
- * anybody is equal to or below this sequence they were never referenced.  We
- * jack this sequence up by the number of roots we found each time in order to
- * make sure we don't have any overlap.
- *
- * 2) We first search all the roots that reference the area _except_ the root
- * we're acting on currently.  This makes up the old_refcnt of all the qgroups
- * before.
- *
- * 3) We walk all of the qgroups referenced by the root we are currently acting
- * on, and will either adjust old_refcnt in the case of a removal or the
- * new_refcnt in the case of an addition.
- *
- * 4) Finally we walk all the qgroups that are referenced by this range
- * including the root we are acting on currently.  We will adjust the counters
- * based on the number of roots we had and will have after this operation.
- *
- * Take this example as an illustration
- *
- *                     [qgroup 1/0]
- *                  /         |          \
- *             [qg 0/0]   [qg 0/1]     [qg 0/2]
- *                \          |            /
- *               [        extent           ]
- *
- * Say we are adding a reference that is covered by qg 0/0.  The first step
- * would give a refcnt of 1 to qg 0/1 and 0/2 and a refcnt of 2 to qg 1/0 with
- * old_roots being 2.  Because it is adding new_roots will be 1.  We then go
- * through qg 0/0 which will get the new_refcnt set to 1 and add 1 to qg 1/0's
- * new_refcnt, bringing it to 3.  We then walk through all of the qgroups, we
- * notice that the old refcnt for qg 0/0 < the new refcnt, so we added a
- * reference and thus must add the size to the referenced bytes.  Everything
- * else is the same so nothing else changes.
- */
-static int qgroup_shared_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info,
-                                   struct btrfs_qgroup_operation *oper)
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+                           struct btrfs_fs_info *fs_info,
+                           u64 bytenr, u64 num_bytes,
+                           struct ulist *old_roots, struct ulist *new_roots)
 {
-       struct ulist *roots = NULL;
-       struct ulist *qgroups, *tmp;
-       struct btrfs_qgroup *qgroup;
-       struct seq_list elem = SEQ_LIST_INIT(elem);
+       struct ulist *qgroups = NULL;
+       struct ulist *tmp = NULL;
        u64 seq;
-       int old_roots = 0;
-       int new_roots = 0;
+       u64 nr_new_roots = 0;
+       u64 nr_old_roots = 0;
        int ret = 0;
 
-       if (oper->elem.seq) {
-               ret = check_existing_refs(trans, fs_info, oper);
-               if (ret < 0)
-                       return ret;
-               if (ret)
-                       return 0;
-       }
+       if (new_roots)
+               nr_new_roots = new_roots->nnodes;
+       if (old_roots)
+               nr_old_roots = old_roots->nnodes;
 
-       qgroups = ulist_alloc(GFP_NOFS);
-       if (!qgroups)
-               return -ENOMEM;
+       if (!fs_info->quota_enabled)
+               goto out_free;
+       BUG_ON(!fs_info->quota_root);
 
+       qgroups = ulist_alloc(GFP_NOFS);
+       if (!qgroups) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
        tmp = ulist_alloc(GFP_NOFS);
        if (!tmp) {
-               ulist_free(qgroups);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out_free;
        }
 
-       btrfs_get_tree_mod_seq(fs_info, &elem);
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq,
-                                  &roots);
-       btrfs_put_tree_mod_seq(fs_info, &elem);
-       if (ret < 0) {
-               ulist_free(qgroups);
-               ulist_free(tmp);
-               return ret;
+       mutex_lock(&fs_info->qgroup_rescan_lock);
+       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+               if (fs_info->qgroup_rescan_progress.objectid <= bytenr) {
+                       mutex_unlock(&fs_info->qgroup_rescan_lock);
+                       ret = 0;
+                       goto out_free;
+               }
        }
+       mutex_unlock(&fs_info->qgroup_rescan_lock);
+
        spin_lock(&fs_info->qgroup_lock);
-       qgroup = find_qgroup_rb(fs_info, oper->ref_root);
-       if (!qgroup)
-               goto out;
        seq = fs_info->qgroup_seq;
 
-       /*
-        * So roots is the list of all the roots currently pointing at the
-        * bytenr, including the ref we are adding if we are adding, or not if
-        * we are removing a ref.  So we pass in the ref_root to skip that root
-        * in our calculations.  We set old_refnct and new_refcnt cause who the
-        * hell knows what everything looked like before, and it doesn't matter
-        * except...
-        */
-       ret = qgroup_calc_old_refcnt(fs_info, oper->ref_root, tmp, roots, qgroups,
-                                    seq, &old_roots, 0);
+       /* Update old refcnts using old_roots */
+       ret = qgroup_update_refcnt(fs_info, old_roots, tmp, qgroups, seq,
+                                  UPDATE_OLD);
        if (ret < 0)
                goto out;
 
-       /*
-        * Now adjust the refcounts of the qgroups that care about this
-        * reference, either the old_count in the case of removal or new_count
-        * in the case of an addition.
-        */
-       ret = qgroup_calc_new_refcnt(fs_info, oper, qgroup, tmp, qgroups,
-                                    seq);
+       /* Update new refcnts using new_roots */
+       ret = qgroup_update_refcnt(fs_info, new_roots, tmp, qgroups, seq,
+                                  UPDATE_NEW);
        if (ret < 0)
                goto out;
 
-       /*
-        * ...in the case of removals.  If we had a removal before we got around
-        * to processing this operation then we need to find that guy and count
-        * his references as if they really existed so we don't end up screwing
-        * up the exclusive counts.  Then whenever we go to process the delete
-        * everything will be grand and we can account for whatever exclusive
-        * changes need to be made there.  We also have to pass in old_roots so
-        * we have an accurate count of the roots as it pertains to this
-        * operations view of the world.
-        */
-       ret = qgroup_account_deleted_refs(fs_info, oper, tmp, qgroups, seq,
-                                         &old_roots);
-       if (ret < 0)
-               goto out;
+       qgroup_update_counters(fs_info, qgroups, nr_old_roots, nr_new_roots,
+                              num_bytes, seq);
 
        /*
-        * We are adding our root, need to adjust up the number of roots,
-        * otherwise old_roots is the number of roots we want.
+        * Bump qgroup_seq to avoid seq overlap
         */
-       if (oper->type == BTRFS_QGROUP_OPER_ADD_SHARED) {
-               new_roots = old_roots + 1;
-       } else {
-               new_roots = old_roots;
-               old_roots++;
-       }
-       fs_info->qgroup_seq += old_roots + 1;
-
-
-       /*
-        * And now the magic happens, bless Arne for having a pretty elegant
-        * solution for this.
-        */
-       qgroup_adjust_counters(fs_info, oper->ref_root, oper->num_bytes,
-                              qgroups, seq, old_roots, new_roots, 0);
+       fs_info->qgroup_seq += max(nr_old_roots, nr_new_roots) + 1;
 out:
        spin_unlock(&fs_info->qgroup_lock);
-       ulist_free(qgroups);
-       ulist_free(roots);
+out_free:
        ulist_free(tmp);
+       ulist_free(qgroups);
+       ulist_free(old_roots);
+       ulist_free(new_roots);
        return ret;
 }
 
-/*
- * Process a reference to a shared subtree. This type of operation is
- * queued during snapshot removal when we encounter extents which are
- * shared between more than one root.
- */
-static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
-                                    struct btrfs_fs_info *fs_info,
-                                    struct btrfs_qgroup_operation *oper)
-{
-       struct ulist *roots = NULL;
-       struct ulist_node *unode;
-       struct ulist_iterator uiter;
-       struct btrfs_qgroup_list *glist;
-       struct ulist *parents;
-       int ret = 0;
-       int err;
-       struct btrfs_qgroup *qg;
-       u64 root_obj = 0;
-       struct seq_list elem = SEQ_LIST_INIT(elem);
-
-       parents = ulist_alloc(GFP_NOFS);
-       if (!parents)
-               return -ENOMEM;
-
-       btrfs_get_tree_mod_seq(fs_info, &elem);
-       ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
-                                  elem.seq, &roots);
-       btrfs_put_tree_mod_seq(fs_info, &elem);
-       if (ret < 0)
-               goto out;
-
-       if (roots->nnodes != 1)
-               goto out;
-
-       ULIST_ITER_INIT(&uiter);
-       unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
-       /*
-        * If we find our ref root then that means all refs
-        * this extent has to the root have not yet been
-        * deleted. In that case, we do nothing and let the
-        * last ref for this bytenr drive our update.
-        *
-        * This can happen for example if an extent is
-        * referenced multiple times in a snapshot (clone,
-        * etc). If we are in the middle of snapshot removal,
-        * queued updates for such an extent will find the
-        * root if we have not yet finished removing the
-        * snapshot.
-        */
-       if (unode->val == oper->ref_root)
-               goto out;
-
-       root_obj = unode->val;
-       BUG_ON(!root_obj);
-
-       spin_lock(&fs_info->qgroup_lock);
-       qg = find_qgroup_rb(fs_info, root_obj);
-       if (!qg)
-               goto out_unlock;
-
-       qg->excl += oper->num_bytes;
-       qg->excl_cmpr += oper->num_bytes;
-       qgroup_dirty(fs_info, qg);
-
-       /*
-        * Adjust counts for parent groups. First we find all
-        * parents, then in the 2nd loop we do the adjustment
-        * while adding parents of the parents to our ulist.
-        */
-       list_for_each_entry(glist, &qg->groups, next_group) {
-               err = ulist_add(parents, glist->group->qgroupid,
-                               ptr_to_u64(glist->group), GFP_ATOMIC);
-               if (err < 0) {
-                       ret = err;
-                       goto out_unlock;
-               }
-       }
-
-       ULIST_ITER_INIT(&uiter);
-       while ((unode = ulist_next(parents, &uiter))) {
-               qg = u64_to_ptr(unode->aux);
-               qg->excl += oper->num_bytes;
-               qg->excl_cmpr += oper->num_bytes;
-               qgroup_dirty(fs_info, qg);
-
-               /* Add any parents of the parents */
-               list_for_each_entry(glist, &qg->groups, next_group) {
-                       err = ulist_add(parents, glist->group->qgroupid,
-                                       ptr_to_u64(glist->group), GFP_ATOMIC);
-                       if (err < 0) {
-                               ret = err;
-                               goto out_unlock;
-                       }
-               }
-       }
-
-out_unlock:
-       spin_unlock(&fs_info->qgroup_lock);
-
-out:
-       ulist_free(roots);
-       ulist_free(parents);
-       return ret;
-}
-
-/*
- * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
- * from the fs. First, all roots referencing the extent are searched, and
- * then the space is accounted accordingly to the different roots. The
- * accounting algorithm works in 3 steps documented inline.
- */
-static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info,
-                               struct btrfs_qgroup_operation *oper)
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info)
 {
+       struct btrfs_qgroup_extent_record *record;
+       struct btrfs_delayed_ref_root *delayed_refs;
+       struct ulist *new_roots = NULL;
+       struct rb_node *node;
+       u64 qgroup_to_skip;
        int ret = 0;
 
-       if (!fs_info->quota_enabled)
-               return 0;
-
-       BUG_ON(!fs_info->quota_root);
+       delayed_refs = &trans->transaction->delayed_refs;
+       qgroup_to_skip = delayed_refs->qgroup_to_skip;
+       while ((node = rb_first(&delayed_refs->dirty_extent_root))) {
+               record = rb_entry(node, struct btrfs_qgroup_extent_record,
+                                 node);
 
-       mutex_lock(&fs_info->qgroup_rescan_lock);
-       if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
-               if (fs_info->qgroup_rescan_progress.objectid <= oper->bytenr) {
-                       mutex_unlock(&fs_info->qgroup_rescan_lock);
-                       return 0;
+               if (!ret) {
+                       /*
+                        * Use (u64)-1 as time_seq to do special search, which
+                        * doesn't lock tree or delayed_refs and search current
+                        * root. It's safe inside commit_transaction().
+                        */
+                       ret = btrfs_find_all_roots(trans, fs_info,
+                                       record->bytenr, (u64)-1, &new_roots);
+                       if (ret < 0)
+                               goto cleanup;
+                       if (qgroup_to_skip)
+                               ulist_del(new_roots, qgroup_to_skip, 0);
+                       ret = btrfs_qgroup_account_extent(trans, fs_info,
+                                       record->bytenr, record->num_bytes,
+                                       record->old_roots, new_roots);
+                       record->old_roots = NULL;
+                       new_roots = NULL;
                }
-       }
-       mutex_unlock(&fs_info->qgroup_rescan_lock);
+cleanup:
+               ulist_free(record->old_roots);
+               ulist_free(new_roots);
+               new_roots = NULL;
+               rb_erase(node, &delayed_refs->dirty_extent_root);
+               kfree(record);
 
-       ASSERT(is_fstree(oper->ref_root));
-
-       trace_btrfs_qgroup_account(oper);
-
-       switch (oper->type) {
-       case BTRFS_QGROUP_OPER_ADD_EXCL:
-       case BTRFS_QGROUP_OPER_SUB_EXCL:
-               ret = qgroup_excl_accounting(fs_info, oper);
-               break;
-       case BTRFS_QGROUP_OPER_ADD_SHARED:
-       case BTRFS_QGROUP_OPER_SUB_SHARED:
-               ret = qgroup_shared_accounting(trans, fs_info, oper);
-               break;
-       case BTRFS_QGROUP_OPER_SUB_SUBTREE:
-               ret = qgroup_subtree_accounting(trans, fs_info, oper);
-               break;
-       default:
-               ASSERT(0);
-       }
-       return ret;
-}
-
-/*
- * Needs to be called everytime we run delayed refs, even if there is an error
- * in order to cleanup outstanding operations.
- */
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info)
-{
-       struct btrfs_qgroup_operation *oper;
-       int ret = 0;
-
-       while (!list_empty(&trans->qgroup_ref_list)) {
-               oper = list_first_entry(&trans->qgroup_ref_list,
-                                       struct btrfs_qgroup_operation, list);
-               list_del_init(&oper->list);
-               if (!ret || !trans->aborted)
-                       ret = btrfs_qgroup_account(trans, fs_info, oper);
-               spin_lock(&fs_info->qgroup_op_lock);
-               rb_erase(&oper->n, &fs_info->qgroup_op_tree);
-               spin_unlock(&fs_info->qgroup_op_lock);
-               btrfs_put_tree_mod_seq(fs_info, &oper->elem);
-               kfree(oper);
        }
        return ret;
 }
@@ -2637,15 +2150,13 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
  */
 static int
 qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
-                  struct btrfs_trans_handle *trans, struct ulist *qgroups,
-                  struct ulist *tmp, struct extent_buffer *scratch_leaf)
+                  struct btrfs_trans_handle *trans,
+                  struct extent_buffer *scratch_leaf)
 {
        struct btrfs_key found;
        struct ulist *roots = NULL;
        struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
        u64 num_bytes;
-       u64 seq;
-       int new_roots;
        int slot;
        int ret;
 
@@ -2695,33 +2206,15 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                else
                        num_bytes = found.offset;
 
-               ulist_reinit(qgroups);
                ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0,
                                           &roots);
                if (ret < 0)
                        goto out;
-               spin_lock(&fs_info->qgroup_lock);
-               seq = fs_info->qgroup_seq;
-               fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
-
-               new_roots = 0;
-               ret = qgroup_calc_old_refcnt(fs_info, 0, tmp, roots, qgroups,
-                                            seq, &new_roots, 1);
-               if (ret < 0) {
-                       spin_unlock(&fs_info->qgroup_lock);
-                       ulist_free(roots);
-                       goto out;
-               }
-
-               ret = qgroup_adjust_counters(fs_info, 0, num_bytes, qgroups,
-                                            seq, 0, new_roots, 1);
-               if (ret < 0) {
-                       spin_unlock(&fs_info->qgroup_lock);
-                       ulist_free(roots);
+               /* For rescan, just pass old_roots as NULL */
+               ret = btrfs_qgroup_account_extent(trans, fs_info,
+                               found.objectid, num_bytes, NULL, roots);
+               if (ret < 0)
                        goto out;
-               }
-               spin_unlock(&fs_info->qgroup_lock);
-               ulist_free(roots);
        }
 out:
        btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
@@ -2735,7 +2228,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                                                     qgroup_rescan_work);
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans = NULL;
-       struct ulist *tmp = NULL, *qgroups = NULL;
        struct extent_buffer *scratch_leaf = NULL;
        int err = -ENOMEM;
        int ret = 0;
@@ -2743,12 +2235,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
        path = btrfs_alloc_path();
        if (!path)
                goto out;
-       qgroups = ulist_alloc(GFP_NOFS);
-       if (!qgroups)
-               goto out;
-       tmp = ulist_alloc(GFP_NOFS);
-       if (!tmp)
-               goto out;
        scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
        if (!scratch_leaf)
                goto out;
@@ -2764,7 +2250,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
                        err = -EINTR;
                } else {
                        err = qgroup_rescan_leaf(fs_info, path, trans,
-                                                qgroups, tmp, scratch_leaf);
+                                                scratch_leaf);
                }
                if (err > 0)
                        btrfs_commit_transaction(trans, fs_info->fs_root);
@@ -2774,8 +2260,6 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 
 out:
        kfree(scratch_leaf);
-       ulist_free(qgroups);
-       ulist_free(tmp);
        btrfs_free_path(path);
 
        mutex_lock(&fs_info->qgroup_rescan_lock);
index c5242aa..6387dcf 100644 (file)
 #ifndef __BTRFS_QGROUP__
 #define __BTRFS_QGROUP__
 
+#include "ulist.h"
+#include "delayed-ref.h"
+
 /*
- * A description of the operations, all of these operations only happen when we
- * are adding the 1st reference for that subvolume in the case of adding space
- * or on the last reference delete in the case of subtraction.  The only
- * exception is the last one, which is added for confusion.
- *
- * BTRFS_QGROUP_OPER_ADD_EXCL: adding bytes where this subvolume is the only
- * one pointing at the bytes we are adding.  This is called on the first
- * allocation.
- *
- * BTRFS_QGROUP_OPER_ADD_SHARED: adding bytes where this bytenr is going to be
- * shared between subvols.  This is called on the creation of a ref that already
- * has refs from a different subvolume, so basically reflink.
- *
- * BTRFS_QGROUP_OPER_SUB_EXCL: removing bytes where this subvolume is the only
- * one referencing the range.
- *
- * BTRFS_QGROUP_OPER_SUB_SHARED: removing bytes where this subvolume shares with
- * refs with other subvolumes.
+ * Record a dirty extent, and info qgroup to update quota on it
+ * TODO: Use kmem cache to alloc it.
  */
-enum btrfs_qgroup_operation_type {
-       BTRFS_QGROUP_OPER_ADD_EXCL,
-       BTRFS_QGROUP_OPER_ADD_SHARED,
-       BTRFS_QGROUP_OPER_SUB_EXCL,
-       BTRFS_QGROUP_OPER_SUB_SHARED,
-       BTRFS_QGROUP_OPER_SUB_SUBTREE,
-};
-
-struct btrfs_qgroup_operation {
-       u64 ref_root;
+struct btrfs_qgroup_extent_record {
+       struct rb_node node;
        u64 bytenr;
        u64 num_bytes;
-       u64 seq;
-       enum btrfs_qgroup_operation_type type;
-       struct seq_list elem;
-       struct rb_node n;
-       struct list_head list;
+       struct ulist *old_roots;
 };
 
 int btrfs_quota_enable(struct btrfs_trans_handle *trans,
@@ -79,16 +54,18 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
 int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
 struct btrfs_delayed_extent_op;
-int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
-                           struct btrfs_fs_info *fs_info, u64 ref_root,
+int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans,
+                                        struct btrfs_fs_info *fs_info);
+struct btrfs_qgroup_extent_record
+*btrfs_qgroup_insert_dirty_extent(struct btrfs_delayed_ref_root *delayed_refs,
+                                 struct btrfs_qgroup_extent_record *record);
+int
+btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
+                           struct btrfs_fs_info *fs_info,
                            u64 bytenr, u64 num_bytes,
-                           enum btrfs_qgroup_operation_type type,
-                           int mod_seq);
-int btrfs_delayed_qgroup_accounting(struct btrfs_trans_handle *trans,
-                                   struct btrfs_fs_info *fs_info);
-void btrfs_remove_qgroup_operation(struct btrfs_trans_handle *trans,
-                                  struct btrfs_fs_info *fs_info,
-                                  struct btrfs_qgroup_operation *oper);
+                           struct ulist *old_roots, struct ulist *new_roots);
+int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info);
 int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
                      struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
index 74b24b0..827951f 100644 (file)
@@ -1847,8 +1847,10 @@ again:
                        }
 
                        eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
-                       if (!eb || !extent_buffer_uptodate(eb)) {
-                               ret = (!eb) ? -ENOMEM : -EIO;
+                       if (IS_ERR(eb)) {
+                               ret = PTR_ERR(eb);
+                       } else if (!extent_buffer_uptodate(eb)) {
+                               ret = -EIO;
                                free_extent_buffer(eb);
                                break;
                        }
@@ -2002,7 +2004,9 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
 
                bytenr = btrfs_node_blockptr(eb, path->slots[i]);
                eb = read_tree_block(root, bytenr, ptr_gen);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       return PTR_ERR(eb);
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        return -EIO;
                }
@@ -2710,7 +2714,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                blocksize = root->nodesize;
                generation = btrfs_node_ptr_generation(upper->eb, slot);
                eb = read_tree_block(root, bytenr, generation);
-               if (!eb || !extent_buffer_uptodate(eb)) {
+               if (IS_ERR(eb)) {
+                       err = PTR_ERR(eb);
+                       goto next;
+               } else if (!extent_buffer_uptodate(eb)) {
                        free_extent_buffer(eb);
                        err = -EIO;
                        goto next;
@@ -2873,7 +2880,9 @@ static int get_tree_block_key(struct reloc_control *rc,
        BUG_ON(block->key_ready);
        eb = read_tree_block(rc->extent_root, block->bytenr,
                             block->key.offset);
-       if (!eb || !extent_buffer_uptodate(eb)) {
+       if (IS_ERR(eb)) {
+               return PTR_ERR(eb);
+       } else if (!extent_buffer_uptodate(eb)) {
                free_extent_buffer(eb);
                return -EIO;
        }
index ab58115..9f2feab 100644 (file)
@@ -2662,18 +2662,30 @@ static void scrub_free_parity(struct scrub_parity *sparity)
        kfree(sparity);
 }
 
+static void scrub_parity_bio_endio_worker(struct btrfs_work *work)
+{
+       struct scrub_parity *sparity = container_of(work, struct scrub_parity,
+                                                   work);
+       struct scrub_ctx *sctx = sparity->sctx;
+
+       scrub_free_parity(sparity);
+       scrub_pending_bio_dec(sctx);
+}
+
 static void scrub_parity_bio_endio(struct bio *bio, int error)
 {
        struct scrub_parity *sparity = (struct scrub_parity *)bio->bi_private;
-       struct scrub_ctx *sctx = sparity->sctx;
 
        if (error)
                bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap,
                          sparity->nsectors);
 
-       scrub_free_parity(sparity);
-       scrub_pending_bio_dec(sctx);
        bio_put(bio);
+
+       btrfs_init_work(&sparity->work, btrfs_scrubparity_helper,
+                       scrub_parity_bio_endio_worker, NULL, NULL);
+       btrfs_queue_work(sparity->sctx->dev_root->fs_info->scrub_parity_workers,
+                        &sparity->work);
 }
 
 static void scrub_parity_check_and_repair(struct scrub_parity *sparity)
@@ -3589,6 +3601,13 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
                        ret = -ENOMEM;
                        goto out;
                }
+               fs_info->scrub_parity_workers =
+                       btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+                                             max_active, 2);
+               if (!fs_info->scrub_parity_workers) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
        }
        ++fs_info->scrub_workers_refcnt;
 out:
@@ -3601,6 +3620,7 @@ static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
                btrfs_destroy_workqueue(fs_info->scrub_workers);
                btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
                btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
+               btrfs_destroy_workqueue(fs_info->scrub_parity_workers);
        }
        WARN_ON(fs_info->scrub_workers_refcnt < 0);
 }
index a1216f9..aa72bfd 100644 (file)
@@ -243,6 +243,7 @@ struct waiting_dir_move {
         * after this directory is moved, we can try to rmdir the ino rmdir_ino.
         */
        u64 rmdir_ino;
+       bool orphanized;
 };
 
 struct orphan_dir_info {
@@ -1158,6 +1159,9 @@ struct backref_ctx {
        /* may be truncated in case it's the last extent in a file */
        u64 extent_len;
 
+       /* data offset in the file extent item */
+       u64 data_offset;
+
        /* Just to check for bugs in backref resolving */
        int found_itself;
 };
@@ -1221,7 +1225,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
        if (ret < 0)
                return ret;
 
-       if (offset + bctx->extent_len > i_size)
+       if (offset + bctx->data_offset + bctx->extent_len > i_size)
                return 0;
 
        /*
@@ -1363,6 +1367,19 @@ static int find_extent_clone(struct send_ctx *sctx,
        backref_ctx->cur_offset = data_offset;
        backref_ctx->found_itself = 0;
        backref_ctx->extent_len = num_bytes;
+       /*
+        * For non-compressed extents iterate_extent_inodes() gives us extent
+        * offsets that already take into account the data offset, but not for
+        * compressed extents, since the offset is logical and not relative to
+        * the physical extent locations. We must take this into account to
+        * avoid sending clone offsets that go beyond the source file's size,
+        * which would result in the clone ioctl failing with -EINVAL on the
+        * receiving end.
+        */
+       if (compressed == BTRFS_COMPRESS_NONE)
+               backref_ctx->data_offset = 0;
+       else
+               backref_ctx->data_offset = btrfs_file_extent_offset(eb, fi);
 
        /*
         * The last extent of a file may be too large due to page alignment.
@@ -1900,8 +1917,13 @@ static int did_overwrite_ref(struct send_ctx *sctx,
                goto out;
        }
 
-       /* we know that it is or will be overwritten. check this now */
-       if (ow_inode < sctx->send_progress)
+       /*
+        * We know that it is or will be overwritten. Check this now.
+        * The current inode being processed might have been the one that caused
+        * inode 'ino' to be orphanized, therefore ow_inode can actually be the
+        * same as sctx->send_progress.
+        */
+       if (ow_inode <= sctx->send_progress)
                ret = 1;
        else
                ret = 0;
@@ -2223,6 +2245,8 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
        fs_path_reset(dest);
 
        while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) {
+               struct waiting_dir_move *wdm;
+
                fs_path_reset(name);
 
                if (is_waiting_for_rm(sctx, ino)) {
@@ -2233,7 +2257,11 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen,
                        break;
                }
 
-               if (is_waiting_for_move(sctx, ino)) {
+               wdm = get_waiting_dir_move(sctx, ino);
+               if (wdm && wdm->orphanized) {
+                       ret = gen_unique_name(sctx, ino, gen, name);
+                       stop = 1;
+               } else if (wdm) {
                        ret = get_first_ref(sctx->parent_root, ino,
                                            &parent_inode, &parent_gen, name);
                } else {
@@ -2328,8 +2356,12 @@ static int send_subvol_begin(struct send_ctx *sctx)
        TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
                    le64_to_cpu(sctx->send_root->root_item.ctransid));
        if (parent_root) {
-               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
-                               sctx->parent_root->root_item.uuid);
+               if (!btrfs_is_empty_uuid(parent_root->root_item.received_uuid))
+                       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                                    parent_root->root_item.received_uuid);
+               else
+                       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                                    parent_root->root_item.uuid);
                TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
                            le64_to_cpu(sctx->parent_root->root_item.ctransid));
        }
@@ -2923,7 +2955,7 @@ static int is_waiting_for_move(struct send_ctx *sctx, u64 ino)
        return entry != NULL;
 }
 
-static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
+static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
 {
        struct rb_node **p = &sctx->waiting_dir_moves.rb_node;
        struct rb_node *parent = NULL;
@@ -2934,6 +2966,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino)
                return -ENOMEM;
        dm->ino = ino;
        dm->rmdir_ino = 0;
+       dm->orphanized = orphanized;
 
        while (*p) {
                parent = *p;
@@ -3030,7 +3063,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
                        goto out;
        }
 
-       ret = add_waiting_dir_move(sctx, pm->ino);
+       ret = add_waiting_dir_move(sctx, pm->ino, is_orphan);
        if (ret)
                goto out;
 
@@ -3353,8 +3386,40 @@ out:
        return ret;
 }
 
+/*
+ * Check if ino ino1 is an ancestor of inode ino2 in the given root.
+ * Return 1 if true, 0 if false and < 0 on error.
+ */
+static int is_ancestor(struct btrfs_root *root,
+                      const u64 ino1,
+                      const u64 ino1_gen,
+                      const u64 ino2,
+                      struct fs_path *fs_path)
+{
+       u64 ino = ino2;
+
+       while (ino > BTRFS_FIRST_FREE_OBJECTID) {
+               int ret;
+               u64 parent;
+               u64 parent_gen;
+
+               fs_path_reset(fs_path);
+               ret = get_first_ref(root, ino, &parent, &parent_gen, fs_path);
+               if (ret < 0) {
+                       if (ret == -ENOENT && ino == ino2)
+                               ret = 0;
+                       return ret;
+               }
+               if (parent == ino1)
+                       return parent_gen == ino1_gen ? 1 : 0;
+               ino = parent;
+       }
+       return 0;
+}
+
 static int wait_for_parent_move(struct send_ctx *sctx,
-                               struct recorded_ref *parent_ref)
+                               struct recorded_ref *parent_ref,
+                               const bool is_orphan)
 {
        int ret = 0;
        u64 ino = parent_ref->dir;
@@ -3374,11 +3439,24 @@ static int wait_for_parent_move(struct send_ctx *sctx,
         * Our current directory inode may not yet be renamed/moved because some
         * ancestor (immediate or not) has to be renamed/moved first. So find if
         * such ancestor exists and make sure our own rename/move happens after
-        * that ancestor is processed.
+        * that ancestor is processed to avoid path build infinite loops (done
+        * at get_cur_path()).
         */
        while (ino > BTRFS_FIRST_FREE_OBJECTID) {
                if (is_waiting_for_move(sctx, ino)) {
-                       ret = 1;
+                       /*
+                        * If the current inode is an ancestor of ino in the
+                        * parent root, we need to delay the rename of the
+                        * current inode, otherwise don't delayed the rename
+                        * because we can end up with a circular dependency
+                        * of renames, resulting in some directories never
+                        * getting the respective rename operations issued in
+                        * the send stream or getting into infinite path build
+                        * loops.
+                        */
+                       ret = is_ancestor(sctx->parent_root,
+                                         sctx->cur_ino, sctx->cur_inode_gen,
+                                         ino, path_before);
                        break;
                }
 
@@ -3420,7 +3498,7 @@ out:
                                           ino,
                                           &sctx->new_refs,
                                           &sctx->deleted_refs,
-                                          false);
+                                          is_orphan);
                if (!ret)
                        ret = 1;
        }
@@ -3589,6 +3667,17 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                        }
                }
 
+               if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root &&
+                   can_rename) {
+                       ret = wait_for_parent_move(sctx, cur, is_orphan);
+                       if (ret < 0)
+                               goto out;
+                       if (ret == 1) {
+                               can_rename = false;
+                               *pending_move = 1;
+                       }
+               }
+
                /*
                 * link/move the ref to the new place. If we have an orphan
                 * inode, move it and update valid_path. If not, link or move
@@ -3609,18 +3698,11 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                 * dirs, we always have one new and one deleted
                                 * ref. The deleted ref is ignored later.
                                 */
-                               ret = wait_for_parent_move(sctx, cur);
-                               if (ret < 0)
-                                       goto out;
-                               if (ret) {
-                                       *pending_move = 1;
-                               } else {
-                                       ret = send_rename(sctx, valid_path,
-                                                         cur->full_path);
-                                       if (!ret)
-                                               ret = fs_path_copy(valid_path,
-                                                              cur->full_path);
-                               }
+                               ret = send_rename(sctx, valid_path,
+                                                 cur->full_path);
+                               if (!ret)
+                                       ret = fs_path_copy(valid_path,
+                                                          cur->full_path);
                                if (ret < 0)
                                        goto out;
                        } else {
@@ -4508,8 +4590,21 @@ verbose_printk("btrfs: send_clone offset=%llu, len=%d, clone_root=%llu, "
        if (ret < 0)
                goto out;
 
-       TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
-                       clone_root->root->root_item.uuid);
+       /*
+        * If the parent we're using has a received_uuid set then use that as
+        * our clone source as that is what we will look for when doing a
+        * receive.
+        *
+        * This covers the case that we create a snapshot off of a received
+        * subvolume and then use that as the parent and try to receive on a
+        * different host.
+        */
+       if (!btrfs_is_empty_uuid(clone_root->root->root_item.received_uuid))
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                            clone_root->root->root_item.received_uuid);
+       else
+               TLV_PUT_UUID(sctx, BTRFS_SEND_A_CLONE_UUID,
+                            clone_root->root->root_item.uuid);
        TLV_PUT_U64(sctx, BTRFS_SEND_A_CLONE_CTRANSID,
                    le64_to_cpu(clone_root->root->root_item.ctransid));
        TLV_PUT_PATH(sctx, BTRFS_SEND_A_CLONE_PATH, p);
index 9e66f5e..cd7ef34 100644 (file)
@@ -135,6 +135,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
  * __btrfs_std_error decodes expected errors from the caller and
  * invokes the approciate error response.
  */
+__cold
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                       unsigned int line, int errno, const char *fmt, ...)
 {
@@ -247,18 +248,11 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
  * We'll complete the cleanup in btrfs_end_transaction and
  * btrfs_commit_transaction.
  */
+__cold
 void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, const char *function,
                               unsigned int line, int errno)
 {
-       /*
-        * Report first abort since mount
-        */
-       if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED,
-                               &root->fs_info->fs_state)) {
-               WARN(1, KERN_DEBUG "BTRFS: Transaction aborted (error %d)\n",
-                               errno);
-       }
        trans->aborted = errno;
        /* Nothing used. The other threads that have joined this
         * transaction may be able to continue. */
@@ -281,6 +275,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
  * __btrfs_panic decodes unexpected, fatal errors from the caller,
  * issues an alert, and either panics or BUGs, depending on mount options.
  */
+__cold
 void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
                   unsigned int line, int errno, const char *fmt, ...)
 {
@@ -841,33 +836,153 @@ out:
        return error;
 }
 
-static struct dentry *get_default_root(struct super_block *sb,
-                                      u64 subvol_objectid)
+static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
+                                          u64 subvol_objectid)
 {
-       struct btrfs_fs_info *fs_info = btrfs_sb(sb);
        struct btrfs_root *root = fs_info->tree_root;
-       struct btrfs_root *new_root;
-       struct btrfs_dir_item *di;
-       struct btrfs_path *path;
-       struct btrfs_key location;
-       struct inode *inode;
-       u64 dir_id;
-       int new = 0;
+       struct btrfs_root *fs_root;
+       struct btrfs_root_ref *root_ref;
+       struct btrfs_inode_ref *inode_ref;
+       struct btrfs_key key;
+       struct btrfs_path *path = NULL;
+       char *name = NULL, *ptr;
+       u64 dirid;
+       int len;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       path->leave_spinning = 1;
+
+       name = kmalloc(PATH_MAX, GFP_NOFS);
+       if (!name) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       ptr = name + PATH_MAX - 1;
+       ptr[0] = '\0';
 
        /*
-        * We have a specific subvol we want to mount, just setup location and
-        * go look up the root.
+        * Walk up the subvolume trees in the tree of tree roots by root
+        * backrefs until we hit the top-level subvolume.
         */
-       if (subvol_objectid) {
-               location.objectid = subvol_objectid;
-               location.type = BTRFS_ROOT_ITEM_KEY;
-               location.offset = (u64)-1;
-               goto find_root;
+       while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+               key.objectid = subvol_objectid;
+               key.type = BTRFS_ROOT_BACKREF_KEY;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+               if (ret < 0) {
+                       goto err;
+               } else if (ret > 0) {
+                       ret = btrfs_previous_item(root, path, subvol_objectid,
+                                                 BTRFS_ROOT_BACKREF_KEY);
+                       if (ret < 0) {
+                               goto err;
+                       } else if (ret > 0) {
+                               ret = -ENOENT;
+                               goto err;
+                       }
+               }
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               subvol_objectid = key.offset;
+
+               root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                         struct btrfs_root_ref);
+               len = btrfs_root_ref_name_len(path->nodes[0], root_ref);
+               ptr -= len + 1;
+               if (ptr < name) {
+                       ret = -ENAMETOOLONG;
+                       goto err;
+               }
+               read_extent_buffer(path->nodes[0], ptr + 1,
+                                  (unsigned long)(root_ref + 1), len);
+               ptr[0] = '/';
+               dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref);
+               btrfs_release_path(path);
+
+               key.objectid = subvol_objectid;
+               key.type = BTRFS_ROOT_ITEM_KEY;
+               key.offset = (u64)-1;
+               fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
+               if (IS_ERR(fs_root)) {
+                       ret = PTR_ERR(fs_root);
+                       goto err;
+               }
+
+               /*
+                * Walk up the filesystem tree by inode refs until we hit the
+                * root directory.
+                */
+               while (dirid != BTRFS_FIRST_FREE_OBJECTID) {
+                       key.objectid = dirid;
+                       key.type = BTRFS_INODE_REF_KEY;
+                       key.offset = (u64)-1;
+
+                       ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+                       if (ret < 0) {
+                               goto err;
+                       } else if (ret > 0) {
+                               ret = btrfs_previous_item(fs_root, path, dirid,
+                                                         BTRFS_INODE_REF_KEY);
+                               if (ret < 0) {
+                                       goto err;
+                               } else if (ret > 0) {
+                                       ret = -ENOENT;
+                                       goto err;
+                               }
+                       }
+
+                       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+                       dirid = key.offset;
+
+                       inode_ref = btrfs_item_ptr(path->nodes[0],
+                                                  path->slots[0],
+                                                  struct btrfs_inode_ref);
+                       len = btrfs_inode_ref_name_len(path->nodes[0],
+                                                      inode_ref);
+                       ptr -= len + 1;
+                       if (ptr < name) {
+                               ret = -ENAMETOOLONG;
+                               goto err;
+                       }
+                       read_extent_buffer(path->nodes[0], ptr + 1,
+                                          (unsigned long)(inode_ref + 1), len);
+                       ptr[0] = '/';
+                       btrfs_release_path(path);
+               }
        }
 
+       btrfs_free_path(path);
+       if (ptr == name + PATH_MAX - 1) {
+               name[0] = '/';
+               name[1] = '\0';
+       } else {
+               memmove(name, ptr, name + PATH_MAX - ptr);
+       }
+       return name;
+
+err:
+       btrfs_free_path(path);
+       kfree(name);
+       return ERR_PTR(ret);
+}
+
+static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid)
+{
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_dir_item *di;
+       struct btrfs_path *path;
+       struct btrfs_key location;
+       u64 dir_id;
+
        path = btrfs_alloc_path();
        if (!path)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
        path->leave_spinning = 1;
 
        /*
@@ -879,58 +994,23 @@ static struct dentry *get_default_root(struct super_block *sb,
        di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
        if (IS_ERR(di)) {
                btrfs_free_path(path);
-               return ERR_CAST(di);
+               return PTR_ERR(di);
        }
        if (!di) {
                /*
                 * Ok the default dir item isn't there.  This is weird since
                 * it's always been there, but don't freak out, just try and
-                * mount to root most subvolume.
+                * mount the top-level subvolume.
                 */
                btrfs_free_path(path);
-               dir_id = BTRFS_FIRST_FREE_OBJECTID;
-               new_root = fs_info->fs_root;
-               goto setup_root;
+               *objectid = BTRFS_FS_TREE_OBJECTID;
+               return 0;
        }
 
        btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
        btrfs_free_path(path);
-
-find_root:
-       new_root = btrfs_read_fs_root_no_name(fs_info, &location);
-       if (IS_ERR(new_root))
-               return ERR_CAST(new_root);
-
-       if (!(sb->s_flags & MS_RDONLY)) {
-               int ret;
-               down_read(&fs_info->cleanup_work_sem);
-               ret = btrfs_orphan_cleanup(new_root);
-               up_read(&fs_info->cleanup_work_sem);
-               if (ret)
-                       return ERR_PTR(ret);
-       }
-
-       dir_id = btrfs_root_dirid(&new_root->root_item);
-setup_root:
-       location.objectid = dir_id;
-       location.type = BTRFS_INODE_ITEM_KEY;
-       location.offset = 0;
-
-       inode = btrfs_iget(sb, &location, new_root, &new);
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
-
-       /*
-        * If we're just mounting the root most subvol put the inode and return
-        * a reference to the dentry.  We will have already gotten a reference
-        * to the inode in btrfs_fill_super so we're good to go.
-        */
-       if (!new && d_inode(sb->s_root) == inode) {
-               iput(inode);
-               return dget(sb->s_root);
-       }
-
-       return d_obtain_root(inode);
+       *objectid = location.objectid;
+       return 0;
 }
 
 static int btrfs_fill_super(struct super_block *sb,
@@ -1108,6 +1188,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",fatal_errors=panic");
        if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
                seq_printf(seq, ",commit=%d", info->commit_interval);
+       seq_printf(seq, ",subvolid=%llu",
+                 BTRFS_I(d_inode(dentry))->root->root_key.objectid);
+       seq_puts(seq, ",subvol=");
+       seq_dentry(seq, dentry, " \t\n\\");
        return 0;
 }
 
@@ -1138,107 +1222,139 @@ static inline int is_subvolume_inode(struct inode *inode)
 }
 
 /*
- * This will strip out the subvol=%s argument for an argument string and add
- * subvolid=0 to make sure we get the actual tree root for path walking to the
- * subvol we want.
+ * This will add subvolid=0 to the argument string while removing any subvol=
+ * and subvolid= arguments to make sure we get the top-level root for path
+ * walking to the subvol we want.
  */
 static char *setup_root_args(char *args)
 {
-       unsigned len = strlen(args) + 2 + 1;
-       char *src, *dst, *buf;
+       char *buf, *dst, *sep;
 
-       /*
-        * We need the same args as before, but with this substitution:
-        * s!subvol=[^,]+!subvolid=0!
-        *
-        * Since the replacement string is up to 2 bytes longer than the
-        * original, allocate strlen(args) + 2 + 1 bytes.
-        */
+       if (!args)
+               return kstrdup("subvolid=0", GFP_NOFS);
 
-       src = strstr(args, "subvol=");
-       /* This shouldn't happen, but just in case.. */
-       if (!src)
-               return NULL;
-
-       buf = dst = kmalloc(len, GFP_NOFS);
+       /* The worst case is that we add ",subvolid=0" to the end. */
+       buf = dst = kmalloc(strlen(args) + strlen(",subvolid=0") + 1, GFP_NOFS);
        if (!buf)
                return NULL;
 
-       /*
-        * If the subvol= arg is not at the start of the string,
-        * copy whatever precedes it into buf.
-        */
-       if (src != args) {
-               *src++ = '\0';
-               strcpy(buf, args);
-               dst += strlen(args);
+       while (1) {
+               sep = strchrnul(args, ',');
+               if (!strstarts(args, "subvol=") &&
+                   !strstarts(args, "subvolid=")) {
+                       memcpy(dst, args, sep - args);
+                       dst += sep - args;
+                       *dst++ = ',';
+               }
+               if (*sep)
+                       args = sep + 1;
+               else
+                       break;
        }
-
        strcpy(dst, "subvolid=0");
-       dst += strlen("subvolid=0");
-
-       /*
-        * If there is a "," after the original subvol=... string,
-        * copy that suffix into our buffer.  Otherwise, we're done.
-        */
-       src = strchr(src, ',');
-       if (src)
-               strcpy(dst, src);
 
        return buf;
 }
 
-static struct dentry *mount_subvol(const char *subvol_name, int flags,
-                                  const char *device_name, char *data)
+static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
+                                  int flags, const char *device_name,
+                                  char *data)
 {
        struct dentry *root;
-       struct vfsmount *mnt;
+       struct vfsmount *mnt = NULL;
        char *newargs;
+       int ret;
 
        newargs = setup_root_args(data);
-       if (!newargs)
-               return ERR_PTR(-ENOMEM);
-       mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
-                            newargs);
+       if (!newargs) {
+               root = ERR_PTR(-ENOMEM);
+               goto out;
+       }
 
-       if (PTR_RET(mnt) == -EBUSY) {
+       mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name, newargs);
+       if (PTR_ERR_OR_ZERO(mnt) == -EBUSY) {
                if (flags & MS_RDONLY) {
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
-                                            newargs);
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY,
+                                            device_name, newargs);
                } else {
-                       int r;
-                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
-                                            newargs);
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY,
+                                            device_name, newargs);
                        if (IS_ERR(mnt)) {
-                               kfree(newargs);
-                               return ERR_CAST(mnt);
+                               root = ERR_CAST(mnt);
+                               mnt = NULL;
+                               goto out;
                        }
 
-                       r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
-                       if (r < 0) {
-                               /* FIXME: release vfsmount mnt ??*/
-                               kfree(newargs);
-                               return ERR_PTR(r);
+                       down_write(&mnt->mnt_sb->s_umount);
+                       ret = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+                       up_write(&mnt->mnt_sb->s_umount);
+                       if (ret < 0) {
+                               root = ERR_PTR(ret);
+                               goto out;
                        }
                }
        }
+       if (IS_ERR(mnt)) {
+               root = ERR_CAST(mnt);
+               mnt = NULL;
+               goto out;
+       }
 
-       kfree(newargs);
+       if (!subvol_name) {
+               if (!subvol_objectid) {
+                       ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb),
+                                                         &subvol_objectid);
+                       if (ret) {
+                               root = ERR_PTR(ret);
+                               goto out;
+                       }
+               }
+               subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
+                                                           subvol_objectid);
+               if (IS_ERR(subvol_name)) {
+                       root = ERR_CAST(subvol_name);
+                       subvol_name = NULL;
+                       goto out;
+               }
 
-       if (IS_ERR(mnt))
-               return ERR_CAST(mnt);
+       }
 
        root = mount_subtree(mnt, subvol_name);
+       /* mount_subtree() drops our reference on the vfsmount. */
+       mnt = NULL;
 
-       if (!IS_ERR(root) && !is_subvolume_inode(d_inode(root))) {
+       if (!IS_ERR(root)) {
                struct super_block *s = root->d_sb;
-               dput(root);
-               root = ERR_PTR(-EINVAL);
-               deactivate_locked_super(s);
-               printk(KERN_ERR "BTRFS: '%s' is not a valid subvolume\n",
-                               subvol_name);
+               struct inode *root_inode = d_inode(root);
+               u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid;
+
+               ret = 0;
+               if (!is_subvolume_inode(root_inode)) {
+                       pr_err("BTRFS: '%s' is not a valid subvolume\n",
+                              subvol_name);
+                       ret = -EINVAL;
+               }
+               if (subvol_objectid && root_objectid != subvol_objectid) {
+                       /*
+                        * This will also catch a race condition where a
+                        * subvolume which was passed by ID is renamed and
+                        * another subvolume is renamed over the old location.
+                        */
+                       pr_err("BTRFS: subvol '%s' does not match subvolid %llu\n",
+                              subvol_name, subvol_objectid);
+                       ret = -EINVAL;
+               }
+               if (ret) {
+                       dput(root);
+                       root = ERR_PTR(ret);
+                       deactivate_locked_super(s);
+               }
        }
 
+out:
+       mntput(mnt);
+       kfree(newargs);
+       kfree(subvol_name);
        return root;
 }
 
@@ -1303,7 +1419,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 {
        struct block_device *bdev = NULL;
        struct super_block *s;
-       struct dentry *root;
        struct btrfs_fs_devices *fs_devices = NULL;
        struct btrfs_fs_info *fs_info = NULL;
        struct security_mnt_opts new_sec_opts;
@@ -1323,10 +1438,10 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                return ERR_PTR(error);
        }
 
-       if (subvol_name) {
-               root = mount_subvol(subvol_name, flags, device_name, data);
-               kfree(subvol_name);
-               return root;
+       if (subvol_name || subvol_objectid != BTRFS_FS_TREE_OBJECTID) {
+               /* mount_subvol() will free subvol_name. */
+               return mount_subvol(subvol_name, subvol_objectid, flags,
+                                   device_name, data);
        }
 
        security_init_mnt_opts(&new_sec_opts);
@@ -1392,23 +1507,19 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                error = btrfs_fill_super(s, fs_devices, data,
                                         flags & MS_SILENT ? 1 : 0);
        }
-
-       root = !error ? get_default_root(s, subvol_objectid) : ERR_PTR(error);
-       if (IS_ERR(root)) {
+       if (error) {
                deactivate_locked_super(s);
-               error = PTR_ERR(root);
                goto error_sec_opts;
        }
 
        fs_info = btrfs_sb(s);
        error = setup_security_options(fs_info, s, &new_sec_opts);
        if (error) {
-               dput(root);
                deactivate_locked_super(s);
                goto error_sec_opts;
        }
 
-       return root;
+       return dget(s->s_root);
 
 error_close_devices:
        btrfs_close_devices(fs_devices);
index c32a7ba..846d277 100644 (file)
@@ -21,6 +21,7 @@
 #include "../transaction.h"
 #include "../disk-io.h"
 #include "../qgroup.h"
+#include "../backref.h"
 
 static void init_dummy_trans(struct btrfs_trans_handle *trans)
 {
@@ -227,6 +228,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
 {
        struct btrfs_trans_handle trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct ulist *old_roots = NULL;
+       struct ulist *new_roots = NULL;
        int ret;
 
        init_dummy_trans(&trans);
@@ -238,10 +241,15 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
                return ret;
        }
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+       /*
+        * Since the test trans doesn't havee the complicated delayed refs,
+        * we can only call btrfs_qgroup_account_extent() directly to test
+        * quota.
+        */
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
        if (ret) {
-               test_msg("Couldn't add space to a qgroup %d\n", ret);
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
@@ -249,9 +257,18 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
        if (ret)
                return ret;
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Delayed qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -259,21 +276,32 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
                test_msg("Qgroup counts didn't match expected values\n");
                return -EINVAL;
        }
+       old_roots = NULL;
+       new_roots = NULL;
+
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
 
        ret = remove_extent_item(root, 4096, 4096);
        if (ret)
                return -EINVAL;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_SUB_EXCL, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Couldn't remove space from the qgroup %d\n", ret);
-               return -EINVAL;
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return -EINVAL;
        }
 
@@ -294,6 +322,8 @@ static int test_multiple_refs(struct btrfs_root *root)
 {
        struct btrfs_trans_handle trans;
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct ulist *old_roots = NULL;
+       struct ulist *new_roots = NULL;
        int ret;
 
        init_dummy_trans(&trans);
@@ -307,20 +337,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return ret;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = insert_normal_tree_ref(root, 4096, 4096, 0, 5);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 5, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Couldn't add space to a qgroup %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Delayed qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -329,20 +368,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return -EINVAL;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = add_tree_ref(root, 4096, 4096, 0, 256);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_ADD_SHARED, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Qgroup record ref failed %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
@@ -356,20 +404,29 @@ static int test_multiple_refs(struct btrfs_root *root)
                return -EINVAL;
        }
 
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &old_roots);
+       if (ret) {
+               ulist_free(old_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
+               return ret;
+       }
+
        ret = remove_extent_ref(root, 4096, 4096, 0, 256);
        if (ret)
                return ret;
 
-       ret = btrfs_qgroup_record_ref(&trans, fs_info, 256, 4096, 4096,
-                                     BTRFS_QGROUP_OPER_SUB_SHARED, 0);
+       ret = btrfs_find_all_roots(&trans, fs_info, 4096, 0, &new_roots);
        if (ret) {
-               test_msg("Qgroup record ref failed %d\n", ret);
+               ulist_free(old_roots);
+               ulist_free(new_roots);
+               test_msg("Couldn't find old roots: %d\n", ret);
                return ret;
        }
 
-       ret = btrfs_delayed_qgroup_accounting(&trans, fs_info);
+       ret = btrfs_qgroup_account_extent(&trans, fs_info, 4096, 4096,
+                                         old_roots, new_roots);
        if (ret) {
-               test_msg("Qgroup accounting failed %d\n", ret);
+               test_msg("Couldn't account space for a qgroup %d\n", ret);
                return ret;
        }
 
index 5628e25..c0f18e7 100644 (file)
@@ -225,12 +225,14 @@ loop:
        cur_trans->dirty_bg_run = 0;
 
        cur_trans->delayed_refs.href_root = RB_ROOT;
+       cur_trans->delayed_refs.dirty_extent_root = RB_ROOT;
        atomic_set(&cur_trans->delayed_refs.num_entries, 0);
        cur_trans->delayed_refs.num_heads_ready = 0;
        cur_trans->delayed_refs.pending_csums = 0;
        cur_trans->delayed_refs.num_heads = 0;
        cur_trans->delayed_refs.flushing = 0;
        cur_trans->delayed_refs.run_delayed_start = 0;
+       cur_trans->delayed_refs.qgroup_to_skip = 0;
 
        /*
         * although the tree mod log is per file system and not per transaction,
@@ -509,6 +511,7 @@ again:
        h->transaction = cur_trans;
        h->blocks_used = 0;
        h->bytes_reserved = 0;
+       h->chunk_bytes_reserved = 0;
        h->root = root;
        h->delayed_ref_updates = 0;
        h->use_count = 1;
@@ -792,6 +795,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        if (!list_empty(&trans->new_bgs))
                btrfs_create_pending_block_groups(trans, root);
 
+       btrfs_trans_release_chunk_metadata(trans);
+
        if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
            should_end_transaction(trans, root) &&
            ACCESS_ONCE(cur_trans->state) == TRANS_STATE_RUNNING) {
@@ -1290,6 +1295,12 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        if (pending->error)
                goto no_free_objectid;
 
+       /*
+        * Make qgroup to skip current new snapshot's qgroupid, as it is
+        * accounted by later btrfs_qgroup_inherit().
+        */
+       btrfs_set_skip_qgroup(trans, objectid);
+
        btrfs_reloc_pre_snapshot(trans, pending, &to_reserve);
 
        if (to_reserve > 0) {
@@ -1298,7 +1309,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                                     to_reserve,
                                                     BTRFS_RESERVE_NO_FLUSH);
                if (pending->error)
-                       goto no_free_objectid;
+                       goto clear_skip_qgroup;
        }
 
        key.objectid = objectid;
@@ -1396,25 +1407,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                btrfs_abort_transaction(trans, root, ret);
                goto fail;
        }
-
-       /*
-        * We need to flush delayed refs in order to make sure all of our quota
-        * operations have been done before we call btrfs_qgroup_inherit.
-        */
-       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto fail;
-       }
-
-       ret = btrfs_qgroup_inherit(trans, fs_info,
-                                  root->root_key.objectid,
-                                  objectid, pending->inherit);
-       if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
-               goto fail;
-       }
-
        /* see comments in should_cow_block() */
        set_bit(BTRFS_ROOT_FORCE_COW, &root->state);
        smp_wmb();
@@ -1497,11 +1489,37 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                        goto fail;
                }
        }
+
+       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
+       /*
+        * account qgroup counters before qgroup_inherit()
+        */
+       ret = btrfs_qgroup_prepare_account_extents(trans, fs_info);
+       if (ret)
+               goto fail;
+       ret = btrfs_qgroup_account_extents(trans, fs_info);
+       if (ret)
+               goto fail;
+       ret = btrfs_qgroup_inherit(trans, fs_info,
+                                  root->root_key.objectid,
+                                  objectid, pending->inherit);
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+
 fail:
        pending->error = ret;
 dir_item_existed:
        trans->block_rsv = rsv;
        trans->bytes_reserved = 0;
+clear_skip_qgroup:
+       btrfs_clear_skip_qgroup(trans);
 no_free_objectid:
        kfree(new_root_item);
 root_item_alloc_fail:
@@ -1963,6 +1981,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                goto scrub_continue;
        }
 
+       /* Reocrd old roots for later qgroup accounting */
+       ret = btrfs_qgroup_prepare_account_extents(trans, root->fs_info);
+       if (ret) {
+               mutex_unlock(&root->fs_info->reloc_mutex);
+               goto scrub_continue;
+       }
+
        /*
         * make sure none of the code above managed to slip in a
         * delayed item
@@ -2004,6 +2029,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         */
        btrfs_free_log_root_tree(trans, root->fs_info);
 
+       /*
+        * Since fs roots are all committed, we can get a quite accurate
+        * new_roots. So let's do quota accounting.
+        */
+       ret = btrfs_qgroup_account_extents(trans, root->fs_info);
+       if (ret < 0) {
+               mutex_unlock(&root->fs_info->tree_log_mutex);
+               mutex_unlock(&root->fs_info->reloc_mutex);
+               goto scrub_continue;
+       }
+
        ret = commit_cowonly_roots(trans, root);
        if (ret) {
                mutex_unlock(&root->fs_info->tree_log_mutex);
@@ -2054,6 +2090,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        clear_bit(BTRFS_INODE_BTREE_LOG1_ERR, &btree_ino->runtime_flags);
        clear_bit(BTRFS_INODE_BTREE_LOG2_ERR, &btree_ino->runtime_flags);
 
+       btrfs_trans_release_chunk_metadata(trans);
+
        spin_lock(&root->fs_info->trans_lock);
        cur_trans->state = TRANS_STATE_UNBLOCKED;
        root->fs_info->running_transaction = NULL;
@@ -2123,6 +2161,7 @@ scrub_continue:
        btrfs_scrub_continue(root);
 cleanup_transaction:
        btrfs_trans_release_metadata(trans, root);
+       btrfs_trans_release_chunk_metadata(trans);
        trans->block_rsv = NULL;
        if (trans->qgroup_reserved) {
                btrfs_qgroup_free(root, trans->qgroup_reserved);
index 0b24755..eb09c20 100644 (file)
@@ -102,6 +102,7 @@ struct btrfs_transaction {
 struct btrfs_trans_handle {
        u64 transid;
        u64 bytes_reserved;
+       u64 chunk_bytes_reserved;
        u64 qgroup_reserved;
        unsigned long use_count;
        unsigned long blocks_reserved;
@@ -153,6 +154,29 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
        spin_unlock(&BTRFS_I(inode)->lock);
 }
 
+/*
+ * Make qgroup codes to skip given qgroupid, means the old/new_roots for
+ * qgroup won't contain the qgroupid in it.
+ */
+static inline void btrfs_set_skip_qgroup(struct btrfs_trans_handle *trans,
+                                        u64 qgroupid)
+{
+       struct btrfs_delayed_ref_root *delayed_refs;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       WARN_ON(delayed_refs->qgroup_to_skip);
+       delayed_refs->qgroup_to_skip = qgroupid;
+}
+
+static inline void btrfs_clear_skip_qgroup(struct btrfs_trans_handle *trans)
+{
+       struct btrfs_delayed_ref_root *delayed_refs;
+
+       delayed_refs = &trans->transaction->delayed_refs;
+       WARN_ON(!delayed_refs->qgroup_to_skip);
+       delayed_refs->qgroup_to_skip = 0;
+}
+
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
index a63719c..a4b9c8b 100644 (file)
@@ -52,9 +52,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                goto out;
 
-       if (btrfs_test_opt(root, SSD))
-               goto out;
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
index d049683..1ce80c1 100644 (file)
@@ -3881,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
                                     &ordered->flags))
                        continue;
 
-               if (ordered->csum_bytes_left) {
-                       btrfs_start_ordered_extent(inode, ordered, 0);
-                       wait_event(ordered->wait,
-                                  ordered->csum_bytes_left == 0);
-               }
-
                list_for_each_entry(sum, &ordered->list, list) {
                        ret = btrfs_csum_file_blocks(trans, log, sum);
                        if (ret)
index 840a38b..91feb2b 100644 (file)
@@ -132,6 +132,15 @@ static struct ulist_node *ulist_rbtree_search(struct ulist *ulist, u64 val)
        return NULL;
 }
 
+static void ulist_rbtree_erase(struct ulist *ulist, struct ulist_node *node)
+{
+       rb_erase(&node->rb_node, &ulist->root);
+       list_del(&node->list);
+       kfree(node);
+       BUG_ON(ulist->nnodes == 0);
+       ulist->nnodes--;
+}
+
 static int ulist_rbtree_insert(struct ulist *ulist, struct ulist_node *ins)
 {
        struct rb_node **p = &ulist->root.rb_node;
@@ -197,9 +206,6 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
 
        node->val = val;
        node->aux = aux;
-#ifdef CONFIG_BTRFS_DEBUG
-       node->seqnum = ulist->nnodes;
-#endif
 
        ret = ulist_rbtree_insert(ulist, node);
        ASSERT(!ret);
@@ -209,6 +215,33 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
        return 1;
 }
 
+/*
+ * ulist_del - delete one node from ulist
+ * @ulist:     ulist to remove node from
+ * @val:       value to delete
+ * @aux:       aux to delete
+ *
+ * The deletion will only be done when *BOTH* val and aux matches.
+ * Return 0 for successful delete.
+ * Return > 0 for not found.
+ */
+int ulist_del(struct ulist *ulist, u64 val, u64 aux)
+{
+       struct ulist_node *node;
+
+       node = ulist_rbtree_search(ulist, val);
+       /* Not found */
+       if (!node)
+               return 1;
+
+       if (node->aux != aux)
+               return 1;
+
+       /* Found and delete */
+       ulist_rbtree_erase(ulist, node);
+       return 0;
+}
+
 /**
  * ulist_next - iterate ulist
  * @ulist:     ulist to iterate
@@ -237,15 +270,7 @@ struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator *uiter)
                uiter->cur_list = uiter->cur_list->next;
        } else {
                uiter->cur_list = ulist->nodes.next;
-#ifdef CONFIG_BTRFS_DEBUG
-               uiter->i = 0;
-#endif
        }
        node = list_entry(uiter->cur_list, struct ulist_node, list);
-#ifdef CONFIG_BTRFS_DEBUG
-       ASSERT(node->seqnum == uiter->i);
-       ASSERT(uiter->i >= 0 && uiter->i < ulist->nnodes);
-       uiter->i++;
-#endif
        return node;
 }
index 4c29db6..a01a2c4 100644 (file)
@@ -57,6 +57,7 @@ void ulist_free(struct ulist *ulist);
 int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
 int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
                    u64 *old_aux, gfp_t gfp_mask);
+int ulist_del(struct ulist *ulist, u64 val, u64 aux);
 
 /* just like ulist_add_merge() but take a pointer for the aux data */
 static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
index 2d9061f..8396699 100644 (file)
@@ -758,13 +758,13 @@ static void free_device(struct rcu_head *head)
 
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
-       struct btrfs_device *device;
+       struct btrfs_device *device, *tmp;
 
        if (--fs_devices->opened > 0)
                return 0;
 
        mutex_lock(&fs_devices->device_list_mutex);
-       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+       list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
                struct btrfs_device *new_device;
                struct rcu_string *name;
 
@@ -1132,15 +1132,31 @@ again:
 
                map = (struct map_lookup *)em->bdev;
                for (i = 0; i < map->num_stripes; i++) {
+                       u64 end;
+
                        if (map->stripes[i].dev != device)
                                continue;
                        if (map->stripes[i].physical >= physical_start + len ||
                            map->stripes[i].physical + em->orig_block_len <=
                            physical_start)
                                continue;
-                       *start = map->stripes[i].physical +
-                               em->orig_block_len;
-                       ret = 1;
+                       /*
+                        * Make sure that while processing the pinned list we do
+                        * not override our *start with a lower value, because
+                        * we can have pinned chunks that fall within this
+                        * device hole and that have lower physical addresses
+                        * than the pending chunks we processed before. If we
+                        * do not take this special care we can end up getting
+                        * 2 pending chunks that start at the same physical
+                        * device offsets because the end offset of a pinned
+                        * chunk can be equal to the start offset of some
+                        * pending chunk.
+                        */
+                       end = map->stripes[i].physical + em->orig_block_len;
+                       if (end > *start) {
+                               *start = end;
+                               ret = 1;
+                       }
                }
        }
        if (search_list == &trans->transaction->pending_chunks) {
@@ -2678,6 +2694,9 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                return -EINVAL;
        }
        map = (struct map_lookup *)em->bdev;
+       lock_chunks(root->fs_info->chunk_root);
+       check_system_chunk(trans, extent_root, map->type);
+       unlock_chunks(root->fs_info->chunk_root);
 
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
@@ -3977,9 +3996,9 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
        uuid_root = btrfs_create_tree(trans, fs_info,
                                      BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
-               btrfs_abort_transaction(trans, tree_root,
-                                       PTR_ERR(uuid_root));
-               return PTR_ERR(uuid_root);
+               ret = PTR_ERR(uuid_root);
+               btrfs_abort_transaction(trans, tree_root, ret);
+               return ret;
        }
 
        fs_info->uuid_root = uuid_root;
@@ -4034,6 +4053,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        int slot;
        int failed = 0;
        bool retried = false;
+       bool checked_pending_chunks = false;
        struct extent_buffer *l;
        struct btrfs_key key;
        struct btrfs_super_block *super_copy = root->fs_info->super_copy;
@@ -4114,15 +4134,6 @@ again:
                goto again;
        } else if (failed && retried) {
                ret = -ENOSPC;
-               lock_chunks(root);
-
-               btrfs_device_set_total_bytes(device, old_size);
-               if (device->writeable)
-                       device->fs_devices->total_rw_bytes += diff;
-               spin_lock(&root->fs_info->free_chunk_lock);
-               root->fs_info->free_chunk_space += diff;
-               spin_unlock(&root->fs_info->free_chunk_lock);
-               unlock_chunks(root);
                goto done;
        }
 
@@ -4134,6 +4145,35 @@ again:
        }
 
        lock_chunks(root);
+
+       /*
+        * We checked in the above loop all device extents that were already in
+        * the device tree. However before we have updated the device's
+        * total_bytes to the new size, we might have had chunk allocations that
+        * have not complete yet (new block groups attached to transaction
+        * handles), and therefore their device extents were not yet in the
+        * device tree and we missed them in the loop above. So if we have any
+        * pending chunk using a device extent that overlaps the device range
+        * that we can not use anymore, commit the current transaction and
+        * repeat the search on the device tree - this way we guarantee we will
+        * not have chunks using device extents that end beyond 'new_size'.
+        */
+       if (!checked_pending_chunks) {
+               u64 start = new_size;
+               u64 len = old_size - new_size;
+
+               if (contains_pending_extent(trans, device, &start, len)) {
+                       unlock_chunks(root);
+                       checked_pending_chunks = true;
+                       failed = 0;
+                       retried = false;
+                       ret = btrfs_commit_transaction(trans, root);
+                       if (ret)
+                               goto done;
+                       goto again;
+               }
+       }
+
        btrfs_device_set_disk_total_bytes(device, new_size);
        if (list_empty(&device->resized_list))
                list_add_tail(&device->resized_list,
@@ -4148,6 +4188,16 @@ again:
        btrfs_end_transaction(trans, root);
 done:
        btrfs_free_path(path);
+       if (ret) {
+               lock_chunks(root);
+               btrfs_device_set_total_bytes(device, old_size);
+               if (device->writeable)
+                       device->fs_devices->total_rw_bytes += diff;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += diff;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+               unlock_chunks(root);
+       }
        return ret;
 }
 
@@ -6147,6 +6197,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                                free_extent_map(em);
                                return -EIO;
                        }
+                       btrfs_warn(root->fs_info, "devid %llu uuid %pU is missing",
+                                               devid, uuid);
                }
                map->stripes[i].dev->in_fs_metadata = 1;
        }
@@ -6266,10 +6318,11 @@ static int read_one_dev(struct btrfs_root *root,
                if (!btrfs_test_opt(root, DEGRADED))
                        return -EIO;
 
-               btrfs_warn(root->fs_info, "devid %llu missing", devid);
                device = add_missing_dev(root, fs_devices, devid, dev_uuid);
                if (!device)
                        return -ENOMEM;
+               btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
+                               devid, dev_uuid);
        } else {
                if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
                        return -EIO;
index 430e034..7dc886c 100644 (file)
@@ -24,6 +24,7 @@
 #include "cifsfs.h"
 #include "dns_resolve.h"
 #include "cifs_debug.h"
+#include "cifs_unicode.h"
 
 static LIST_HEAD(cifs_dfs_automount_list);
 
@@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        xid = get_xid();
        rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls,
                &num_referrals, &referrals,
-               cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+               cifs_remap(cifs_sb));
        free_xid(xid);
 
        cifs_put_tlink(tlink);
index 0303c67..5a53ac6 100644 (file)
 #include "cifsglob.h"
 #include "cifs_debug.h"
 
-/*
- * cifs_utf16_bytes - how long will a string be after conversion?
- * @utf16 - pointer to input string
- * @maxbytes - don't go past this many bytes of input string
- * @codepage - destination codepage
- *
- * Walk a utf16le string and return the number of bytes that the string will
- * be after being converted to the given charset, not including any null
- * termination required. Don't walk past maxbytes in the source buffer.
- */
-int
-cifs_utf16_bytes(const __le16 *from, int maxbytes,
-               const struct nls_table *codepage)
-{
-       int i;
-       int charlen, outlen = 0;
-       int maxwords = maxbytes / 2;
-       char tmp[NLS_MAX_CHARSET_SIZE];
-       __u16 ftmp;
-
-       for (i = 0; i < maxwords; i++) {
-               ftmp = get_unaligned_le16(&from[i]);
-               if (ftmp == 0)
-                       break;
-
-               charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
-               if (charlen > 0)
-                       outlen += charlen;
-               else
-                       outlen++;
-       }
-
-       return outlen;
-}
-
 int cifs_remap(struct cifs_sb_info *cifs_sb)
 {
        int map_type;
@@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target)
  * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
  */
 static int
-cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
+cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp,
             int maptype)
 {
        int len = 1;
+       __u16 src_char;
+
+       src_char = *from;
 
        if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target))
                return len;
@@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
 
        /* if character not one of seven in special remap set */
        len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
-       if (len <= 0) {
-               *target = '?';
-               len = 1;
-       }
+       if (len <= 0)
+               goto surrogate_pair;
+
+       return len;
+
+surrogate_pair:
+       /* convert SURROGATE_PAIR and IVS */
+       if (strcmp(cp->charset, "utf8"))
+               goto unknown;
+       len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6);
+       if (len <= 0)
+               goto unknown;
+       return len;
+
+unknown:
+       *target = '?';
+       len = 1;
        return len;
 }
 
@@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
        int nullsize = nls_nullsize(codepage);
        int fromwords = fromlen / 2;
        char tmp[NLS_MAX_CHARSET_SIZE];
-       __u16 ftmp;
+       __u16 ftmp[3];          /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */
 
        /*
         * because the chars can be of varying widths, we need to take care
@@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
        safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
 
        for (i = 0; i < fromwords; i++) {
-               ftmp = get_unaligned_le16(&from[i]);
-               if (ftmp == 0)
+               ftmp[0] = get_unaligned_le16(&from[i]);
+               if (ftmp[0] == 0)
                        break;
+               if (i + 1 < fromwords)
+                       ftmp[1] = get_unaligned_le16(&from[i + 1]);
+               else
+                       ftmp[1] = 0;
+               if (i + 2 < fromwords)
+                       ftmp[2] = get_unaligned_le16(&from[i + 2]);
+               else
+                       ftmp[2] = 0;
 
                /*
                 * check to see if converting this character might make the
@@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
                /* put converted char into 'to' buffer */
                charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type);
                outlen += charlen;
+
+               /* charlen (=bytes of UTF-8 for 1 character)
+                * 4bytes UTF-8(surrogate pair) is charlen=4
+                *   (4bytes UTF-16 code)
+                * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4
+                *   (2 UTF-8 pairs divided to 2 UTF-16 pairs) */
+               if (charlen == 4)
+                       i++;
+               else if (charlen >= 5)
+                       /* 5-6bytes UTF-8 */
+                       i += 2;
        }
 
        /* properly null-terminate string */
@@ -295,6 +295,46 @@ success:
        return i;
 }
 
+/*
+ * cifs_utf16_bytes - how long will a string be after conversion?
+ * @utf16 - pointer to input string
+ * @maxbytes - don't go past this many bytes of input string
+ * @codepage - destination codepage
+ *
+ * Walk a utf16le string and return the number of bytes that the string will
+ * be after being converted to the given charset, not including any null
+ * termination required. Don't walk past maxbytes in the source buffer.
+ */
+int
+cifs_utf16_bytes(const __le16 *from, int maxbytes,
+               const struct nls_table *codepage)
+{
+       int i;
+       int charlen, outlen = 0;
+       int maxwords = maxbytes / 2;
+       char tmp[NLS_MAX_CHARSET_SIZE];
+       __u16 ftmp[3];
+
+       for (i = 0; i < maxwords; i++) {
+               ftmp[0] = get_unaligned_le16(&from[i]);
+               if (ftmp[0] == 0)
+                       break;
+               if (i + 1 < maxwords)
+                       ftmp[1] = get_unaligned_le16(&from[i + 1]);
+               else
+                       ftmp[1] = 0;
+               if (i + 2 < maxwords)
+                       ftmp[2] = get_unaligned_le16(&from[i + 2]);
+               else
+                       ftmp[2] = 0;
+
+               charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD);
+               outlen += charlen;
+       }
+
+       return outlen;
+}
+
 /*
  * cifs_strndup_from_utf16 - copy a string from wire format to the local
  * codepage
@@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
        char src_char;
        __le16 dst_char;
        wchar_t tmp;
+       wchar_t *wchar_to;      /* UTF-16 */
+       int ret;
+       unicode_t u;
 
        if (map_chars == NO_MAP_UNI_RSVD)
                return cifs_strtoUTF16(target, source, PATH_MAX, cp);
 
+       wchar_to = kzalloc(6, GFP_KERNEL);
+
        for (i = 0; i < srclen; j++) {
                src_char = source[i];
                charlen = 1;
@@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
                         * if no match, use question mark, which at least in
                         * some cases serves as wild card
                         */
-                       if (charlen < 1) {
-                               dst_char = cpu_to_le16(0x003f);
-                               charlen = 1;
+                       if (charlen > 0)
+                               goto ctoUTF16;
+
+                       /* convert SURROGATE_PAIR */
+                       if (strcmp(cp->charset, "utf8") || !wchar_to)
+                               goto unknown;
+                       if (*(source + i) & 0x80) {
+                               charlen = utf8_to_utf32(source + i, 6, &u);
+                               if (charlen < 0)
+                                       goto unknown;
+                       } else
+                               goto unknown;
+                       ret  = utf8s_to_utf16s(source + i, charlen,
+                                              UTF16_LITTLE_ENDIAN,
+                                              wchar_to, 6);
+                       if (ret < 0)
+                               goto unknown;
+
+                       i += charlen;
+                       dst_char = cpu_to_le16(*wchar_to);
+                       if (charlen <= 3)
+                               /* 1-3bytes UTF-8 to 2bytes UTF-16 */
+                               put_unaligned(dst_char, &target[j]);
+                       else if (charlen == 4) {
+                               /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16
+                                * 7-8bytes UTF-8(IVS) divided to 2 UTF-16
+                                *   (charlen=3+4 or 4+4) */
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 1));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
+                       } else if (charlen >= 5) {
+                               /* 5-6bytes UTF-8 to 6bytes UTF-16 */
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 1));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
+                               dst_char = cpu_to_le16(*(wchar_to + 2));
+                               j++;
+                               put_unaligned(dst_char, &target[j]);
                        }
+                       continue;
+
+unknown:
+                       dst_char = cpu_to_le16(0x003f);
+                       charlen = 1;
                }
+
+ctoUTF16:
                /*
                 * character may take more than one byte in the source string,
                 * but will take exactly two bytes in the target string
@@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
 
 ctoUTF16_out:
        put_unaligned(0, &target[j]); /* Null terminate target unicode string */
+       kfree(wchar_to);
        return j;
 }
 
index f5089bd..0a9fb6b 100644 (file)
@@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_puts(s, ",nouser_xattr");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR)
                seq_puts(s, ",mapchars");
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR)
+               seq_puts(s, ",mapposix");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
                seq_puts(s, ",sfu");
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
index c31ce98..c63fd1d 100644 (file)
@@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid,
 extern int CIFSUnixCreateSymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const char *fromName, const char *toName,
-                       const struct nls_table *nls_codepage);
+                       const struct nls_table *nls_codepage, int remap);
 extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **syminfo,
-                       const struct nls_table *nls_codepage);
+                       const struct nls_table *nls_codepage, int remap);
 extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                               __u16 fid, char **symlinkinfo,
                               const struct nls_table *nls_codepage);
index 84650a5..f26ffbf 100644 (file)
@@ -2784,7 +2784,7 @@ copyRetry:
 int
 CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon,
                      const char *fromName, const char *toName,
-                     const struct nls_table *nls_codepage)
+                     const struct nls_table *nls_codepage, int remap)
 {
        TRANSACTION2_SPI_REQ *pSMB = NULL;
        TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -2804,9 +2804,9 @@ createSymLinkRetry:
 
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len =
-                   cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName,
-                                   /* find define for this maxpathcomponent */
-                                   PATH_MAX, nls_codepage);
+                   cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName,
+                               /* find define for this maxpathcomponent */
+                                       PATH_MAX, nls_codepage, remap);
                name_len++;     /* trailing null */
                name_len *= 2;
 
@@ -2828,9 +2828,9 @@ createSymLinkRetry:
        data_offset = (char *) (&pSMB->hdr.Protocol) + offset;
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len_target =
-                   cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX
-                                   /* find define for this maxpathcomponent */
-                                   , nls_codepage);
+                   cifsConvertToUTF16((__le16 *) data_offset, toName,
+                               /* find define for this maxpathcomponent */
+                                       PATH_MAX, nls_codepage, remap);
                name_len_target++;      /* trailing null */
                name_len_target *= 2;
        } else {        /* BB improve the check for buffer overruns BB */
@@ -3034,7 +3034,7 @@ winCreateHardLinkRetry:
 int
 CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **symlinkinfo,
-                       const struct nls_table *nls_codepage)
+                       const struct nls_table *nls_codepage, int remap)
 {
 /* SMB_QUERY_FILE_UNIX_LINK */
        TRANSACTION2_QPI_REQ *pSMB = NULL;
@@ -3055,8 +3055,9 @@ querySymLinkRetry:
 
        if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
                name_len =
-                       cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName,
-                                       PATH_MAX, nls_codepage);
+                       cifsConvertToUTF16((__le16 *) pSMB->FileName,
+                                          searchName, PATH_MAX, nls_codepage,
+                                          remap);
                name_len++;     /* trailing null */
                name_len *= 2;
        } else {        /* BB improve the check for buffer overruns BB */
@@ -4917,7 +4918,7 @@ getDFSRetry:
                strncpy(pSMB->RequestFileName, search_name, name_len);
        }
 
-       if (ses->server && ses->server->sign)
+       if (ses->server->sign)
                pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE;
 
        pSMB->hdr.Uid = ses->Suid;
index f3bfe08..8383d5e 100644 (file)
@@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                rc = generic_ip_connect(server);
                if (rc) {
                        cifs_dbg(FYI, "reconnect error %d\n", rc);
+                       mutex_unlock(&server->srv_mutex);
                        msleep(3000);
                } else {
                        atomic_inc(&tcpSesReconnectCount);
@@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
                        if (server->tcpStatus != CifsExiting)
                                server->tcpStatus = CifsNeedNegotiate;
                        spin_unlock(&GlobalMid_Lock);
+                       mutex_unlock(&server->srv_mutex);
                }
-               mutex_unlock(&server->srv_mutex);
        } while (server->tcpStatus == CifsNeedReconnect);
 
        return rc;
index 338d569..c3eb998 100644 (file)
@@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
                }
                rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
                                            cifs_sb->local_nls,
-                                           cifs_sb->mnt_cifs_flags &
-                                               CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                           cifs_remap(cifs_sb));
                if (rc)
                        goto mknod_out;
 
index cafbf10..3f50cee 100644 (file)
@@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
        posix_flags = cifs_posix_convert_flags(f_flags);
        rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
                             poplock, full_path, cifs_sb->local_nls,
-                            cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                            cifs_remap(cifs_sb));
        cifs_put_tlink(tlink);
 
        if (rc)
@@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                rc = server->ops->mand_unlock_range(cfile, flock, xid);
 
 out:
-       if (flock->fl_flags & FL_POSIX)
-               posix_lock_file_wait(file, flock);
+       if (flock->fl_flags & FL_POSIX && !rc)
+               rc = posix_lock_file_wait(file, flock);
        return rc;
 }
 
index 55b5811..f621b44 100644 (file)
@@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
 
        /* could have done a find first instead but this returns more info */
        rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data,
-                                 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                 cifs_sb->local_nls, cifs_remap(cifs_sb));
        cifs_put_tlink(tlink);
 
        if (!rc) {
@@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode,
                        rc = -ENOMEM;
        } else {
                /* we already have inode, update it */
+
+               /* if uniqueid is different, return error */
+               if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM &&
+                   CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) {
+                       rc = -ESTALE;
+                       goto cgiiu_exit;
+               }
+
+               /* if filetype is different, return error */
+               if (unlikely(((*pinode)->i_mode & S_IFMT) !=
+                   (fattr.cf_mode & S_IFMT))) {
+                       rc = -ESTALE;
+                       goto cgiiu_exit;
+               }
+
                cifs_fattr_to_inode(*pinode, &fattr);
        }
 
+cgiiu_exit:
        return rc;
 }
 
@@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                if (!*inode)
                        rc = -ENOMEM;
        } else {
+               /* we already have inode, update it */
+
+               /* if filetype is different, return error */
+               if (unlikely(((*inode)->i_mode & S_IFMT) !=
+                   (fattr.cf_mode & S_IFMT))) {
+                       rc = -ESTALE;
+                       goto cgii_exit;
+               }
+
                cifs_fattr_to_inode(*inode, &fattr);
        }
 
@@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
                pTcon = tlink_tcon(tlink);
                rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
                                    cifs_sb->local_nls,
-                                   cifs_sb->mnt_cifs_flags &
-                                       CIFS_MOUNT_MAP_SPECIAL_CHR);
+                                   cifs_remap(cifs_sb));
                cifs_put_tlink(tlink);
        }
 
index 252e672..e6c707c 100644 (file)
@@ -717,7 +717,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
                rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname);
        else if (pTcon->unix_ext)
                rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname,
-                                          cifs_sb->local_nls);
+                                          cifs_sb->local_nls,
+                                          cifs_remap(cifs_sb));
        /* else
           rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName,
                                        cifs_sb_target->local_nls); */
index b4a4723..b1eede3 100644 (file)
@@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
        if (dentry) {
                inode = d_inode(dentry);
                if (inode) {
+                       if (d_mountpoint(dentry))
+                               goto out;
                        /*
                         * If we're generating inode numbers, then we don't
                         * want to clobber the existing one with the one that
index 7bfdd60..fc537c2 100644 (file)
@@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
        /* Check for unix extensions */
        if (cap_unix(tcon->ses)) {
                rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path,
-                                            cifs_sb->local_nls);
+                                            cifs_sb->local_nls,
+                                            cifs_remap(cifs_sb));
                if (rc == -EREMOTE)
                        rc = cifs_unix_dfs_readlink(xid, tcon, full_path,
                                                    target_path,
index 65cd7a8..54cbe19 100644 (file)
@@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
 
        /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */
        /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */
-       if ((tcon->ses) &&
+       if ((tcon->ses) && (tcon->ses->server) &&
            (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
                hdr->CreditCharge = cpu_to_le16(1);
        /* else CreditCharge MBZ */
index 656ce52..37b5afd 100644 (file)
@@ -1239,13 +1239,13 @@ ascend:
                /* might go back up the wrong parent if we have had a rename. */
                if (need_seqretry(&rename_lock, seq))
                        goto rename_retry;
-               next = child->d_child.next;
-               while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+               /* go into the first sibling still alive */
+               do {
+                       next = child->d_child.next;
                        if (next == &this_parent->d_subdirs)
                                goto ascend;
                        child = list_entry(next, struct dentry, d_child);
-                       next = next->next;
-               }
+               } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
                rcu_read_unlock();
                goto resume;
        }
index 0822345..83f4e76 100644 (file)
@@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb,
        goto out;
 
 found:
-       *return_block = i * bits_per_entry + bit;
+       *return_block = (u64) i * bits_per_entry + bit;
        *return_size = run;
        ret = set_run(sb, i, bits_per_entry, bit, run, 1);
 
index 138321b..3d935c8 100644 (file)
@@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = {
  */
 static int omfs_get_imap(struct super_block *sb)
 {
-       unsigned int bitmap_size, count, array_size;
+       unsigned int bitmap_size, array_size;
+       int count;
        struct omfs_sb_info *sbi = OMFS_SB(sb);
        struct buffer_head *bh;
        unsigned long **ptr;
@@ -359,7 +360,7 @@ nomem:
 }
 
 enum {
-       Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+       Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
 };
 
 static const match_table_t tokens = {
@@ -368,6 +369,7 @@ static const match_table_t tokens = {
        {Opt_umask, "umask=%o"},
        {Opt_dmask, "dmask=%o"},
        {Opt_fmask, "fmask=%o"},
+       {Opt_err, NULL},
 };
 
 static int parse_options(char *options, struct omfs_sb_info *sbi)
@@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent)
        }
 
        sb->s_root = d_make_root(root);
-       if (!sb->s_root)
+       if (!sb->s_root) {
+               ret = -ENOMEM;
                goto out_brelse_bh2;
+       }
        printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
 
        ret = 0;
index 24f6404..84d693d 100644 (file)
@@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
        struct cred *override_cred;
        char *link = NULL;
 
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
        ovl_path_upper(parent, &parentpath);
        upperdir = parentpath.dentry;
 
index d139405..692ceda 100644 (file)
@@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry,
        struct kstat stat;
        int err;
 
+       if (WARN_ON(!workdir))
+               return ERR_PTR(-EROFS);
+
        err = ovl_lock_rename_workdir(workdir, upperdir);
        if (err)
                goto out;
@@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
        struct dentry *newdentry;
        int err;
 
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
        err = ovl_lock_rename_workdir(workdir, upperdir);
        if (err)
                goto out;
@@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
        struct dentry *opaquedir = NULL;
        int err;
 
-       if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
-               opaquedir = ovl_check_empty_and_clear(dentry);
-               err = PTR_ERR(opaquedir);
-               if (IS_ERR(opaquedir))
-                       goto out;
+       if (WARN_ON(!workdir))
+               return -EROFS;
+
+       if (is_dir) {
+               if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
+                       opaquedir = ovl_check_empty_and_clear(dentry);
+                       err = PTR_ERR(opaquedir);
+                       if (IS_ERR(opaquedir))
+                               goto out;
+               } else {
+                       LIST_HEAD(list);
+
+                       /*
+                        * When removing an empty opaque directory, then it
+                        * makes no sense to replace it with an exact replica of
+                        * itself.  But emptiness still needs to be checked.
+                        */
+                       err = ovl_check_empty_dir(dentry, &list);
+                       ovl_cache_free(&list);
+                       if (err)
+                               goto out;
+               }
        }
 
        err = ovl_lock_rename_workdir(workdir, upperdir);
index 5f0d199..bf8537c 100644 (file)
@@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
        struct ovl_fs *ufs = sb->s_fs_info;
 
-       if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
+       if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir))
                return -EROFS;
 
        return 0;
@@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry);
                err = PTR_ERR(ufs->workdir);
                if (IS_ERR(ufs->workdir)) {
-                       pr_err("overlayfs: failed to create directory %s/%s\n",
-                              ufs->config.workdir, OVL_WORKDIR_NAME);
-                       goto out_put_upper_mnt;
+                       pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n",
+                               ufs->config.workdir, OVL_WORKDIR_NAME, -err);
+                       sb->s_flags |= MS_RDONLY;
+                       ufs->workdir = NULL;
                }
        }
 
@@ -997,7 +998,6 @@ out_put_lower_mnt:
        kfree(ufs->lower_mnt);
 out_put_workdir:
        dput(ufs->workdir);
-out_put_upper_mnt:
        mntput(ufs->upper_mnt);
 out_put_lowerpath:
        for (i = 0; i < numlower; i++)
index 555f821..52b4927 100644 (file)
@@ -538,6 +538,7 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc)
 
        return res;
 }
+EXPORT_SYMBOL(seq_dentry);
 
 static void *single_start(struct seq_file *p, loff_t *pos)
 {
index 04e79d5..e9d401c 100644 (file)
@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
  * After the last attribute is removed revert to original inode format,
  * making all literal area available to the data fork once more.
  */
-STATIC void
-xfs_attr_fork_reset(
+void
+xfs_attr_fork_remove(
        struct xfs_inode        *ip,
        struct xfs_trans        *tp)
 {
@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
            (mp->m_flags & XFS_MOUNT_ATTR2) &&
            (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
            !(args->op_flags & XFS_DA_OP_ADDNAME)) {
-               xfs_attr_fork_reset(dp, args->trans);
+               xfs_attr_fork_remove(dp, args->trans);
        } else {
                xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
                dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
        if (forkoff == -1) {
                ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
                ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
-               xfs_attr_fork_reset(dp, args->trans);
+               xfs_attr_fork_remove(dp, args->trans);
                goto out;
        }
 
index 025c4b8..882c8d3 100644 (file)
@@ -53,7 +53,7 @@ int   xfs_attr_shortform_remove(struct xfs_da_args *args);
 int    xfs_attr_shortform_list(struct xfs_attr_list_context *context);
 int    xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
 int    xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
-
+void   xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
 
 /*
  * Internal routines when attribute fork size == XFS_LBSIZE(mp).
index aeffeaa..f1026e8 100644 (file)
@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
                align_alen += temp;
                align_off -= temp;
        }
+
+       /* Same adjustment for the end of the requested area. */
+       temp = (align_alen % extsz);
+       if (temp)
+               align_alen += extsz - temp;
+
        /*
-        * Same adjustment for the end of the requested area.
+        * For large extent hint sizes, the aligned extent might be larger than
+        * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
+        * the length back under MAXEXTLEN. The outer allocation loops handle
+        * short allocation just fine, so it is safe to do this. We only want to
+        * do it when we are forced to, though, because it means more allocation
+        * operations are required.
         */
-       if ((temp = (align_alen % extsz))) {
-               align_alen += extsz - temp;
-       }
+       while (align_alen > MAXEXTLEN)
+               align_alen -= extsz;
+       ASSERT(align_alen <= MAXEXTLEN);
+
        /*
         * If the previous block overlaps with this proposed allocation
         * then move the start forward without adjusting the length.
@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
                        return -EINVAL;
        } else {
                ASSERT(orig_off >= align_off);
-               ASSERT(orig_end <= align_off + align_alen);
+               /* see MAXEXTLEN handling above */
+               ASSERT(orig_end <= align_off + align_alen ||
+                      align_alen + extsz > MAXEXTLEN);
        }
 
 #ifdef DEBUG
@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
        /* Figure out the extent size, adjust alen */
        extsz = xfs_get_extsz_hint(ip);
        if (extsz) {
-               /*
-                * Make sure we don't exceed a single extent length when we
-                * align the extent by reducing length we are going to
-                * allocate by the maximum amount extent size aligment may
-                * require.
-                */
-               alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
                error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
                                               1, 0, &aoff, &alen);
                ASSERT(!error);
index 07349a1..1c9e755 100644 (file)
@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
         */
        newlen = args.mp->m_ialloc_inos;
        if (args.mp->m_maxicount &&
-           percpu_counter_read(&args.mp->m_icount) + newlen >
+           percpu_counter_read_positive(&args.mp->m_icount) + newlen >
                                                        args.mp->m_maxicount)
                return -ENOSPC;
        args.minlen = args.maxlen = args.mp->m_ialloc_blks;
@@ -1339,10 +1339,13 @@ xfs_dialloc(
         * If we have already hit the ceiling of inode blocks then clear
         * okalloc so we scan all available agi structures for a free
         * inode.
+        *
+        * Read rough value of mp->m_icount by percpu_counter_read_positive,
+        * which will sacrifice the preciseness but improve the performance.
         */
        if (mp->m_maxicount &&
-           percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
-                                                       mp->m_maxicount) {
+           percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
+                                                       mp->m_maxicount) {
                noroom = 1;
                okalloc = 0;
        }
index f9c1c64..3fbf167 100644 (file)
@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
        return error;
 }
 
+/*
+ * xfs_attr_inactive kills all traces of an attribute fork on an inode. It
+ * removes both the on-disk and in-memory inode fork. Note that this also has to
+ * handle the condition of inodes without attributes but with an attribute fork
+ * configured, so we can't use xfs_inode_hasattr() here.
+ *
+ * The in-memory attribute fork is removed even on error.
+ */
 int
-xfs_attr_inactive(xfs_inode_t *dp)
+xfs_attr_inactive(
+       struct xfs_inode        *dp)
 {
-       xfs_trans_t *trans;
-       xfs_mount_t *mp;
-       int error;
+       struct xfs_trans        *trans;
+       struct xfs_mount        *mp;
+       int                     cancel_flags = 0;
+       int                     lock_mode = XFS_ILOCK_SHARED;
+       int                     error = 0;
 
        mp = dp->i_mount;
        ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
 
-       xfs_ilock(dp, XFS_ILOCK_SHARED);
-       if (!xfs_inode_hasattr(dp) ||
-           dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               xfs_iunlock(dp, XFS_ILOCK_SHARED);
-               return 0;
-       }
-       xfs_iunlock(dp, XFS_ILOCK_SHARED);
+       xfs_ilock(dp, lock_mode);
+       if (!XFS_IFORK_Q(dp))
+               goto out_destroy_fork;
+       xfs_iunlock(dp, lock_mode);
 
        /*
         * Start our first transaction of the day.
@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
         * the inode in every transaction to let it float upward through
         * the log.
         */
+       lock_mode = 0;
        trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
        error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
-       if (error) {
-               xfs_trans_cancel(trans, 0);
-               return error;
-       }
-       xfs_ilock(dp, XFS_ILOCK_EXCL);
+       if (error)
+               goto out_cancel;
+
+       lock_mode = XFS_ILOCK_EXCL;
+       cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
+       xfs_ilock(dp, lock_mode);
+
+       if (!XFS_IFORK_Q(dp))
+               goto out_cancel;
 
        /*
         * No need to make quota reservations here. We expect to release some
@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
         */
        xfs_trans_ijoin(trans, dp, 0);
 
-       /*
-        * Decide on what work routines to call based on the inode size.
-        */
-       if (!xfs_inode_hasattr(dp) ||
-           dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
-               error = 0;
-               goto out;
+       /* invalidate and truncate the attribute fork extents */
+       if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
+               error = xfs_attr3_root_inactive(&trans, dp);
+               if (error)
+                       goto out_cancel;
+
+               error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
+               if (error)
+                       goto out_cancel;
        }
-       error = xfs_attr3_root_inactive(&trans, dp);
-       if (error)
-               goto out;
 
-       error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
-       if (error)
-               goto out;
+       /* Reset the attribute fork - this also destroys the in-core fork */
+       xfs_attr_fork_remove(dp, trans);
 
        error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
+       xfs_iunlock(dp, lock_mode);
        return error;
 
-out:
-       xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
-       xfs_iunlock(dp, XFS_ILOCK_EXCL);
+out_cancel:
+       xfs_trans_cancel(trans, cancel_flags);
+out_destroy_fork:
+       /* kill the in-core attr fork before we drop the inode lock */
+       if (dp->i_afp)
+               xfs_idestroy_fork(dp, XFS_ATTR_FORK);
+       if (lock_mode)
+               xfs_iunlock(dp, lock_mode);
        return error;
 }
index 8121e75..3b75912 100644 (file)
@@ -124,7 +124,7 @@ xfs_iozero(
                status = 0;
        } while (count);
 
-       return (-status);
+       return status;
 }
 
 int
index d6ebc85..539a85f 100644 (file)
@@ -1946,21 +1946,17 @@ xfs_inactive(
        /*
         * If there are attributes associated with the file then blow them away
         * now.  The code calls a routine that recursively deconstructs the
-        * attribute fork.  We need to just commit the current transaction
-        * because we can't use it for xfs_attr_inactive().
+        * attribute fork. If also blows away the in-core attribute fork.
         */
-       if (ip->i_d.di_anextents > 0) {
-               ASSERT(ip->i_d.di_forkoff != 0);
-
+       if (XFS_IFORK_Q(ip)) {
                error = xfs_attr_inactive(ip);
                if (error)
                        return;
        }
 
-       if (ip->i_afp)
-               xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
+       ASSERT(!ip->i_afp);
        ASSERT(ip->i_d.di_anextents == 0);
+       ASSERT(ip->i_d.di_forkoff == 0);
 
        /*
         * Free the inode.
@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
        if (error)
                return error;
 
-       /* Satisfy xfs_bumplink that this is a real tmpfile */
+       /*
+        * Prepare the tmpfile inode as if it were created through the VFS.
+        * Otherwise, the link increment paths will complain about nlink 0->1.
+        * Drop the link count as done by d_tmpfile(), complete the inode setup
+        * and flag it as linkable.
+        */
+       drop_nlink(VFS_I(tmpfile));
        xfs_finish_inode_setup(tmpfile);
        VFS_I(tmpfile)->i_state |= I_LINKABLE;
 
@@ -3151,7 +3153,7 @@ xfs_rename(
         * intermediate state on disk.
         */
        if (wip) {
-               ASSERT(wip->i_d.di_nlink == 0);
+               ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
                error = xfs_bumplink(tp, wip);
                if (error)
                        goto out_trans_abort;
index 2ce7ee3..6f23fbd 100644 (file)
@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
        return xfs_sync_sb(mp, true);
 }
 
+/*
+ * Deltas for the inode count are +/-64, hence we use a large batch size
+ * of 128 so we don't need to take the counter lock on every update.
+ */
+#define XFS_ICOUNT_BATCH       128
 int
 xfs_mod_icount(
        struct xfs_mount        *mp,
        int64_t                 delta)
 {
-       /* deltas are +/-64, hence the large batch size of 128. */
-       __percpu_counter_add(&mp->m_icount, delta, 128);
-       if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
+       __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
+       if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
                ASSERT(0);
                percpu_counter_add(&mp->m_icount, -delta);
                return -EINVAL;
@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
        return 0;
 }
 
+/*
+ * Deltas for the block count can vary from 1 to very large, but lock contention
+ * only occurs on frequent small block count updates such as in the delayed
+ * allocation path for buffered writes (page a time updates). Hence we set
+ * a large batch count (1024) to minimise global counter updates except when
+ * we get near to ENOSPC and we have to be very accurate with our updates.
+ */
+#define XFS_FDBLOCKS_BATCH     1024
 int
 xfs_mod_fdblocks(
        struct xfs_mount        *mp,
@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
         * Taking blocks away, need to be more accurate the closer we
         * are to zero.
         *
-        * batch size is set to a maximum of 1024 blocks - if we are
-        * allocating of freeing extents larger than this then we aren't
-        * going to be hammering the counter lock so a lock per update
-        * is not a problem.
-        *
         * If the counter has a value of less than 2 * max batch size,
         * then make everything serialise as we are real close to
         * ENOSPC.
         */
-#define __BATCH        1024
-       if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+       if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
+                                    XFS_FDBLOCKS_BATCH) < 0)
                batch = 1;
        else
-               batch = __BATCH;
-#undef __BATCH
+               batch = XFS_FDBLOCKS_BATCH;
 
        __percpu_counter_add(&mp->m_fdblocks, delta, batch);
-       if (percpu_counter_compare(&mp->m_fdblocks,
-                                  XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+       if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
+                                    XFS_FDBLOCKS_BATCH) >= 0) {
                /* we had space! */
                return 0;
        }
index ae2982c..656da2a 100644 (file)
@@ -17,7 +17,7 @@
 #define PHY_ID_BCM7250                 0xae025280
 #define PHY_ID_BCM7364                 0xae025260
 #define PHY_ID_BCM7366                 0x600d8490
-#define PHY_ID_BCM7425                 0x03625e60
+#define PHY_ID_BCM7425                 0x600d86b0
 #define PHY_ID_BCM7429                 0x600d8730
 #define PHY_ID_BCM7439                 0x600d8480
 #define PHY_ID_BCM7439_2               0xae025080
index 27e285b..59915ea 100644 (file)
@@ -151,10 +151,8 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
        return 1;
 }
 
-static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+static inline unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-       set_bit(0, cpumask_bits(dstp));
-
        return 0;
 }
 
@@ -208,7 +206,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
+unsigned int cpumask_local_spread(unsigned int i, int node);
 
 /**
  * for_each_cpu - iterate over every cpu in a mask
index 50e5009..84a1094 100644 (file)
@@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
-int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
+int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
+
+static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+{
+       return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
+}
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
                return 0;
 }
 
+static inline int
+__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
+{
+       return percpu_counter_compare(fbc, rhs);
+}
+
 static inline void
 percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
index c56a438..ce13cf2 100644 (file)
@@ -574,11 +574,14 @@ static inline void sctp_v6_map_v4(union sctp_addr *addr)
 /* Map v4 address to v4-mapped v6 address */
 static inline void sctp_v4_map_v6(union sctp_addr *addr)
 {
+       __be16 port;
+
+       port = addr->v4.sin_port;
+       addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
+       addr->v6.sin6_port = port;
        addr->v6.sin6_family = AF_INET6;
        addr->v6.sin6_flowinfo = 0;
        addr->v6.sin6_scope_id = 0;
-       addr->v6.sin6_port = addr->v4.sin_port;
-       addr->v6.sin6_addr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
        addr->v6.sin6_addr.s6_addr32[0] = 0;
        addr->v6.sin6_addr.s6_addr32[1] = 0;
        addr->v6.sin6_addr.s6_addr32[2] = htonl(0x0000ffff);
index d61be72..5f12257 100644 (file)
@@ -1,9 +1,7 @@
 #ifndef TARGET_CORE_BACKEND_H
 #define TARGET_CORE_BACKEND_H
 
-#define TRANSPORT_PLUGIN_PHBA_PDEV             1
-#define TRANSPORT_PLUGIN_VHBA_PDEV             2
-#define TRANSPORT_PLUGIN_VHBA_VDEV             3
+#define TRANSPORT_FLAG_PASSTHROUGH             1
 
 struct target_backend_cits {
        struct config_item_type tb_dev_cit;
@@ -22,7 +20,7 @@ struct se_subsystem_api {
        char inquiry_rev[4];
        struct module *owner;
 
-       u8 transport_type;
+       u8 transport_flags;
 
        int (*attach_hba)(struct se_hba *, u32);
        void (*detach_hba)(struct se_hba *);
@@ -138,5 +136,7 @@ int se_dev_set_queue_depth(struct se_device *, u32);
 int    se_dev_set_max_sectors(struct se_device *, u32);
 int    se_dev_set_optimal_sectors(struct se_device *, u32);
 int    se_dev_set_block_size(struct se_device *, u32);
+sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd,
+       sense_reason_t (*exec_cmd)(struct se_cmd *cmd));
 
 #endif /* TARGET_CORE_BACKEND_H */
index 25bb04c..b99c011 100644 (file)
@@ -40,8 +40,6 @@ struct target_fabric_configfs {
        struct config_item      *tf_fabric;
        /* Passed from fabric modules */
        struct config_item_type *tf_fabric_cit;
-       /* Pointer to target core subsystem */
-       struct configfs_subsystem *tf_subsys;
        /* Pointer to fabric's struct module */
        struct module *tf_module;
        struct target_core_fabric_ops tf_ops;
index 17c7f5a..0f4dc37 100644 (file)
@@ -4,7 +4,6 @@
 struct target_core_fabric_ops {
        struct module *module;
        const char *name;
-       struct configfs_subsystem *tf_subsys;
        char *(*get_fabric_name)(void);
        u8 (*get_fabric_proto_ident)(struct se_portal_group *);
        char *(*tpg_get_wwn)(struct se_portal_group *);
@@ -109,6 +108,9 @@ struct target_core_fabric_ops {
 int target_register_template(const struct target_core_fabric_ops *fo);
 void target_unregister_template(const struct target_core_fabric_ops *fo);
 
+int target_depend_item(struct config_item *item);
+void target_undepend_item(struct config_item *item);
+
 struct se_session *transport_init_session(enum target_prot_op);
 int transport_alloc_session_tags(struct se_session *, unsigned int,
                unsigned int);
index 7f79cf4..0b73af9 100644 (file)
@@ -1117,61 +1117,6 @@ DEFINE_EVENT(btrfs__workqueue_done, btrfs_workqueue_destroy,
        TP_ARGS(wq)
 );
 
-#define show_oper_type(type)                                           \
-       __print_symbolic(type,                                          \
-               { BTRFS_QGROUP_OPER_ADD_EXCL,   "OPER_ADD_EXCL" },      \
-               { BTRFS_QGROUP_OPER_ADD_SHARED, "OPER_ADD_SHARED" },    \
-               { BTRFS_QGROUP_OPER_SUB_EXCL,   "OPER_SUB_EXCL" },      \
-               { BTRFS_QGROUP_OPER_SUB_SHARED, "OPER_SUB_SHARED" })
-
-DECLARE_EVENT_CLASS(btrfs_qgroup_oper,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper),
-
-       TP_STRUCT__entry(
-               __field(        u64,  ref_root          )
-               __field(        u64,  bytenr            )
-               __field(        u64,  num_bytes         )
-               __field(        u64,  seq               )
-               __field(        int,  type              )
-               __field(        u64,  elem_seq          )
-       ),
-
-       TP_fast_assign(
-               __entry->ref_root       = oper->ref_root;
-               __entry->bytenr         = oper->bytenr,
-               __entry->num_bytes      = oper->num_bytes;
-               __entry->seq            = oper->seq;
-               __entry->type           = oper->type;
-               __entry->elem_seq       = oper->elem.seq;
-       ),
-
-       TP_printk("ref_root = %llu, bytenr = %llu, num_bytes = %llu, "
-                 "seq = %llu, elem.seq = %llu, type = %s",
-                 (unsigned long long)__entry->ref_root,
-                 (unsigned long long)__entry->bytenr,
-                 (unsigned long long)__entry->num_bytes,
-                 (unsigned long long)__entry->seq,
-                 (unsigned long long)__entry->elem_seq,
-                 show_oper_type(__entry->type))
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_account,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper)
-);
-
-DEFINE_EVENT(btrfs_qgroup_oper, btrfs_qgroup_record_ref,
-
-       TP_PROTO(struct btrfs_qgroup_operation *oper),
-
-       TP_ARGS(oper)
-);
-
 #endif /* _TRACE_BTRFS_H */
 
 /* This part must be outside protection */
index 81ea598..f7554fd 100644 (file)
@@ -140,19 +140,42 @@ DEFINE_EVENT(kmem_free, kfree,
        TP_ARGS(call_site, ptr)
 );
 
-DEFINE_EVENT(kmem_free, kmem_cache_free,
+DEFINE_EVENT_CONDITION(kmem_free, kmem_cache_free,
 
        TP_PROTO(unsigned long call_site, const void *ptr),
 
-       TP_ARGS(call_site, ptr)
+       TP_ARGS(call_site, ptr),
+
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id()))
 );
 
-TRACE_EVENT(mm_page_free,
+TRACE_EVENT_CONDITION(mm_page_free,
 
        TP_PROTO(struct page *page, unsigned int order),
 
        TP_ARGS(page, order),
 
+
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
        TP_STRUCT__entry(
                __field(        unsigned long,  pfn             )
                __field(        unsigned int,   order           )
@@ -253,12 +276,35 @@ DEFINE_EVENT(mm_page, mm_page_alloc_zone_locked,
        TP_ARGS(page, order, migratetype)
 );
 
-DEFINE_EVENT_PRINT(mm_page, mm_page_pcpu_drain,
+TRACE_EVENT_CONDITION(mm_page_pcpu_drain,
 
        TP_PROTO(struct page *page, unsigned int order, int migratetype),
 
        TP_ARGS(page, order, migratetype),
 
+       /*
+        * This trace can be potentially called from an offlined cpu.
+        * Since trace points use RCU and RCU should not be used from
+        * offline cpus, filter such calls out.
+        * While this trace can be called from a preemptable section,
+        * it has no impact on the condition since tasks can migrate
+        * only from online cpus to other online cpus. Thus its safe
+        * to use raw_smp_processor_id.
+        */
+       TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  pfn             )
+               __field(        unsigned int,   order           )
+               __field(        int,            migratetype     )
+       ),
+
+       TP_fast_assign(
+               __entry->pfn            = page ? page_to_pfn(page) : -1UL;
+               __entry->order          = order;
+               __entry->migratetype    = migratetype;
+       ),
+
        TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
                pfn_to_page(__entry->pfn), __entry->pfn,
                __entry->order, __entry->migratetype)
index 42a1d2a..cfc9e84 100644 (file)
@@ -3370,6 +3370,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
        module_bug_cleanup(mod);
        mutex_unlock(&module_mutex);
 
+       blocking_notifier_call_chain(&module_notify_list,
+                                    MODULE_STATE_GOING, mod);
+
        /* we can't deallocate the module until we clear memory protection */
        unset_module_init_ro_nx(mod);
        unset_module_core_ro_nx(mod);
index 830dd5d..5f62708 100644 (file)
@@ -139,64 +139,42 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 #endif
 
 /**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
- *
+ * cpumask_local_spread - select the i'th cpu with local numa cpu's first
  * @i: index number
- * @numa_node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
+ * @node: local numa_node
  *
- * This function sets the cpumask according to a numa aware policy.
- * cpumask could be used as an affinity hint for the IRQ related to a
- * queue. When the policy is to spread queues across cores - local cores
- * first.
+ * This function selects an online CPU according to a numa aware policy;
+ * local cpus are returned first, followed by non-local ones, then it
+ * wraps around.
  *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
- * the cpu bit and need to re-call the function.
+ * It's not very efficient, but useful for setup.
  */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-       cpumask_var_t mask;
        int cpu;
-       int ret = 0;
-
-       if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-               return -ENOMEM;
 
+       /* Wrap: we always want a cpu. */
        i %= num_online_cpus();
 
-       if (numa_node == -1 || !cpumask_of_node(numa_node)) {
-               /* Use all online cpu's for non numa aware system */
-               cpumask_copy(mask, cpu_online_mask);
+       if (node == -1) {
+               for_each_cpu(cpu, cpu_online_mask)
+                       if (i-- == 0)
+                               return cpu;
        } else {
-               int n;
-
-               cpumask_and(mask,
-                           cpumask_of_node(numa_node), cpu_online_mask);
-
-               n = cpumask_weight(mask);
-               if (i >= n) {
-                       i -= n;
-
-                       /* If index > number of local cpu's, mask out local
-                        * cpu's
-                        */
-                       cpumask_andnot(mask, cpu_online_mask, mask);
+               /* NUMA first. */
+               for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
+                       if (i-- == 0)
+                               return cpu;
+
+               for_each_cpu(cpu, cpu_online_mask) {
+                       /* Skip NUMA nodes, done above. */
+                       if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
+                               continue;
+
+                       if (i-- == 0)
+                               return cpu;
                }
        }
-
-       for_each_cpu(cpu, mask) {
-               if (--i < 0)
-                       goto out;
-       }
-
-       ret = -EAGAIN;
-
-out:
-       free_cpumask_var(mask);
-
-       if (!ret)
-               cpumask_set_cpu(cpu, dstp);
-
-       return ret;
+       BUG();
 }
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+EXPORT_SYMBOL(cpumask_local_spread);
index 48144cd..f051d69 100644 (file)
@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
  * Compare counter against given value.
  * Return 1 if greater, 0 if equal and -1 if less
  */
-int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
+int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
 {
        s64     count;
 
        count = percpu_counter_read(fbc);
        /* Check to see if rough count will be sufficient for comparison */
-       if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) {
+       if (abs(count - rhs) > (batch * num_online_cpus())) {
                if (count > rhs)
                        return 1;
                else
@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
        else
                return 0;
 }
-EXPORT_SYMBOL(percpu_counter_compare);
+EXPORT_SYMBOL(__percpu_counter_compare);
 
 static int __init percpu_counter_startup(void)
 {
index 4ec0c80..112ad78 100644 (file)
@@ -330,6 +330,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
                release_sock(sk);
                timeo = schedule_timeout(timeo);
                lock_sock(sk);
+
+               if (sock_flag(sk, SOCK_DEAD))
+                       break;
+
                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        }
 
@@ -373,6 +377,10 @@ static int caif_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                struct sk_buff *skb;
 
                lock_sock(sk);
+               if (sock_flag(sk, SOCK_DEAD)) {
+                       err = -ECONNRESET;
+                       goto unlock;
+               }
                skb = skb_dequeue(&sk->sk_receive_queue);
                caif_check_flow_release(sk);
 
index 265e427..ff347a0 100644 (file)
@@ -2495,51 +2495,22 @@ static bool ieee80211_coalesce_started_roc(struct ieee80211_local *local,
                                           struct ieee80211_roc_work *new_roc,
                                           struct ieee80211_roc_work *cur_roc)
 {
-       unsigned long j = jiffies;
-       unsigned long cur_roc_end = cur_roc->hw_start_time +
-                                   msecs_to_jiffies(cur_roc->duration);
-       struct ieee80211_roc_work *next_roc;
-       int new_dur;
+       unsigned long now = jiffies;
+       unsigned long remaining = cur_roc->hw_start_time +
+                                 msecs_to_jiffies(cur_roc->duration) -
+                                 now;
 
        if (WARN_ON(!cur_roc->started || !cur_roc->hw_begun))
                return false;
 
-       if (time_after(j + IEEE80211_ROC_MIN_LEFT, cur_roc_end))
+       /* if it doesn't fit entirely, schedule a new one */
+       if (new_roc->duration > jiffies_to_msecs(remaining))
                return false;
 
        ieee80211_handle_roc_started(new_roc);
 
-       new_dur = new_roc->duration - jiffies_to_msecs(cur_roc_end - j);
-
-       /* cur_roc is long enough - add new_roc to the dependents list. */
-       if (new_dur <= 0) {
-               list_add_tail(&new_roc->list, &cur_roc->dependents);
-               return true;
-       }
-
-       new_roc->duration = new_dur;
-
-       /*
-        * if cur_roc was already coalesced before, we might
-        * want to extend the next roc instead of adding
-        * a new one.
-        */
-       next_roc = list_entry(cur_roc->list.next,
-                             struct ieee80211_roc_work, list);
-       if (&next_roc->list != &local->roc_list &&
-           next_roc->chan == new_roc->chan &&
-           next_roc->sdata == new_roc->sdata &&
-           !WARN_ON(next_roc->started)) {
-               list_add_tail(&new_roc->list, &next_roc->dependents);
-               next_roc->duration = max(next_roc->duration,
-                                        new_roc->duration);
-               next_roc->type = max(next_roc->type, new_roc->type);
-               return true;
-       }
-
-       /* add right after cur_roc */
-       list_add(&new_roc->list, &cur_roc->list);
-
+       /* add to dependents so we send the expired event properly */
+       list_add_tail(&new_roc->list, &cur_roc->dependents);
        return true;
 }
 
@@ -2652,17 +2623,9 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
                         * In the offloaded ROC case, if it hasn't begun, add
                         * this new one to the dependent list to be handled
                         * when the master one begins. If it has begun,
-                        * check that there's still a minimum time left and
-                        * if so, start this one, transmitting the frame, but
-                        * add it to the list directly after this one with
-                        * a reduced time so we'll ask the driver to execute
-                        * it right after finishing the previous one, in the
-                        * hope that it'll also be executed right afterwards,
-                        * effectively extending the old one.
-                        * If there's no minimum time left, just add it to the
-                        * normal list.
-                        * TODO: the ROC type is ignored here, assuming that it
-                        * is better to immediately use the current ROC.
+                        * check if it fits entirely within the existing one,
+                        * in which case it will just be dependent as well.
+                        * Otherwise, schedule it by itself.
                         */
                        if (!tmp->hw_begun) {
                                list_add_tail(&roc->list, &tmp->dependents);
index ab46ab4..c0a9187 100644 (file)
@@ -205,6 +205,8 @@ enum ieee80211_packet_rx_flags {
  * @IEEE80211_RX_CMNTR: received on cooked monitor already
  * @IEEE80211_RX_BEACON_REPORTED: This frame was already reported
  *     to cfg80211_report_obss_beacon().
+ * @IEEE80211_RX_REORDER_TIMER: this frame is released by the
+ *     reorder buffer timeout timer, not the normal RX path
  *
  * These flags are used across handling multiple interfaces
  * for a single frame.
@@ -212,6 +214,7 @@ enum ieee80211_packet_rx_flags {
 enum ieee80211_rx_flags {
        IEEE80211_RX_CMNTR              = BIT(0),
        IEEE80211_RX_BEACON_REPORTED    = BIT(1),
+       IEEE80211_RX_REORDER_TIMER      = BIT(2),
 };
 
 struct ieee80211_rx_data {
@@ -325,12 +328,6 @@ struct mesh_preq_queue {
        u8 flags;
 };
 
-#if HZ/100 == 0
-#define IEEE80211_ROC_MIN_LEFT 1
-#else
-#define IEEE80211_ROC_MIN_LEFT (HZ/100)
-#endif
-
 struct ieee80211_roc_work {
        struct list_head list;
        struct list_head dependents;
index bab5c63..84cef60 100644 (file)
@@ -522,6 +522,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
                memcpy(sdata->vif.hw_queue, master->vif.hw_queue,
                       sizeof(sdata->vif.hw_queue));
                sdata->vif.bss_conf.chandef = master->vif.bss_conf.chandef;
+
+               mutex_lock(&local->key_mtx);
+               sdata->crypto_tx_tailroom_needed_cnt +=
+                       master->crypto_tx_tailroom_needed_cnt;
+               mutex_unlock(&local->key_mtx);
+
                break;
                }
        case NL80211_IFTYPE_AP:
index 2291cd7..a907f2d 100644 (file)
@@ -58,6 +58,22 @@ static void assert_key_lock(struct ieee80211_local *local)
        lockdep_assert_held(&local->key_mtx);
 }
 
+static void
+update_vlan_tailroom_need_count(struct ieee80211_sub_if_data *sdata, int delta)
+{
+       struct ieee80211_sub_if_data *vlan;
+
+       if (sdata->vif.type != NL80211_IFTYPE_AP)
+               return;
+
+       mutex_lock(&sdata->local->mtx);
+
+       list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+               vlan->crypto_tx_tailroom_needed_cnt += delta;
+
+       mutex_unlock(&sdata->local->mtx);
+}
+
 static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
 {
        /*
@@ -79,6 +95,8 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
         * http://mid.gmane.org/1308590980.4322.19.camel@jlt3.sipsolutions.net
         */
 
+       update_vlan_tailroom_need_count(sdata, 1);
+
        if (!sdata->crypto_tx_tailroom_needed_cnt++) {
                /*
                 * Flush all XMIT packets currently using HW encryption or no
@@ -88,6 +106,15 @@ static void increment_tailroom_need_count(struct ieee80211_sub_if_data *sdata)
        }
 }
 
+static void decrease_tailroom_need_count(struct ieee80211_sub_if_data *sdata,
+                                        int delta)
+{
+       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt < delta);
+
+       update_vlan_tailroom_need_count(sdata, -delta);
+       sdata->crypto_tx_tailroom_needed_cnt -= delta;
+}
+
 static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 {
        struct ieee80211_sub_if_data *sdata;
@@ -144,7 +171,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 
                if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
                      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
-                       sdata->crypto_tx_tailroom_needed_cnt--;
+                       decrease_tailroom_need_count(sdata, 1);
 
                WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
                        (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV));
@@ -541,7 +568,7 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key,
                        schedule_delayed_work(&sdata->dec_tailroom_needed_wk,
                                              HZ/2);
                } else {
-                       sdata->crypto_tx_tailroom_needed_cnt--;
+                       decrease_tailroom_need_count(sdata, 1);
                }
        }
 
@@ -631,6 +658,7 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom)
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 {
        struct ieee80211_key *key;
+       struct ieee80211_sub_if_data *vlan;
 
        ASSERT_RTNL();
 
@@ -639,7 +667,14 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
 
        mutex_lock(&sdata->local->key_mtx);
 
-       sdata->crypto_tx_tailroom_needed_cnt = 0;
+       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+                    sdata->crypto_tx_tailroom_pending_dec);
+
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+                       WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
+                                    vlan->crypto_tx_tailroom_pending_dec);
+       }
 
        list_for_each_entry(key, &sdata->key_list, list) {
                increment_tailroom_need_count(sdata);
@@ -649,6 +684,22 @@ void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata)
        mutex_unlock(&sdata->local->key_mtx);
 }
 
+void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata)
+{
+       struct ieee80211_sub_if_data *vlan;
+
+       mutex_lock(&sdata->local->key_mtx);
+
+       sdata->crypto_tx_tailroom_needed_cnt = 0;
+
+       if (sdata->vif.type == NL80211_IFTYPE_AP) {
+               list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
+                       vlan->crypto_tx_tailroom_needed_cnt = 0;
+       }
+
+       mutex_unlock(&sdata->local->key_mtx);
+}
+
 void ieee80211_iter_keys(struct ieee80211_hw *hw,
                         struct ieee80211_vif *vif,
                         void (*iter)(struct ieee80211_hw *hw,
@@ -688,8 +739,8 @@ static void ieee80211_free_keys_iface(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_key *key, *tmp;
 
-       sdata->crypto_tx_tailroom_needed_cnt -=
-               sdata->crypto_tx_tailroom_pending_dec;
+       decrease_tailroom_need_count(sdata,
+                                    sdata->crypto_tx_tailroom_pending_dec);
        sdata->crypto_tx_tailroom_pending_dec = 0;
 
        ieee80211_debugfs_key_remove_mgmt_default(sdata);
@@ -709,6 +760,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
 {
        struct ieee80211_local *local = sdata->local;
        struct ieee80211_sub_if_data *vlan;
+       struct ieee80211_sub_if_data *master;
        struct ieee80211_key *key, *tmp;
        LIST_HEAD(keys);
 
@@ -728,8 +780,20 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
        list_for_each_entry_safe(key, tmp, &keys, list)
                __ieee80211_key_destroy(key, false);
 
-       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
-                    sdata->crypto_tx_tailroom_pending_dec);
+       if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+               if (sdata->bss) {
+                       master = container_of(sdata->bss,
+                                             struct ieee80211_sub_if_data,
+                                             u.ap);
+
+                       WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt !=
+                                    master->crypto_tx_tailroom_needed_cnt);
+               }
+       } else {
+               WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt ||
+                            sdata->crypto_tx_tailroom_pending_dec);
+       }
+
        if (sdata->vif.type == NL80211_IFTYPE_AP) {
                list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list)
                        WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt ||
@@ -793,8 +857,8 @@ void ieee80211_delayed_tailroom_dec(struct work_struct *wk)
         */
 
        mutex_lock(&sdata->local->key_mtx);
-       sdata->crypto_tx_tailroom_needed_cnt -=
-               sdata->crypto_tx_tailroom_pending_dec;
+       decrease_tailroom_need_count(sdata,
+                                    sdata->crypto_tx_tailroom_pending_dec);
        sdata->crypto_tx_tailroom_pending_dec = 0;
        mutex_unlock(&sdata->local->key_mtx);
 }
index c5a3183..96557dd 100644 (file)
@@ -161,6 +161,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata,
 void ieee80211_free_sta_keys(struct ieee80211_local *local,
                             struct sta_info *sta);
 void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
+void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata);
 
 #define key_mtx_dereference(local, ref) \
        rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx)))
index 260eed4..5793f75 100644 (file)
@@ -2121,7 +2121,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
                /* deliver to local stack */
                skb->protocol = eth_type_trans(skb, dev);
                memset(skb->cb, 0, sizeof(skb->cb));
-               if (rx->local->napi)
+               if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) &&
+                   rx->local->napi)
                        napi_gro_receive(rx->local->napi, skb);
                else
                        netif_receive_skb(skb);
@@ -3231,7 +3232,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
                /* This is OK -- must be QoS data frame */
                .security_idx = tid,
                .seqno_idx = tid,
-               .flags = 0,
+               .flags = IEEE80211_RX_REORDER_TIMER,
        };
        struct tid_ampdu_rx *tid_agg_rx;
 
index 79412f1..b864ebc 100644 (file)
@@ -2022,6 +2022,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
        mutex_unlock(&local->sta_mtx);
 
        /* add back keys */
+       list_for_each_entry(sdata, &local->interfaces, list)
+               ieee80211_reset_crypto_tx_tailroom(sdata);
+
        list_for_each_entry(sdata, &local->interfaces, list)
                if (ieee80211_sdata_running(sdata))
                        ieee80211_enable_keys(sdata);
index ad9eed7..1e1c89e 100644 (file)
@@ -815,10 +815,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                if (dev->flags & IFF_UP)
                        dev_deactivate(dev);
 
-               if (new && new->ops->attach) {
-                       new->ops->attach(new);
-                       num_q = 0;
-               }
+               if (new && new->ops->attach)
+                       goto skip;
 
                for (i = 0; i < num_q; i++) {
                        struct netdev_queue *dev_queue = dev_ingress_queue(dev);
@@ -834,12 +832,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                                qdisc_destroy(old);
                }
 
+skip:
                if (!ingress) {
                        notify_and_destroy(net, skb, n, classid,
                                           dev->qdisc, new);
                        if (new && !new->ops->attach)
                                atomic_inc(&new->refcnt);
                        dev->qdisc = new ? : &noop_qdisc;
+
+                       if (new && new->ops->attach)
+                               new->ops->attach(new);
                } else {
                        notify_and_destroy(net, skb, n, classid, old, new);
                }
index 5266ea7..0643059 100644 (file)
@@ -1880,6 +1880,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
                unix_state_unlock(sk);
                timeo = freezable_schedule_timeout(timeo);
                unix_state_lock(sk);
+
+               if (sock_flag(sk, SOCK_DEAD))
+                       break;
+
                clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
        }
 
@@ -1939,6 +1943,10 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
                struct sk_buff *skb, *last;
 
                unix_state_lock(sk);
+               if (sock_flag(sk, SOCK_DEAD)) {
+                       err = -ECONNRESET;
+                       goto unlock;
+               }
                last = skb = skb_peek(&sk->sk_receive_queue);
 again:
                if (skb == NULL) {
index a1504c4..25db8cf 100644 (file)
@@ -73,18 +73,11 @@ class LxLsmod(gdb.Command):
                 "        " if utils.get_long_type().sizeof == 8 else ""))
 
         for module in module_list():
-            ref = 0
-            module_refptr = module['refptr']
-            for cpu in cpus.cpu_list("cpu_possible_mask"):
-                refptr = cpus.per_cpu(module_refptr, cpu)
-                ref += refptr['incs']
-                ref -= refptr['decs']
-
             gdb.write("{address} {name:<19} {size:>8}  {ref}".format(
                 address=str(module['module_core']).split()[0],
                 name=module['name'].string(),
                 size=str(module['core_size']),
-                ref=str(ref)))
+                ref=str(module['refcnt']['counter'])))
 
             source_list = module['source_list']
             t = self._module_use_type.get_type().pointer()
index 1c86787..ac0db16 100644 (file)
@@ -4926,9 +4926,12 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec,
  dig_only:
        parse_digital(codec);
 
-       if (spec->power_down_unused || codec->power_save_node)
+       if (spec->power_down_unused || codec->power_save_node) {
                if (!codec->power_filter)
                        codec->power_filter = snd_hda_gen_path_power_filter;
+               if (!codec->patch_ops.stream_pm)
+                       codec->patch_ops.stream_pm = snd_hda_gen_stream_pm;
+       }
 
        if (!spec->no_analog && spec->beep_nid) {
                err = snd_hda_attach_beep_device(codec, spec->beep_nid);
index 34040d2..fea198c 100644 (file)
@@ -2089,6 +2089,8 @@ static const struct pci_device_id azx_ids[] = {
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        { PCI_DEVICE(0x1002, 0xaab0),
          .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
+       { PCI_DEVICE(0x1002, 0xaac8),
+         .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS },
        /* VIA VT8251/VT8237A */
        { PCI_DEVICE(0x1106, 0x3288),
          .driver_data = AZX_DRIVER_VIA | AZX_DCAPS_POSFIX_VIA },
index 31f8f13..4641684 100644 (file)
@@ -884,6 +884,7 @@ static struct alc_codec_rename_pci_table rename_pci_tbl[] = {
        { 0x10ec0275, 0x1028, 0, "ALC3260" },
        { 0x10ec0899, 0x1028, 0, "ALC3861" },
        { 0x10ec0298, 0x1028, 0, "ALC3266" },
+       { 0x10ec0256, 0x1028, 0, "ALC3246" },
        { 0x10ec0670, 0x1025, 0, "ALC669X" },
        { 0x10ec0676, 0x1025, 0, "ALC679X" },
        { 0x10ec0282, 0x1043, 0, "ALC3229" },
@@ -4227,6 +4228,11 @@ static void alc_fixup_headset_mode_alc662(struct hda_codec *codec,
        if (action == HDA_FIXUP_ACT_PRE_PROBE) {
                spec->parse_flags |= HDA_PINCFG_HEADSET_MIC;
                spec->gen.hp_mic = 1; /* Mic-in is same pin as headphone */
+
+               /* Disable boost for mic-in permanently. (This code is only called
+                  from quirks that guarantee that the headphone is at NID 0x1b.) */
+               snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000);
+               snd_hda_override_wcaps(codec, 0x1b, get_wcaps(codec, 0x1b) & ~AC_WCAP_IN_AMP);
        } else
                alc_fixup_headset_mode(codec, fix, action);
 }
index 43c99ce..6833c74 100644 (file)
@@ -4403,7 +4403,6 @@ static const struct hda_codec_ops stac_patch_ops = {
 #ifdef CONFIG_PM
        .suspend = stac_suspend,
 #endif
-       .stream_pm = snd_hda_gen_stream_pm,
        .reboot_notify = stac_shutup,
 };
 
@@ -4697,7 +4696,8 @@ static int patch_stac92hd71bxx(struct hda_codec *codec)
                return err;
 
        spec = codec->spec;
-       codec->power_save_node = 1;
+       /* disabled power_save_node since it causes noises on a Dell machine */
+       /* codec->power_save_node = 1; */
        spec->linear_tone_beep = 0;
        spec->gen.own_eapd_ctl = 1;
        spec->gen.power_down_unused = 1;
index d51703e..0a4ad5f 100644 (file)
@@ -72,7 +72,6 @@ static void hda_fixup_thinkpad_acpi(struct hda_codec *codec,
                if (led_set_func(TPACPI_LED_MUTE, false) >= 0) {
                        old_vmaster_hook = spec->vmaster_mute.hook;
                        spec->vmaster_mute.hook = update_tpacpi_mute_led;
-                       spec->vmaster_mute_enum = 1;
                        removefunc = false;
                }
                if (led_set_func(TPACPI_LED_MICMUTE, false) >= 0) {
index 46facfc..2917534 100644 (file)
@@ -1118,6 +1118,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        case USB_ID(0x045E, 0x075D): /* MS Lifecam Cinema  */
        case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
        case USB_ID(0x045E, 0x0772): /* MS Lifecam Studio */
+       case USB_ID(0x045E, 0x0779): /* MS Lifecam HD-3000 */
        case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
                return true;
        }
index c5baf9c..618c2bc 100644 (file)
@@ -123,6 +123,8 @@ static int get_last_jit_image(char *haystack, size_t hlen,
        assert(ret == 0);
 
        ptr = haystack;
+       memset(pmatch, 0, sizeof(pmatch));
+
        while (1) {
                ret = regexec(&regex, ptr, 1, pmatch, 0);
                if (ret == 0) {
index bac98ca..323b65e 100644 (file)
@@ -52,6 +52,7 @@ unsigned int skip_c0;
 unsigned int skip_c1;
 unsigned int do_nhm_cstates;
 unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
 unsigned int do_pc2;
 unsigned int do_pc3;
 unsigned int do_pc6;
@@ -91,6 +92,7 @@ unsigned int do_gfx_perf_limit_reasons;
 unsigned int do_ring_perf_limit_reasons;
 unsigned int crystal_hz;
 unsigned long long tsc_hz;
+int base_cpu;
 
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -316,7 +318,7 @@ void print_header(void)
 
        if (do_nhm_cstates)
                outp += sprintf(outp, "  CPU%%c1");
-       if (do_nhm_cstates && !do_slm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
                outp += sprintf(outp, "  CPU%%c3");
        if (do_nhm_cstates)
                outp += sprintf(outp, "  CPU%%c6");
@@ -546,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                goto done;
 
-       if (do_nhm_cstates && !do_slm_cstates)
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
                outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc);
        if (do_nhm_cstates)
                outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc);
@@ -1018,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
                return 0;
 
-       if (do_nhm_cstates && !do_slm_cstates) {
+       if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
                        return -6;
        }
 
-       if (do_nhm_cstates) {
+       if (do_nhm_cstates && !do_knl_cstates) {
                if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
                        return -7;
+       } else if (do_knl_cstates) {
+               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
        }
 
        if (do_snb_cstates)
@@ -1150,7 +1155,7 @@ dump_nhm_platform_info(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
+       get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
 
        fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
 
@@ -1162,7 +1167,7 @@ dump_nhm_platform_info(void)
        fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
                ratio, bclk, ratio * bclk);
 
-       get_msr(0, MSR_IA32_POWER_CTL, &msr);
+       get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
        fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
                msr, msr & 0x2 ? "EN" : "DIS");
 
@@ -1175,7 +1180,7 @@ dump_hsw_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
 
@@ -1197,7 +1202,7 @@ dump_ivt_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
 
@@ -1249,7 +1254,7 @@ dump_nhm_turbo_ratio_limits(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
+       get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
 
        fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
 
@@ -1295,12 +1300,73 @@ dump_nhm_turbo_ratio_limits(void)
        return;
 }
 
+static void
+dump_knl_turbo_ratio_limits(void)
+{
+       int cores;
+       unsigned int ratio;
+       unsigned long long msr;
+       int delta_cores;
+       int delta_ratio;
+       int i;
+
+       get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
+
+       fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+       msr);
+
+       /**
+        * Turbo encoding in KNL is as follows:
+        * [7:0] -- Base value of number of active cores of bucket 1.
+        * [15:8] -- Base value of freq ratio of bucket 1.
+        * [20:16] -- +ve delta of number of active cores of bucket 2.
+        * i.e. active cores of bucket 2 =
+        * active cores of bucket 1 + delta
+        * [23:21] -- Negative delta of freq ratio of bucket 2.
+        * i.e. freq ratio of bucket 2 =
+        * freq ratio of bucket 1 - delta
+        * [28:24]-- +ve delta of number of active cores of bucket 3.
+        * [31:29]-- -ve delta of freq ratio of bucket 3.
+        * [36:32]-- +ve delta of number of active cores of bucket 4.
+        * [39:37]-- -ve delta of freq ratio of bucket 4.
+        * [44:40]-- +ve delta of number of active cores of bucket 5.
+        * [47:45]-- -ve delta of freq ratio of bucket 5.
+        * [52:48]-- +ve delta of number of active cores of bucket 6.
+        * [55:53]-- -ve delta of freq ratio of bucket 6.
+        * [60:56]-- +ve delta of number of active cores of bucket 7.
+        * [63:61]-- -ve delta of freq ratio of bucket 7.
+        */
+       cores = msr & 0xFF;
+       ratio = (msr >> 8) && 0xFF;
+       if (ratio > 0)
+               fprintf(stderr,
+                       "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+                       ratio, bclk, ratio * bclk, cores);
+
+       for (i = 16; i < 64; i = i + 8) {
+               delta_cores = (msr >> i) & 0x1F;
+               delta_ratio = (msr >> (i + 5)) && 0x7;
+               if (!delta_cores || !delta_ratio)
+                       return;
+               cores = cores + delta_cores;
+               ratio = ratio - delta_ratio;
+
+               /** -ve ratios will make successive ratio calculations
+                * negative. Hence return instead of carrying on.
+                */
+               if (ratio > 0)
+                       fprintf(stderr,
+                               "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+                               ratio, bclk, ratio * bclk, cores);
+       }
+}
+
 static void
 dump_nhm_cst_cfg(void)
 {
        unsigned long long msr;
 
-       get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 
 #define SNB_C1_AUTO_UNDEMOTE              (1UL << 27)
 #define SNB_C3_AUTO_UNDEMOTE              (1UL << 28)
@@ -1381,12 +1447,41 @@ int parse_int_file(const char *fmt, ...)
 }
 
 /*
- * cpu_is_first_sibling_in_core(cpu)
- * return 1 if given CPU is 1st HT sibling in the core
+ * get_cpu_position_in_core(cpu)
+ * return the position of the CPU among its HT siblings in the core
+ * return -1 if the sibling is not in list
  */
-int cpu_is_first_sibling_in_core(int cpu)
+int get_cpu_position_in_core(int cpu)
 {
-       return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
+       char path[64];
+       FILE *filep;
+       int this_cpu;
+       char character;
+       int i;
+
+       sprintf(path,
+               "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list",
+               cpu);
+       filep = fopen(path, "r");
+       if (filep == NULL) {
+               perror(path);
+               exit(1);
+       }
+
+       for (i = 0; i < topo.num_threads_per_core; i++) {
+               fscanf(filep, "%d", &this_cpu);
+               if (this_cpu == cpu) {
+                       fclose(filep);
+                       return i;
+               }
+
+               /* Account for no separator after last thread*/
+               if (i != (topo.num_threads_per_core - 1))
+                       fscanf(filep, "%c", &character);
+       }
+
+       fclose(filep);
+       return -1;
 }
 
 /*
@@ -1412,25 +1507,31 @@ int get_num_ht_siblings(int cpu)
 {
        char path[80];
        FILE *filep;
-       int sib1, sib2;
-       int matches;
+       int sib1;
+       int matches = 0;
        char character;
+       char str[100];
+       char *ch;
 
        sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu);
        filep = fopen_or_die(path, "r");
+
        /*
         * file format:
-        * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4)
-        * otherwinse 1 sibling (self).
+        * A ',' separated or '-' separated set of numbers
+        * (eg 1-2 or 1,3,4,5)
         */
-       matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2);
+       fscanf(filep, "%d%c\n", &sib1, &character);
+       fseek(filep, 0, SEEK_SET);
+       fgets(str, 100, filep);
+       ch = strchr(str, character);
+       while (ch != NULL) {
+               matches++;
+               ch = strchr(ch+1, character);
+       }
 
        fclose(filep);
-
-       if (matches == 3)
-               return 2;
-       else
-               return 1;
+       return matches+1;
 }
 
 /*
@@ -1594,8 +1695,10 @@ restart:
 void check_dev_msr()
 {
        struct stat sb;
+       char pathname[32];
 
-       if (stat("/dev/cpu/0/msr", &sb))
+       sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+       if (stat(pathname, &sb))
                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
                        err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
 }
@@ -1608,6 +1711,7 @@ void check_permissions()
        cap_user_data_t cap_data = &cap_data_data;
        extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
        int do_exit = 0;
+       char pathname[32];
 
        /* check for CAP_SYS_RAWIO */
        cap_header->pid = getpid();
@@ -1622,7 +1726,8 @@ void check_permissions()
        }
 
        /* test file permissions */
-       if (euidaccess("/dev/cpu/0/msr", R_OK)) {
+       sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+       if (euidaccess(pathname, R_OK)) {
                do_exit++;
                warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
        }
@@ -1704,7 +1809,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
        default:
                return 0;
        }
-       get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
+       get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
 
        pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
 
@@ -1753,6 +1858,21 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
        }
 }
 
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+
+       if (family != 6)
+               return 0;
+
+       switch (model) {
+       case 0x57:      /* Knights Landing */
+               return 1;
+       default:
+               return 0;
+       }
+}
 static void
 dump_cstate_pstate_config_info(family, model)
 {
@@ -1770,6 +1890,9 @@ dump_cstate_pstate_config_info(family, model)
        if (has_nhm_turbo_ratio_limit(family, model))
                dump_nhm_turbo_ratio_limits();
 
+       if (has_knl_turbo_ratio_limit(family, model))
+               dump_knl_turbo_ratio_limits();
+
        dump_nhm_cst_cfg();
 }
 
@@ -1801,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
                return 0;
 
-       switch (msr & 0x7) {
+       switch (msr & 0xF) {
        case ENERGY_PERF_BIAS_PERFORMANCE:
                epb_string = "performance";
                break;
@@ -1925,7 +2048,7 @@ double get_tdp(model)
        unsigned long long msr;
 
        if (do_rapl & RAPL_PKG_POWER_INFO)
-               if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+               if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
                        return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
 
        switch (model) {
@@ -1950,6 +2073,7 @@ rapl_dram_energy_units_probe(int  model, double rapl_energy_units)
        case 0x3F:      /* HSX */
        case 0x4F:      /* BDX */
        case 0x56:      /* BDX-DE */
+       case 0x57:      /* KNL */
                return (rapl_dram_energy_units = 15.3 / 1000000);
        default:
                return (rapl_energy_units);
@@ -1991,6 +2115,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        case 0x3F:      /* HSX */
        case 0x4F:      /* BDX */
        case 0x56:      /* BDX-DE */
+       case 0x57:      /* KNL */
                do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
                break;
        case 0x2D:
@@ -2006,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model)
        }
 
        /* units on package 0, verify later other packages match */
-       if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
+       if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
                return;
 
        rapl_power_units = 1.0 / (1 << (msr & 0xF));
@@ -2331,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model)
        return 0;
 }
 
+int is_knl(unsigned int family, unsigned int model)
+{
+       if (!genuine_intel)
+               return 0;
+       switch (model) {
+       case 0x57:      /* KNL */
+               return 1;
+       }
+       return 0;
+}
+
 #define SLM_BCLK_FREQS 5
 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
 
@@ -2340,7 +2476,7 @@ double slm_bclk(void)
        unsigned int i;
        double freq;
 
-       if (get_msr(0, MSR_FSB_FREQ, &msr))
+       if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
                fprintf(stderr, "SLM BCLK: unknown\n");
 
        i = msr & 0xf;
@@ -2408,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        if (!do_nhm_platform_info)
                goto guess;
 
-       if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
+       if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
                goto guess;
 
        target_c_local = (msr >> 16) & 0xFF;
@@ -2541,6 +2677,7 @@ void process_cpuid()
        do_c8_c9_c10 = has_hsw_msrs(family, model);
        do_skl_residency = has_skl_msrs(family, model);
        do_slm_cstates = is_slm(family, model);
+       do_knl_cstates  = is_knl(family, model);
        bclk = discover_bclk(family, model);
 
        rapl_probe(family, model);
@@ -2755,13 +2892,9 @@ int initialize_counters(int cpu_id)
 
        my_package_id = get_physical_package_id(cpu_id);
        my_core_id = get_core_id(cpu_id);
-
-       if (cpu_is_first_sibling_in_core(cpu_id)) {
-               my_thread_id = 0;
+       my_thread_id = get_cpu_position_in_core(cpu_id);
+       if (!my_thread_id)
                topo.num_cores++;
-       } else {
-               my_thread_id = 1;
-       }
 
        init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
        init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id);
@@ -2785,13 +2918,24 @@ void setup_all_buffers(void)
        for_all_proc_cpus(initialize_counters);
 }
 
+void set_base_cpu(void)
+{
+       base_cpu = sched_getcpu();
+       if (base_cpu < 0)
+               err(-ENODEV, "No valid cpus found");
+
+       if (debug > 1)
+               fprintf(stderr, "base_cpu = %d\n", base_cpu);
+}
+
 void turbostat_init()
 {
+       setup_all_buffers();
+       set_base_cpu();
        check_dev_msr();
        check_permissions();
        process_cpuid();
 
-       setup_all_buffers();
 
        if (debug)
                for_all_cpus(print_epb, ODD_COUNTERS);
@@ -2870,7 +3014,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
+       fprintf(stderr, "turbostat version 4.7 27-May, 2015"
                " - Len Brown <lenb@kernel.org>\n");
 }