Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)
diff --git a/CREDITS b/CREDITS

index 513aaa3..8373676 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1864,10 +1864,11 @@ S: The Netherlands
  
  N: Martin Kepplinger
  E: martink@posteo.de
-E: martin.kepplinger@theobroma-systems.com
+E: martin.kepplinger@ginzinger.com
  W: http://www.martinkepplinger.com
  D: mma8452 accelerators iio driver
-D: Kernel cleanups
+D: pegasus_notetaker input driver
+D: Kernel fixes and cleanups
  S: Garnisonstraße 26
  S: 4020 Linz
  S: Austria
diff --git a/MAINTAINERS b/MAINTAINERS

index c447953..f30b8ea 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8100,6 +8100,7 @@ S:        Maintained
  F:     drivers/media/dvb-frontends/mn88473*
  
  MODULE SUPPORT
+M:     Jessica Yu <jeyu@redhat.com>
  M:     Rusty Russell <rusty@rustcorp.com.au>
  S:     Maintained
  F:     include/linux/module.h
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c

index 099e170..0068fd4 100644 (file)
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -3149,7 +3149,7 @@ static void print_dma_descriptors(struct cryptocop_int_operation *iop)
         printk("print_dma_descriptors start\n");
  
         printk("iop:\n");
-       printk("\tsid: 0x%lld\n", iop->sid);
+       printk("\tsid: 0x%llx\n", iop->sid);
  
         printk("\tcdesc_out: 0x%p\n", iop->cdesc_out);
         printk("\tcdesc_in: 0x%p\n", iop->cdesc_in);
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h

index b408fe6..3cef068 100644 (file)
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -31,7 +31,6 @@ struct thread_info {
         int                cpu;                 /* cpu we're on */
         int                preempt_count;       /* 0 => preemptable, <0 => BUG */
         mm_segment_t            addr_limit;
-       struct restart_block restart_block;
  };
  
  /*
@@ -44,9 +43,6 @@ struct thread_info {
         .cpu =          0,                      \
         .preempt_count = INIT_PREEMPT_COUNT,    \
         .addr_limit     = KERNEL_DS,            \
-       .restart_block  = {                     \
-               .fn = do_no_restart_syscall,    \
-       },                                      \
  }
  
  #define init_thread_info       (init_thread_union.thread_info)
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c

index ad1f81f..7138303 100644 (file)
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -79,7 +79,7 @@ restore_sigcontext(struct sigcontext *usc, int *pd0)
         unsigned int er0;
  
         /* Always make any pending restarted system calls return -EINTR */
-       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+       current->restart_block.fn = do_no_restart_syscall;
  
         /* restore passed registers */
  #define COPY(r)  do { err |= get_user(regs->r, &usc->sc_##r); } while (0)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h

index 64053d9..836c562 100644 (file)
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -12,9 +12,7 @@
  
  #ifndef __ASSEMBLY__
  
-unsigned long return_address(int depth);
-
-#define ftrace_return_address(n) return_address(n)
+#define ftrace_return_address(n) __builtin_return_address(n)
  
  void _mcount(void);
  void ftrace_caller(void);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h

index 0332317..602af69 100644 (file)
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -192,7 +192,7 @@ struct task_struct;
  struct mm_struct;
  struct seq_file;
  
-typedef int (*dump_trace_func_t)(void *data, unsigned long address);
+typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
  void dump_trace(dump_trace_func_t func, void *data,
                 struct task_struct *task, unsigned long sp);
  
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h

index 02613ba..3066031 100644 (file)
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -9,6 +9,9 @@
  #include <uapi/asm/unistd.h>
  
  #define __IGNORE_time
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
  
  #define __ARCH_WANT_OLD_READDIR
  #define __ARCH_WANT_SYS_ALARM
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c

index 43446fa..c74c592 100644 (file)
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -2014,12 +2014,12 @@ void show_code(struct pt_regs *regs)
                         *ptr++ = '\t';
                 ptr += print_insn(ptr, code + start, addr);
                 start += opsize;
-               printk("%s", buffer);
+               pr_cont("%s", buffer);
                 ptr = buffer;
                 ptr += sprintf(ptr, "\n          ");
                 hops++;
         }
-       printk("\n");
+       pr_cont("\n");
  }
  
  void print_fn_code(unsigned char *code, unsigned long len)
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c

index 6693383..55d4fe1 100644 (file)
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                 if (sp < low || sp > high - sizeof(*sf))
                         return sp;
                 sf = (struct stack_frame *) sp;
+               if (func(data, sf->gprs[8], 0))
+                       return sp;
                 /* Follow the backchain. */
                 while (1) {
-                       if (func(data, sf->gprs[8]))
-                               return sp;
                         low = sp;
                         sp = sf->back_chain;
                         if (!sp)
@@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                         if (sp <= low || sp > high - sizeof(*sf))
                                 return sp;
                         sf = (struct stack_frame *) sp;
+                       if (func(data, sf->gprs[8], 1))
+                               return sp;
                 }
                 /* Zero backchain detected, check for interrupt frame. */
                 sp = (unsigned long) (sf + 1);
@@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
                         return sp;
                 regs = (struct pt_regs *) sp;
                 if (!user_mode(regs)) {
-                       if (func(data, regs->psw.addr))
+                       if (func(data, regs->psw.addr, 1))
                                 return sp;
                 }
                 low = sp;
@@ -85,33 +87,12 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
  }
  EXPORT_SYMBOL_GPL(dump_trace);
  
-struct return_address_data {
-       unsigned long address;
-       int depth;
-};
-
-static int __return_address(void *data, unsigned long address)
-{
-       struct return_address_data *rd = data;
-
-       if (rd->depth--)
-               return 0;
-       rd->address = address;
-       return 1;
-}
-
-unsigned long return_address(int depth)
-{
-       struct return_address_data rd = { .depth = depth + 2 };
-
-       dump_trace(__return_address, &rd, NULL, current_stack_pointer());
-       return rd.address;
-}
-EXPORT_SYMBOL_GPL(return_address);
-
-static int show_address(void *data, unsigned long address)
+static int show_address(void *data, unsigned long address, int reliable)
  {
-       printk("([<%016lx>] %pSR)\n", address, (void *)address);
+       if (reliable)
+               printk(" [<%016lx>] %pSR \n", address, (void *)address);
+       else
+               printk("([<%016lx>] %pSR)\n", address, (void *)address);
         return 0;
  }
  
@@ -138,14 +119,14 @@ void show_stack(struct task_struct *task, unsigned long *sp)
                 else
                         stack = (unsigned long *)task->thread.ksp;
         }
+       printk(KERN_DEFAULT "Stack:\n");
         for (i = 0; i < 20; i++) {
                 if (((addr_t) stack & (THREAD_SIZE-1)) == 0)
                         break;
-               if ((i * sizeof(long) % 32) == 0)
-                       printk("%s       ", i == 0 ? "" : "\n");
-               printk("%016lx ", *stack++);
+               if (i % 4 == 0)
+                       printk(KERN_DEFAULT "       ");
+               pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' ');
         }
-       printk("\n");
         show_trace(task, (unsigned long)sp);
  }
  
@@ -163,13 +144,13 @@ void show_registers(struct pt_regs *regs)
         mode = user_mode(regs) ? "User" : "Krnl";
         printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
         if (!user_mode(regs))
-               printk(" (%pSR)", (void *)regs->psw.addr);
-       printk("\n");
+               pr_cont(" (%pSR)", (void *)regs->psw.addr);
+       pr_cont("\n");
         printk("           R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
                "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e,
                psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm);
-       printk(" RI:%x EA:%x", psw->ri, psw->eaba);
-       printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
+       pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba);
+       printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode,
                regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]);
         printk("           %016lx %016lx %016lx %016lx\n",
                regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]);
@@ -205,14 +186,14 @@ void die(struct pt_regs *regs, const char *str)
         printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff,
                regs->int_code >> 17, ++die_counter);
  #ifdef CONFIG_PREEMPT
-       printk("PREEMPT ");
+       pr_cont("PREEMPT ");
  #endif
  #ifdef CONFIG_SMP
-       printk("SMP ");
+       pr_cont("SMP ");
  #endif
         if (debug_pagealloc_enabled())
-               printk("DEBUG_PAGEALLOC");
-       printk("\n");
+               pr_cont("DEBUG_PAGEALLOC");
+       pr_cont("\n");
         notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV);
         print_modules();
         show_regs(regs);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c

index 17431f6..955a7b6 100644 (file)
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -222,7 +222,7 @@ static int __init service_level_perf_register(void)
  }
  arch_initcall(service_level_perf_register);
  
-static int __perf_callchain_kernel(void *data, unsigned long address)
+static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
  {
         struct perf_callchain_entry_ctx *entry = data;
  
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c

index 44f84b2..355db9d 100644 (file)
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched)
         return 1;
  }
  
-static int save_address(void *data, unsigned long address)
+static int save_address(void *data, unsigned long address, int reliable)
  {
         return __save_address(data, address, 0);
  }
  
-static int save_address_nosched(void *data, unsigned long address)
+static int save_address_nosched(void *data, unsigned long address, int reliable)
  {
         return __save_address(data, address, 1);
  }
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c

index cd404aa..4a0c5bc 100644 (file)
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -217,6 +217,7 @@ static __init int setup_hugepagesz(char *opt)
         } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
                 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
         } else {
+               hugetlb_bad_size();
                 pr_err("hugepagesz= specifies an unsupported page size %s\n",
                         string);
                 return 0;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c

index f56a39b..b3e9d18 100644 (file)
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -151,36 +151,40 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
  #ifdef CONFIG_MEMORY_HOTPLUG
  int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
  {
-       unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
-       unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
+       unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
         unsigned long start_pfn = PFN_DOWN(start);
         unsigned long size_pages = PFN_DOWN(size);
-       unsigned long nr_pages;
-       int rc, zone_enum;
+       pg_data_t *pgdat = NODE_DATA(nid);
+       struct zone *zone;
+       int rc, i;
  
         rc = vmem_add_mapping(start, size);
         if (rc)
                 return rc;
  
-       while (size_pages > 0) {
-               if (start_pfn < dma_end_pfn) {
-                       nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
-                                  dma_end_pfn - start_pfn : size_pages;
-                       zone_enum = ZONE_DMA;
-               } else if (start_pfn < normal_end_pfn) {
-                       nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
-                                  normal_end_pfn - start_pfn : size_pages;
-                       zone_enum = ZONE_NORMAL;
+       for (i = 0; i < MAX_NR_ZONES; i++) {
+               zone = pgdat->node_zones + i;
+               if (zone_idx(zone) != ZONE_MOVABLE) {
+                       /* Add range within existing zone limits, if possible */
+                       zone_start_pfn = zone->zone_start_pfn;
+                       zone_end_pfn = zone->zone_start_pfn +
+                                      zone->spanned_pages;
                 } else {
-                       nr_pages = size_pages;
-                       zone_enum = ZONE_MOVABLE;
+                       /* Add remaining range to ZONE_MOVABLE */
+                       zone_start_pfn = start_pfn;
+                       zone_end_pfn = start_pfn + size_pages;
                 }
-               rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
-                                start_pfn, size_pages);
+               if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
+                       continue;
+               nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
+                          zone_end_pfn - start_pfn : size_pages;
+               rc = __add_pages(nid, zone, start_pfn, nr_pages);
                 if (rc)
                         break;
                 start_pfn += nr_pages;
                 size_pages -= nr_pages;
+               if (!size_pages)
+                       break;
         }
         if (rc)
                 vmem_remove_mapping(start, size);
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c

index 16f4c39..9a4de45 100644 (file)
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -13,7 +13,7 @@
  #include <linux/init.h>
  #include <asm/processor.h>
  
-static int __s390_backtrace(void *data, unsigned long address)
+static int __s390_backtrace(void *data, unsigned long address, int reliable)
  {
         unsigned int *depth = data;
  
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c

index ddd2661..887e571 100644 (file)
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void)
          * consistent with the vaddr_start/vaddr_end variables.
          */
         BUILD_BUG_ON(vaddr_start >= vaddr_end);
-       BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
+       BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
                      vaddr_end >= EFI_VA_START);
-       BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
-                     config_enabled(CONFIG_EFI)) &&
+       BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
+                     IS_ENABLED(CONFIG_EFI)) &&
                      vaddr_end >= __START_KERNEL_map);
         BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
  
diff --git a/block/badblocks.c b/block/badblocks.c

index 6610e28..6ebcef2 100644 (file)
--- a/block/badblocks.c
+++ b/block/badblocks.c
@@ -133,6 +133,26 @@ retry:
  }
  EXPORT_SYMBOL_GPL(badblocks_check);
  
+static void badblocks_update_acked(struct badblocks *bb)
+{
+       u64 *p = bb->page;
+       int i;
+       bool unacked = false;
+
+       if (!bb->unacked_exist)
+               return;
+
+       for (i = 0; i < bb->count ; i++) {
+               if (!BB_ACK(p[i])) {
+                       unacked = true;
+                       break;
+               }
+       }
+
+       if (!unacked)
+               bb->unacked_exist = 0;
+}
+
  /**
   * badblocks_set() - Add a range of bad blocks to the table.
   * @bb:                the badblocks structure that holds all badblock information
@@ -294,6 +314,8 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors,
         bb->changed = 1;
         if (!acknowledged)
                 bb->unacked_exist = 1;
+       else
+               badblocks_update_acked(bb);
         write_sequnlock_irqrestore(&bb->lock, flags);
  
         return rv;
@@ -401,6 +423,7 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors)
                 }
         }
  
+       badblocks_update_acked(bb);
         bb->changed = 1;
  out:
         write_sequnlock_irq(&bb->lock);
diff --git a/block/blk-flush.c b/block/blk-flush.c

index 6a14b68..3c882cb 100644 (file)
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -342,6 +342,34 @@ static void flush_data_end_io(struct request *rq, int error)
         struct request_queue *q = rq->q;
         struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL);
  
+       /*
+        * Updating q->in_flight[] here for making this tag usable
+        * early. Because in blk_queue_start_tag(),
+        * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and
+        * reserve tags for sync I/O.
+        *
+        * More importantly this way can avoid the following I/O
+        * deadlock:
+        *
+        * - suppose there are 40 fua requests comming to flush queue
+        *   and queue depth is 31
+        * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc
+        *   tag for async I/O any more
+        * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT
+        *   and flush_data_end_io() is called
+        * - the other rqs still can't go ahead if not updating
+        *   q->in_flight[BLK_RW_ASYNC] here, meantime these rqs
+        *   are held in flush data queue and make no progress of
+        *   handling post flush rq
+        * - only after the post flush rq is handled, all these rqs
+        *   can be completed
+        */
+
+       elv_completed_request(q, rq);
+
+       /* for avoiding double accounting */
+       rq->cmd_flags &= ~REQ_STARTED;
+
         /*
          * After populating an empty queue, kick it to avoid stall.  Read
          * the comment in flush_end_io().
diff --git a/block/blk-mq.c b/block/blk-mq.c

index ddc2eed..f3d27a6 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1217,9 +1217,9 @@ static struct request *blk_mq_map_request(struct request_queue *q,
         blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
         rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
  
-       hctx->queued++;
-       data->hctx = hctx;
-       data->ctx = ctx;
+       data->hctx = alloc_data.hctx;
+       data->ctx = alloc_data.ctx;
+       data->hctx->queued++;
         return rq;
  }
  
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c

index ba5f11c..9669fc7 100644 (file)
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1418,30 +1418,33 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports,
          * Message mode could be enforced. In this case assume that advantage
          * of multipe MSIs is negated and use single MSI mode instead.
          */
-       nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX,
-                       PCI_IRQ_MSIX | PCI_IRQ_MSI);
-       if (nvec > 0) {
-               if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) {
-                       hpriv->get_irq_vector = ahci_get_irq_vector;
-                       hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
-                       return nvec;
+       if (n_ports > 1) {
+               nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX,
+                               PCI_IRQ_MSIX | PCI_IRQ_MSI);
+               if (nvec > 0) {
+                       if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) {
+                               hpriv->get_irq_vector = ahci_get_irq_vector;
+                               hpriv->flags |= AHCI_HFLAG_MULTI_MSI;
+                               return nvec;
+                       }
+
+                       /*
+                        * Fallback to single MSI mode if the controller
+                        * enforced MRSM mode.
+                        */
+                       printk(KERN_INFO
+                               "ahci: MRSM is on, fallback to single MSI\n");
+                       pci_free_irq_vectors(pdev);
                 }
  
                 /*
-                * Fallback to single MSI mode if the controller enforced MRSM
-                * mode.
+                * -ENOSPC indicated we don't have enough vectors.  Don't bother
+                * trying a single vectors for any other error:
                  */
-               printk(KERN_INFO "ahci: MRSM is on, fallback to single MSI\n");
-               pci_free_irq_vectors(pdev);
+               if (nvec < 0 && nvec != -ENOSPC)
+                       return nvec;
         }
  
-       /*
-        * -ENOSPC indicated we don't have enough vectors.  Don't bother trying
-        * a single vectors for any other error:
-        */
-       if (nvec < 0 && nvec != -ENOSPC)
-               return nvec;
-
         /*
          * If the host is not capable of supporting per-port vectors, fall
          * back to single MSI before finally attempting single MSI-X.
@@ -1617,7 +1620,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                 /* legacy intx interrupts */
                 pci_intx(pdev, 1);
         }
-       hpriv->irq = pdev->irq;
+       hpriv->irq = pci_irq_vector(pdev, 0);
  
         if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss)
                 host->flags |= ATA_HOST_PARALLEL_SCAN;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c

index 811e11c..0809cda 100644 (file)
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2954,7 +2954,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device,
         case DAC960_PD_Controller:
           if (!request_region(Controller->IO_Address, 0x80,
                               Controller->FullModelName)) {
-               DAC960_Error("IO port 0x%d busy for Controller at\n",
+               DAC960_Error("IO port 0x%lx busy for Controller at\n",
                              Controller, Controller->IO_Address);
                 goto Failure;
           }
@@ -2990,7 +2990,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device,
         case DAC960_P_Controller:
           if (!request_region(Controller->IO_Address, 0x80,
                               Controller->FullModelName)){
-               DAC960_Error("IO port 0x%d busy for Controller at\n",
+               DAC960_Error("IO port 0x%lx busy for Controller at\n",
                              Controller, Controller->IO_Address);
                 goto Failure;
           }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index ba405b5..19a16b2 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -164,7 +164,7 @@ static void sock_shutdown(struct nbd_device *nbd)
         spin_lock(&nbd->sock_lock);
  
         if (!nbd->sock) {
-               spin_unlock_irq(&nbd->sock_lock);
+               spin_unlock(&nbd->sock_lock);
                 return;
         }
  
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig

index 6d94e2e..d252276 100644 (file)
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -79,12 +79,12 @@ config I2C_AMD8111
  
  config I2C_HIX5HD2
         tristate "Hix5hd2 high-speed I2C driver"
-       depends on ARCH_HIX5HD2 || COMPILE_TEST
+       depends on ARCH_HISI || ARCH_HIX5HD2 || COMPILE_TEST
         help
-         Say Y here to include support for high-speed I2C controller in the
-         Hisilicon based hix5hd2 SoCs.
+         Say Y here to include support for the high-speed I2C controller
+         used in HiSilicon hix5hd2 SoCs.
  
-         This driver can also be built as a module.  If so, the module
+         This driver can also be built as a module. If so, the module
           will be called i2c-hix5hd2.
  
  config I2C_I801
@@ -589,10 +589,10 @@ config I2C_IMG
  
  config I2C_IMX
         tristate "IMX I2C interface"
-       depends on ARCH_MXC || ARCH_LAYERSCAPE
+       depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE
         help
           Say Y here if you want to use the IIC bus controller on
-         the Freescale i.MX/MXC or Layerscape processors.
+         the Freescale i.MX/MXC, Layerscape or ColdFire processors.
  
           This driver can also be built as a module.  If so, the module
           will be called i2c-imx.
diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c

index 1fe93c4..11e866d 100644 (file)
--- a/drivers/i2c/busses/i2c-designware-core.c
+++ b/drivers/i2c/busses/i2c-designware-core.c
@@ -95,6 +95,9 @@
  #define DW_IC_STATUS_TFE               BIT(2)
  #define DW_IC_STATUS_MST_ACTIVITY      BIT(5)
  
+#define DW_IC_SDA_HOLD_RX_SHIFT                16
+#define DW_IC_SDA_HOLD_RX_MASK         GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT)
+
  #define DW_IC_ERR_TX_ABRT      0x1
  
  #define DW_IC_TAR_10BITADDR_MASTER BIT(12)
@@ -420,12 +423,20 @@ int i2c_dw_init(struct dw_i2c_dev *dev)
         /* Configure SDA Hold Time if required */
         reg = dw_readl(dev, DW_IC_COMP_VERSION);
         if (reg >= DW_IC_SDA_HOLD_MIN_VERS) {
-               if (dev->sda_hold_time) {
-                       dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
-               } else {
+               if (!dev->sda_hold_time) {
                         /* Keep previous hold time setting if no one set it */
                         dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD);
                 }
+               /*
+                * Workaround for avoiding TX arbitration lost in case I2C
+                * slave pulls SDA down "too quickly" after falling egde of
+                * SCL by enabling non-zero SDA RX hold. Specification says it
+                * extends incoming SDA low to high transition while SCL is
+                * high but it apprears to help also above issue.
+                */
+               if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK))
+                       dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT;
+               dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD);
         } else {
                 dev_warn(dev->dev,
                         "Hardware too old to adjust SDA hold time.\n");
diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c

index 9604024..49f2084 100644 (file)
--- a/drivers/i2c/busses/i2c-digicolor.c
+++ b/drivers/i2c/busses/i2c-digicolor.c
@@ -368,6 +368,7 @@ static const struct of_device_id dc_i2c_match[] = {
         { .compatible = "cnxt,cx92755-i2c" },
         { },
  };
+MODULE_DEVICE_TABLE(of, dc_i2c_match);
  
  static struct platform_driver dc_i2c_driver = {
         .probe   = dc_i2c_probe,
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c

index 08847e8..eb3627f 100644 (file)
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -146,6 +146,7 @@
  #define SMBHSTCFG_HST_EN       1
  #define SMBHSTCFG_SMB_SMI_EN   2
  #define SMBHSTCFG_I2C_EN       4
+#define SMBHSTCFG_SPD_WD       0x10
  
  /* TCO configuration bits for TCOCTL */
  #define TCOCTL_EN              0x0100
@@ -865,9 +866,16 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
                 block = 1;
                 break;
         case I2C_SMBUS_I2C_BLOCK_DATA:
-               /* NB: page 240 of ICH5 datasheet shows that the R/#W
-                * bit should be cleared here, even when reading */
-               outb_p((addr & 0x7f) << 1, SMBHSTADD(priv));
+               /*
+                * NB: page 240 of ICH5 datasheet shows that the R/#W
+                * bit should be cleared here, even when reading.
+                * However if SPD Write Disable is set (Lynx Point and later),
+                * the read will fail if we don't set the R/#W bit.
+                */
+               outb_p(((addr & 0x7f) << 1) |
+                      ((priv->original_hstcfg & SMBHSTCFG_SPD_WD) ?
+                       (read_write & 0x01) : 0),
+                      SMBHSTADD(priv));
                 if (read_write == I2C_SMBUS_READ) {
                         /* NB: page 240 of ICH5 datasheet also shows
                          * that DATA1 is the cmd field when reading */
@@ -1573,6 +1581,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
                 /* Disable SMBus interrupt feature if SMBus using SMI# */
                 priv->features &= ~FEATURE_IRQ;
         }
+       if (temp & SMBHSTCFG_SPD_WD)
+               dev_info(&dev->dev, "SPD Write Disable is set\n");
  
         /* Clear special mode bits */
         if (priv->features & (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER))
diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c

index 592a8f2..47fc1f1 100644 (file)
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1009,10 +1009,13 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx,
         rinfo->sda_gpio = of_get_named_gpio(pdev->dev.of_node, "sda-gpios", 0);
         rinfo->scl_gpio = of_get_named_gpio(pdev->dev.of_node, "scl-gpios", 0);
  
-       if (!gpio_is_valid(rinfo->sda_gpio) ||
-           !gpio_is_valid(rinfo->scl_gpio) ||
-           IS_ERR(i2c_imx->pinctrl_pins_default) ||
-           IS_ERR(i2c_imx->pinctrl_pins_gpio)) {
+       if (rinfo->sda_gpio == -EPROBE_DEFER ||
+           rinfo->scl_gpio == -EPROBE_DEFER) {
+               return -EPROBE_DEFER;
+       } else if (!gpio_is_valid(rinfo->sda_gpio) ||
+                  !gpio_is_valid(rinfo->scl_gpio) ||
+                  IS_ERR(i2c_imx->pinctrl_pins_default) ||
+                  IS_ERR(i2c_imx->pinctrl_pins_gpio)) {
                 dev_dbg(&pdev->dev, "recovery information incomplete\n");
                 return 0;
         }
diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c

index b8ea621..30132c3 100644 (file)
--- a/drivers/i2c/busses/i2c-jz4780.c
+++ b/drivers/i2c/busses/i2c-jz4780.c
@@ -729,6 +729,7 @@ static const struct of_device_id jz4780_i2c_of_matches[] = {
         { .compatible = "ingenic,jz4780-i2c", },
         { /* sentinel */ }
  };
+MODULE_DEVICE_TABLE(of, jz4780_i2c_of_matches);
  
  static int jz4780_i2c_probe(struct platform_device *pdev)
  {
diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c

index 50702c7..df22066 100644 (file)
--- a/drivers/i2c/busses/i2c-rk3x.c
+++ b/drivers/i2c/busses/i2c-rk3x.c
@@ -694,6 +694,8 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate,
         t_calc->div_low--;
         t_calc->div_high--;
  
+       /* Give the tuning value 0, that would not update con register */
+       t_calc->tuning = 0;
         /* Maximum divider supported by hw is 0xffff */
         if (t_calc->div_low > 0xffff) {
                 t_calc->div_low = 0xffff;
diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c

index 263685c..05cf192 100644 (file)
--- a/drivers/i2c/busses/i2c-xgene-slimpro.c
+++ b/drivers/i2c/busses/i2c-xgene-slimpro.c
@@ -105,7 +105,7 @@ struct slimpro_i2c_dev {
         struct mbox_chan *mbox_chan;
         struct mbox_client mbox_client;
         struct completion rd_complete;
-       u8 dma_buffer[I2C_SMBUS_BLOCK_MAX];
+       u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1]; /* dma_buffer[0] is used for length */
         u32 *resp_msg;
  };
  
diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c

index 2a972ed..e29ff37 100644 (file)
--- a/drivers/i2c/busses/i2c-xlp9xx.c
+++ b/drivers/i2c/busses/i2c-xlp9xx.c
@@ -426,6 +426,7 @@ static const struct of_device_id xlp9xx_i2c_of_match[] = {
         { .compatible = "netlogic,xlp980-i2c", },
         { /* sentinel */ },
  };
+MODULE_DEVICE_TABLE(of, xlp9xx_i2c_of_match);
  
  #ifdef CONFIG_ACPI
  static const struct acpi_device_id xlp9xx_i2c_acpi_ids[] = {
diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c

index 0968f59..ad17d88 100644 (file)
--- a/drivers/i2c/busses/i2c-xlr.c
+++ b/drivers/i2c/busses/i2c-xlr.c
@@ -358,6 +358,7 @@ static const struct of_device_id xlr_i2c_dt_ids[] = {
         },
         { }
  };
+MODULE_DEVICE_TABLE(of, xlr_i2c_dt_ids);
  
  static int xlr_i2c_probe(struct platform_device *pdev)
  {
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c

index 5ab6721..1704fc8 100644 (file)
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -1681,6 +1681,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap,
  static void of_i2c_register_devices(struct i2c_adapter *adap)
  {
         struct device_node *bus, *node;
+       struct i2c_client *client;
  
         /* Only register child devices if the adapter has a node pointer set */
         if (!adap->dev.of_node)
@@ -1695,7 +1696,14 @@ static void of_i2c_register_devices(struct i2c_adapter *adap)
         for_each_available_child_of_node(bus, node) {
                 if (of_node_test_and_set_flag(node, OF_POPULATED))
                         continue;
-               of_i2c_register_device(adap, node);
+
+               client = of_i2c_register_device(adap, node);
+               if (IS_ERR(client)) {
+                       dev_warn(&adap->dev,
+                                "Failed to create I2C device for %s\n",
+                                node->full_name);
+                       of_node_clear_flag(node, OF_POPULATED);
+               }
         }
  
         of_node_put(bus);
@@ -2299,6 +2307,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action,
                 if (IS_ERR(client)) {
                         dev_err(&adap->dev, "failed to create client for '%s'\n",
                                  rd->dn->full_name);
+                       of_node_clear_flag(rd->dn, OF_POPULATED);
                         return notifier_from_errno(PTR_ERR(client));
                 }
                 break;
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c

index c0e7b62..1210244 100644 (file)
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -178,7 +178,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
                        idev->id_vendor, idev->id_device);
  }
  
-ipack_device_attr(id_format, "0x%hhu\n");
+ipack_device_attr(id_format, "0x%hhx\n");
  
  static DEVICE_ATTR_RO(id);
  static DEVICE_ATTR_RO(id_device);
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c

index 1525870..33741ad 100644 (file)
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -283,7 +283,7 @@ static void gru_unload_mm_tracker(struct gru_state *gru,
         spin_lock(&gru->gs_asid_lock);
         BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap);
         asids->mt_ctxbitmap ^= ctxbitmap;
-       gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
+       gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n",
                 gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]);
         spin_unlock(&gru->gs_asid_lock);
         spin_unlock(&gms->ms_asid_lock);
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c

index 831935a..a7a8847 100644 (file)
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1205,7 +1205,7 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm)
                                  mdc, lpm);
                         return mdc;
                 }
-               fcx_max_data = mdc * FCX_MAX_DATA_FACTOR;
+               fcx_max_data = (u32)mdc * FCX_MAX_DATA_FACTOR;
                 if (fcx_max_data < private->fcx_max_data) {
                         dev_warn(&device->cdev->dev,
                                  "The maximum data size for zHPF requests %u "
@@ -1675,7 +1675,7 @@ static u32 get_fcx_max_data(struct dasd_device *device)
                          " data size for zHPF requests failed\n");
                 return 0;
         } else
-               return mdc * FCX_MAX_DATA_FACTOR;
+               return (u32)mdc * FCX_MAX_DATA_FACTOR;
  }
  
  /*
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c

index 46be25c..876c7e6 100644 (file)
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -780,7 +780,7 @@ static int cfg_wait_idle(void)
  static int __init chp_init(void)
  {
         struct chp_id chpid;
-       int ret;
+       int state, ret;
  
         ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw);
         if (ret)
@@ -791,7 +791,9 @@ static int __init chp_init(void)
                 return 0;
         /* Register available channel-paths. */
         chp_id_for_each(&chpid) {
-               if (chp_info_get_status(chpid) != CHP_STATUS_NOT_RECOGNIZED)
+               state = chp_info_get_status(chpid);
+               if (state == CHP_STATUS_CONFIGURED ||
+                   state == CHP_STATUS_STANDBY)
                         chp_new(chpid);
         }
  
diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c

index db27390..790babc 100644 (file)
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -353,7 +353,7 @@ static void NCR5380_print_phase(struct Scsi_Host *instance)
  #endif
  
  
-static int probe_irq __initdata;
+static int probe_irq;
  
  /**
   * probe_intr  -       helper for IRQ autoprobe
@@ -365,7 +365,7 @@ static int probe_irq __initdata;
   * used by the IRQ probe code.
   */
  
-static irqreturn_t __init probe_intr(int irq, void *dev_id)
+static irqreturn_t probe_intr(int irq, void *dev_id)
  {
         probe_irq = irq;
         return IRQ_HANDLED;
@@ -380,7 +380,7 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id)
   * and then looking to see what interrupt actually turned up.
   */
  
-static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
+static int __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
                                                 int possible)
  {
         struct NCR5380_hostdata *hostdata = shost_priv(instance);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c

index 68138a6..d9239c2 100644 (file)
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -900,8 +900,9 @@ void hwi_ring_cq_db(struct beiscsi_hba *phba,
  static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba)
  {
         struct sgl_handle *psgl_handle;
+       unsigned long flags;
  
-       spin_lock_bh(&phba->io_sgl_lock);
+       spin_lock_irqsave(&phba->io_sgl_lock, flags);
         if (phba->io_sgl_hndl_avbl) {
                 beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO,
                             "BM_%d : In alloc_io_sgl_handle,"
@@ -919,14 +920,16 @@ static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba)
                         phba->io_sgl_alloc_index++;
         } else
                 psgl_handle = NULL;
-       spin_unlock_bh(&phba->io_sgl_lock);
+       spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
         return psgl_handle;
  }
  
  static void
  free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
  {
-       spin_lock_bh(&phba->io_sgl_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&phba->io_sgl_lock, flags);
         beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO,
                     "BM_%d : In free_,io_sgl_free_index=%d\n",
                     phba->io_sgl_free_index);
@@ -941,7 +944,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                              "value there=%p\n", phba->io_sgl_free_index,
                              phba->io_sgl_hndl_base
                              [phba->io_sgl_free_index]);
-                spin_unlock_bh(&phba->io_sgl_lock);
+                spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
                 return;
         }
         phba->io_sgl_hndl_base[phba->io_sgl_free_index] = psgl_handle;
@@ -950,7 +953,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                 phba->io_sgl_free_index = 0;
         else
                 phba->io_sgl_free_index++;
-       spin_unlock_bh(&phba->io_sgl_lock);
+       spin_unlock_irqrestore(&phba->io_sgl_lock, flags);
  }
  
  static inline struct wrb_handle *
@@ -958,15 +961,16 @@ beiscsi_get_wrb_handle(struct hwi_wrb_context *pwrb_context,
                        unsigned int wrbs_per_cxn)
  {
         struct wrb_handle *pwrb_handle;
+       unsigned long flags;
  
-       spin_lock_bh(&pwrb_context->wrb_lock);
+       spin_lock_irqsave(&pwrb_context->wrb_lock, flags);
         pwrb_handle = pwrb_context->pwrb_handle_base[pwrb_context->alloc_index];
         pwrb_context->wrb_handles_available--;
         if (pwrb_context->alloc_index == (wrbs_per_cxn - 1))
                 pwrb_context->alloc_index = 0;
         else
                 pwrb_context->alloc_index++;
-       spin_unlock_bh(&pwrb_context->wrb_lock);
+       spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags);
  
         if (pwrb_handle)
                 memset(pwrb_handle->pwrb, 0, sizeof(*pwrb_handle->pwrb));
@@ -1001,14 +1005,16 @@ beiscsi_put_wrb_handle(struct hwi_wrb_context *pwrb_context,
                        struct wrb_handle *pwrb_handle,
                        unsigned int wrbs_per_cxn)
  {
-       spin_lock_bh(&pwrb_context->wrb_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&pwrb_context->wrb_lock, flags);
         pwrb_context->pwrb_handle_base[pwrb_context->free_index] = pwrb_handle;
         pwrb_context->wrb_handles_available++;
         if (pwrb_context->free_index == (wrbs_per_cxn - 1))
                 pwrb_context->free_index = 0;
         else
                 pwrb_context->free_index++;
-       spin_unlock_bh(&pwrb_context->wrb_lock);
+       spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags);
  }
  
  /**
@@ -1037,8 +1043,9 @@ free_wrb_handle(struct beiscsi_hba *phba, struct hwi_wrb_context *pwrb_context,
  static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba)
  {
         struct sgl_handle *psgl_handle;
+       unsigned long flags;
  
-       spin_lock_bh(&phba->mgmt_sgl_lock);
+       spin_lock_irqsave(&phba->mgmt_sgl_lock, flags);
         if (phba->eh_sgl_hndl_avbl) {
                 psgl_handle = phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index];
                 phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index] = NULL;
@@ -1056,14 +1063,16 @@ static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba)
                         phba->eh_sgl_alloc_index++;
         } else
                 psgl_handle = NULL;
-       spin_unlock_bh(&phba->mgmt_sgl_lock);
+       spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
         return psgl_handle;
  }
  
  void
  free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
  {
-       spin_lock_bh(&phba->mgmt_sgl_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&phba->mgmt_sgl_lock, flags);
         beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG,
                     "BM_%d : In  free_mgmt_sgl_handle,"
                     "eh_sgl_free_index=%d\n",
@@ -1078,7 +1087,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                             "BM_%d : Double Free in eh SGL ,"
                             "eh_sgl_free_index=%d\n",
                             phba->eh_sgl_free_index);
-               spin_unlock_bh(&phba->mgmt_sgl_lock);
+               spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
                 return;
         }
         phba->eh_sgl_hndl_base[phba->eh_sgl_free_index] = psgl_handle;
@@ -1088,7 +1097,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle)
                 phba->eh_sgl_free_index = 0;
         else
                 phba->eh_sgl_free_index++;
-       spin_unlock_bh(&phba->mgmt_sgl_lock);
+       spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags);
  }
  
  static void
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

index c051694..f9b6fba 100644 (file)
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -791,9 +791,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
  
  free_task:
         /* regular RX path uses back_lock */
-       spin_lock_bh(&session->back_lock);
+       spin_lock(&session->back_lock);
         __iscsi_put_task(task);
-       spin_unlock_bh(&session->back_lock);
+       spin_unlock(&session->back_lock);
         return NULL;
  }
  
diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c

index 9b4815e..19bf202 100644 (file)
--- a/drivers/thermal/intel_pch_thermal.c
+++ b/drivers/thermal/intel_pch_thermal.c
@@ -20,10 +20,13 @@
  #include <linux/types.h>
  #include <linux/init.h>
  #include <linux/pci.h>
+#include <linux/acpi.h>
  #include <linux/thermal.h>
  #include <linux/pm.h>
  
  /* Intel PCH thermal Device IDs */
+#define PCH_THERMAL_DID_HSW_1  0x9C24 /* Haswell PCH */
+#define PCH_THERMAL_DID_HSW_2  0x8C24 /* Haswell PCH */
  #define PCH_THERMAL_DID_WPT    0x9CA4 /* Wildcat Point */
  #define PCH_THERMAL_DID_SKL    0x9D31 /* Skylake PCH */
  
@@ -66,9 +69,53 @@ struct pch_thermal_device {
         unsigned long crt_temp;
         int hot_trip_id;
         unsigned long hot_temp;
+       int psv_trip_id;
+       unsigned long psv_temp;
         bool bios_enabled;
  };
  
+#ifdef CONFIG_ACPI
+
+/*
+ * On some platforms, there is a companion ACPI device, which adds
+ * passive trip temperature using _PSV method. There is no specific
+ * passive temperature setting in MMIO interface of this PCI device.
+ */
+static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
+                                     int *nr_trips)
+{
+       struct acpi_device *adev;
+
+       ptd->psv_trip_id = -1;
+
+       adev = ACPI_COMPANION(&ptd->pdev->dev);
+       if (adev) {
+               unsigned long long r;
+               acpi_status status;
+
+               status = acpi_evaluate_integer(adev->handle, "_PSV", NULL,
+                                              &r);
+               if (ACPI_SUCCESS(status)) {
+                       unsigned long trip_temp;
+
+                       trip_temp = DECI_KELVIN_TO_MILLICELSIUS(r);
+                       if (trip_temp) {
+                               ptd->psv_temp = trip_temp;
+                               ptd->psv_trip_id = *nr_trips;
+                               ++(*nr_trips);
+                       }
+               }
+       }
+}
+#else
+static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd,
+                                     int *nr_trips)
+{
+       ptd->psv_trip_id = -1;
+
+}
+#endif
+
  static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips)
  {
         u8 tsel;
@@ -119,6 +166,8 @@ read_trips:
                 ++(*nr_trips);
         }
  
+       pch_wpt_add_acpi_psv_trip(ptd, nr_trips);
+
         return 0;
  }
  
@@ -194,6 +243,8 @@ static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip,
                 *type = THERMAL_TRIP_CRITICAL;
         else if (ptd->hot_trip_id == trip)
                 *type = THERMAL_TRIP_HOT;
+       else if (ptd->psv_trip_id == trip)
+               *type = THERMAL_TRIP_PASSIVE;
         else
                 return -EINVAL;
  
@@ -208,6 +259,8 @@ static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, int *tem
                 *temp = ptd->crt_temp;
         else if (ptd->hot_trip_id == trip)
                 *temp = ptd->hot_temp;
+       else if (ptd->psv_trip_id == trip)
+               *temp = ptd->psv_temp;
         else
                 return -EINVAL;
  
@@ -242,6 +295,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev,
                 ptd->ops = &pch_dev_ops_wpt;
                 dev_name = "pch_skylake";
                 break;
+       case PCH_THERMAL_DID_HSW_1:
+       case PCH_THERMAL_DID_HSW_2:
+               ptd->ops = &pch_dev_ops_wpt;
+               dev_name = "pch_haswell";
+               break;
         default:
                 dev_err(&pdev->dev, "unknown pch thermal device\n");
                 return -ENODEV;
@@ -324,6 +382,8 @@ static int intel_pch_thermal_resume(struct device *device)
  static struct pci_device_id intel_pch_thermal_id[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) },
         { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1) },
+       { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2) },
         { 0, },
  };
  MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c

index 0e4dc0a..7a22307 100644 (file)
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c
@@ -669,20 +669,10 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
         .set_cur_state = powerclamp_set_cur_state,
  };
  
-static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = {
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_ARAT },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_NONSTOP_TSC },
-       { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_CONSTANT_TSC},
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
-
  static int __init powerclamp_probe(void)
  {
-       if (!x86_match_cpu(intel_powerclamp_ids)) {
-               pr_err("Intel powerclamp does not run on family %d model %d\n",
-                               boot_cpu_data.x86, boot_cpu_data.x86_model);
+       if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
+               pr_err("CPU does not support MWAIT");
                 return -ENODEV;
         }
  
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c

index 7910165..42f9a0a 100644 (file)
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -137,7 +137,7 @@ Espan:
  bad_entry:
         EXOFS_ERR(
                 "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
-               "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
+               "offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n",
                 dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
                 _LLU(le64_to_cpu(p->inode_no)),
                 rec_len, p->name_len);
diff --git a/fs/iomap.c b/fs/iomap.c

index 013d1d3..a8ee8c3 100644 (file)
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
         struct page *page = data;
         int ret;
  
-       ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
-                       NULL, iomap);
+       ret = __block_write_begin_int(page, pos, length, NULL, iomap);
         if (ret)
                 return ret;
  
@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
         }
  
         while (len > 0) {
-               ret = iomap_apply(inode, start, len, 0, ops, &ctx,
+               ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
                                 iomap_fiemap_actor);
                 /* inode with no (attribute) mapping will give ENOENT */
                 if (ret == -ENOENT)
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c

index 1e8fe84..5355efb 100644 (file)
--- a/fs/orangefs/dcache.c
+++ b/fs/orangefs/dcache.c
@@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry)
                 }
         }
  
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
         ret = 1;
  out_release_op:
         op_release(new_op);
@@ -94,8 +94,9 @@ out_drop:
  static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
  {
         int ret;
+       unsigned long time = (unsigned long) dentry->d_fsdata;
  
-       if (time_before(jiffies, dentry->d_time))
+       if (time_before(jiffies, time))
                 return 1;
  
         if (flags & LOOKUP_RCU)
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c

index 66ea0cc..02cc613 100644 (file)
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -621,9 +621,9 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
          * readahead cache (if any); this forces an expensive refresh of
          * data for the next caller of mmap (or 'get_block' accesses)
          */
-       if (file->f_path.dentry->d_inode &&
-           file->f_path.dentry->d_inode->i_mapping &&
-           mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) {
+       if (file_inode(file) &&
+           file_inode(file)->i_mapping &&
+           mapping_nrpages(&file_inode(file)->i_data)) {
                 if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
                         gossip_debug(GOSSIP_INODE_DEBUG,
                             "calling flush_racache on %pU\n",
@@ -632,7 +632,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file)
                         gossip_debug(GOSSIP_INODE_DEBUG,
                             "flush_racache finished\n");
                 }
-               truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+               truncate_inode_pages(file_inode(file)->i_mapping,
                                      0);
         }
         return 0;
@@ -648,7 +648,7 @@ static int orangefs_fsync(struct file *file,
  {
         int ret = -EINVAL;
         struct orangefs_inode_s *orangefs_inode =
-               ORANGEFS_I(file->f_path.dentry->d_inode);
+               ORANGEFS_I(file_inode(file));
         struct orangefs_kernel_op_s *new_op = NULL;
  
         /* required call */
@@ -661,7 +661,7 @@ static int orangefs_fsync(struct file *file,
  
         ret = service_operation(new_op,
                         "orangefs_fsync",
-                       get_interruptible_flag(file->f_path.dentry->d_inode));
+                       get_interruptible_flag(file_inode(file)));
  
         gossip_debug(GOSSIP_FILE_DEBUG,
                      "orangefs_fsync got return value of %d\n",
@@ -669,7 +669,7 @@ static int orangefs_fsync(struct file *file,
  
         op_release(new_op);
  
-       orangefs_flush_inode(file->f_path.dentry->d_inode);
+       orangefs_flush_inode(file_inode(file));
         return ret;
  }
  
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c

index d15d3d2..a290ff6 100644 (file)
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -72,7 +72,7 @@ static int orangefs_create(struct inode *dir,
  
         d_instantiate(dentry, inode);
         unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
         gossip_debug(GOSSIP_NAME_DEBUG,
@@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
                 goto out;
         }
  
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
  
         inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
         if (IS_ERR(inode)) {
@@ -322,7 +322,7 @@ static int orangefs_symlink(struct inode *dir,
  
         d_instantiate(dentry, inode);
         unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
         gossip_debug(GOSSIP_NAME_DEBUG,
@@ -386,7 +386,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
  
         d_instantiate(dentry, inode);
         unlock_new_inode(inode);
-       dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+       orangefs_set_timeout(dentry);
         ORANGEFS_I(inode)->getattr_time = jiffies - 1;
  
         gossip_debug(GOSSIP_NAME_DEBUG,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h

index 0a82048..3bf803d 100644 (file)
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -580,4 +580,11 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
  #endif
  }
  
+static inline void orangefs_set_timeout(struct dentry *dentry)
+{
+       unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
+
+       dentry->d_fsdata = (void *) time;
+}
+
  #endif /* __ORANGEFSKERNEL_H */
diff --git a/fs/proc/base.c b/fs/proc/base.c

index adfc5b4..ca651ac 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1012,6 +1012,9 @@ static ssize_t auxv_read(struct file *file, char __user *buf,
  {
         struct mm_struct *mm = file->private_data;
         unsigned int nwords = 0;
+
+       if (!mm)
+               return 0;
         do {
                 nwords += 2;
         } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c

index c27344c..c6eb219 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
          * allocating, so skip that check by pretending to be freeing.
          */
         error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
-       if (error)
-               goto error0;
-error0:
         xfs_perag_put(args.pag);
         if (error)
                 trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
         return xfs_bmap_btalloc(ap);
  }
  
+/* Trim extent to fit a logical block range. */
+void
+xfs_trim_extent(
+       struct xfs_bmbt_irec    *irec,
+       xfs_fileoff_t           bno,
+       xfs_filblks_t           len)
+{
+       xfs_fileoff_t           distance;
+       xfs_fileoff_t           end = bno + len;
+
+       if (irec->br_startoff + irec->br_blockcount <= bno ||
+           irec->br_startoff >= end) {
+               irec->br_blockcount = 0;
+               return;
+       }
+
+       if (irec->br_startoff < bno) {
+               distance = bno - irec->br_startoff;
+               if (isnullstartblock(irec->br_startblock))
+                       irec->br_startblock = DELAYSTARTBLOCK;
+               if (irec->br_startblock != DELAYSTARTBLOCK &&
+                   irec->br_startblock != HOLESTARTBLOCK)
+                       irec->br_startblock += distance;
+               irec->br_startoff += distance;
+               irec->br_blockcount -= distance;
+       }
+
+       if (end < irec->br_startoff + irec->br_blockcount) {
+               distance = irec->br_startoff + irec->br_blockcount - end;
+               irec->br_blockcount -= distance;
+       }
+}
+
  /*
   * Trim the returned map to the required bounds
   */
@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
         return stolen;
  }
  
+int
+xfs_bmap_del_extent_delay(
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
+       struct xfs_bmbt_irec    new;
+       int64_t                 da_old, da_new, da_diff = 0;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       xfs_filblks_t           got_indlen, new_indlen, stolen;
+       int                     error = 0, state = 0;
+       bool                    isrt;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+       da_old = startblockval(got->br_startblock);
+       da_new = 0;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+
+       if (isrt) {
+               int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
+
+               do_div(rtexts, mp->m_sb.sb_rextsize);
+               xfs_mod_frextents(mp, rtexts);
+       }
+
+       /*
+        * Update the inode delalloc counter now and wait to update the
+        * sb counters as we might have to borrow some blocks for the
+        * indirect block accounting.
+        */
+       xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
+                       isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+       ip->i_delayed_blks -= del->br_blockcount;
+
+       if (whichfork == XFS_COW_FORK)
+               state |= BMAP_COWFORK;
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = got->br_blockcount - del->br_blockcount;
+               da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
+                               got->br_blockcount), da_old);
+               got->br_startblock = nullstartblock((int)da_new);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                *
+                * Distribute the original indlen reservation across the two new
+                * extents.  Steal blocks from the deleted extent if necessary.
+                * Stealing blocks simply fudges the fdblocks accounting below.
+                * Warn if either of the new indlen reservations is zero as this
+                * can lead to delalloc problems.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
+
+               new.br_blockcount = got_endoff - del_endoff;
+               new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+
+               WARN_ON_ONCE(!got_indlen || !new_indlen);
+               stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
+                                                      del->br_blockcount);
+
+               got->br_startblock = nullstartblock((int)got_indlen);
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = nullstartblock((int)new_indlen);
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+
+               da_new = got_indlen + new_indlen - stolen;
+               del->br_blockcount -= stolen;
+               break;
+       }
+
+       ASSERT(da_old >= da_new);
+       da_diff = da_old - da_new;
+       if (!isrt)
+               da_diff += del->br_blockcount;
+       if (da_diff)
+               xfs_mod_fdblocks(mp, da_diff, false);
+       return error;
+}
+
+void
+xfs_bmap_del_extent_cow(
+       struct xfs_inode        *ip,
+       xfs_extnum_t            *idx,
+       struct xfs_bmbt_irec    *got,
+       struct xfs_bmbt_irec    *del)
+{
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec    new;
+       xfs_fileoff_t           del_endoff, got_endoff;
+       int                     state = BMAP_COWFORK;
+
+       XFS_STATS_INC(mp, xs_del_exlist);
+
+       del_endoff = del->br_startoff + del->br_blockcount;
+       got_endoff = got->br_startoff + got->br_blockcount;
+
+       ASSERT(*idx >= 0);
+       ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+       ASSERT(del->br_blockcount > 0);
+       ASSERT(got->br_startoff <= del->br_startoff);
+       ASSERT(got_endoff >= del_endoff);
+       ASSERT(!isnullstartblock(got->br_startblock));
+
+       if (got->br_startoff == del->br_startoff)
+               state |= BMAP_LEFT_CONTIG;
+       if (got_endoff == del_endoff)
+               state |= BMAP_RIGHT_CONTIG;
+
+       switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
+       case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
+               /*
+                * Matches the whole extent.  Delete the entry.
+                */
+               xfs_iext_remove(ip, *idx, 1, state);
+               --*idx;
+               break;
+       case BMAP_LEFT_CONTIG:
+               /*
+                * Deleting the first part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_startoff = del_endoff;
+               got->br_blockcount -= del->br_blockcount;
+               got->br_startblock = del->br_startblock + del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case BMAP_RIGHT_CONTIG:
+               /*
+                * Deleting the last part of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount -= del->br_blockcount;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+               break;
+       case 0:
+               /*
+                * Deleting the middle of the extent.
+                */
+               trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+               got->br_blockcount = del->br_startoff - got->br_startoff;
+               xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
+               trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+
+               new.br_startoff = del_endoff;
+               new.br_blockcount = got_endoff - del_endoff;
+               new.br_state = got->br_state;
+               new.br_startblock = del->br_startblock + del->br_blockcount;
+
+               ++*idx;
+               xfs_iext_insert(ip, *idx, 1, &new, state);
+               break;
+       }
+}
+
  /*
   * Called by xfs_bmapi to update file extent records and the btree
   * after removing space (or undoing a delayed allocation).
@@ -5171,175 +5414,6 @@ done:
         return error;
  }
  
-/* Remove an extent from the CoW fork.  Similar to xfs_bmap_del_extent. */
-int
-xfs_bunmapi_cow(
-       struct xfs_inode                *ip,
-       struct xfs_bmbt_irec            *del)
-{
-       xfs_filblks_t                   da_new;
-       xfs_filblks_t                   da_old;
-       xfs_fsblock_t                   del_endblock = 0;
-       xfs_fileoff_t                   del_endoff;
-       int                             delay;
-       struct xfs_bmbt_rec_host        *ep;
-       int                             error;
-       struct xfs_bmbt_irec            got;
-       xfs_fileoff_t                   got_endoff;
-       struct xfs_ifork                *ifp;
-       struct xfs_mount                *mp;
-       xfs_filblks_t                   nblks;
-       struct xfs_bmbt_irec            new;
-       /* REFERENCED */
-       uint                            qfield;
-       xfs_filblks_t                   temp;
-       xfs_filblks_t                   temp2;
-       int                             state = BMAP_COWFORK;
-       int                             eof;
-       xfs_extnum_t                    eidx;
-
-       mp = ip->i_mount;
-       XFS_STATS_INC(mp, xs_del_exlist);
-
-       ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
-                       &eidx, &got, &new);
-
-       ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
-       ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
-               (uint)sizeof(xfs_bmbt_rec_t)));
-       ASSERT(del->br_blockcount > 0);
-       ASSERT(got.br_startoff <= del->br_startoff);
-       del_endoff = del->br_startoff + del->br_blockcount;
-       got_endoff = got.br_startoff + got.br_blockcount;
-       ASSERT(got_endoff >= del_endoff);
-       delay = isnullstartblock(got.br_startblock);
-       ASSERT(isnullstartblock(del->br_startblock) == delay);
-       qfield = 0;
-       error = 0;
-       /*
-        * If deleting a real allocation, must free up the disk space.
-        */
-       if (!delay) {
-               nblks = del->br_blockcount;
-               qfield = XFS_TRANS_DQ_BCOUNT;
-               /*
-                * Set up del_endblock and cur for later.
-                */
-               del_endblock = del->br_startblock + del->br_blockcount;
-               da_old = da_new = 0;
-       } else {
-               da_old = startblockval(got.br_startblock);
-               da_new = 0;
-               nblks = 0;
-       }
-       qfield = qfield;
-       nblks = nblks;
-
-       /*
-        * Set flag value to use in switch statement.
-        * Left-contig is 2, right-contig is 1.
-        */
-       switch (((got.br_startoff == del->br_startoff) << 1) |
-               (got_endoff == del_endoff)) {
-       case 3:
-               /*
-                * Matches the whole extent.  Delete the entry.
-                */
-               xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
-               --eidx;
-               break;
-
-       case 2:
-               /*
-                * Deleting the first part of the extent.
-                */
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_startoff(ep, del_endoff);
-               temp = got.br_blockcount - del->br_blockcount;
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               xfs_bmbt_set_startblock(ep, del_endblock);
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 1:
-               /*
-                * Deleting the last part of the extent.
-                */
-               temp = got.br_blockcount - del->br_blockcount;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               if (delay) {
-                       temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
-                               da_old);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-                       da_new = temp;
-                       break;
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               break;
-
-       case 0:
-               /*
-                * Deleting the middle of the extent.
-                */
-               temp = del->br_startoff - got.br_startoff;
-               trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
-               xfs_bmbt_set_blockcount(ep, temp);
-               new.br_startoff = del_endoff;
-               temp2 = got_endoff - del_endoff;
-               new.br_blockcount = temp2;
-               new.br_state = got.br_state;
-               if (!delay) {
-                       new.br_startblock = del_endblock;
-               } else {
-                       temp = xfs_bmap_worst_indlen(ip, temp);
-                       xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
-                       temp2 = xfs_bmap_worst_indlen(ip, temp2);
-                       new.br_startblock = nullstartblock((int)temp2);
-                       da_new = temp + temp2;
-                       while (da_new > da_old) {
-                               if (temp) {
-                                       temp--;
-                                       da_new--;
-                                       xfs_bmbt_set_startblock(ep,
-                                               nullstartblock((int)temp));
-                               }
-                               if (da_new == da_old)
-                                       break;
-                               if (temp2) {
-                                       temp2--;
-                                       da_new--;
-                                       new.br_startblock =
-                                               nullstartblock((int)temp2);
-                               }
-                       }
-               }
-               trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
-               xfs_iext_insert(ip, eidx + 1, 1, &new, state);
-               ++eidx;
-               break;
-       }
-
-       /*
-        * Account for change in delayed indirect blocks.
-        * Nothing to do for disk quota accounting here.
-        */
-       ASSERT(da_old >= da_new);
-       if (da_old > da_new)
-               xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-
-       return error;
-}
-
  /*
   * Unmap (remove) blocks from a file.
   * If nexts is nonzero then the number of extents to remove is limited to
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h

index f97db71..7cae6ec 100644 (file)
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -190,6 +190,8 @@ void        xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
  #define        XFS_BMAP_TRACE_EXLIST(ip,c,w)
  #endif
  
+void   xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
+               xfs_filblks_t len);
  int    xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
  void   xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
  void   xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
@@ -221,7 +223,11 @@ int        xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
                 xfs_fileoff_t bno, xfs_filblks_t len, int flags,
                 xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
                 struct xfs_defer_ops *dfops, int *done);
-int    xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
+int    xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
+               xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
+               struct xfs_bmbt_irec *del);
+void   xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
+               struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
  int    xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                 xfs_extnum_t num);
  uint   xfs_default_attroffset(struct xfs_inode *ip);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c

index 5c8e6f2..0e80993 100644 (file)
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
         return rval;
  }
  
-int
+static int
  xfs_btree_count_blocks_helper(
         struct xfs_btree_cur    *cur,
         int                     level,
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c

index 3cc3cf7..ac9a003 100644 (file)
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
         if (mp->m_quotainfo)
                 ndquots = mp->m_quotainfo->qi_dqperchunk;
         else
-               ndquots = xfs_calc_dquots_per_chunk(
-                                       XFS_BB_TO_FSB(mp, bp->b_length));
+               ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
  
         for (i = 0; i < ndquots; i++, d++) {
                 if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h

index f6547fc..6b7579e 100644 (file)
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
   * padding field for v3 inodes.
   */
  #define        XFS_DINODE_MAGIC                0x494e  /* 'IN' */
-#define XFS_DINODE_GOOD_VERSION(v)     ((v) >= 1 && (v) <= 3)
  typedef struct xfs_dinode {
         __be16          di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
         __be16          di_mode;        /* mode and type of file */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c

index 8de9a3a..134424f 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -57,6 +57,17 @@ xfs_inobp_check(
  }
  #endif
  
+bool
+xfs_dinode_good_version(
+       struct xfs_mount *mp,
+       __u8            version)
+{
+       if (xfs_sb_version_hascrc(&mp->m_sb))
+               return version == 3;
+
+       return version == 1 || version == 2;
+}
+
  /*
   * If we are doing readahead on an inode buffer, we might be in log recovery
   * reading an inode allocation buffer that hasn't yet been replayed, and hence
@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
  
                 dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
                 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
-                           XFS_DINODE_GOOD_VERSION(dip->di_version);
+                       xfs_dinode_good_version(mp, dip->di_version);
                 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                 XFS_ERRTAG_ITOBP_INOTOBP,
                                                 XFS_RANDOM_ITOBP_INOTOBP))) {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h

index 62d9d46..3cfe12a 100644 (file)
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -74,6 +74,8 @@ void  xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
  void   xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
                                struct xfs_dinode *to);
  
+bool   xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
+
  #if defined(DEBUG)
  void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
  #else
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c

index a314fc7..6e4f7f9 100644 (file)
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
         struct xfs_inode        *ip = XFS_I(inode);
         loff_t                  isize = i_size_read(inode);
         size_t                  count = iov_iter_count(to);
+       loff_t                  end = iocb->ki_pos + count - 1;
         struct iov_iter         data;
         struct xfs_buftarg      *target;
         ssize_t                 ret = 0;
@@ -272,49 +273,21 @@ xfs_file_dio_aio_read(
  
         file_accessed(iocb->ki_filp);
  
-       /*
-        * Locking is a bit tricky here. If we take an exclusive lock for direct
-        * IO, we effectively serialise all new concurrent read IO to this file
-        * and block it behind IO that is currently in progress because IO in
-        * progress holds the IO lock shared. We only need to hold the lock
-        * exclusive to blow away the page cache, so only take lock exclusively
-        * if the page cache needs invalidation. This allows the normal direct
-        * IO case of no page cache pages to proceeed concurrently without
-        * serialisation.
-        */
         xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
         if (mapping->nrpages) {
-               xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
-               xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
+               if (ret)
+                       goto out_unlock;
  
                 /*
-                * The generic dio code only flushes the range of the particular
-                * I/O. Because we take an exclusive lock here, this whole
-                * sequence is considerably more expensive for us. This has a
-                * noticeable performance impact for any file with cached pages,
-                * even when outside of the range of the particular I/O.
-                *
-                * Hence, amortize the cost of the lock against a full file
-                * flush and reduce the chances of repeated iolock cycles going
-                * forward.
+                * Invalidate whole pages. This can return an error if we fail
+                * to invalidate a page, but this should never happen on XFS.
+                * Warn if it does fail.
                  */
-               if (mapping->nrpages) {
-                       ret = filemap_write_and_wait(mapping);
-                       if (ret) {
-                               xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
-                               return ret;
-                       }
-
-                       /*
-                        * Invalidate whole pages. This can return an error if
-                        * we fail to invalidate a page, but this should never
-                        * happen on XFS. Warn if it does fail.
-                        */
-                       ret = invalidate_inode_pages2(mapping);
-                       WARN_ON_ONCE(ret);
-                       ret = 0;
-               }
-               xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
+               WARN_ON_ONCE(ret);
+               ret = 0;
         }
  
         data = *to;
@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
                 iocb->ki_pos += ret;
                 iov_iter_advance(to, ret);
         }
-       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
  
+out_unlock:
+       xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
         return ret;
  }
  
@@ -570,61 +544,49 @@ xfs_file_dio_aio_write(
         if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
                 return -EINVAL;
  
-       /* "unaligned" here means not aligned to a filesystem block */
-       if ((iocb->ki_pos & mp->m_blockmask) ||
-           ((iocb->ki_pos + count) & mp->m_blockmask))
-               unaligned_io = 1;
-
         /*
-        * We don't need to take an exclusive lock unless there page cache needs
-        * to be invalidated or unaligned IO is being executed. We don't need to
-        * consider the EOF extension case here because
-        * xfs_file_aio_write_checks() will relock the inode as necessary for
-        * EOF zeroing cases and fill out the new inode size as appropriate.
+        * Don't take the exclusive iolock here unless the I/O is unaligned to
+        * the file system block size.  We don't need to consider the EOF
+        * extension case here because xfs_file_aio_write_checks() will relock
+        * the inode as necessary for EOF zeroing cases and fill out the new
+        * inode size as appropriate.
          */
-       if (unaligned_io || mapping->nrpages)
+       if ((iocb->ki_pos & mp->m_blockmask) ||
+           ((iocb->ki_pos + count) & mp->m_blockmask)) {
+               unaligned_io = 1;
                 iolock = XFS_IOLOCK_EXCL;
-       else
+       } else {
                 iolock = XFS_IOLOCK_SHARED;
-       xfs_rw_ilock(ip, iolock);
-
-       /*
-        * Recheck if there are cached pages that need invalidate after we got
-        * the iolock to protect against other threads adding new pages while
-        * we were waiting for the iolock.
-        */
-       if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
-               xfs_rw_iunlock(ip, iolock);
-               iolock = XFS_IOLOCK_EXCL;
-               xfs_rw_ilock(ip, iolock);
         }
  
+       xfs_rw_ilock(ip, iolock);
+
         ret = xfs_file_aio_write_checks(iocb, from, &iolock);
         if (ret)
                 goto out;
         count = iov_iter_count(from);
         end = iocb->ki_pos + count - 1;
  
-       /*
-        * See xfs_file_dio_aio_read() for why we do a full-file flush here.
-        */
         if (mapping->nrpages) {
-               ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+               ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
                 if (ret)
                         goto out;
+
                 /*
                  * Invalidate whole pages. This can return an error if we fail
                  * to invalidate a page, but this should never happen on XFS.
                  * Warn if it does fail.
                  */
-               ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
+               ret = invalidate_inode_pages2_range(mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
                 WARN_ON_ONCE(ret);
                 ret = 0;
         }
  
         /*
          * If we are doing unaligned IO, wait for all other IO to drain,
-        * otherwise demote the lock if we had to flush cached pages
+        * otherwise demote the lock if we had to take the exclusive lock
+        * for other reasons in xfs_file_aio_write_checks.
          */
         if (unaligned_io)
                 inode_dio_wait(inode);
@@ -947,134 +909,6 @@ out_unlock:
         return error;
  }
  
-/*
- * Flush all file writes out to disk.
- */
-static int
-xfs_file_wait_for_io(
-       struct inode    *inode,
-       loff_t          offset,
-       size_t          len)
-{
-       loff_t          rounding;
-       loff_t          ioffset;
-       loff_t          iendoffset;
-       loff_t          bs;
-       int             ret;
-
-       bs = inode->i_sb->s_blocksize;
-       inode_dio_wait(inode);
-
-       rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
-       ioffset = round_down(offset, rounding);
-       iendoffset = round_up(offset + len, rounding) - 1;
-       ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
-                                          iendoffset);
-       return ret;
-}
-
-/* Hook up to the VFS reflink function */
-STATIC int
-xfs_file_share_range(
-       struct file     *file_in,
-       loff_t          pos_in,
-       struct file     *file_out,
-       loff_t          pos_out,
-       u64             len,
-       bool            is_dedupe)
-{
-       struct inode    *inode_in;
-       struct inode    *inode_out;
-       ssize_t         ret;
-       loff_t          bs;
-       loff_t          isize;
-       int             same_inode;
-       loff_t          blen;
-       unsigned int    flags = 0;
-
-       inode_in = file_inode(file_in);
-       inode_out = file_inode(file_out);
-       bs = inode_out->i_sb->s_blocksize;
-
-       /* Don't touch certain kinds of inodes */
-       if (IS_IMMUTABLE(inode_out))
-               return -EPERM;
-       if (IS_SWAPFILE(inode_in) ||
-           IS_SWAPFILE(inode_out))
-               return -ETXTBSY;
-
-       /* Reflink only works within this filesystem. */
-       if (inode_in->i_sb != inode_out->i_sb)
-               return -EXDEV;
-       same_inode = (inode_in->i_ino == inode_out->i_ino);
-
-       /* Don't reflink dirs, pipes, sockets... */
-       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
-               return -EISDIR;
-       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
-               return -EINVAL;
-       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
-               return -EINVAL;
-
-       /* Don't share DAX file data for now. */
-       if (IS_DAX(inode_in) || IS_DAX(inode_out))
-               return -EINVAL;
-
-       /* Are we going all the way to the end? */
-       isize = i_size_read(inode_in);
-       if (isize == 0)
-               return 0;
-       if (len == 0)
-               len = isize - pos_in;
-
-       /* Ensure offsets don't wrap and the input is inside i_size */
-       if (pos_in + len < pos_in || pos_out + len < pos_out ||
-           pos_in + len > isize)
-               return -EINVAL;
-
-       /* Don't allow dedupe past EOF in the dest file */
-       if (is_dedupe) {
-               loff_t  disize;
-
-               disize = i_size_read(inode_out);
-               if (pos_out >= disize || pos_out + len > disize)
-                       return -EINVAL;
-       }
-
-       /* If we're linking to EOF, continue to the block boundary. */
-       if (pos_in + len == isize)
-               blen = ALIGN(isize, bs) - pos_in;
-       else
-               blen = len;
-
-       /* Only reflink if we're aligned to block boundaries */
-       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-               return -EINVAL;
-
-       /* Don't allow overlapped reflink within the same file */
-       if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
-               return -EINVAL;
-
-       /* Wait for the completion of any pending IOs on srcfile */
-       ret = xfs_file_wait_for_io(inode_in, pos_in, len);
-       if (ret)
-               goto out;
-       ret = xfs_file_wait_for_io(inode_out, pos_out, len);
-       if (ret)
-               goto out;
-
-       if (is_dedupe)
-               flags |= XFS_REFLINK_DEDUPE;
-       ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
-                       pos_out, len, flags);
-       if (ret < 0)
-               goto out;
-
-out:
-       return ret;
-}
-
  STATIC ssize_t
  xfs_file_copy_range(
         struct file     *file_in,
@@ -1086,7 +920,7 @@ xfs_file_copy_range(
  {
         int             error;
  
-       error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                      len, false);
         if (error)
                 return error;
@@ -1101,7 +935,7 @@ xfs_file_clone_range(
         loff_t          pos_out,
         u64             len)
  {
-       return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
+       return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
                                      len, false);
  }
  
@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
         if (len > XFS_MAX_DEDUPE_LEN)
                 len = XFS_MAX_DEDUPE_LEN;
  
-       error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
+       error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
                                      len, true);
         if (error)
                 return error;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c

index 14796b7..f295049 100644 (file)
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -1656,9 +1656,9 @@ void
  xfs_inode_set_cowblocks_tag(
         xfs_inode_t     *ip)
  {
-       trace_xfs_inode_set_eofblocks_tag(ip);
+       trace_xfs_inode_set_cowblocks_tag(ip);
         return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
-                       trace_xfs_perag_set_eofblocks,
+                       trace_xfs_perag_set_cowblocks,
                         XFS_ICI_COWBLOCKS_TAG);
  }
  
@@ -1666,7 +1666,7 @@ void
  xfs_inode_clear_cowblocks_tag(
         xfs_inode_t     *ip)
  {
-       trace_xfs_inode_clear_eofblocks_tag(ip);
+       trace_xfs_inode_clear_cowblocks_tag(ip);
         return __xfs_inode_clear_eofblocks_tag(ip,
-                       trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
+                       trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
  }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c

index d907eb9..436e109 100644 (file)
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
         xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
                         &got, &prev);
         if (!eof && got.br_startoff <= offset_fsb) {
+               if (xfs_is_reflink_inode(ip)) {
+                       bool            shared;
+
+                       end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
+                                       maxbytes_fsb);
+                       xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
+                       error = xfs_reflink_reserve_cow(ip, &got, &shared);
+                       if (error)
+                               goto out_unlock;
+               }
+
                 trace_xfs_iomap_found(ip, offset, count, 0, &got);
                 goto done;
         }
@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
         struct xfs_mount        *mp = ip->i_mount;
         struct xfs_bmbt_irec    imap;
         xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    shared, trimmed;
         int                     nimaps = 1, error = 0;
+       bool                    shared = false, trimmed = false;
         unsigned                lockmode;
  
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
-               error = xfs_reflink_reserve_cow_range(ip, offset, length);
-               if (error < 0)
-                       return error;
-       }
-
         if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
                    !xfs_get_extsz_hint(ip)) {
                 /* Reserve delalloc blocks for regular writeback. */
@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
                                 iomap);
         }
  
-       lockmode = xfs_ilock_data_map_shared(ip);
+       /*
+        * COW writes will allocate delalloc space, so we need to make sure
+        * to take the lock exclusively here.
+        */
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               lockmode = XFS_ILOCK_EXCL;
+               xfs_ilock(ip, XFS_ILOCK_EXCL);
+       } else {
+               lockmode = xfs_ilock_data_map_shared(ip);
+       }
  
         ASSERT(offset <= mp->m_super->s_maxbytes);
         if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
  
         error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
                                &nimaps, 0);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if (error)
+               goto out_unlock;
+
+       if (flags & IOMAP_REPORT) {
+               /* Trim the mapping to the nearest shared extent boundary. */
+               error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
+                               &trimmed);
+               if (error)
+                       goto out_unlock;
         }
  
-       /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
-       if (error) {
-               xfs_iunlock(ip, lockmode);
-               return error;
+       if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
+               error = xfs_reflink_reserve_cow(ip, &imap, &shared);
+               if (error)
+                       goto out_unlock;
+
+               end_fsb = imap.br_startoff + imap.br_blockcount;
+               length = XFS_FSB_TO_B(mp, end_fsb) - offset;
         }
  
         if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
         if (shared)
                 iomap->flags |= IOMAP_F_SHARED;
         return 0;
+out_unlock:
+       xfs_iunlock(ip, lockmode);
+       return error;
  }
  
  static int
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index fc78739..b341f10 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1009,6 +1009,7 @@ xfs_mountfs(
   out_quota:
         xfs_qm_unmount_quotas(mp);
   out_rtunmount:
+       mp->m_super->s_flags &= ~MS_ACTIVE;
         xfs_rtunmount_inodes(mp);
   out_rele_rip:
         IRELE(rip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c

index 5965e94..a279b4e 100644 (file)
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
         if (!xfs_is_reflink_inode(ip) ||
             ISUNWRITTEN(irec) ||
             irec->br_startblock == HOLESTARTBLOCK ||
-           irec->br_startblock == DELAYSTARTBLOCK) {
+           irec->br_startblock == DELAYSTARTBLOCK ||
+           isnullstartblock(irec->br_startblock)) {
                 *shared = false;
                 return 0;
         }
@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
         }
  }
  
-/* Create a CoW reservation for a range of blocks within a file. */
-static int
-__xfs_reflink_reserve_cow(
+/*
+ * Trim the passed in imap to the next shared/unshared extent boundary, and
+ * if imap->br_startoff points to a shared extent reserve space for it in the
+ * COW fork.  In this case *shared is set to true, else to false.
+ *
+ * Note that imap will always contain the block numbers for the existing blocks
+ * in the data fork, as the upper layers need them for read-modify-write
+ * operations.
+ */
+int
+xfs_reflink_reserve_cow(
         struct xfs_inode        *ip,
-       xfs_fileoff_t           *offset_fsb,
-       xfs_fileoff_t           end_fsb,
-       bool                    *skipped)
+       struct xfs_bmbt_irec    *imap,
+       bool                    *shared)
  {
-       struct xfs_bmbt_irec    got, prev, imap;
-       xfs_fileoff_t           orig_end_fsb;
-       int                     nimaps, eof = 0, error = 0;
-       bool                    shared = false, trimmed = false;
+       struct xfs_bmbt_irec    got, prev;
+       xfs_fileoff_t           end_fsb, orig_end_fsb;
+       int                     eof = 0, error = 0;
+       bool                    trimmed;
         xfs_extnum_t            idx;
         xfs_extlen_t            align;
  
-       /* Already reserved?  Skip the refcount btree access. */
-       xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
+       /*
+        * Search the COW fork extent list first.  This serves two purposes:
+        * first this implement the speculative preallocation using cowextisze,
+        * so that we also unshared block adjacent to shared blocks instead
+        * of just the shared blocks themselves.  Second the lookup in the
+        * extent list is generally faster than going out to the shared extent
+        * tree.
+        */
+       xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
                         &got, &prev);
-       if (!eof && got.br_startoff <= *offset_fsb) {
-               end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
-               trace_xfs_reflink_cow_found(ip, &got);
-               goto done;
-       }
+       if (!eof && got.br_startoff <= imap->br_startoff) {
+               trace_xfs_reflink_cow_found(ip, imap);
+               xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
  
-       /* Read extent from the source file. */
-       nimaps = 1;
-       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
-                       &imap, &nimaps, 0);
-       if (error)
-               goto out_unlock;
-       ASSERT(nimaps == 1);
+               *shared = true;
+               return 0;
+       }
  
         /* Trim the mapping to the nearest shared extent boundary. */
-       error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
+       error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
         if (error)
-               goto out_unlock;
-
-       end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
+               return error;
  
         /* Not shared?  Just report the (potentially capped) extent. */
-       if (!shared) {
-               *skipped = true;
-               goto done;
-       }
+       if (!*shared)
+               return 0;
  
         /*
          * Fork all the shared blocks from our write offset until the end of
@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
          */
         error = xfs_qm_dqattach_locked(ip, 0);
         if (error)
-               goto out_unlock;
+               return error;
+
+       end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
  
         align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
         if (align)
                 end_fsb = roundup_64(end_fsb, align);
  
  retry:
-       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
-                       end_fsb - *offset_fsb, &got,
-                       &prev, &idx, eof);
+       error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
+                       end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
         switch (error) {
         case 0:
                 break;
         case -ENOSPC:
         case -EDQUOT:
                 /* retry without any preallocation */
-               trace_xfs_reflink_cow_enospc(ip, &imap);
+               trace_xfs_reflink_cow_enospc(ip, imap);
                 if (end_fsb != orig_end_fsb) {
                         end_fsb = orig_end_fsb;
                         goto retry;
                 }
                 /*FALLTHRU*/
         default:
-               goto out_unlock;
+               return error;
         }
  
         if (end_fsb != orig_end_fsb)
                 xfs_inode_set_cowblocks_tag(ip);
  
         trace_xfs_reflink_cow_alloc(ip, &got);
-done:
-       *offset_fsb = end_fsb;
-out_unlock:
-       return error;
-}
-
-/* Create a CoW reservation for part of a file. */
-int
-xfs_reflink_reserve_cow_range(
-       struct xfs_inode        *ip,
-       xfs_off_t               offset,
-       xfs_off_t               count)
-{
-       struct xfs_mount        *mp = ip->i_mount;
-       xfs_fileoff_t           offset_fsb, end_fsb;
-       bool                    skipped = false;
-       int                     error;
-
-       trace_xfs_reflink_reserve_cow_range(ip, offset, count);
-
-       offset_fsb = XFS_B_TO_FSBT(mp, offset);
-       end_fsb = XFS_B_TO_FSB(mp, offset + count);
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-       while (offset_fsb < end_fsb) {
-               error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
-                               &skipped);
-               if (error) {
-                       trace_xfs_reflink_reserve_cow_range_error(ip, error,
-                               _RET_IP_);
-                       break;
-               }
-       }
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       return error;
+       return 0;
  }
  
  /* Allocate all CoW reservations covering a range of blocks in a file. */
@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
         struct xfs_defer_ops    dfops;
         struct xfs_trans        *tp;
         xfs_fsblock_t           first_block;
-       xfs_fileoff_t           next_fsb;
         int                     nimaps = 1, error;
-       bool                    skipped = false;
+       bool                    shared;
  
         xfs_defer_init(&dfops, &first_block);
  
@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
  
         xfs_ilock(ip, XFS_ILOCK_EXCL);
  
-       next_fsb = *offset_fsb;
-       error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
+       /* Read extent from the source file. */
+       nimaps = 1;
+       error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
+                       &imap, &nimaps, 0);
+       if (error)
+               goto out_unlock;
+       ASSERT(nimaps == 1);
+
+       error = xfs_reflink_reserve_cow(ip, &imap, &shared);
         if (error)
                 goto out_trans_cancel;
  
-       if (skipped) {
-               *offset_fsb = next_fsb;
+       if (!shared) {
+               *offset_fsb = imap.br_startoff + imap.br_blockcount;
                 goto out_trans_cancel;
         }
  
         xfs_trans_ijoin(tp, ip, 0);
-       error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
+       error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
                         XFS_BMAPI_COWFORK, &first_block,
                         XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
                         &imap, &nimaps, &dfops);
         if (error)
                 goto out_trans_cancel;
  
-       /* We might not have been able to map the whole delalloc extent */
-       *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
-
         error = xfs_defer_finish(&tp, &dfops, NULL);
         if (error)
                 goto out_trans_cancel;
  
         error = xfs_trans_commit(tp);
  
+       *offset_fsb = imap.br_startoff + imap.br_blockcount;
  out_unlock:
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
         return error;
@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
         xfs_fileoff_t                   offset_fsb,
         xfs_fileoff_t                   end_fsb)
  {
-       struct xfs_bmbt_irec            irec;
-       xfs_filblks_t                   count_fsb;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, prev, del;
+       xfs_extnum_t                    idx;
         xfs_fsblock_t                   firstfsb;
         struct xfs_defer_ops            dfops;
-       int                             error = 0;
-       int                             nimaps;
+       int                             error = 0, eof = 0;
  
         if (!xfs_is_reflink_inode(ip))
                 return 0;
  
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       break;
-               ASSERT(nimaps == 1);
-
-               trace_xfs_reflink_cancel_cow(ip, &irec);
+       xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
+                       &got, &prev);
+       if (eof)
+               return 0;
  
-               if (irec.br_startblock == DELAYSTARTBLOCK) {
-                       /* Free a delayed allocation. */
-                       xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
-                                       false);
-                       ip->i_delayed_blks -= irec.br_blockcount;
+       while (got.br_startoff < end_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+               trace_xfs_reflink_cancel_cow(ip, &del);
  
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
+               if (isnullstartblock(del.br_startblock)) {
+                       error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
+                                       &idx, &got, &del);
                         if (error)
                                 break;
-               } else if (irec.br_startblock == HOLESTARTBLOCK) {
-                       /* empty */
                 } else {
                         xfs_trans_ijoin(*tpp, ip, 0);
                         xfs_defer_init(&dfops, &firstfsb);
  
                         /* Free the CoW orphan record. */
                         error = xfs_refcount_free_cow_extent(ip->i_mount,
-                                       &dfops, irec.br_startblock,
-                                       irec.br_blockcount);
+                                       &dfops, del.br_startblock,
+                                       del.br_blockcount);
                         if (error)
                                 break;
  
                         xfs_bmap_add_free(ip->i_mount, &dfops,
-                                       irec.br_startblock, irec.br_blockcount,
+                                       del.br_startblock, del.br_blockcount,
                                         NULL);
  
                         /* Update quota accounting */
                         xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-                                       -(long)irec.br_blockcount);
+                                       -(long)del.br_blockcount);
  
                         /* Roll the transaction */
                         error = xfs_defer_finish(tpp, &dfops, ip);
@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
                         }
  
                         /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &irec);
-                       if (error)
-                               break;
+                       xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
                 }
  
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + irec.br_blockcount;
+               if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
+                       break;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
         }
  
+       /* clear tag if cow fork is emptied */
+       if (!ifp->if_bytes)
+               xfs_inode_clear_cowblocks_tag(ip);
+
         return error;
  }
  
@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
         xfs_off_t                       offset,
         xfs_off_t                       count)
  {
-       struct xfs_bmbt_irec            irec;
-       struct xfs_bmbt_irec            uirec;
+       struct xfs_ifork                *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+       struct xfs_bmbt_irec            got, prev, del;
         struct xfs_trans                *tp;
         xfs_fileoff_t                   offset_fsb;
         xfs_fileoff_t                   end_fsb;
-       xfs_filblks_t                   count_fsb;
         xfs_fsblock_t                   firstfsb;
         struct xfs_defer_ops            dfops;
-       int                             error;
+       int                             error, eof = 0;
         unsigned int                    resblks;
-       xfs_filblks_t                   ilen;
         xfs_filblks_t                   rlen;
-       int                             nimaps;
+       xfs_extnum_t                    idx;
  
         trace_xfs_reflink_end_cow(ip, offset, count);
  
+       /* No COW extents?  That's easy! */
+       if (ifp->if_bytes == 0)
+               return 0;
+
         offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
         end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
-       count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
  
         /* Start a rolling transaction to switch the mappings */
         resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         xfs_trans_ijoin(tp, ip, 0);
  
-       /* Go find the old extent in the CoW fork. */
-       while (offset_fsb < end_fsb) {
-               /* Read extent from the source file */
-               nimaps = 1;
-               count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
-               error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
-                               &nimaps, XFS_BMAPI_COWFORK);
-               if (error)
-                       goto out_cancel;
-               ASSERT(nimaps == 1);
+       xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
+                       &got, &prev);
  
-               ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
-               trace_xfs_reflink_cow_remap(ip, &irec);
+       /* If there is a hole at end_fsb - 1 go to the previous extent */
+       if (eof || got.br_startoff > end_fsb) {
+               ASSERT(idx > 0);
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
+       }
  
-               /*
-                * We can have a hole in the CoW fork if part of a directio
-                * write is CoW but part of it isn't.
-                */
-               rlen = ilen = irec.br_blockcount;
-               if (irec.br_startblock == HOLESTARTBLOCK)
+       /* Walk backwards until we're out of the I/O range... */
+       while (got.br_startoff + got.br_blockcount > offset_fsb) {
+               del = got;
+               xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
+
+               /* Extent delete may have bumped idx forward */
+               if (!del.br_blockcount) {
+                       idx--;
                         goto next_extent;
+               }
+
+               ASSERT(!isnullstartblock(got.br_startblock));
  
                 /* Unmap the old blocks in the data fork. */
-               while (rlen) {
-                       xfs_defer_init(&dfops, &firstfsb);
-                       error = __xfs_bunmapi(tp, ip, irec.br_startoff,
-                                       &rlen, 0, 1, &firstfsb, &dfops);
-                       if (error)
-                               goto out_defer;
-
-                       /*
-                        * Trim the extent to whatever got unmapped.
-                        * Remember, bunmapi works backwards.
-                        */
-                       uirec.br_startblock = irec.br_startblock + rlen;
-                       uirec.br_startoff = irec.br_startoff + rlen;
-                       uirec.br_blockcount = irec.br_blockcount - rlen;
-                       irec.br_blockcount = rlen;
-                       trace_xfs_reflink_cow_remap_piece(ip, &uirec);
+               xfs_defer_init(&dfops, &firstfsb);
+               rlen = del.br_blockcount;
+               error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
+                               &firstfsb, &dfops);
+               if (error)
+                       goto out_defer;
  
-                       /* Free the CoW orphan record. */
-                       error = xfs_refcount_free_cow_extent(tp->t_mountp,
-                                       &dfops, uirec.br_startblock,
-                                       uirec.br_blockcount);
-                       if (error)
-                               goto out_defer;
+               /* Trim the extent to whatever got unmapped. */
+               if (rlen) {
+                       xfs_trim_extent(&del, del.br_startoff + rlen,
+                               del.br_blockcount - rlen);
+               }
+               trace_xfs_reflink_cow_remap(ip, &del);
  
-                       /* Map the new blocks into the data fork. */
-                       error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
-                                       ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Free the CoW orphan record. */
+               error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
+                               del.br_startblock, del.br_blockcount);
+               if (error)
+                       goto out_defer;
  
-                       /* Remove the mapping from the CoW fork. */
-                       error = xfs_bunmapi_cow(ip, &uirec);
-                       if (error)
-                               goto out_defer;
+               /* Map the new blocks into the data fork. */
+               error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
+               if (error)
+                       goto out_defer;
  
-                       error = xfs_defer_finish(&tp, &dfops, ip);
-                       if (error)
-                               goto out_defer;
-               }
+               /* Remove the mapping from the CoW fork. */
+               xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
+
+               error = xfs_defer_finish(&tp, &dfops, ip);
+               if (error)
+                       goto out_defer;
  
  next_extent:
-               /* Roll on... */
-               offset_fsb = irec.br_startoff + ilen;
+               if (idx < 0)
+                       break;
+               xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
         }
  
         error = xfs_trans_commit(tp);
@@ -774,7 +737,6 @@ next_extent:
  
  out_defer:
         xfs_defer_cancel(&dfops);
-out_cancel:
         xfs_trans_cancel(tp);
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
  out:
@@ -1312,19 +1274,26 @@ out_error:
   */
  int
  xfs_reflink_remap_range(
-       struct xfs_inode        *src,
-       xfs_off_t               srcoff,
-       struct xfs_inode        *dest,
-       xfs_off_t               destoff,
-       xfs_off_t               len,
-       unsigned int            flags)
+       struct file             *file_in,
+       loff_t                  pos_in,
+       struct file             *file_out,
+       loff_t                  pos_out,
+       u64                     len,
+       bool                    is_dedupe)
  {
+       struct inode            *inode_in = file_inode(file_in);
+       struct xfs_inode        *src = XFS_I(inode_in);
+       struct inode            *inode_out = file_inode(file_out);
+       struct xfs_inode        *dest = XFS_I(inode_out);
         struct xfs_mount        *mp = src->i_mount;
+       loff_t                  bs = inode_out->i_sb->s_blocksize;
+       bool                    same_inode = (inode_in == inode_out);
         xfs_fileoff_t           sfsbno, dfsbno;
         xfs_filblks_t           fsblen;
-       int                     error;
         xfs_extlen_t            cowextsize;
-       bool                    is_same;
+       loff_t                  isize;
+       ssize_t                 ret;
+       loff_t                  blen;
  
         if (!xfs_sb_version_hasreflink(&mp->m_sb))
                 return -EOPNOTSUPP;
@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
         if (XFS_FORCED_SHUTDOWN(mp))
                 return -EIO;
  
-       /* Don't reflink realtime inodes */
-       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
-               return -EINVAL;
-
-       if (flags & ~XFS_REFLINK_ALL)
-               return -EINVAL;
-
-       trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
-
         /* Lock both files against IO */
-       if (src->i_ino == dest->i_ino) {
+       if (same_inode) {
                 xfs_ilock(src, XFS_IOLOCK_EXCL);
                 xfs_ilock(src, XFS_MMAPLOCK_EXCL);
         } else {
@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
                 xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
         }
  
+       /* Don't touch certain kinds of inodes */
+       ret = -EPERM;
+       if (IS_IMMUTABLE(inode_out))
+               goto out_unlock;
+
+       ret = -ETXTBSY;
+       if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
+               goto out_unlock;
+
+
+       /* Don't reflink dirs, pipes, sockets... */
+       ret = -EISDIR;
+       if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
+               goto out_unlock;
+       ret = -EINVAL;
+       if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
+               goto out_unlock;
+       if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
+               goto out_unlock;
+
+       /* Don't reflink realtime inodes */
+       if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
+               goto out_unlock;
+
+       /* Don't share DAX file data for now. */
+       if (IS_DAX(inode_in) || IS_DAX(inode_out))
+               goto out_unlock;
+
+       /* Are we going all the way to the end? */
+       isize = i_size_read(inode_in);
+       if (isize == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
+       if (len == 0)
+               len = isize - pos_in;
+
+       /* Ensure offsets don't wrap and the input is inside i_size */
+       if (pos_in + len < pos_in || pos_out + len < pos_out ||
+           pos_in + len > isize)
+               goto out_unlock;
+
+       /* Don't allow dedupe past EOF in the dest file */
+       if (is_dedupe) {
+               loff_t  disize;
+
+               disize = i_size_read(inode_out);
+               if (pos_out >= disize || pos_out + len > disize)
+                       goto out_unlock;
+       }
+
+       /* If we're linking to EOF, continue to the block boundary. */
+       if (pos_in + len == isize)
+               blen = ALIGN(isize, bs) - pos_in;
+       else
+               blen = len;
+
+       /* Only reflink if we're aligned to block boundaries */
+       if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
+           !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
+               goto out_unlock;
+
+       /* Don't allow overlapped reflink within the same file */
+       if (same_inode) {
+               if (pos_out + blen > pos_in && pos_out < pos_in + blen)
+                       goto out_unlock;
+       }
+
+       /* Wait for the completion of any pending IOs on both files */
+       inode_dio_wait(inode_in);
+       if (!same_inode)
+               inode_dio_wait(inode_out);
+
+       ret = filemap_write_and_wait_range(inode_in->i_mapping,
+                       pos_in, pos_in + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       ret = filemap_write_and_wait_range(inode_out->i_mapping,
+                       pos_out, pos_out + len - 1);
+       if (ret)
+               goto out_unlock;
+
+       trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
+
         /*
          * Check that the extents are the same.
          */
-       if (flags & XFS_REFLINK_DEDUPE) {
-               is_same = false;
-               error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
-                               destoff, len, &is_same);
-               if (error)
-                       goto out_error;
+       if (is_dedupe) {
+               bool            is_same = false;
+
+               ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
+                               len, &is_same);
+               if (ret)
+                       goto out_unlock;
                 if (!is_same) {
-                       error = -EBADE;
-                       goto out_error;
+                       ret = -EBADE;
+                       goto out_unlock;
                 }
         }
  
-       error = xfs_reflink_set_inode_flag(src, dest);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_set_inode_flag(src, dest);
+       if (ret)
+               goto out_unlock;
  
         /*
          * Invalidate the page cache so that we can clear any CoW mappings
          * in the destination file.
          */
-       truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
-                                  PAGE_ALIGN(destoff + len) - 1);
+       truncate_inode_pages_range(&inode_out->i_data, pos_out,
+                                  PAGE_ALIGN(pos_out + len) - 1);
  
-       dfsbno = XFS_B_TO_FSBT(mp, destoff);
-       sfsbno = XFS_B_TO_FSBT(mp, srcoff);
+       dfsbno = XFS_B_TO_FSBT(mp, pos_out);
+       sfsbno = XFS_B_TO_FSBT(mp, pos_in);
         fsblen = XFS_B_TO_FSB(mp, len);
-       error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
-                       destoff + len);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
+                       pos_out + len);
+       if (ret)
+               goto out_unlock;
  
         /*
          * Carry the cowextsize hint from src to dest if we're sharing the
@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
          * has a cowextsize hint, and the destination file does not.
          */
         cowextsize = 0;
-       if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
+       if (pos_in == 0 && len == i_size_read(inode_in) &&
             (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
-           destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
+           pos_out == 0 && len >= i_size_read(inode_out) &&
             !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
                 cowextsize = src->i_d.di_cowextsize;
  
-       error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
-       if (error)
-               goto out_error;
+       ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
  
-out_error:
+out_unlock:
         xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
         xfs_iunlock(src, XFS_IOLOCK_EXCL);
         if (src->i_ino != dest->i_ino) {
                 xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
                 xfs_iunlock(dest, XFS_IOLOCK_EXCL);
         }
-       if (error)
-               trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
-       return error;
+       if (ret)
+               trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
+       return ret;
  }
  
  /*
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h

index 5dc3c8a..fad1160 100644 (file)
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
  extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
                 struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
  
-extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
-               xfs_off_t offset, xfs_off_t count);
+extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
+               struct xfs_bmbt_irec *imap, bool *shared);
  extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
                 xfs_off_t offset, xfs_off_t count);
  extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
                 xfs_off_t count);
  extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
-#define XFS_REFLINK_DEDUPE     1       /* only reflink if contents match */
-#define XFS_REFLINK_ALL                (XFS_REFLINK_DEDUPE)
-extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
-               struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
-               unsigned int flags);
+extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
+               struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
  extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
                 struct xfs_trans **tpp);
  extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c

index 5f8d55d..276d302 100644 (file)
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
  };
  
  
-struct kobj_type xfs_error_cfg_ktype = {
+static struct kobj_type xfs_error_cfg_ktype = {
         .release = xfs_sysfs_release,
         .sysfs_ops = &xfs_sysfs_ops,
         .default_attrs = xfs_error_attrs,
  };
  
-struct kobj_type xfs_error_ktype = {
+static struct kobj_type xfs_error_ktype = {
         .release = xfs_sysfs_release,
         .sysfs_ops = &xfs_sysfs_ops,
  };
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h

index ad188d3..0907752 100644 (file)
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
  DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
  DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
  
-DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
+DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
  DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
  
  DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
  DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
  DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
  DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
-DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
  
-DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
  DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
  DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
  DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h

index 43199a0..63554e9 100644 (file)
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -70,7 +70,7 @@ KSYM(__kcrctab_\name):
  #include <generated/autoksyms.h>
  
  #define __EXPORT_SYMBOL(sym, val, sec)                         \
-       __cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym))
+       __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym))
  #define __cond_export_sym(sym, val, sec, conf)                 \
         ___cond_export_sym(sym, val, sec, conf)
  #define ___cond_export_sym(sym, val, sec, enabled)             \
diff --git a/include/linux/iomap.h b/include/linux/iomap.h

index e63e288..7892f55 100644 (file)
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -19,11 +19,15 @@ struct vm_fault;
  #define IOMAP_UNWRITTEN        0x04    /* blocks allocated @blkno in unwritten state */
  
  /*
- * Flags for iomap mappings:
+ * Flags for all iomap mappings:
   */
-#define IOMAP_F_MERGED 0x01    /* contains multiple blocks/extents */
-#define IOMAP_F_SHARED 0x02    /* block shared with another file */
-#define IOMAP_F_NEW    0x04    /* blocks have been newly allocated */
+#define IOMAP_F_NEW    0x01    /* blocks have been newly allocated */
+
+/*
+ * Flags that only need to be reported for IOMAP_REPORT requests:
+ */
+#define IOMAP_F_MERGED 0x10    /* contains multiple blocks/extents */
+#define IOMAP_F_SHARED 0x20    /* block shared with another file */
  
  /*
   * Magic value for blkno:
@@ -42,8 +46,9 @@ struct iomap {
  /*
   * Flags for iomap_begin / iomap_end.  No flag implies a read.
   */
-#define IOMAP_WRITE            (1 << 0)
-#define IOMAP_ZERO             (1 << 1)
+#define IOMAP_WRITE            (1 << 0) /* writing, must allocate blocks */
+#define IOMAP_ZERO             (1 << 1) /* zeroing operation, may skip holes */
+#define IOMAP_REPORT           (1 << 2) /* report extent status, e.g. FIEMAP */
  
  struct iomap_ops {
         /*
diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h

index 15ec117..8f2e059 100644 (file)
--- a/include/linux/kconfig.h
+++ b/include/linux/kconfig.h
@@ -31,7 +31,6 @@
   * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
   * the last step cherry picks the 2nd arg, we get a zero.
   */
-#define config_enabled(cfg)            ___is_defined(cfg)
  #define __is_defined(x)                        ___is_defined(x)
  #define ___is_defined(val)             ____is_defined(__ARG_PLACEHOLDER_##val)
  #define ____is_defined(arg1_or_junk)   __take_second_arg(arg1_or_junk 1, 0)
@@ -41,13 +40,13 @@
   * otherwise. For boolean options, this is equivalent to
   * IS_ENABLED(CONFIG_FOO).
   */
-#define IS_BUILTIN(option) config_enabled(option)
+#define IS_BUILTIN(option) __is_defined(option)
  
  /*
   * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
   * otherwise.
   */
-#define IS_MODULE(option) config_enabled(option##_MODULE)
+#define IS_MODULE(option) __is_defined(option##_MODULE)
  
  /*
   * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 7f2ae99..0f088f3 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -440,33 +440,7 @@ struct zone {
         seqlock_t               span_seqlock;
  #endif
  
-       /*
-        * wait_table           -- the array holding the hash table
-        * wait_table_hash_nr_entries   -- the size of the hash table array
-        * wait_table_bits      -- wait_table_size == (1 << wait_table_bits)
-        *
-        * The purpose of all these is to keep track of the people
-        * waiting for a page to become available and make them
-        * runnable again when possible. The trouble is that this
-        * consumes a lot of space, especially when so few things
-        * wait on pages at a given time. So instead of using
-        * per-page waitqueues, we use a waitqueue hash table.
-        *
-        * The bucket discipline is to sleep on the same queue when
-        * colliding and wake all in that wait queue when removing.
-        * When something wakes, it must check to be sure its page is
-        * truly available, a la thundering herd. The cost of a
-        * collision is great, but given the expected load of the
-        * table, they should be so rare as to be outweighed by the
-        * benefits from the saved space.
-        *
-        * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
-        * primary users of these fields, and in mm/page_alloc.c
-        * free_area_init_core() performs the initialization of them.
-        */
-       wait_queue_head_t       *wait_table;
-       unsigned long           wait_table_hash_nr_entries;
-       unsigned long           wait_table_bits;
+       int initialized;
  
         /* Write-intensive fields used from the page allocator */
         ZONE_PADDING(_pad1_)
@@ -546,7 +520,7 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn)
  
  static inline bool zone_is_initialized(struct zone *zone)
  {
-       return !!zone->wait_table;
+       return zone->initialized;
  }
  
  static inline bool zone_is_empty(struct zone *zone)
diff --git a/ipc/msgutil.c b/ipc/msgutil.c

index a521999..bf74eaa 100644 (file)
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -53,7 +53,7 @@ static struct msg_msg *alloc_msg(size_t len)
         size_t alen;
  
         alen = min(len, DATALEN_MSG);
-       msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
+       msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT);
         if (msg == NULL)
                 return NULL;
  
@@ -65,7 +65,7 @@ static struct msg_msg *alloc_msg(size_t len)
         while (len > 0) {
                 struct msg_msgseg *seg;
                 alen = min(len, DATALEN_SEG);
-               seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
+               seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
                 if (seg == NULL)
                         goto out_err;
                 *pseg = seg;
diff --git a/kernel/kcov.c b/kernel/kcov.c

index 8d44b3f..30e6d05 100644 (file)
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void)
         /*
          * We are interested in code coverage as a function of a syscall inputs,
          * so we ignore code executed in interrupts.
+        * The checks for whether we are in an interrupt are open-coded, because
+        * 1. We can't use in_interrupt() here, since it also returns true
+        *    when we are inside local_bh_disable() section.
+        * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()),
+        *    since that leads to slower generated code (three separate tests,
+        *    one for each of the flags).
          */
-       if (!t || in_interrupt())
+       if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET
+                                                       | NMI_MASK)))
                 return;
         mode = READ_ONCE(t->kcov_mode);
         if (mode == KCOV_MODE_TRACE) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 94732d1..42d4027 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7515,11 +7515,27 @@ static struct kmem_cache *task_group_cache __read_mostly;
  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
  DECLARE_PER_CPU(cpumask_var_t, select_idle_mask);
  
+#define WAIT_TABLE_BITS 8
+#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS)
+static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned;
+
+wait_queue_head_t *bit_waitqueue(void *word, int bit)
+{
+       const int shift = BITS_PER_LONG == 32 ? 5 : 6;
+       unsigned long val = (unsigned long)word << shift | bit;
+
+       return bit_wait_table + hash_long(val, WAIT_TABLE_BITS);
+}
+EXPORT_SYMBOL(bit_waitqueue);
+
  void __init sched_init(void)
  {
         int i, j;
         unsigned long alloc_size = 0, ptr;
  
+       for (i = 0; i < WAIT_TABLE_SIZE; i++)
+               init_waitqueue_head(bit_wait_table + i);
+
  #ifdef CONFIG_FAIR_GROUP_SCHED
         alloc_size += 2 * nr_cpu_ids * sizeof(void **);
  #endif
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c

index 4f70535..9453efe 100644 (file)
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit)
  }
  EXPORT_SYMBOL(wake_up_bit);
  
-wait_queue_head_t *bit_waitqueue(void *word, int bit)
-{
-       const int shift = BITS_PER_LONG == 32 ? 5 : 6;
-       const struct zone *zone = page_zone(virt_to_page(word));
-       unsigned long val = (unsigned long)word << shift | bit;
-
-       return &zone->wait_table[hash_long(val, zone->wait_table_bits)];
-}
-EXPORT_SYMBOL(bit_waitqueue);
-
  /*
   * Manipulate the atomic_t address to produce a better bit waitqueue table hash
   * index (we're keying off bit -1, but that would produce a horrible hash
diff --git a/kernel/softirq.c b/kernel/softirq.c

index 1bf81ef..744fa61 100644 (file)
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
  DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
  
  const char * const softirq_to_name[NR_SOFTIRQS] = {
-       "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
+       "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL",
         "TASKLET", "SCHED", "HRTIMER", "RCU"
  };
  
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug

index 33bc56c..b01e547 100644 (file)
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -198,6 +198,7 @@ config FRAME_WARN
         int "Warn for stack frames larger than (needs gcc 4.4)"
         range 0 8192
         default 0 if KASAN
+       default 2048 if GCC_PLUGIN_LATENT_ENTROPY
         default 1024 if !64BIT
         default 2048 if 64BIT
         help
diff --git a/lib/genalloc.c b/lib/genalloc.c

index 0a11396..144fe6b 100644 (file)
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -292,7 +292,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
         struct gen_pool_chunk *chunk;
         unsigned long addr = 0;
         int order = pool->min_alloc_order;
-       int nbits, start_bit = 0, end_bit, remain;
+       int nbits, start_bit, end_bit, remain;
  
  #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
         BUG_ON(in_nmi());
@@ -307,6 +307,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size,
                 if (size > atomic_read(&chunk->avail))
                         continue;
  
+               start_bit = 0;
                 end_bit = chunk_size(chunk) >> order;
  retry:
                 start_bit = algo(chunk->bits, end_bit, start_bit,
diff --git a/lib/stackdepot.c b/lib/stackdepot.c

index 60f77f1..4d830e2 100644 (file)
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -50,7 +50,7 @@
                                         STACK_ALLOC_ALIGN)
  #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \
                 STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS)
-#define STACK_ALLOC_SLABS_CAP 1024
+#define STACK_ALLOC_SLABS_CAP 8192
  #define STACK_ALLOC_MAX_SLABS \
         (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \
          (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP)
diff --git a/mm/Kconfig b/mm/Kconfig

index be0ee11..86e3e0e 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -187,7 +187,7 @@ config MEMORY_HOTPLUG
         bool "Allow for memory hot-add"
         depends on SPARSEMEM || X86_64_ACPI_NUMA
         depends on ARCH_ENABLE_MEMORY_HOTPLUG
-       depends on !KASAN
+       depends on COMPILE_TEST || !KASAN
  
  config MEMORY_HOTPLUG_SPARSE
         def_bool y
diff --git a/mm/filemap.c b/mm/filemap.c

index 849f459..c7fe2f1 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc);
   */
  wait_queue_head_t *page_waitqueue(struct page *page)
  {
-       const struct zone *zone = page_zone(page);
-
-       return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
+       return bit_waitqueue(page, 0);
  }
  EXPORT_SYMBOL(page_waitqueue);
  
diff --git a/mm/kmemleak.c b/mm/kmemleak.c

index a5e453c..e5355a5 100644 (file)
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1453,8 +1453,11 @@ static void kmemleak_scan(void)
  
                 read_lock(&tasklist_lock);
                 do_each_thread(g, p) {
-                       scan_block(task_stack_page(p), task_stack_page(p) +
-                                  THREAD_SIZE, NULL);
+                       void *stack = try_get_task_stack(p);
+                       if (stack) {
+                               scan_block(stack, stack + THREAD_SIZE, NULL);
+                               put_task_stack(p);
+                       }
                 } while_each_thread(g, p);
                 read_unlock(&tasklist_lock);
         }
diff --git a/mm/list_lru.c b/mm/list_lru.c

index 1d05cb9..234676e 100644 (file)
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware,
         err = memcg_init_list_lru(lru, memcg_aware);
         if (err) {
                 kfree(lru->node);
+               /* Do this so a list_lru_destroy() doesn't crash: */
+               lru->node = NULL;
                 goto out;
         }
  
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index ae052b5..0f870ba 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1917,6 +1917,15 @@ retry:
                      current->flags & PF_EXITING))
                 goto force;
  
+       /*
+        * Prevent unbounded recursion when reclaim operations need to
+        * allocate memory. This might exceed the limits temporarily,
+        * but we prefer facilitating memory reclaim and getting back
+        * under the limit over triggering OOM kills in these cases.
+        */
+       if (unlikely(current->flags & PF_MEMALLOC))
+               goto force;
+
         if (unlikely(task_in_memcg_oom(current)))
                 goto nomem;
  
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c

index 9629273..cad4b91 100644 (file)
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
         unsigned long i, pfn, end_pfn, nr_pages;
         int node = pgdat->node_id;
         struct page *page;
-       struct zone *zone;
  
         nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
         page = virt_to_page(pgdat);
@@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
         for (i = 0; i < nr_pages; i++, page++)
                 get_page_bootmem(node, page, NODE_INFO);
  
-       zone = &pgdat->node_zones[0];
-       for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
-               if (zone_is_initialized(zone)) {
-                       nr_pages = zone->wait_table_hash_nr_entries
-                               * sizeof(wait_queue_head_t);
-                       nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
-                       page = virt_to_page(zone->wait_table);
-
-                       for (i = 0; i < nr_pages; i++, page++)
-                               get_page_bootmem(node, page, NODE_INFO);
-               }
-       }
-
         pfn = pgdat->node_start_pfn;
         end_pfn = pgdat_end_pfn(pgdat);
  
@@ -2131,7 +2117,6 @@ void try_offline_node(int nid)
         unsigned long start_pfn = pgdat->node_start_pfn;
         unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
         unsigned long pfn;
-       int i;
  
         for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
                 unsigned long section_nr = pfn_to_section_nr(pfn);
@@ -2158,20 +2143,6 @@ void try_offline_node(int nid)
          */
         node_set_offline(nid);
         unregister_one_node(nid);
-
-       /* free waittable in each zone */
-       for (i = 0; i < MAX_NR_ZONES; i++) {
-               struct zone *zone = pgdat->node_zones + i;
-
-               /*
-                * wait_table may be allocated from boot memory,
-                * here only free if it's allocated by vmalloc.
-                */
-               if (is_vmalloc_addr(zone->wait_table)) {
-                       vfree(zone->wait_table);
-                       zone->wait_table = NULL;
-               }
-       }
  }
  EXPORT_SYMBOL(try_offline_node);
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 2b3bf67..8fd42aa 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type)
         }
  
         *p = '\0';
-       printk("(%s) ", tmp);
+       printk(KERN_CONT "(%s) ", tmp);
  }
  
  /*
@@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter)
                         free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count;
  
                 show_node(zone);
-               printk("%s"
+               printk(KERN_CONT
+                       "%s"
                         " free:%lukB"
                         " min:%lukB"
                         " low:%lukB"
@@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter)
                         K(zone_page_state(zone, NR_FREE_CMA_PAGES)));
                 printk("lowmem_reserve[]:");
                 for (i = 0; i < MAX_NR_ZONES; i++)
-                       printk(" %ld", zone->lowmem_reserve[i]);
-               printk("\n");
+                       printk(KERN_CONT " %ld", zone->lowmem_reserve[i]);
+               printk(KERN_CONT "\n");
         }
  
         for_each_populated_zone(zone) {
@@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter)
                 if (skip_free_areas_node(filter, zone_to_nid(zone)))
                         continue;
                 show_node(zone);
-               printk("%s: ", zone->name);
+               printk(KERN_CONT "%s: ", zone->name);
  
                 spin_lock_irqsave(&zone->lock, flags);
                 for (order = 0; order < MAX_ORDER; order++) {
@@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter)
                 }
                 spin_unlock_irqrestore(&zone->lock, flags);
                 for (order = 0; order < MAX_ORDER; order++) {
-                       printk("%lu*%lukB ", nr[order], K(1UL) << order);
+                       printk(KERN_CONT "%lu*%lukB ",
+                              nr[order], K(1UL) << order);
                         if (nr[order])
                                 show_migration_types(types[order]);
                 }
-               printk("= %lukB\n", K(total));
+               printk(KERN_CONT "= %lukB\n", K(total));
         }
  
         hugetlb_show_meminfo();
@@ -4976,72 +4978,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
  #endif
  }
  
-/*
- * Helper functions to size the waitqueue hash table.
- * Essentially these want to choose hash table sizes sufficiently
- * large so that collisions trying to wait on pages are rare.
- * But in fact, the number of active page waitqueues on typical
- * systems is ridiculously low, less than 200. So this is even
- * conservative, even though it seems large.
- *
- * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
- * waitqueues, i.e. the size of the waitq table given the number of pages.
- */
-#define PAGES_PER_WAITQUEUE    256
-
-#ifndef CONFIG_MEMORY_HOTPLUG
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
-       unsigned long size = 1;
-
-       pages /= PAGES_PER_WAITQUEUE;
-
-       while (size < pages)
-               size <<= 1;
-
-       /*
-        * Once we have dozens or even hundreds of threads sleeping
-        * on IO we've got bigger problems than wait queue collision.
-        * Limit the size of the wait table to a reasonable size.
-        */
-       size = min(size, 4096UL);
-
-       return max(size, 4UL);
-}
-#else
-/*
- * A zone's size might be changed by hot-add, so it is not possible to determine
- * a suitable size for its wait_table.  So we use the maximum size now.
- *
- * The max wait table size = 4096 x sizeof(wait_queue_head_t).   ie:
- *
- *    i386 (preemption config)    : 4096 x 16 = 64Kbyte.
- *    ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte.
- *    ia64, x86-64 (preemption)   : 4096 x 24 = 96Kbyte.
- *
- * The maximum entries are prepared when a zone's memory is (512K + 256) pages
- * or more by the traditional way. (See above).  It equals:
- *
- *    i386, x86-64, powerpc(4K page size) : =  ( 2G + 1M)byte.
- *    ia64(16K page size)                 : =  ( 8G + 4M)byte.
- *    powerpc (64K page size)             : =  (32G +16M)byte.
- */
-static inline unsigned long wait_table_hash_nr_entries(unsigned long pages)
-{
-       return 4096UL;
-}
-#endif
-
-/*
- * This is an integer logarithm so that shifts can be used later
- * to extract the more random high bits from the multiplicative
- * hash function before the remainder is taken.
- */
-static inline unsigned long wait_table_bits(unsigned long size)
-{
-       return ffz(~size);
-}
-
  /*
   * Initially all pages are reserved - free ones are freed
   * up by free_all_bootmem() once the early boot process is
@@ -5304,49 +5240,6 @@ void __init setup_per_cpu_pageset(void)
                         alloc_percpu(struct per_cpu_nodestat);
  }
  
-static noinline __ref
-int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
-{
-       int i;
-       size_t alloc_size;
-
-       /*
-        * The per-page waitqueue mechanism uses hashed waitqueues
-        * per zone.
-        */
-       zone->wait_table_hash_nr_entries =
-                wait_table_hash_nr_entries(zone_size_pages);
-       zone->wait_table_bits =
-               wait_table_bits(zone->wait_table_hash_nr_entries);
-       alloc_size = zone->wait_table_hash_nr_entries
-                                       * sizeof(wait_queue_head_t);
-
-       if (!slab_is_available()) {
-               zone->wait_table = (wait_queue_head_t *)
-                       memblock_virt_alloc_node_nopanic(
-                               alloc_size, zone->zone_pgdat->node_id);
-       } else {
-               /*
-                * This case means that a zone whose size was 0 gets new memory
-                * via memory hot-add.
-                * But it may be the case that a new node was hot-added.  In
-                * this case vmalloc() will not be able to use this new node's
-                * memory - this wait_table must be initialized to use this new
-                * node itself as well.
-                * To use this new node's memory, further consideration will be
-                * necessary.
-                */
-               zone->wait_table = vmalloc(alloc_size);
-       }
-       if (!zone->wait_table)
-               return -ENOMEM;
-
-       for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
-               init_waitqueue_head(zone->wait_table + i);
-
-       return 0;
-}
-
  static __meminit void zone_pcp_init(struct zone *zone)
  {
         /*
@@ -5367,10 +5260,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
                                         unsigned long size)
  {
         struct pglist_data *pgdat = zone->zone_pgdat;
-       int ret;
-       ret = zone_wait_table_init(zone, size);
-       if (ret)
-               return ret;
+
         pgdat->nr_zones = zone_idx(zone) + 1;
  
         zone->zone_start_pfn = zone_start_pfn;
@@ -5382,6 +5272,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
                         zone_start_pfn, (zone_start_pfn + size));
  
         zone_init_free_lists(zone);
+       zone->initialized = 1;
  
         return 0;
  }
diff --git a/mm/slab.c b/mm/slab.c

index 090fb26..0b0550c 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
         spin_lock_init(&parent->list_lock);
         parent->free_objects = 0;
         parent->free_touched = 0;
+       parent->num_slabs = 0;
  }
  
  #define MAKE_LIST(cachep, listp, slab, nodeid)                         \
@@ -966,7 +967,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
          * guaranteed to be valid until irq is re-enabled, because it will be
          * freed after synchronize_sched().
          */
-       if (force_change)
+       if (old_shared && force_change)
                 synchronize_sched();
  
  fail:
@@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
         for_each_kmem_cache_node(cachep, node, n) {
                 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
                 unsigned long active_slabs = 0, num_slabs = 0;
+               unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+               unsigned long num_slabs_full;
  
                 spin_lock_irqsave(&n->list_lock, flags);
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs = n->num_slabs;
                 list_for_each_entry(page, &n->slabs_partial, lru) {
                         active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                 }
                 list_for_each_entry(page, &n->slabs_free, lru)
-                       num_slabs++;
+                       num_slabs_free++;
  
                 free_objects += n->free_objects;
                 spin_unlock_irqrestore(&n->list_lock, flags);
  
-               num_slabs += active_slabs;
                 num_objs = num_slabs * cachep->num;
+               active_slabs = num_slabs - num_slabs_free;
+               num_slabs_full = num_slabs -
+                       (num_slabs_partial + num_slabs_free);
+               active_objs += (num_slabs_full * cachep->num);
+
                 pr_warn("  node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
                         node, active_slabs, num_slabs, active_objs, num_objs,
                         free_objects);
@@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache,
  
                 page = list_entry(p, struct page, lru);
                 list_del(&page->lru);
+               n->num_slabs--;
                 /*
                  * Safe to drop the lock. The slab is no longer linked
                  * to the cache.
@@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
                 list_add_tail(&page->lru, &(n->slabs_free));
         else
                 fixup_slab_list(cachep, n, page, &list);
+
+       n->num_slabs++;
         STATS_INC_GROWN(cachep);
         n->free_objects += cachep->num - page->active;
         spin_unlock(&n->list_lock);
@@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
  
                 page = list_last_entry(&n->slabs_free, struct page, lru);
                 list_move(&page->lru, list);
+               n->num_slabs--;
         }
  }
  
@@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
         unsigned long num_objs;
         unsigned long active_slabs = 0;
         unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+       unsigned long num_slabs_partial = 0, num_slabs_free = 0;
+       unsigned long num_slabs_full = 0;
         const char *name;
         char *error = NULL;
         int node;
@@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
                 check_irq_on();
                 spin_lock_irq(&n->list_lock);
  
-               list_for_each_entry(page, &n->slabs_full, lru) {
-                       if (page->active != cachep->num && !error)
-                               error = "slabs_full accounting error";
-                       active_objs += cachep->num;
-                       active_slabs++;
-               }
+               num_slabs += n->num_slabs;
+
                 list_for_each_entry(page, &n->slabs_partial, lru) {
                         if (page->active == cachep->num && !error)
                                 error = "slabs_partial accounting error";
                         if (!page->active && !error)
                                 error = "slabs_partial accounting error";
                         active_objs += page->active;
-                       active_slabs++;
+                       num_slabs_partial++;
                 }
+
                 list_for_each_entry(page, &n->slabs_free, lru) {
                         if (page->active && !error)
                                 error = "slabs_free accounting error";
-                       num_slabs++;
+                       num_slabs_free++;
                 }
+
                 free_objects += n->free_objects;
                 if (n->shared)
                         shared_avail += n->shared->avail;
  
                 spin_unlock_irq(&n->list_lock);
         }
-       num_slabs += active_slabs;
         num_objs = num_slabs * cachep->num;
+       active_slabs = num_slabs - num_slabs_free;
+       num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free);
+       active_objs += (num_slabs_full * cachep->num);
+
         if (num_objs - active_objs != free_objects && !error)
                 error = "free_objects accounting error";
  
diff --git a/mm/slab.h b/mm/slab.h

index 9653f2e..bc05fdc 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -432,6 +432,7 @@ struct kmem_cache_node {
         struct list_head slabs_partial; /* partial list first, better asm code */
         struct list_head slabs_full;
         struct list_head slabs_free;
+       unsigned long num_slabs;
         unsigned long free_objects;
         unsigned int free_limit;
         unsigned int colour_next;       /* Per-node cache coloring */
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 744f926..76fda22 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                             sc.gfp_mask,
                                             sc.reclaim_idx);
  
+       current->flags |= PF_MEMALLOC;
         nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+       current->flags &= ~PF_MEMALLOC;
  
         trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 28 Oct 2016 16:23:59 +0000 (09:23 -0700)
CREDITS		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/cris/arch-v32/drivers/cryptocop.c		patch \| blob \| history
arch/h8300/include/asm/thread_info.h		patch \| blob \| history
arch/h8300/kernel/signal.c		patch \| blob \| history
arch/s390/include/asm/ftrace.h		patch \| blob \| history
arch/s390/include/asm/processor.h		patch \| blob \| history
arch/s390/include/asm/unistd.h		patch \| blob \| history
arch/s390/kernel/dis.c		patch \| blob \| history
arch/s390/kernel/dumpstack.c		patch \| blob \| history
arch/s390/kernel/perf_event.c		patch \| blob \| history
arch/s390/kernel/stacktrace.c		patch \| blob \| history
arch/s390/mm/hugetlbpage.c		patch \| blob \| history
arch/s390/mm/init.c		patch \| blob \| history
arch/s390/oprofile/init.c		patch \| blob \| history
arch/x86/mm/kaslr.c		patch \| blob \| history
block/badblocks.c		patch \| blob \| history
block/blk-flush.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
drivers/ata/ahci.c		patch \| blob \| history
drivers/block/DAC960.c		patch \| blob \| history
drivers/block/nbd.c		patch \| blob \| history
drivers/i2c/busses/Kconfig		patch \| blob \| history
drivers/i2c/busses/i2c-designware-core.c		patch \| blob \| history
drivers/i2c/busses/i2c-digicolor.c		patch \| blob \| history
drivers/i2c/busses/i2c-i801.c		patch \| blob \| history
drivers/i2c/busses/i2c-imx.c		patch \| blob \| history
drivers/i2c/busses/i2c-jz4780.c		patch \| blob \| history
drivers/i2c/busses/i2c-rk3x.c		patch \| blob \| history
drivers/i2c/busses/i2c-xgene-slimpro.c		patch \| blob \| history
drivers/i2c/busses/i2c-xlp9xx.c		patch \| blob \| history
drivers/i2c/busses/i2c-xlr.c		patch \| blob \| history
drivers/i2c/i2c-core.c		patch \| blob \| history
drivers/ipack/ipack.c		patch \| blob \| history
drivers/misc/sgi-gru/grumain.c		patch \| blob \| history
drivers/s390/block/dasd_eckd.c		patch \| blob \| history
drivers/s390/cio/chp.c		patch \| blob \| history
drivers/scsi/NCR5380.c		patch \| blob \| history
drivers/scsi/be2iscsi/be_main.c		patch \| blob \| history
drivers/scsi/libiscsi.c		patch \| blob \| history
drivers/thermal/intel_pch_thermal.c		patch \| blob \| history
drivers/thermal/intel_powerclamp.c		patch \| blob \| history
fs/exofs/dir.c		patch \| blob \| history
fs/iomap.c		patch \| blob \| history
fs/orangefs/dcache.c		patch \| blob \| history
fs/orangefs/file.c		patch \| blob \| history
fs/orangefs/namei.c		patch \| blob \| history
fs/orangefs/orangefs-kernel.h		patch \| blob \| history
fs/proc/base.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/libxfs/xfs_bmap.h		patch \| blob \| history
fs/xfs/libxfs/xfs_btree.c		patch \| blob \| history
fs/xfs/libxfs/xfs_dquot_buf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_format.h		patch \| blob \| history
fs/xfs/libxfs/xfs_inode_buf.c		patch \| blob \| history
fs/xfs/libxfs/xfs_inode_buf.h		patch \| blob \| history
fs/xfs/xfs_file.c		patch \| blob \| history
fs/xfs/xfs_icache.c		patch \| blob \| history
fs/xfs/xfs_iomap.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_reflink.c		patch \| blob \| history
fs/xfs/xfs_reflink.h		patch \| blob \| history
fs/xfs/xfs_sysfs.c		patch \| blob \| history
fs/xfs/xfs_trace.h		patch \| blob \| history
include/asm-generic/export.h		patch \| blob \| history
include/linux/iomap.h		patch \| blob \| history
include/linux/kconfig.h		patch \| blob \| history
include/linux/mmzone.h		patch \| blob \| history
ipc/msgutil.c		patch \| blob \| history
kernel/kcov.c		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/wait.c		patch \| blob \| history
kernel/softirq.c		patch \| blob \| history
lib/Kconfig.debug		patch \| blob \| history
lib/genalloc.c		patch \| blob \| history
lib/stackdepot.c		patch \| blob \| history
mm/Kconfig		patch \| blob \| history
mm/filemap.c		patch \| blob \| history
mm/kmemleak.c		patch \| blob \| history
mm/list_lru.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/memory_hotplug.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/slab.c		patch \| blob \| history
mm/slab.h		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history