Merge branch 'for-3.5/core' of git://git.kernel.dk/linux-block

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)
diff --combined fs/bio.c

index 84da885,4ef7bd4..73922ab
--- 1/fs/bio.c
--- 2/fs/bio.c
+++ b/fs/bio.c
@@@ -19,12 -19,14 +19,14 @@@
   #include <linux/swap.h>
   #include <linux/bio.h>
   #include <linux/blkdev.h>
+ #include <linux/iocontext.h>
   #include <linux/slab.h>
   #include <linux/init.h>
   #include <linux/kernel.h>
   #include <linux/export.h>
   #include <linux/mempool.h>
   #include <linux/workqueue.h>
+ #include <linux/cgroup.h>
   #include <scsi/sg.h>          /* for struct sg_iovec */
   
   #include <trace/events/block.h>
@@@ -418,6 -420,7 +420,7 @@@ void bio_put(struct bio *bio
          * last put frees it
          */
         if (atomic_dec_and_test(&bio->bi_cnt)) {
+               bio_disassociate_task(bio);
                 bio->bi_next = NULL;
                 bio->bi_destructor(bio);
         }
@@@ -505,14 -508,9 +508,14 @@@ EXPORT_SYMBOL(bio_clone)
   int bio_get_nr_vecs(struct block_device *bdev)
   {
         struct request_queue *q = bdev_get_queue(bdev);
- -      return min_t(unsigned,
+ +      int nr_pages;
+ +
+ +      nr_pages = min_t(unsigned,
                      queue_max_segments(q),
                      queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+ +
+ +      return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
+ +
   }
   EXPORT_SYMBOL(bio_get_nr_vecs);
   
@@@ -1646,6 -1644,64 +1649,64 @@@ bad
   }
   EXPORT_SYMBOL(bioset_create);
   
+ #ifdef CONFIG_BLK_CGROUP
+ /**
+  * bio_associate_current - associate a bio with %current
+  * @bio: target bio
+  *
+  * Associate @bio with %current if it hasn't been associated yet.  Block
+  * layer will treat @bio as if it were issued by %current no matter which
+  * task actually issues it.
+  *
+  * This function takes an extra reference of @task's io_context and blkcg
+  * which will be put when @bio is released.  The caller must own @bio,
+  * ensure %current->io_context exists, and is responsible for synchronizing
+  * calls to this function.
+  */
+ int bio_associate_current(struct bio *bio)
+ {
+       struct io_context *ioc;
+       struct cgroup_subsys_state *css;
+ 
+       if (bio->bi_ioc)
+               return -EBUSY;
+ 
+       ioc = current->io_context;
+       if (!ioc)
+               return -ENOENT;
+ 
+       /* acquire active ref on @ioc and associate */
+       get_io_context_active(ioc);
+       bio->bi_ioc = ioc;
+ 
+       /* associate blkcg if exists */
+       rcu_read_lock();
+       css = task_subsys_state(current, blkio_subsys_id);
+       if (css && css_tryget(css))
+               bio->bi_css = css;
+       rcu_read_unlock();
+ 
+       return 0;
+ }
+ 
+ /**
+  * bio_disassociate_task - undo bio_associate_current()
+  * @bio: target bio
+  */
+ void bio_disassociate_task(struct bio *bio)
+ {
+       if (bio->bi_ioc) {
+               put_io_context(bio->bi_ioc);
+               bio->bi_ioc = NULL;
+       }
+       if (bio->bi_css) {
+               css_put(bio->bi_css);
+               bio->bi_css = NULL;
+       }
+ }
+ 
+ #endif /* CONFIG_BLK_CGROUP */
+ 
   static void __init biovec_init_slabs(void)
   {
         int i;
diff --combined fs/ioprio.c

index 5e6dbe8,4864437..e50170c
--- 1/fs/ioprio.c
--- 2/fs/ioprio.c
+++ b/fs/ioprio.c
@@@ -37,8 -37,8 +37,8 @@@ int set_task_ioprio(struct task_struct 
   
         rcu_read_lock();
         tcred = __task_cred(task);
- -      if (tcred->uid != cred->euid &&
- -          tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) {
+ +      if (!uid_eq(tcred->uid, cred->euid) &&
+ +          !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
                 rcu_read_unlock();
                 return -EPERM;
         }
@@@ -50,7 -50,7 +50,7 @@@
   
         ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
         if (ioc) {
-               ioc_ioprio_changed(ioc, ioprio);
+               ioc->ioprio = ioprio;
                 put_io_context(ioc);
         }
   
@@@ -65,7 -65,6 +65,7 @@@ SYSCALL_DEFINE3(ioprio_set, int, which
         struct task_struct *p, *g;
         struct user_struct *user;
         struct pid *pgrp;
+ +      kuid_t uid;
         int ret;
   
         switch (class) {
@@@ -111,19 -110,16 +111,19 @@@
                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                         break;
                 case IOPRIO_WHO_USER:
+ +                      uid = make_kuid(current_user_ns(), who);
+ +                      if (!uid_valid(uid))
+ +                              break;
                         if (!who)
                                 user = current_user();
                         else
- -                              user = find_user(who);
+ +                              user = find_user(uid);
   
                         if (!user)
                                 break;
   
                         do_each_thread(g, p) {
- -                              if (__task_cred(p)->uid != who)
+ +                              if (!uid_eq(task_uid(p), uid))
                                         continue;
                                 ret = set_task_ioprio(p, ioprio);
                                 if (ret)
@@@ -178,7 -174,6 +178,7 @@@ SYSCALL_DEFINE2(ioprio_get, int, which
         struct task_struct *g, *p;
         struct user_struct *user;
         struct pid *pgrp;
+ +      kuid_t uid;
         int ret = -ESRCH;
         int tmpio;
   
@@@ -208,17 -203,16 +208,17 @@@
                         } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
                         break;
                 case IOPRIO_WHO_USER:
+ +                      uid = make_kuid(current_user_ns(), who);
                         if (!who)
                                 user = current_user();
                         else
- -                              user = find_user(who);
+ +                              user = find_user(uid);
   
                         if (!user)
                                 break;
   
                         do_each_thread(g, p) {
- -                              if (__task_cred(p)->uid != user->uid)
+ +                              if (!uid_eq(task_uid(p), user->uid))
                                         continue;
                                 tmpio = get_task_ioprio(p);
                                 if (tmpio < 0)
diff --combined include/linux/blkdev.h

index 4d4ac24,9e0edbf..ba43f40
--- 1/include/linux/blkdev.h
--- 2/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -1,10 -1,9 +1,10 @@@
   #ifndef _LINUX_BLKDEV_H
   #define _LINUX_BLKDEV_H
   
+ +#include <linux/sched.h>
+ +
   #ifdef CONFIG_BLOCK
   
- -#include <linux/sched.h>
   #include <linux/major.h>
   #include <linux/genhd.h>
   #include <linux/list.h>
@@@ -32,10 -31,17 +32,17 @@@ struct blk_trace
   struct request;
   struct sg_io_hdr;
   struct bsg_job;
+ struct blkcg_gq;
   
   #define BLKDEV_MIN_RQ 4
   #define BLKDEV_MAX_RQ 128     /* Default maximum */
   
+ /*
+  * Maximum number of blkcg policies allowed to be registered concurrently.
+  * Defined here to simplify include dependency.
+  */
+ #define BLKCG_MAX_POLS                2
+ 
   struct request;
   typedef void (rq_end_io_fn)(struct request *, int);
   
@@@ -363,6 -369,11 +370,11 @@@ struct request_queue 
         struct list_head        timeout_list;
   
         struct list_head        icq_list;
+ #ifdef CONFIG_BLK_CGROUP
+       DECLARE_BITMAP          (blkcg_pols, BLKCG_MAX_POLS);
+       struct blkcg_gq         *root_blkg;
+       struct list_head        blkg_list;
+ #endif
   
         struct queue_limits     limits;
   
@@@ -390,12 -401,17 +402,17 @@@
   
         struct mutex            sysfs_lock;
   
+       int                     bypass_depth;
+ 
   #if defined(CONFIG_BLK_DEV_BSG)
         bsg_job_fn              *bsg_job_fn;
         int                     bsg_job_size;
         struct bsg_class_device bsg_dev;
   #endif
   
+ #ifdef CONFIG_BLK_CGROUP
+       struct list_head        all_q_node;
+ #endif
   #ifdef CONFIG_BLK_DEV_THROTTLING
         /* Throttle data */
         struct throtl_data *td;
@@@ -407,7 -423,7 +424,7 @@@
   #define       QUEUE_FLAG_SYNCFULL     3       /* read queue has been filled */
   #define QUEUE_FLAG_ASYNCFULL  4       /* write queue has been filled */
   #define QUEUE_FLAG_DEAD               5       /* queue being torn down */
- #define QUEUE_FLAG_ELVSWITCH  6       /* don't use elevator, just do FIFO */
+ #define QUEUE_FLAG_BYPASS     6       /* act as dumb FIFO queue */
   #define QUEUE_FLAG_BIDI               7       /* queue supports bidi requests */
   #define QUEUE_FLAG_NOMERGES     8     /* disable merge attempts */
   #define QUEUE_FLAG_SAME_COMP  9       /* complete on same CPU-group */
@@@ -491,6 -507,7 +508,7 @@@ static inline void queue_flag_clear(uns
   #define blk_queue_tagged(q)   test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
   #define blk_queue_stopped(q)  test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
   #define blk_queue_dead(q)     test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+ #define blk_queue_bypass(q)   test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
   #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
   #define blk_queue_noxmerges(q)        \
         test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
diff --combined init/Kconfig

index 81816b8,038e259..1e004d0
--- 1/init/Kconfig
--- 2/init/Kconfig
+++ b/init/Kconfig
@@@ -27,9 -27,6 +27,9 @@@ config IRQ_WOR
         bool
         depends on HAVE_IRQ_WORK
   
+ +config BUILDTIME_EXTABLE_SORT
+ +      bool
+ +
   menu "General setup"
   
   config EXPERIMENTAL
@@@ -390,7 -387,6 +390,7 @@@ config AUDIT_LOGINUID_IMMUTABL
           but may not be backwards compatible with older init systems.
   
   source "kernel/irq/Kconfig"
+ +source "kernel/time/Kconfig"
   
   menu "RCU Subsystem"
   
@@@ -462,33 -458,6 +462,33 @@@ config RCU_FANOU
           Select a specific number if testing RCU itself.
           Take the default if unsure.
   
+ +config RCU_FANOUT_LEAF
+ +      int "Tree-based hierarchical RCU leaf-level fanout value"
+ +      range 2 RCU_FANOUT if 64BIT
+ +      range 2 RCU_FANOUT if !64BIT
+ +      depends on TREE_RCU || TREE_PREEMPT_RCU
+ +      default 16
+ +      help
+ +        This option controls the leaf-level fanout of hierarchical
+ +        implementations of RCU, and allows trading off cache misses
+ +        against lock contention.  Systems that synchronize their
+ +        scheduling-clock interrupts for energy-efficiency reasons will
+ +        want the default because the smaller leaf-level fanout keeps
+ +        lock contention levels acceptably low.  Very large systems
+ +        (hundreds or thousands of CPUs) will instead want to set this
+ +        value to the maximum value possible in order to reduce the
+ +        number of cache misses incurred during RCU's grace-period
+ +        initialization.  These systems tend to run CPU-bound, and thus
+ +        are not helped by synchronized interrupts, and thus tend to
+ +        skew them, which reduces lock contention enough that large
+ +        leaf-level fanouts work well.
+ +
+ +        Select a specific number if testing RCU itself.
+ +
+ +        Select the maximum permissible value for large systems.
+ +
+ +        Take the default if unsure.
+ +
   config RCU_FANOUT_EXACT
         bool "Disable tree-based hierarchical RCU auto-balancing"
         depends on TREE_RCU || TREE_PREEMPT_RCU
@@@ -546,25 -515,10 +546,25 @@@ config RCU_BOOST_PRI
         depends on RCU_BOOST
         default 1
         help
- -        This option specifies the real-time priority to which preempted
- -        RCU readers are to be boosted.  If you are working with CPU-bound
- -        real-time applications, you should specify a priority higher then
- -        the highest-priority CPU-bound application.
+ +        This option specifies the real-time priority to which long-term
+ +        preempted RCU readers are to be boosted.  If you are working
+ +        with a real-time application that has one or more CPU-bound
+ +        threads running at a real-time priority level, you should set
+ +        RCU_BOOST_PRIO to a priority higher then the highest-priority
+ +        real-time CPU-bound thread.  The default RCU_BOOST_PRIO value
+ +        of 1 is appropriate in the common case, which is real-time
+ +        applications that do not have any CPU-bound threads.
+ +
+ +        Some real-time applications might not have a single real-time
+ +        thread that saturates a given CPU, but instead might have
+ +        multiple real-time threads that, taken together, fully utilize
+ +        that CPU.  In this case, you should set RCU_BOOST_PRIO to
+ +        a priority higher than the lowest-priority thread that is
+ +        conspiring to prevent the CPU from running any non-real-time
+ +        tasks.  For example, if one thread at priority 10 and another
+ +        thread at priority 5 are between themselves fully consuming
+ +        the CPU time on a given CPU, then RCU_BOOST_PRIO should be
+ +        set to priority 6 or higher.
   
           Specify the real-time priority, or take the default if unsure.
   
@@@ -803,7 -757,7 +803,7 @@@ config RT_GROUP_SCHE
   endif #CGROUP_SCHED
   
   config BLK_CGROUP
-       tristate "Block IO controller"
+       bool "Block IO controller"
         depends on BLOCK
         default n
         ---help---
@@@ -874,10 -828,7 +874,10 @@@ config IPC_N
   config USER_NS
         bool "User namespace (EXPERIMENTAL)"
         depends on EXPERIMENTAL
- -      default y
+ +      depends on UIDGID_CONVERTED
+ +      select UIDGID_STRICT_TYPE_CHECKS
+ +
+ +      default n
         help
           This allows containers, i.e. vservers, to use user namespaces
           to provide different user info for different servers.
@@@ -901,131 -852,6 +901,131 @@@ config NET_N
   
   endif # NAMESPACES
   
+ +config UIDGID_CONVERTED
+ +      # True if all of the selected software conmponents are known
+ +      # to have uid_t and gid_t converted to kuid_t and kgid_t
+ +      # where appropriate and are otherwise safe to use with
+ +      # the user namespace.
+ +      bool
+ +      default y
+ +
+ +      # List of kernel pieces that need user namespace work
+ +      # Features
+ +      depends on SYSVIPC = n
+ +      depends on IMA = n
+ +      depends on EVM = n
+ +      depends on KEYS = n
+ +      depends on AUDIT = n
+ +      depends on AUDITSYSCALL = n
+ +      depends on TASKSTATS = n
+ +      depends on TRACING = n
+ +      depends on FS_POSIX_ACL = n
+ +      depends on QUOTA = n
+ +      depends on QUOTACTL = n
+ +      depends on DEBUG_CREDENTIALS = n
+ +      depends on BSD_PROCESS_ACCT = n
+ +      depends on DRM = n
+ +      depends on PROC_EVENTS = n
+ +
+ +      # Networking
+ +      depends on NET = n
+ +      depends on NET_9P = n
+ +      depends on IPX = n
+ +      depends on PHONET = n
+ +      depends on NET_CLS_FLOW = n
+ +      depends on NETFILTER_XT_MATCH_OWNER = n
+ +      depends on NETFILTER_XT_MATCH_RECENT = n
+ +      depends on NETFILTER_XT_TARGET_LOG = n
+ +      depends on NETFILTER_NETLINK_LOG = n
+ +      depends on INET = n
+ +      depends on IPV6 = n
+ +      depends on IP_SCTP = n
+ +      depends on AF_RXRPC = n
+ +      depends on LLC2 = n
+ +      depends on NET_KEY = n
+ +      depends on INET_DIAG = n
+ +      depends on DNS_RESOLVER = n
+ +      depends on AX25 = n
+ +      depends on ATALK = n
+ +
+ +      # Filesystems
+ +      depends on USB_DEVICEFS = n
+ +      depends on USB_GADGETFS = n
+ +      depends on USB_FUNCTIONFS = n
+ +      depends on DEVTMPFS = n
+ +      depends on XENFS = n
+ +
+ +      depends on 9P_FS = n
+ +      depends on ADFS_FS = n
+ +      depends on AFFS_FS = n
+ +      depends on AFS_FS = n
+ +      depends on AUTOFS4_FS = n
+ +      depends on BEFS_FS = n
+ +      depends on BFS_FS = n
+ +      depends on BTRFS_FS = n
+ +      depends on CEPH_FS = n
+ +      depends on CIFS = n
+ +      depends on CODA_FS = n
+ +      depends on CONFIGFS_FS = n
+ +      depends on CRAMFS = n
+ +      depends on DEBUG_FS = n
+ +      depends on ECRYPT_FS = n
+ +      depends on EFS_FS = n
+ +      depends on EXOFS_FS = n
+ +      depends on FAT_FS = n
+ +      depends on FUSE_FS = n
+ +      depends on GFS2_FS = n
+ +      depends on HFS_FS = n
+ +      depends on HFSPLUS_FS = n
+ +      depends on HPFS_FS = n
+ +      depends on HUGETLBFS = n
+ +      depends on ISO9660_FS = n
+ +      depends on JFFS2_FS = n
+ +      depends on JFS_FS = n
+ +      depends on LOGFS = n
+ +      depends on MINIX_FS = n
+ +      depends on NCP_FS = n
+ +      depends on NFSD = n
+ +      depends on NFS_FS = n
+ +      depends on NILFS2_FS = n
+ +      depends on NTFS_FS = n
+ +      depends on OCFS2_FS = n
+ +      depends on OMFS_FS = n
+ +      depends on QNX4FS_FS = n
+ +      depends on QNX6FS_FS = n
+ +      depends on REISERFS_FS = n
+ +      depends on SQUASHFS = n
+ +      depends on SYSV_FS = n
+ +      depends on UBIFS_FS = n
+ +      depends on UDF_FS = n
+ +      depends on UFS_FS = n
+ +      depends on VXFS_FS = n
+ +      depends on XFS_FS = n
+ +
+ +      depends on !UML || HOSTFS = n
+ +
+ +      # The rare drivers that won't build
+ +      depends on AIRO = n
+ +      depends on AIRO_CS = n
+ +      depends on TUN = n
+ +      depends on INFINIBAND_QIB = n
+ +      depends on BLK_DEV_LOOP = n
+ +      depends on ANDROID_BINDER_IPC = n
+ +
+ +      # Security modules
+ +      depends on SECURITY_TOMOYO = n
+ +      depends on SECURITY_APPARMOR = n
+ +
+ +config UIDGID_STRICT_TYPE_CHECKS
+ +      bool "Require conversions between uid/gids and their internal representation"
+ +      depends on UIDGID_CONVERTED
+ +      default n
+ +      help
+ +       While the nececessary conversions are being added to all subsystems this option allows
+ +       the code to continue to build for unconverted subsystems.
+ +
+ +       Say Y here if you want the strict type checking enabled
+ +
   config SCHED_AUTOGROUP
         bool "Automatic process group scheduling"
         select EVENTFD
@@@ -1330,7 -1156,7 +1330,7 @@@ menu "Kernel Performance Events And Cou
   
   config PERF_EVENTS
         bool "Kernel performance events and counters"
- -      default y if (PROFILING || PERF_COUNTERS)
+ +      default y if PROFILING
         depends on HAVE_PERF_EVENTS
         select ANON_INODES
         select IRQ_WORK
@@@ -1357,6 -1183,18 +1357,6 @@@
   
           Say Y if unsure.
   
- -config PERF_COUNTERS
- -      bool "Kernel performance counters (old config option)"
- -      depends on HAVE_PERF_EVENTS
- -      help
- -        This config has been obsoleted by the PERF_EVENTS
- -        config option - please see that one for details.
- -
- -        It has no effect on the kernel whether you enable
- -        it or not, it is a compatibility placeholder.
- -
- -        Say N if unsure.
- -
   config DEBUG_PERF_USE_VMALLOC
         default n
         bool "Debug: use vmalloc to back perf mmap() buffers"
diff --combined kernel/fork.c

index 017fb23,08eb858..31a32c7
--- 1/kernel/fork.c
--- 2/kernel/fork.c
+++ b/kernel/fork.c
@@@ -34,7 -34,6 +34,7 @@@
   #include <linux/cgroup.h>
   #include <linux/security.h>
   #include <linux/hugetlb.h>
+ +#include <linux/seccomp.h>
   #include <linux/swap.h>
   #include <linux/syscalls.h>
   #include <linux/jiffies.h>
@@@ -48,7 -47,6 +48,7 @@@
   #include <linux/audit.h>
   #include <linux/memcontrol.h>
   #include <linux/ftrace.h>
+ +#include <linux/proc_fs.h>
   #include <linux/profile.h>
   #include <linux/rmap.h>
   #include <linux/ksm.h>
@@@ -69,7 -67,6 +69,7 @@@
   #include <linux/oom.h>
   #include <linux/khugepaged.h>
   #include <linux/signalfd.h>
+ +#include <linux/uprobes.h>
   
   #include <asm/pgtable.h>
   #include <asm/pgalloc.h>
@@@ -114,67 -111,32 +114,67 @@@ int nr_processes(void
         return total;
   }
   
- -#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
- -# define alloc_task_struct_node(node)         \
- -              kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
- -# define free_task_struct(tsk)                        \
- -              kmem_cache_free(task_struct_cachep, (tsk))
+ +#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
   static struct kmem_cache *task_struct_cachep;
+ +
+ +static inline struct task_struct *alloc_task_struct_node(int node)
+ +{
+ +      return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
+ +}
+ +
+ +void __weak arch_release_task_struct(struct task_struct *tsk) { }
+ +
+ +static inline void free_task_struct(struct task_struct *tsk)
+ +{
+ +      arch_release_task_struct(tsk);
+ +      kmem_cache_free(task_struct_cachep, tsk);
+ +}
   #endif
   
- -#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+ +#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+ +void __weak arch_release_thread_info(struct thread_info *ti) { }
+ +
+ +/*
+ + * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
+ + * kmemcache based allocator.
+ + */
+ +# if THREAD_SIZE >= PAGE_SIZE
   static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
                                                   int node)
   {
- -#ifdef CONFIG_DEBUG_STACK_USAGE
- -      gfp_t mask = GFP_KERNEL | __GFP_ZERO;
- -#else
- -      gfp_t mask = GFP_KERNEL;
- -#endif
- -      struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+ +      struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ +                                           THREAD_SIZE_ORDER);
   
         return page ? page_address(page) : NULL;
   }
   
   static inline void free_thread_info(struct thread_info *ti)
   {
+ +      arch_release_thread_info(ti);
         free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
   }
+ +# else
+ +static struct kmem_cache *thread_info_cache;
+ +
+ +static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+ +                                                int node)
+ +{
+ +      return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+ +}
+ +
+ +static void free_thread_info(struct thread_info *ti)
+ +{
+ +      arch_release_thread_info(ti);
+ +      kmem_cache_free(thread_info_cache, ti);
+ +}
+ +
+ +void thread_info_cache_init(void)
+ +{
+ +      thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+ +                                            THREAD_SIZE, 0, NULL);
+ +      BUG_ON(thread_info_cache == NULL);
+ +}
+ +# endif
   #endif
   
   /* SLAB cache for signal_struct structures (tsk->signal) */
@@@ -208,7 -170,6 +208,7 @@@ void free_task(struct task_struct *tsk
         free_thread_info(tsk->stack);
         rt_mutex_debug_task_free(tsk);
         ftrace_graph_exit_task(tsk);
+ +      put_seccomp_filter(tsk);
         free_task_struct(tsk);
   }
   EXPORT_SYMBOL(free_task);
@@@ -242,11 -203,17 +242,11 @@@ void __put_task_struct(struct task_stru
   }
   EXPORT_SYMBOL_GPL(__put_task_struct);
   
- -/*
- - * macro override instead of weak attribute alias, to workaround
- - * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
- - */
- -#ifndef arch_task_cache_init
- -#define arch_task_cache_init()
- -#endif
+ +void __init __weak arch_task_cache_init(void) { }
   
   void __init fork_init(unsigned long mempages)
   {
- -#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+ +#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
   #ifndef ARCH_MIN_TASKALIGN
   #define ARCH_MIN_TASKALIGN    L1_CACHE_BYTES
   #endif
@@@ -293,6 -260,8 +293,6 @@@ static struct task_struct *dup_task_str
         int node = tsk_fork_get_node(orig);
         int err;
   
- -      prepare_to_copy(orig);
- -
         tsk = alloc_task_struct_node(node);
         if (!tsk)
                 return NULL;
@@@ -386,8 -355,7 +386,8 @@@ static int dup_mmap(struct mm_struct *m
                 }
                 charge = 0;
                 if (mpnt->vm_flags & VM_ACCOUNT) {
- -                      unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ +                      unsigned long len;
+ +                      len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
                         if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
                                 goto fail_nomem;
                         charge = len;
@@@ -453,9 -421,6 +453,9 @@@
   
                 if (retval)
                         goto out;
+ +
+ +              if (file && uprobe_mmap(tmp))
+ +                      goto out;
         }
         /* a new mm has just been created */
         arch_dup_mmap(oldmm, mm);
@@@ -604,7 -569,6 +604,7 @@@ void mmput(struct mm_struct *mm
         might_sleep();
   
         if (atomic_dec_and_test(&mm->mm_users)) {
+ +              uprobe_clear_state(mm);
                 exit_aio(mm);
                 ksm_exit(mm);
                 khugepaged_exit(mm); /* must run before exit_mmap */
@@@ -615,6 -579,7 +615,6 @@@
                         list_del(&mm->mmlist);
                         spin_unlock(&mmlist_lock);
                 }
- -              put_swap_token(mm);
                 if (mm->binfmt)
                         module_put(mm->binfmt->module);
                 mmdrop(mm);
@@@ -782,8 -747,6 +782,8 @@@ void mm_release(struct task_struct *tsk
                 exit_pi_state_list(tsk);
   #endif
   
+ +      uprobe_free_utask(tsk);
+ +
         /* Get rid of any cached register state */
         deactivate_mm(tsk, mm);
   
@@@ -831,10 -794,13 +831,10 @@@ struct mm_struct *dup_mm(struct task_st
         memcpy(mm, oldmm, sizeof(*mm));
         mm_init_cpumask(mm);
   
- -      /* Initializing for Swap token stuff */
- -      mm->token_priority = 0;
- -      mm->last_interval = 0;
- -
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
         mm->pmd_huge_pte = NULL;
   #endif
+ +      uprobe_reset_state(mm);
   
         if (!mm_init(mm, tsk))
                 goto fail_nomem;
@@@ -909,6 -875,10 +909,6 @@@ static int copy_mm(unsigned long clone_
                 goto fail_nomem;
   
   good_mm:
- -      /* Initializing for Swap token stuff */
- -      mm->token_priority = 0;
- -      mm->last_interval = 0;
- -
         tsk->mm = mm;
         tsk->active_mm = mm;
         return 0;
@@@ -976,9 -946,8 +976,8 @@@ static int copy_io(unsigned long clone_
          * Share io context with parent, if CLONE_IO is set
          */
         if (clone_flags & CLONE_IO) {
-               tsk->io_context = ioc_task_link(ioc);
-               if (unlikely(!tsk->io_context))
-                       return -ENOMEM;
+               ioc_task_link(ioc);
+               tsk->io_context = ioc;
         } else if (ioprio_valid(ioc->ioprio)) {
                 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
                 if (unlikely(!new_ioc))
@@@ -1192,7 -1161,6 +1191,7 @@@ static struct task_struct *copy_process
                 goto fork_out;
   
         ftrace_graph_init_task(p);
+ +      get_seccomp_filter(p);
   
         rt_mutex_init_task(p);
   
@@@ -1373,7 -1341,6 +1372,7 @@@
         INIT_LIST_HEAD(&p->pi_state_list);
         p->pi_state_cache = NULL;
   #endif
+ +      uprobe_copy_process(p);
         /*
          * sigaltstack should be cleared when sharing the same VM
          */
@@@ -1496,8 -1463,6 +1495,8 @@@ bad_fork_cleanup_io
         if (p->io_context)
                 exit_io_context(p);
   bad_fork_cleanup_namespaces:
+ +      if (unlikely(clone_flags & CLONE_NEWPID))
+ +              pid_ns_release_proc(p->nsproxy->pid_ns);
         exit_task_namespaces(p);
   bad_fork_cleanup_mm:
         if (p->mm)
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 30 May 2012 15:52:42 +0000 (08:52 -0700)
		1	2
fs/bio.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/ioprio.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/blkdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
init/Kconfig	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/fork.c	patch \|	diff1 \|	diff2 \|	blob \| history