#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+ #include <linux/iocontext.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
+ #include <linux/cgroup.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
* last put frees it
*/
if (atomic_dec_and_test(&bio->bi_cnt)) {
+ bio_disassociate_task(bio);
bio->bi_next = NULL;
bio->bi_destructor(bio);
}
int bio_get_nr_vecs(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- return min_t(unsigned,
+ int nr_pages;
+
+ nr_pages = min_t(unsigned,
queue_max_segments(q),
queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
+
+ return min_t(unsigned, nr_pages, BIO_MAX_PAGES);
+
}
EXPORT_SYMBOL(bio_get_nr_vecs);
}
EXPORT_SYMBOL(bioset_create);
+ #ifdef CONFIG_BLK_CGROUP
+ /**
+ * bio_associate_current - associate a bio with %current
+ * @bio: target bio
+ *
+ * Associate @bio with %current if it hasn't been associated yet. Block
+ * layer will treat @bio as if it were issued by %current no matter which
+ * task actually issues it.
+ *
+ * This function takes an extra reference of @task's io_context and blkcg
+ * which will be put when @bio is released. The caller must own @bio,
+ * ensure %current->io_context exists, and is responsible for synchronizing
+ * calls to this function.
+ */
+ int bio_associate_current(struct bio *bio)
+ {
+ struct io_context *ioc;
+ struct cgroup_subsys_state *css;
+
+ if (bio->bi_ioc)
+ return -EBUSY;
+
+ ioc = current->io_context;
+ if (!ioc)
+ return -ENOENT;
+
+ /* acquire active ref on @ioc and associate */
+ get_io_context_active(ioc);
+ bio->bi_ioc = ioc;
+
+ /* associate blkcg if exists */
+ rcu_read_lock();
+ css = task_subsys_state(current, blkio_subsys_id);
+ if (css && css_tryget(css))
+ bio->bi_css = css;
+ rcu_read_unlock();
+
+ return 0;
+ }
+
+ /**
+ * bio_disassociate_task - undo bio_associate_current()
+ * @bio: target bio
+ */
+ void bio_disassociate_task(struct bio *bio)
+ {
+ if (bio->bi_ioc) {
+ put_io_context(bio->bi_ioc);
+ bio->bi_ioc = NULL;
+ }
+ if (bio->bi_css) {
+ css_put(bio->bi_css);
+ bio->bi_css = NULL;
+ }
+ }
+
+ #endif /* CONFIG_BLK_CGROUP */
+
static void __init biovec_init_slabs(void)
{
int i;
rcu_read_lock();
tcred = __task_cred(task);
- if (tcred->uid != cred->euid &&
- tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) {
+ if (!uid_eq(tcred->uid, cred->euid) &&
+ !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
rcu_read_unlock();
return -EPERM;
}
ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
if (ioc) {
- ioc_ioprio_changed(ioc, ioprio);
+ ioc->ioprio = ioprio;
put_io_context(ioc);
}
struct task_struct *p, *g;
struct user_struct *user;
struct pid *pgrp;
+ kuid_t uid;
int ret;
switch (class) {
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
+ uid = make_kuid(current_user_ns(), who);
+ if (!uid_valid(uid))
+ break;
if (!who)
user = current_user();
else
- user = find_user(who);
+ user = find_user(uid);
if (!user)
break;
do_each_thread(g, p) {
- if (__task_cred(p)->uid != who)
+ if (!uid_eq(task_uid(p), uid))
continue;
ret = set_task_ioprio(p, ioprio);
if (ret)
struct task_struct *g, *p;
struct user_struct *user;
struct pid *pgrp;
+ kuid_t uid;
int ret = -ESRCH;
int tmpio;
} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
+ uid = make_kuid(current_user_ns(), who);
if (!who)
user = current_user();
else
- user = find_user(who);
+ user = find_user(uid);
if (!user)
break;
do_each_thread(g, p) {
- if (__task_cred(p)->uid != user->uid)
+ if (!uid_eq(task_uid(p), user->uid))
continue;
tmpio = get_task_ioprio(p);
if (tmpio < 0)
#ifndef _LINUX_BLKDEV_H
#define _LINUX_BLKDEV_H
+#include <linux/sched.h>
+
#ifdef CONFIG_BLOCK
-#include <linux/sched.h>
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
struct request;
struct sg_io_hdr;
struct bsg_job;
+ struct blkcg_gq;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
+ /*
+ * Maximum number of blkcg policies allowed to be registered concurrently.
+ * Defined here to simplify include dependency.
+ */
+ #define BLKCG_MAX_POLS 2
+
struct request;
typedef void (rq_end_io_fn)(struct request *, int);
struct list_head timeout_list;
struct list_head icq_list;
+ #ifdef CONFIG_BLK_CGROUP
+ DECLARE_BITMAP (blkcg_pols, BLKCG_MAX_POLS);
+ struct blkcg_gq *root_blkg;
+ struct list_head blkg_list;
+ #endif
struct queue_limits limits;
struct mutex sysfs_lock;
+ int bypass_depth;
+
#if defined(CONFIG_BLK_DEV_BSG)
bsg_job_fn *bsg_job_fn;
int bsg_job_size;
struct bsg_class_device bsg_dev;
#endif
+ #ifdef CONFIG_BLK_CGROUP
+ struct list_head all_q_node;
+ #endif
#ifdef CONFIG_BLK_DEV_THROTTLING
/* Throttle data */
struct throtl_data *td;
#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
- #define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */
+ #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 9 /* complete on same CPU-group */
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
+ #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
bool
depends on HAVE_IRQ_WORK
+config BUILDTIME_EXTABLE_SORT
+ bool
+
menu "General setup"
config EXPERIMENTAL
but may not be backwards compatible with older init systems.
source "kernel/irq/Kconfig"
+source "kernel/time/Kconfig"
menu "RCU Subsystem"
Select a specific number if testing RCU itself.
Take the default if unsure.
+config RCU_FANOUT_LEAF
+ int "Tree-based hierarchical RCU leaf-level fanout value"
+ range 2 RCU_FANOUT if 64BIT
+ range 2 RCU_FANOUT if !64BIT
+ depends on TREE_RCU || TREE_PREEMPT_RCU
+ default 16
+ help
+ This option controls the leaf-level fanout of hierarchical
+ implementations of RCU, and allows trading off cache misses
+ against lock contention. Systems that synchronize their
+ scheduling-clock interrupts for energy-efficiency reasons will
+ want the default because the smaller leaf-level fanout keeps
+ lock contention levels acceptably low. Very large systems
+ (hundreds or thousands of CPUs) will instead want to set this
+ value to the maximum value possible in order to reduce the
+ number of cache misses incurred during RCU's grace-period
+ initialization. These systems tend to run CPU-bound, and thus
+ are not helped by synchronized interrupts, and thus tend to
+ skew them, which reduces lock contention enough that large
+ leaf-level fanouts work well.
+
+ Select a specific number if testing RCU itself.
+
+ Select the maximum permissible value for large systems.
+
+ Take the default if unsure.
+
config RCU_FANOUT_EXACT
bool "Disable tree-based hierarchical RCU auto-balancing"
depends on TREE_RCU || TREE_PREEMPT_RCU
depends on RCU_BOOST
default 1
help
- This option specifies the real-time priority to which preempted
- RCU readers are to be boosted. If you are working with CPU-bound
- real-time applications, you should specify a priority higher then
- the highest-priority CPU-bound application.
+ This option specifies the real-time priority to which long-term
+ preempted RCU readers are to be boosted. If you are working
+ with a real-time application that has one or more CPU-bound
+ threads running at a real-time priority level, you should set
+ RCU_BOOST_PRIO to a priority higher then the highest-priority
+ real-time CPU-bound thread. The default RCU_BOOST_PRIO value
+ of 1 is appropriate in the common case, which is real-time
+ applications that do not have any CPU-bound threads.
+
+ Some real-time applications might not have a single real-time
+ thread that saturates a given CPU, but instead might have
+ multiple real-time threads that, taken together, fully utilize
+ that CPU. In this case, you should set RCU_BOOST_PRIO to
+ a priority higher than the lowest-priority thread that is
+ conspiring to prevent the CPU from running any non-real-time
+ tasks. For example, if one thread at priority 10 and another
+ thread at priority 5 are between themselves fully consuming
+ the CPU time on a given CPU, then RCU_BOOST_PRIO should be
+ set to priority 6 or higher.
Specify the real-time priority, or take the default if unsure.
endif #CGROUP_SCHED
config BLK_CGROUP
- tristate "Block IO controller"
+ bool "Block IO controller"
depends on BLOCK
default n
---help---
config USER_NS
bool "User namespace (EXPERIMENTAL)"
depends on EXPERIMENTAL
- default y
+ depends on UIDGID_CONVERTED
+ select UIDGID_STRICT_TYPE_CHECKS
+
+ default n
help
This allows containers, i.e. vservers, to use user namespaces
to provide different user info for different servers.
endif # NAMESPACES
+config UIDGID_CONVERTED
+ # True if all of the selected software conmponents are known
+ # to have uid_t and gid_t converted to kuid_t and kgid_t
+ # where appropriate and are otherwise safe to use with
+ # the user namespace.
+ bool
+ default y
+
+ # List of kernel pieces that need user namespace work
+ # Features
+ depends on SYSVIPC = n
+ depends on IMA = n
+ depends on EVM = n
+ depends on KEYS = n
+ depends on AUDIT = n
+ depends on AUDITSYSCALL = n
+ depends on TASKSTATS = n
+ depends on TRACING = n
+ depends on FS_POSIX_ACL = n
+ depends on QUOTA = n
+ depends on QUOTACTL = n
+ depends on DEBUG_CREDENTIALS = n
+ depends on BSD_PROCESS_ACCT = n
+ depends on DRM = n
+ depends on PROC_EVENTS = n
+
+ # Networking
+ depends on NET = n
+ depends on NET_9P = n
+ depends on IPX = n
+ depends on PHONET = n
+ depends on NET_CLS_FLOW = n
+ depends on NETFILTER_XT_MATCH_OWNER = n
+ depends on NETFILTER_XT_MATCH_RECENT = n
+ depends on NETFILTER_XT_TARGET_LOG = n
+ depends on NETFILTER_NETLINK_LOG = n
+ depends on INET = n
+ depends on IPV6 = n
+ depends on IP_SCTP = n
+ depends on AF_RXRPC = n
+ depends on LLC2 = n
+ depends on NET_KEY = n
+ depends on INET_DIAG = n
+ depends on DNS_RESOLVER = n
+ depends on AX25 = n
+ depends on ATALK = n
+
+ # Filesystems
+ depends on USB_DEVICEFS = n
+ depends on USB_GADGETFS = n
+ depends on USB_FUNCTIONFS = n
+ depends on DEVTMPFS = n
+ depends on XENFS = n
+
+ depends on 9P_FS = n
+ depends on ADFS_FS = n
+ depends on AFFS_FS = n
+ depends on AFS_FS = n
+ depends on AUTOFS4_FS = n
+ depends on BEFS_FS = n
+ depends on BFS_FS = n
+ depends on BTRFS_FS = n
+ depends on CEPH_FS = n
+ depends on CIFS = n
+ depends on CODA_FS = n
+ depends on CONFIGFS_FS = n
+ depends on CRAMFS = n
+ depends on DEBUG_FS = n
+ depends on ECRYPT_FS = n
+ depends on EFS_FS = n
+ depends on EXOFS_FS = n
+ depends on FAT_FS = n
+ depends on FUSE_FS = n
+ depends on GFS2_FS = n
+ depends on HFS_FS = n
+ depends on HFSPLUS_FS = n
+ depends on HPFS_FS = n
+ depends on HUGETLBFS = n
+ depends on ISO9660_FS = n
+ depends on JFFS2_FS = n
+ depends on JFS_FS = n
+ depends on LOGFS = n
+ depends on MINIX_FS = n
+ depends on NCP_FS = n
+ depends on NFSD = n
+ depends on NFS_FS = n
+ depends on NILFS2_FS = n
+ depends on NTFS_FS = n
+ depends on OCFS2_FS = n
+ depends on OMFS_FS = n
+ depends on QNX4FS_FS = n
+ depends on QNX6FS_FS = n
+ depends on REISERFS_FS = n
+ depends on SQUASHFS = n
+ depends on SYSV_FS = n
+ depends on UBIFS_FS = n
+ depends on UDF_FS = n
+ depends on UFS_FS = n
+ depends on VXFS_FS = n
+ depends on XFS_FS = n
+
+ depends on !UML || HOSTFS = n
+
+ # The rare drivers that won't build
+ depends on AIRO = n
+ depends on AIRO_CS = n
+ depends on TUN = n
+ depends on INFINIBAND_QIB = n
+ depends on BLK_DEV_LOOP = n
+ depends on ANDROID_BINDER_IPC = n
+
+ # Security modules
+ depends on SECURITY_TOMOYO = n
+ depends on SECURITY_APPARMOR = n
+
+config UIDGID_STRICT_TYPE_CHECKS
+ bool "Require conversions between uid/gids and their internal representation"
+ depends on UIDGID_CONVERTED
+ default n
+ help
+ While the nececessary conversions are being added to all subsystems this option allows
+ the code to continue to build for unconverted subsystems.
+
+ Say Y here if you want the strict type checking enabled
+
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
config PERF_EVENTS
bool "Kernel performance events and counters"
- default y if (PROFILING || PERF_COUNTERS)
+ default y if PROFILING
depends on HAVE_PERF_EVENTS
select ANON_INODES
select IRQ_WORK
Say Y if unsure.
-config PERF_COUNTERS
- bool "Kernel performance counters (old config option)"
- depends on HAVE_PERF_EVENTS
- help
- This config has been obsoleted by the PERF_EVENTS
- config option - please see that one for details.
-
- It has no effect on the kernel whether you enable
- it or not, it is a compatibility placeholder.
-
- Say N if unsure.
-
config DEBUG_PERF_USE_VMALLOC
default n
bool "Debug: use vmalloc to back perf mmap() buffers"
#include <linux/cgroup.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
+#include <linux/seccomp.h>
#include <linux/swap.h>
#include <linux/syscalls.h>
#include <linux/jiffies.h>
#include <linux/audit.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
+#include <linux/proc_fs.h>
#include <linux/profile.h>
#include <linux/rmap.h>
#include <linux/ksm.h>
#include <linux/oom.h>
#include <linux/khugepaged.h>
#include <linux/signalfd.h>
+#include <linux/uprobes.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
return total;
}
-#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-# define alloc_task_struct_node(node) \
- kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
-# define free_task_struct(tsk) \
- kmem_cache_free(task_struct_cachep, (tsk))
+#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
static struct kmem_cache *task_struct_cachep;
+
+static inline struct task_struct *alloc_task_struct_node(int node)
+{
+ return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
+}
+
+void __weak arch_release_task_struct(struct task_struct *tsk) { }
+
+static inline void free_task_struct(struct task_struct *tsk)
+{
+ arch_release_task_struct(tsk);
+ kmem_cache_free(task_struct_cachep, tsk);
+}
#endif
-#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+void __weak arch_release_thread_info(struct thread_info *ti) { }
+
+/*
+ * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
+ * kmemcache based allocator.
+ */
+# if THREAD_SIZE >= PAGE_SIZE
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
-#ifdef CONFIG_DEBUG_STACK_USAGE
- gfp_t mask = GFP_KERNEL | __GFP_ZERO;
-#else
- gfp_t mask = GFP_KERNEL;
-#endif
- struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+ struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+ THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
}
static inline void free_thread_info(struct thread_info *ti)
{
+ arch_release_thread_info(ti);
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
+# else
+static struct kmem_cache *thread_info_cache;
+
+static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+ int node)
+{
+ return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+}
+
+static void free_thread_info(struct thread_info *ti)
+{
+ arch_release_thread_info(ti);
+ kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+ thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+ THREAD_SIZE, 0, NULL);
+ BUG_ON(thread_info_cache == NULL);
+}
+# endif
#endif
/* SLAB cache for signal_struct structures (tsk->signal) */
free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
ftrace_graph_exit_task(tsk);
+ put_seccomp_filter(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
}
EXPORT_SYMBOL_GPL(__put_task_struct);
-/*
- * macro override instead of weak attribute alias, to workaround
- * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
- */
-#ifndef arch_task_cache_init
-#define arch_task_cache_init()
-#endif
+void __init __weak arch_task_cache_init(void) { }
void __init fork_init(unsigned long mempages)
{
-#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES
#endif
int node = tsk_fork_get_node(orig);
int err;
- prepare_to_copy(orig);
-
tsk = alloc_task_struct_node(node);
if (!tsk)
return NULL;
}
charge = 0;
if (mpnt->vm_flags & VM_ACCOUNT) {
- unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
+ unsigned long len;
+ len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
goto fail_nomem;
charge = len;
if (retval)
goto out;
+
+ if (file && uprobe_mmap(tmp))
+ goto out;
}
/* a new mm has just been created */
arch_dup_mmap(oldmm, mm);
might_sleep();
if (atomic_dec_and_test(&mm->mm_users)) {
+ uprobe_clear_state(mm);
exit_aio(mm);
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
list_del(&mm->mmlist);
spin_unlock(&mmlist_lock);
}
- put_swap_token(mm);
if (mm->binfmt)
module_put(mm->binfmt->module);
mmdrop(mm);
exit_pi_state_list(tsk);
#endif
+ uprobe_free_utask(tsk);
+
/* Get rid of any cached register state */
deactivate_mm(tsk, mm);
memcpy(mm, oldmm, sizeof(*mm));
mm_init_cpumask(mm);
- /* Initializing for Swap token stuff */
- mm->token_priority = 0;
- mm->last_interval = 0;
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
mm->pmd_huge_pte = NULL;
#endif
+ uprobe_reset_state(mm);
if (!mm_init(mm, tsk))
goto fail_nomem;
goto fail_nomem;
good_mm:
- /* Initializing for Swap token stuff */
- mm->token_priority = 0;
- mm->last_interval = 0;
-
tsk->mm = mm;
tsk->active_mm = mm;
return 0;
* Share io context with parent, if CLONE_IO is set
*/
if (clone_flags & CLONE_IO) {
- tsk->io_context = ioc_task_link(ioc);
- if (unlikely(!tsk->io_context))
- return -ENOMEM;
+ ioc_task_link(ioc);
+ tsk->io_context = ioc;
} else if (ioprio_valid(ioc->ioprio)) {
new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
if (unlikely(!new_ioc))
goto fork_out;
ftrace_graph_init_task(p);
+ get_seccomp_filter(p);
rt_mutex_init_task(p);
INIT_LIST_HEAD(&p->pi_state_list);
p->pi_state_cache = NULL;
#endif
+ uprobe_copy_process(p);
/*
* sigaltstack should be cleared when sharing the same VM
*/
if (p->io_context)
exit_io_context(p);
bad_fork_cleanup_namespaces:
+ if (unlikely(clone_flags & CLONE_NEWPID))
+ pid_ns_release_proc(p->nsproxy->pid_ns);
exit_task_namespaces(p);
bad_fork_cleanup_mm:
if (p->mm)