aboutsummaryrefslogtreecommitdiff
path: root/include/linux/sched.h
AgeCommit message (Collapse)Author
2006-12-07[PATCH] SysRq-X: show blocked tasksIngo Molnar
Add SysRq-X support: show blocked (TASK_UNINTERRUPTIBLE) tasks only. Useful for debugging IO stalls. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07[PATCH] Add include/linux/freezer.h and move definitions from sched.hNigel Cunningham
Move process freezing functions from include/linux/sched.h to freezer.h, so that modifications to the freezer or the kernel configuration don't require recompiling just about everything. [akpm@osdl.org: fix ueagle driver] Signed-off-by: Nigel Cunningham <nigel@suspend2.net> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07[PATCH] Save some bytes in struct mm_structArnaldo Carvalho de Melo
Before: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ unsigned int dumpable:2; /* 432 4 */ cpumask_t cpu_vm_mask; /* 436 4 */ mm_context_t context; /* 440 68 */ long unsigned int swap_token_time; /* 508 4 */ /* ---------- cacheline 16 boundary ---------- */ char recent_pagein; /* 512 1 */ /* XXX 3 bytes hole, try to pack */ int core_waiters; /* 516 4 */ struct completion * core_startup_done; /* 520 4 */ struct completion core_done; /* 524 52 */ rwlock_t ioctx_list_lock; /* 576 36 */ struct kioctx * ioctx_list; /* 612 4 */ }; /* size: 616, sum members: 613, holes: 1, sum holes: 3, cachelines: 20, last cacheline: 8 bytes */ After: [acme@newtoy net-2.6.20]$ pahole --cacheline 32 kernel/sched.o mm_struct /* include2/asm/processor.h:542 */ struct mm_struct { struct vm_area_struct * mmap; /* 0 4 */ struct rb_root mm_rb; /* 4 4 */ struct vm_area_struct * mmap_cache; /* 8 4 */ long unsigned int (*get_unmapped_area)(); /* 12 4 */ void (*unmap_area)(); /* 16 4 */ long unsigned int mmap_base; /* 20 4 */ long unsigned int task_size; /* 24 4 */ long unsigned int cached_hole_size; /* 28 4 */ /* ---------- cacheline 1 boundary ---------- */ long unsigned int free_area_cache; /* 32 4 */ pgd_t * pgd; /* 36 4 */ atomic_t mm_users; /* 40 4 */ atomic_t mm_count; /* 44 4 */ int map_count; /* 48 4 */ struct rw_semaphore mmap_sem; /* 52 64 */ spinlock_t page_table_lock; /* 116 40 */ struct list_head mmlist; /* 156 8 */ mm_counter_t _file_rss; /* 164 4 */ mm_counter_t _anon_rss; /* 168 4 */ long unsigned int hiwater_rss; /* 172 4 */ long unsigned int hiwater_vm; /* 176 4 */ long unsigned int total_vm; /* 180 4 */ long unsigned int locked_vm; /* 184 4 */ long unsigned int shared_vm; /* 188 4 */ /* ---------- cacheline 6 boundary ---------- */ long unsigned int exec_vm; /* 192 4 */ long unsigned int stack_vm; /* 196 4 */ long unsigned int reserved_vm; /* 200 4 */ long unsigned int def_flags; /* 204 4 */ long unsigned int nr_ptes; /* 208 4 */ long unsigned int start_code; /* 212 4 */ long unsigned int end_code; /* 216 4 */ long unsigned int start_data; /* 220 4 */ /* ---------- cacheline 7 boundary ---------- */ long unsigned int end_data; /* 224 4 */ long unsigned int start_brk; /* 228 4 */ long unsigned int brk; /* 232 4 */ long unsigned int start_stack; /* 236 4 */ long unsigned int arg_start; /* 240 4 */ long unsigned int arg_end; /* 244 4 */ long unsigned int env_start; /* 248 4 */ long unsigned int env_end; /* 252 4 */ /* ---------- cacheline 8 boundary ---------- */ long unsigned int saved_auxv[44]; /* 256 176 */ cpumask_t cpu_vm_mask; /* 432 4 */ mm_context_t context; /* 436 68 */ long unsigned int swap_token_time; /* 504 4 */ char recent_pagein; /* 508 1 */ unsigned char dumpable:2; /* 509 1 */ /* XXX 2 bytes hole, try to pack */ int core_waiters; /* 512 4 */ struct completion * core_startup_done; /* 516 4 */ struct completion core_done; /* 520 52 */ rwlock_t ioctx_list_lock; /* 572 36 */ struct kioctx * ioctx_list; /* 608 4 */ }; /* size: 612, sum members: 610, holes: 1, sum holes: 2, cachelines: 20, last cacheline: 4 bytes */ [acme@newtoy net-2.6.20]$ codiff -V /tmp/sched.o.before kernel/sched.o /pub/scm/linux/kernel/git/acme/net-2.6.20/kernel/sched.c: struct mm_struct | -4 dumpable:2; from: unsigned int /* 432(30) 4(2) */ to: unsigned char /* 509(6) 1(2) */ < SNIP other offset changes > 1 struct changed [acme@newtoy net-2.6.20]$ I'm not aware of any problem about using 2 byte wide bitfields where previously a 4 byte wide one was, holler if there is any, I wouldn't be surprised, bitfields are things from hell. For the curious, 432(30) means: at offset 432 from the struct start, at offset 30 in the bitfield (yeah, it comes backwards, hellish, huh?) ditto for 509(6), while 4(2) and 1(2) means "struct field size(bitfield size)". Now we have a 2 bytes hole and are using only 4 bytes of the last 32 bytes cacheline, any takers? :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-07[PATCH] new scheme to preempt swap tokenAshwin Chaugule
The new swap token patches replace the current token traversal algo. The old algo had a crude timeout parameter that was used to handover the token from one task to another. This algo, transfers the token to the tasks that are in need of the token. The urgency for the token is based on the number of times a task is required to swap-in pages. Accordingly, the priority of a task is incremented if it has been badly affected due to swap-outs. To ensure that the token doesnt bounce around rapidly, the token holders are given a priority boost. The priority of tasks is also decremented, if their rate of swap-in's keeps reducing. This way, the condition to check whether to pre-empt the swap token, is a matter of comparing two task's priority fields. [akpm@osdl.org: cleanups] Signed-off-by: Ashwin Chaugule <ashwin.chaugule@celunite.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-28[PATCH] taskstats: kill ->taskstats_lock in favor of ->siglockOleg Nesterov
signal_struct is (mostly) protected by ->sighand->siglock, I think we don't need ->taskstats_lock to protect ->stats. This also allows us to simplify the locking in fill_tgid(). Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Jay Lan <jlan@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-06[PATCH] sched: fix a kerneldoc error on is_init()Henne
Fix a kerneldoc warning and reorderd the description for is_init(). Signed-off-by: Henrik Kretzschmar <henne@nachtwindheim.de> Cc: "Randy.Dunlap" <rdunlap@xenotime.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-03[PATCH] sched: cleanup sched_group cpu_power setupSiddha, Suresh B
Up to now sched group's cpu_power for each sched domain is initialized independently. This made the setup code ugly as the new sched domains are getting added. Make the sched group cpu_power setup code generic, by using domain child field and new domain flag in sched_domain. For most of the sched domains(except NUMA), sched group's cpu_power is now computed generically using the domain properties of itself and of the child domain. sched groups in NUMA domains are setup little differently and hence they don't use this generic mechanism. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> Acked-by: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-03[PATCH] sched: introduce child field in sched_domainSiddha, Suresh B
Introduce the child field in sched_domain struct and use it in sched_balance_self(). We will also use this field in cleaning up the sched group cpu_power setup(done in a different patch) code. Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> Acked-by: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] replace cad_pid by a struct pidCedric Le Goater
There are a few places in the kernel where the init task is signaled. The ctrl+alt+del sequence is one them. It kills a task, usually init, using a cached pid (cad_pid). This patch replaces the pid_t by a struct pid to avoid pid wrap around problem. The struct pid is initialized at boot time in init() and can be modified through systctl with /proc/sys/kernel/cad_pid [ I haven't found any distro using it ? ] It also introduces a small helper routine kill_cad_pid() which is used where it seemed ok to use cad_pid instead of pid 1. [akpm@osdl.org: cleanups, build fix] Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] IPC namespace coreKirill Korotaev
This patch set allows to unshare IPCs and have a private set of IPC objects (sem, shm, msg) inside namespace. Basically, it is another building block of containers functionality. This patch implements core IPC namespace changes: - ipc_namespace structure - new config option CONFIG_IPC_NS - adds CLONE_NEWIPC flag - unshare support [clg@fr.ibm.com: small fix for unshare of ipc namespace] [akpm@osdl.org: build fix] Signed-off-by: Pavel Emelianov <xemul@openvz.org> Signed-off-by: Kirill Korotaev <dev@openvz.org> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] namespaces: utsname: implement CLONE_NEWUTS flagSerge E. Hallyn
Implement a CLONE_NEWUTS flag, and use it at clone and sys_unshare. [clg@fr.ibm.com: IPC unshare fix] [bunk@stusta.de: cleanup] Signed-off-by: Serge Hallyn <serue@us.ibm.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Cedric Le Goater <clg@fr.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] namespaces: utsname: implement utsname namespacesSerge E. Hallyn
This patch defines the uts namespace and some manipulators. Adds the uts namespace to task_struct, and initializes a system-wide init namespace. It leaves a #define for system_utsname so sysctl will compile. This define will be removed in a separate patch. [akpm@osdl.org: build fix, cleanup] Signed-off-by: Serge Hallyn <serue@us.ibm.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] namespaces: incorporate fs namespace into nsproxySerge E. Hallyn
This moves the mount namespace into the nsproxy. The mount namespace count now refers to the number of nsproxies point to it, rather than the number of tasks. As a result, the unshare_namespace() function in kernel/fork.c no longer checks whether it is being shared. Signed-off-by: Serge Hallyn <serue@us.ibm.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] namespaces: add nsproxySerge E. Hallyn
This patch adds a nsproxy structure to the task struct. Later patches will move the fs namespace pointer into this structure, and introduce a new utsname namespace into the nsproxy. The vserver and openvz functionality, then, would be implemented in large part by virtualizing/isolating more and more resources into namespaces, each contained in the nsproxy. [akpm@osdl.org: build fix] Signed-off-by: Serge Hallyn <serue@us.ibm.com> Cc: Kirill Korotaev <dev@openvz.org> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] usb: fixup usb so it uses struct pidEric W. Biederman
The problem with remembering a user space process by its pid is that it is possible that the process will exit, pid wrap around will occur. Converting to a struct pid avoid that problem, and paves the way for implementing a pid namespace. Also since usb is the only user of kill_proc_info_as_uid rename kill_proc_info_as_uid to kill_pid_info_as_uid and have the new version take a struct pid. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Acked-by: Greg Kroah-Hartman <gregkh@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] Define struct pspaceSukadev Bhattiprolu
Define a per-container pid space object. And create one instance of this object, init_pspace, to define the entire pid space. Subsequent patches will provide/use interfaces to create/destroy pid spaces. Its a subset/rework of Eric Biederman's patch http://lkml.org/lkml/2006/2/6/285 . Signed-off-by: Eric Biederman <ebiederm@xmission.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: Kirill Korotaev <dev@sw.ru> Cc: Andrey Savochkin <saw@sw.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] pid: implement signal functions that take a struct pid *Eric W. Biederman
Currently the signal functions all either take a task or a pid_t argument. This patch implements variants that take a struct pid *. After all of the users have been update it is my intention to remove the variants that take a pid_t as using pid_t can be more work (an extra hash table lookup) and difficult to get right in the presence of multiple pid namespaces. There are two kinds of functions introduced in this patch. The are the general use functions kill_pgrp and kill_pid which take a priv argument that is ultimately used to create the appropriate siginfo information, Then there are _kill_pgrp_info, kill_pgrp_info, kill_pid_info the internal implementation helpers that take an explicit siginfo. The distinction is made because filling out an explcit siginfo is tricky, and will be even more tricky when pid namespaces are introduced. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] pid: implement access helpers for a tacks various process groupsEric W. Biederman
In the last round of cleaning up the pid hash table a more general struct pid was introduced, that can be referenced counted. With the more general struct pid most if not all places where we store a pid_t we can now store a struct pid * and remove the need for a hash table lookup, and avoid any possible problems with pid roll over. Looking forward to the pid namespaces struct pid * gives us an absolute form a pid so we can compare and use them without caring which pid namespace we are in. This patchset introduces the infrastructure needed to use struct pid instead of pid_t, and then it goes on to convert two different kernel users that currently store a pid_t value. There are a lot more places to go but this is enough to get the basic idea. Before we can merge a pid namespace patch all of the kernel pid_t users need to be examined. Those that deal with user space processes need to be converted to using a struct pid *. Those that deal with kernel processes need to converted to using the kthread api. A rare few that only use their current processes pid values get to be left alone. This patch: task_session returns the struct pid of a tasks session. task_pgrp returns the struct pid of a tasks process group. task_tgid returns the struct pid of a tasks thread group. task_pid returns the struct pid of a tasks process id. These can be used to avoid unnecessary hash table lookups, and to implement safe pid comparisions in the face of a pid namespace. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-02[PATCH] proc: readdir race fix (take 3)Eric W. Biederman
The problem: An opendir, readdir, closedir sequence can fail to report process ids that are continually in use throughout the sequence of system calls. For this race to trigger the process that proc_pid_readdir stops at must exit before readdir is called again. This can cause ps to fail to report processes, and it is in violation of posix guarantees and normal application expectations with respect to readdir. Currently there is no way to work around this problem in user space short of providing a gargantuan buffer to user space so the directory read all happens in on system call. This patch implements the normal directory semantics for proc, that guarantee that a directory entry that is neither created nor destroyed while reading the directory entry will be returned. For directory that are either created or destroyed during the readdir you may or may not see them. Furthermore you may seek to a directory offset you have previously seen. These are the guarantee that ext[23] provides and that posix requires, and more importantly that user space expects. Plus it is a simple semantic to implement reliable service. It is just a matter of calling readdir a second time if you are wondering if something new has show up. These better semantics are implemented by scanning through the pids in numerical order and by making the file offset a pid plus a fixed offset. The pid scan happens on the pid bitmap, which when you look at it is remarkably efficient for a brute force algorithm. Given that a typical cache line is 64 bytes and thus covers space for 64*8 == 200 pids. There are only 40 cache lines for the entire 32K pid space. A typical system will have 100 pids or more so this is actually fewer cache lines we have to look at to scan a linked list, and the worst case of having to scan the entire pid bitmap is pretty reasonable. If we need something more efficient we can go to a more efficient data structure for indexing the pids, but for now what we have should be sufficient. In addition this takes no additional locks and is actually less code than what we are doing now. Also another very subtle bug in this area has been fixed. It is possible to catch a task in the middle of de_thread where a thread is assuming the thread of it's thread group leader. This patch carefully handles that case so if we hit it we don't fail to return the pid, that is undergoing the de_thread dance. Thanks to KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> for providing the first fix, pointing this out and working on it. [oleg@tv-sign.ru: fix it] Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Jean Delvare <jdelvare@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01[PATCH] csa accounting taskstats updateJay Lan
ChangeLog: Feedbacks from Andrew Morton: - define TS_COMM_LEN to 32 - change acct_stimexpd field of task_struct to be of cputime_t, which is to be used to save the tsk->stime of last timer interrupt update. - a new Documentation/accounting/taskstats-struct.txt to describe fields of taskstats struct. Feedback from Balbir Singh: - keep the stime of a task to be zero when both stime and utime are zero as recoreded in task_struct. Misc: - convert accumulated RSS/VM from platform dependent pages-ticks to MBytes-usecs in the kernel Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Chris Sturtivant <csturtiv@sgi.com> Cc: Tony Ernst <tee@sgi.com> Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-10-01[PATCH] csa: convert CONFIG tag for extended accounting routinesJay Lan
There were a few accounting data/macros that are used in CSA but are #ifdef'ed inside CONFIG_BSD_PROCESS_ACCT. This patch is to change those ifdef's from CONFIG_BSD_PROCESS_ACCT to CONFIG_TASK_XACCT. A few defines are moved from kernel/acct.c and include/linux/acct.h to kernel/tsacct.c and include/linux/tsacct_kern.h. Signed-off-by: Jay Lan <jlan@sgi.com> Cc: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Chris Sturtivant <csturtiv@sgi.com> Cc: Tony Ernst <tee@sgi.com> Cc: Guillaume Thouvenin <guillaume.thouvenin@bull.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-30[PATCH] BLOCK: Remove duplicate declaration of exit_io_context() [try #6]David Howells
Remove the duplicate declaration of exit_io_context() from linux/sched.h. Signed-Off-By: David Howells <dhowells@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
2006-09-29[PATCH] introduce TASK_DEAD stateOleg Nesterov
I am not sure about this patch, I am asking Ingo to take a decision. task_struct->state == EXIT_DEAD is a very special case, to avoid a confusion it makes sense to introduce a new state, TASK_DEAD, while EXIT_DEAD should live only in ->exit_state as documented in sched.h. Note that this state is not visible to user-space, get_task_state() masks off unsuitable states. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] kill PF_DEAD flagOleg Nesterov
After the previous change (->flags & PF_DEAD) <=> (->state == EXIT_DEAD), we don't need PF_DEAD any longer. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] introduce is_rt_policy() helperOleg Nesterov
Imho, makes the code a bit easier to read. Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] simplify update_times (avoid jiffies/jiffies_64 aliasing problem)Atsushi Nemoto
Pass ticks to do_timer() and update_times(), and adjust x86_64 and s390 timer interrupt handler with this change. Currently update_times() calculates ticks by "jiffies - wall_jiffies", but callers of do_timer() should know how many ticks to update. Passing ticks get rid of this redundant calculation. Also there are another redundancy pointed out by Martin Schwidefsky. This cleanup make a barrier added by 5aee405c662ca644980c184774277fc6d0769a84 needless. So this patch removes it. As a bonus, this cleanup make wall_jiffies can be removed easily, since now wall_jiffies is always synced with jiffies. (This patch does not really remove wall_jiffies. It would be another cleanup patch) Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: john stultz <johnstul@us.ibm.com> Cc: Andi Kleen <ak@muc.de> Cc: Paul Mackerras <paulus@samba.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Acked-by: Russell King <rmk@arm.linux.org.uk> Cc: Ian Molton <spyro@f2s.com> Cc: Mikael Starvik <starvik@axis.com> Acked-by: David Howells <dhowells@redhat.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: Hirokazu Takata <takata.hirokazu@renesas.com> Acked-by: Ralf Baechle <ralf@linux-mips.org> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: Kazumoto Kojima <kkojima@rr.iij4u.or.jp> Cc: Richard Curnow <rc@rc0.org.uk> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Jeff Dike <jdike@addtoit.com> Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Cc: Miles Bader <uclinux-v850@lsi.nec.co.jp> Cc: Chris Zankel <chris@zankel.net> Acked-by: "Luck, Tony" <tony.luck@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Roman Zippel <zippel@linux-m68k.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] pidspace: is_init()Sukadev Bhattiprolu
This is an updated version of Eric Biederman's is_init() patch. (http://lkml.org/lkml/2006/2/6/280). It applies cleanly to 2.6.18-rc3 and replaces a few more instances of ->pid == 1 with is_init(). Further, is_init() checks pid and thus removes dependency on Eric's other patches for now. Eric's original description: There are a lot of places in the kernel where we test for init because we give it special properties. Most significantly init must not die. This results in code all over the kernel test ->pid == 1. Introduce is_init to capture this case. With multiple pid spaces for all of the cases affected we are looking for only the first process on the system, not some other process that has pid == 1. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Sukadev Bhattiprolu <sukadev@us.ibm.com> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: Serge Hallyn <serue@us.ibm.com> Cc: Cedric Le Goater <clg@fr.ibm.com> Cc: <lxc-devel@lists.sourceforge.net> Acked-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] ifdef blktrace debugging fieldsAlexey Dobriyan
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Acked-by: Jens Axboe <axboe@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-29[PATCH] task_struct: ifdef Missed'em V IPCAlexey Dobriyan
ipc/sem.c only. $ agrep sysvsem -w -n ipc/sem.c:912: undo_list = current->sysvsem.undo_list; ipc/sem.c:932: undo_list = current->sysvsem.undo_list; ipc/sem.c:954: undo_list = current->sysvsem.undo_list; ipc/sem.c:963: current->sysvsem.undo_list = undo_list; ipc/sem.c:1247: tsk->sysvsem.undo_list = undo_list; ipc/sem.c:1249: tsk->sysvsem.undo_list = NULL; ipc/sem.c:1271: undo_list = tsk->sysvsem.undo_list; include/linux/sched.h:876: struct sysv_sem sysvsem; Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-09-26[PATCH] Add the canary field to the PDA area and the task structArjan van de Ven
This patch adds the per thread cookie field to the task struct and the PDA. Also it makes sure that the PDA value gets the new cookie value at context switch, and that a new task gets a new cookie at task creation time. Signed-off-by: Arjan van Ven <arjan@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andi Kleen <ak@suse.de> CC: Andi Kleen <ak@suse.de>
2006-09-26[PATCH] non lazy "sleazy" fpu implementationArjan van de Ven
Right now the kernel on x86-64 has a 100% lazy fpu behavior: after *every* context switch a trap is taken for the first FPU use to restore the FPU context lazily. This is of course great for applications that have very sporadic or no FPU use (since then you avoid doing the expensive save/restore all the time). However for very frequent FPU users... you take an extra trap every context switch. The patch below adds a simple heuristic to this code: After 5 consecutive context switches of FPU use, the lazy behavior is disabled and the context gets restored every context switch. If the app indeed uses the FPU, the trap is avoided. (the chance of the 6th time slice using FPU after the previous 5 having done so are quite high obviously). After 256 switches, this is reset and lazy behavior is returned (until there are 5 consecutive ones again). The reason for this is to give apps that do longer bursts of FPU use still the lazy behavior back after some time. [akpm@osdl.org: place new task_struct field next to jit_keyring to save space] Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andi Kleen <ak@suse.de> Cc: Andi Kleen <ak@muc.de> Signed-off-by: Andrew Morton <akpm@osdl.org>
2006-09-01[PATCH] task delay accounting fixesShailabh Nagar
Cleanup allocation and freeing of tsk->delays used by delay accounting. This solves two problems reported for delay accounting: 1. oops in __delayacct_blkio_ticks http://www.uwsg.indiana.edu/hypermail/linux/kernel/0608.2/1844.html Currently tsk->delays is getting freed too early in task exit which can cause a NULL tsk->delays to get accessed via reading of /proc/<tgid>/stats. The patch fixes this problem by freeing tsk->delays closer to when task_struct itself is freed up. As a result, it also eliminates the use of tsk->delays_lock which was only being used (inadequately) to safeguard access to tsk->delays while a task was exiting. 2. Possible memory leak in kernel/delayacct.c http://www.uwsg.indiana.edu/hypermail/linux/kernel/0608.2/1389.html The patch cleans up tsk->delays allocations after a bad fork which was missing earlier. The patch has been tested to fix the problems listed above and stress tested with rapid calls to delay accounting's taskstats command interface (which is the other path that can access the same data, besides the /proc interface causing the oops above). Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-08-06[PATCH] Make suspend possible with a traced process at a breakpointRafael J. Wysocki
It should be possible to suspend, either to RAM or to disk, if there's a traced process that has just reached a breakpoint. However, this is a special case, because its parent process might have been frozen already and then we are unable to deliver the "freeze" signal to the traced process. If this happens, it's better to cancel the freezing of the traced process. Ref. http://bugzilla.kernel.org/show_bug.cgi?id=6787 Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl> Acked-by: Pavel Machek <pavel@ucw.cz> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-14[PATCH] delay accounting taskstats interface send tgid onceShailabh Nagar
Send per-tgid data only once during exit of a thread group instead of once with each member thread exit. Currently, when a thread exits, besides its per-tid data, the per-tgid data of its thread group is also sent out, if its thread group is non-empty. The per-tgid data sent consists of the sum of per-tid stats for all *remaining* threads of the thread group. This patch modifies this sending in two ways: - the per-tgid data is sent only when the last thread of a thread group exits. This cuts down heavily on the overhead of sending/receiving per-tgid data, especially when other exploiters of the taskstats interface aren't interested in per-tgid stats - the semantics of the per-tgid data sent are changed. Instead of being the sum of per-tid data for remaining threads, the value now sent is the true total accumalated statistics for all threads that are/were part of the thread group. The patch also addresses a minor issue where failure of one accounting subsystem to fill in the taskstats structure was causing the send of taskstats to not be sent at all. The patch has been tested for stability and run cerberus for over 4 hours on an SMP. [akpm@osdl.org: bugfixes] Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-14[PATCH] per-task-delay-accounting: delay accounting usage of taskstats interfaceShailabh Nagar
Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <nagar@us.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-14[PATCH] per-task-delay-accounting: cpu delay collection via schedstatsChandra Seetharaman
Make the task-related schedstats functions callable by delay accounting even if schedstats collection isn't turned on. This removes the dependency of delay accounting on schedstats. Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com> Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-14[PATCH] per-task-delay-accounting: sync block I/O and swapin delay collectionShailabh Nagar
Unlike earlier iterations of the delay accounting patches, now delays are only collected for the actual I/O waits rather than try and cover the delays seen in I/O submission paths. Account separately for block I/O delays incurred as a result of swapin page faults whose frequency can be affected by the task/process' rss limit. Hence swapin delays can act as feedback for rss limit changes independent of I/O priority changes. Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-14[PATCH] per-task-delay-accounting: setupShailabh Nagar
Initialization code related to collection of per-task "delay" statistics which measure how long it had to wait for cpu, sync block io, swapping etc. The collection of statistics and the interface are in other patches. This patch sets up the data structures and allows the statistics collection to be disabled through a kernel boot parameter. Signed-off-by: Shailabh Nagar <nagar@watson.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-03[PATCH] sched: cleanup, convert sched.c-internal typedefs to structIngo Molnar
convert: - runqueue_t to 'struct rq' - prio_array_t to 'struct prio_array' - migration_req_t to 'struct migration_req' I was the one who added these but they are both against the kernel coding style and also were used inconsistently at places. So just get rid of them at once, now that we are flushing the scheduler patch-queue anyway. Conversion was mostly scripted, the result was reviewed and all secondary whitespace and style impact (if any) was fixed up by hand. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-03[PATCH] sched: cleanup, remove task_t, convert to struct task_structIngo Molnar
cleanup: remove task_t and convert all the uses to struct task_struct. I introduced it for the scheduler anno and it was a mistake. Conversion was mostly scripted, the result was reviewed and all secondary whitespace and style impact (if any) was fixed up by hand. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-03[PATCH] lockdep: coreIngo Molnar
Do 'make oldconfig' and accept all the defaults for new config options - reboot into the kernel and if everything goes well it should boot up fine and you should have /proc/lockdep and /proc/lockdep_stats files. Typically if the lock validator finds some problem it will print out voluminous debug output that begins with "BUG: ..." and which syslog output can be used by kernel developers to figure out the precise locking scenario. What does the lock validator do? It "observes" and maps all locking rules as they occur dynamically (as triggered by the kernel's natural use of spinlocks, rwlocks, mutexes and rwsems). Whenever the lock validator subsystem detects a new locking scenario, it validates this new rule against the existing set of rules. If this new rule is consistent with the existing set of rules then the new rule is added transparently and the kernel continues as normal. If the new rule could create a deadlock scenario then this condition is printed out. When determining validity of locking, all possible "deadlock scenarios" are considered: assuming arbitrary number of CPUs, arbitrary irq context and task context constellations, running arbitrary combinations of all the existing locking scenarios. In a typical system this means millions of separate scenarios. This is why we call it a "locking correctness" validator - for all rules that are observed the lock validator proves it with mathematical certainty that a deadlock could not occur (assuming that the lock validator implementation itself is correct and its internal data structures are not corrupted by some other kernel subsystem). [see more details and conditionals of this statement in include/linux/lockdep.h and Documentation/lockdep-design.txt] Furthermore, this "all possible scenarios" property of the validator also enables the finding of complex, highly unlikely multi-CPU multi-context races via single single-context rules, increasing the likelyhood of finding bugs drastically. In practical terms: the lock validator already found a bug in the upstream kernel that could only occur on systems with 3 or more CPUs, and which needed 3 very unlikely code sequences to occur at once on the 3 CPUs. That bug was found and reported on a single-CPU system (!). So in essence a race will be found "piecemail-wise", triggering all the necessary components for the race, without having to reproduce the race scenario itself! In its short existence the lock validator found and reported many bugs before they actually caused a real deadlock. To further increase the efficiency of the validator, the mapping is not per "lock instance", but per "lock-class". For example, all struct inode objects in the kernel have inode->inotify_mutex. If there are 10,000 inodes cached, then there are 10,000 lock objects. But ->inotify_mutex is a single "lock type", and all locking activities that occur against ->inotify_mutex are "unified" into this single lock-class. The advantage of the lock-class approach is that all historical ->inotify_mutex uses are mapped into a single (and as narrow as possible) set of locking rules - regardless of how many different tasks or inode structures it took to build this set of rules. The set of rules persist during the lifetime of the kernel. To see the rough magnitude of checking that the lock validator does, here's a portion of /proc/lockdep_stats, fresh after bootup: lock-classes: 694 [max: 2048] direct dependencies: 1598 [max: 8192] indirect dependencies: 17896 all direct dependencies: 16206 dependency chains: 1910 [max: 8192] in-hardirq chains: 17 in-softirq chains: 105 in-process chains: 1065 stack-trace entries: 38761 [max: 131072] combined max dependencies: 2033928 hardirq-safe locks: 24 hardirq-unsafe locks: 176 softirq-safe locks: 53 softirq-unsafe locks: 137 irq-safe locks: 59 irq-unsafe locks: 176 The lock validator has observed 1598 actual single-thread locking patterns, and has validated all possible 2033928 distinct locking scenarios. More details about the design of the lock validator can be found in Documentation/lockdep-design.txt, which can also found at: http://redhat.com/~mingo/lockdep-patches/lockdep-design.txt [bunk@stusta.de: cleanups] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Adrian Bunk <bunk@stusta.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-03[PATCH] lockdep: irqtrace subsystem, coreIngo Molnar
Accurate hard-IRQ-flags and softirq-flags state tracing. This allows us to attach extra functionality to IRQ flags on/off events (such as trace-on/off). Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-07-03[PATCH] lockdep: better lock debuggingIngo Molnar
Generic lock debugging: - generalized lock debugging framework. For example, a bug in one lock subsystem turns off debugging in all lock subsystems. - got rid of the caller address passing (__IP__/__IP_DECL__/etc.) from the mutex/rtmutex debugging code: it caused way too much prototype hackery, and lockdep will give the same information anyway. - ability to do silent tests - check lock freeing in vfree too. - more finegrained debugging options, to allow distributions to turn off more expensive debugging features. There's no separate 'held mutexes' list anymore - but there's a 'held locks' stack within lockdep, which unifies deadlock detection across all lock classes. (this is independent of the lockdep validation stuff - lockdep first checks whether we are holding a lock already) Here are the current debugging options: CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_LOCK_ALLOC=y which do: config DEBUG_MUTEXES bool "Mutex debugging, basic checks" config DEBUG_LOCK_ALLOC bool "Detect incorrect freeing of live mutexes" Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-30[PATCH] SELinux: add security hook call to kill_proc_info_as_uidDavid Quigley
This patch adds a call to the extended security_task_kill hook introduced by the prior patch to the kill_proc_info_as_uid function so that these signals can be properly mediated by security modules. It also updates the existing hook call in check_kill_permission. Signed-off-by: David Quigley <dpquigl@tycho.nsa.gov> Signed-off-by: James Morris <jmorris@namei.org> Cc: Stephen Smalley <sds@tycho.nsa.gov> Cc: Chris Wright <chrisw@sous-sol.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] rtmutex: Propagate priority settings into PI lock chainsThomas Gleixner
When the priority of a task, which is blocked on a lock, changes we must propagate this change into the PI lock chain. Therefor the chain walk code is changed to get rid of the references to current to avoid false positives in the deadlock detector, as setscheduler might be called by a task which holds the lock on which the task whose priority is changed is blocked. Also add some comments about the get/put_task_struct usage to avoid confusion. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] pi-futex: futex_lock_pi/futex_unlock_pi supportIngo Molnar
This adds the actual pi-futex implementation, based on rt-mutexes. [dino@in.ibm.com: fix an oops-causing race] Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Dinakar Guniguntala <dino@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] pi-futex: rt mutex testerThomas Gleixner
RT-mutex tester: scriptable tester for rt mutexes, which allows userspace scripting of mutex unit-tests (and dynamic tests as well), using the actual rt-mutex implementation of the kernel. [akpm@osdl.org: fixlet] Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] pi-futex: rt mutex coreIngo Molnar
Core functions for the rt-mutex subsystem. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] pi-futex: scheduler support for piIngo Molnar
Add framework to boost/unboost the priority of RT tasks. This consists of: - caching the 'normal' priority in ->normal_prio - providing a functions to set/get the priority of the task - make sched_setscheduler() aware of boosting The effective_prio() cleanups also fix a priority-calculation bug pointed out by Andrey Gelman, in set_user_nice(). has_rt_policy() fix: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Andrey Gelman <agelman@012.net.il> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-27[PATCH] sched: mc/smt power savings sched policySiddha, Suresh B
sysfs entries 'sched_mc_power_savings' and 'sched_smt_power_savings' in /sys/devices/system/cpu/ control the MC/SMT power savings policy for the scheduler. Based on the values (1-enable, 0-disable) for these controls, sched groups cpu power will be determined for different domains. When power savings policy is enabled and under light load conditions, scheduler will minimize the physical packages/cpu cores carrying the load and thus conserving power(with a perf impact based on the workload characteristics... see OLS 2005 CMP kernel scheduler paper for more details..) Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Con Kolivas <kernel@kolivas.org> Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>