From 6859a8402945cf1d74af75a2e1aa4e327a506ab4 Mon Sep 17 00:00:00 2001 From: Alan Mayer Date: Wed, 26 Mar 2008 16:11:31 -0500 Subject: x86: resize NR_IRQS for large machines On machines with very large numbers of cpus, tables that are dimensioned by NR_IRQS get very large, especially the irq_desc table. They are also very sparsely used. When the cpu count is > MAX_IO_APICS, use MAX_IO_APICS to set NR_IRQS, otherwise use NR_CPUS. Signed-off-by: Alan Mayer Reviewed-by: Christoph Lameter Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel_stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index e8ffce898bf..cf9f40a91c9 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -1,11 +1,11 @@ #ifndef _LINUX_KERNEL_STAT_H #define _LINUX_KERNEL_STAT_H -#include #include #include #include #include +#include #include /* -- cgit v1.2.3 From 988f7b5789ccf5cfed14c72e28573a49f0cb4809 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:22 -0700 Subject: x86: PAT export resource_wc in pci sysfs For the ranges with IORESOURCE_PREFETCH, export a new resource_wc interface in pci /sysfs along with resource (which is uncached). Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Acked-by: Jesse Barnes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e..d18b1dd49fa 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -206,6 +206,7 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ + struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */ #ifdef CONFIG_PCI_MSI struct list_head msi_list; #endif -- cgit v1.2.3 From 09e67ca2c523544e6b38aa570a5f62a0cf20b87b Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 16 May 2008 11:57:45 +1000 Subject: [POWERPC] Move of_device_get_modalias to drivers/of Commit 140b932f8cb6cced10b96860651a198b1b89cbb9 ("Create modalias file in sysfs for of_platform bus") needs this to avoid breaking the sparc builds. Just move the code and add whitespace around some binary operators. Signed-off-by: Stephen Rothwell Acked-by: David S. Miller Signed-off-by: Paul Mackerras --- include/linux/of_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index afe338217d9..d3a74e00a3e 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -24,4 +24,7 @@ static inline void of_device_free(struct of_device *dev) of_release_dev(&dev->dev); } +extern ssize_t of_device_get_modalias(struct of_device *ofdev, + char *str, ssize_t len); + #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3 From 8b09dee67f484e9b42114b1a1f068e080fd7aa56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcupreempt: remove duplicate prototypes rcu_batches_completed and rcu_patches_completed_bh are both declared in rcuclassic.h and rcupreempt.h. This patch removes the extra prototypes for them from rcupdate.h. rcu_batches_completed_bh is defined as a static inline in the rcupreempt.h header file. Trying to export this as EXPORT_SYMBOL_GPL causes linking problems with the powerpc linker. There's no need to export a static inlined function. Modules must be compiled with the same type of RCU implementation as the kernel they are for. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/rcupdate.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d42dbec0608..ec2fc5b3264 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -224,8 +224,6 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); -extern long rcu_batches_completed(void); -extern long rcu_batches_completed_bh(void); /* Internal to kernel */ extern void rcu_init(void); -- cgit v1.2.3 From 4446a36ff8c74ac3b32feb009b651048e129c6af Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add call_rcu_sched() Fourth cut of patch to provide the call_rcu_sched(). This is again to synchronize_sched() as call_rcu() is to synchronize_rcu(). Should be fine for experimental and -rt use, but not ready for inclusion. With some luck, I will be able to tell Andrew to come out of hiding on the next round. Passes multi-day rcutorture sessions with concurrent CPU hotplugging. Fixes since the first version include a bug that could result in indefinite blocking (spotted by Gautham Shenoy), better resiliency against CPU-hotplug operations, and other minor fixes. Fixes since the second version include reworking grace-period detection to avoid deadlocks that could happen when running concurrently with CPU hotplug, adding Mathieu's fix to avoid the softlockup messages, as well as Mathieu's fix to allow use earlier in boot. Fixes since the third version include a wrong-CPU bug spotted by Andrew, getting rid of the obsolete synchronize_kernel API that somehow snuck back in, merging spin_unlock() and local_irq_restore() in a few places, commenting the code that checks for quiescent states based on interrupting from user-mode execution or the idle loop, removing some inline attributes, and some code-style changes. Known/suspected shortcomings: o I still do not entirely trust the sleep/wakeup logic. Next step will be to use a private snapshot of the CPU online mask in rcu_sched_grace_period() -- if the CPU wasn't there at the start of the grace period, we don't need to hear from it. And the bit about accounting for changes in online CPUs inside of rcu_sched_grace_period() is ugly anyway. o It might be good for rcu_sched_grace_period() to invoke resched_cpu() when a given CPU wasn't responding quickly, but resched_cpu() is declared static... This patch also fixes a long-standing bug in the earlier preemptable-RCU implementation of synchronize_rcu() that could result in loss of concurrent external changes to a task's CPU affinity mask. I still cannot remember who reported this... Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcuclassic.h | 3 +++ include/linux/rcupdate.h | 22 ++++++++++++++++++++++ include/linux/rcupreempt.h | 42 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index b3aa05baab8..8c774905dcf 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map; #define __synchronize_sched() synchronize_rcu() +#define call_rcu_sched(head, func) call_rcu(head, func) + extern void __rcu_init(void); +#define rcu_init_sched() do { } while (0) extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ec2fc5b3264..411969cb524 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,7 @@ #include #include #include +#include /** * struct rcu_head - callback structure for use with RCU @@ -168,6 +169,27 @@ struct rcu_head { (p) = (v); \ }) +/* Infrastructure to implement the synchronize_() primitives. */ + +struct rcu_synchronize { + struct rcu_head head; + struct completion completion; +}; + +extern void wakeme_after_rcu(struct rcu_head *head); + +#define synchronize_rcu_xxx(name, func) \ +void name(void) \ +{ \ + struct rcu_synchronize rcu; \ + \ + init_completion(&rcu.completion); \ + /* Will wake me after RCU finished. */ \ + func(&rcu.head, wakeme_after_rcu); \ + /* Wait for it. */ \ + wait_for_completion(&rcu.completion); \ +} + /** * synchronize_sched - block until all CPUs have exited any non-preemptive * kernel code sequences. diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 8a05c7e20bc..f04b64eca63 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -40,10 +40,39 @@ #include #include -#define rcu_qsctr_inc(cpu) +struct rcu_dyntick_sched { + int dynticks; + int dynticks_snap; + int sched_qs; + int sched_qs_snap; + int sched_dynticks_snap; +}; + +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); + +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); + + rdssp->sched_qs++; +} #define rcu_bh_qsctr_inc(cpu) #define call_rcu_bh(head, rcu) call_rcu(head, rcu) +/** + * call_rcu_sched - Queue RCU callback for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * + * The update function will be invoked some time after a full + * synchronize_sched()-style grace period elapses, in other words after + * all currently executing preempt-disabled sections of code (including + * hardirq handlers, NMI handlers, and local_irq_save() blocks) have + * completed. + */ +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + extern void __rcu_read_lock(void) __acquires(RCU); extern void __rcu_read_unlock(void) __releases(RCU); extern int rcu_pending(int cpu); @@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu); extern void __synchronize_sched(void); extern void __rcu_init(void); +extern void rcu_init_sched(void); extern void rcu_check_callbacks(int cpu, int user); extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); @@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; #ifdef CONFIG_NO_HZ -DECLARE_PER_CPU(long, dynticks_progress_counter); +DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched); static inline void rcu_enter_nohz(void) { smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - __get_cpu_var(dynticks_progress_counter)++; - WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1); } static inline void rcu_exit_nohz(void) { - __get_cpu_var(dynticks_progress_counter)++; smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ - WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); + __get_cpu_var(rcu_dyntick_sched).dynticks++; + WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1)); } #else /* CONFIG_NO_HZ */ -- cgit v1.2.3 From 70f12f848d3e981479b4f6f751e73c14f7c13e5b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: add rcu_barrier_sched() and rcu_barrier_bh() Add rcu_barrier_sched() and rcu_barrier_bh(). With these in place, rcutorture no longer gives the occasional oops when repeatedly starting and stopping torturing rcu_bh. Also adds the API needed to flush out pre-existing call_rcu_sched() callbacks. Signed-off-by: Paul E. McKenney Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/rcupdate.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 411969cb524..e8b4039cfb2 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -246,6 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head, /* Exported common interfaces */ extern void synchronize_rcu(void); extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); -- cgit v1.2.3 From 82524746c27fa418c250a56dd7606b9d3fc79826 Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:05 +0200 Subject: rcu: split list.h and move rcu-protected lists into rculist.h Move rcu-protected lists from list.h into a new header file rculist.h. This is done because list are a very used primitive structure all over the kernel and it's currently impossible to include other header files in this list.h without creating some circular dependencies. For example, list.h implements rcu-protected list and uses rcu_dereference() without including rcupdate.h. It actually compiles because users of rcu_dereference() are macros. Others RCU functions could be used too but aren't probably because of this. Therefore this patch creates rculist.h which includes rcupdates without to many changes/troubles. Signed-off-by: Franck Bui-Huu Acked-by: Paul E. McKenney Acked-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/dcache.h | 1 + include/linux/list.h | 367 -------------------------------------------- include/linux/rculist.h | 396 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 397 insertions(+), 367 deletions(-) create mode 100644 include/linux/rculist.h (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 2a6639407c8..1f5cebf10a2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/include/linux/list.h b/include/linux/list.h index 08cf4f65188..139ec41d9c2 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -84,65 +84,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) __list_add(new, head->prev, head); } -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add_rcu(struct list_head * new, - struct list_head * prev, struct list_head * next) -{ - new->next = next; - new->prev = prev; - smp_wmb(); - next->prev = new; - prev->next = new; -} - -/** - * list_add_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_rcu(struct list_head *new, struct list_head *head) -{ - __list_add_rcu(new, head, head->next); -} - -/** - * list_add_tail_rcu - add a new entry to rcu-protected list - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_add_tail_rcu() - * or list_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - */ -static inline void list_add_tail_rcu(struct list_head *new, - struct list_head *head) -{ - __list_add_rcu(new, head->prev, head); -} - /* * Delete a list entry by making the prev/next entries * point to each other. @@ -173,36 +114,6 @@ static inline void list_del(struct list_head *entry) extern void list_del(struct list_head *entry); #endif -/** - * list_del_rcu - deletes entry from list without re-initialization - * @entry: the element to delete from the list. - * - * Note: list_empty() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as list_del_rcu() - * or list_add_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * list_for_each_entry_rcu(). - * - * Note that the caller is not permitted to immediately free - * the newly deleted entry. Instead, either synchronize_rcu() - * or call_rcu() must be used to defer freeing until an RCU - * grace period has elapsed. - */ -static inline void list_del_rcu(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->prev = LIST_POISON2; -} - /** * list_replace - replace old entry by new one * @old : the element to be replaced @@ -226,25 +137,6 @@ static inline void list_replace_init(struct list_head *old, INIT_LIST_HEAD(old); } -/** - * list_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - * Note: @old should not be empty. - */ -static inline void list_replace_rcu(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->prev = old->prev; - smp_wmb(); - new->next->prev = new; - new->prev->next = new; - old->prev = LIST_POISON2; -} - /** * list_del_init - deletes entry from list and reinitialize it. * @entry: the element to delete from the list. @@ -368,62 +260,6 @@ static inline void list_splice_init(struct list_head *list, } } -/** - * list_splice_init_rcu - splice an RCU-protected list into an existing list. - * @list: the RCU-protected list to splice - * @head: the place in the list to splice the first list into - * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... - * - * @head can be RCU-read traversed concurrently with this function. - * - * Note that this function blocks. - * - * Important note: the caller must take whatever action is necessary to - * prevent any other updates to @head. In principle, it is possible - * to modify the list as soon as sync() begins execution. - * If this sort of thing becomes necessary, an alternative version - * based on call_rcu() could be created. But only if -really- - * needed -- there is no shortage of RCU API members. - */ -static inline void list_splice_init_rcu(struct list_head *list, - struct list_head *head, - void (*sync)(void)) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - if (list_empty(head)) - return; - - /* "first" and "last" tracking list, so initialize it. */ - - INIT_LIST_HEAD(list); - - /* - * At this point, the list body still points to the source list. - * Wait for any readers to finish using the list before splicing - * the list body into the new list. Any new readers will see - * an empty list. - */ - - sync(); - - /* - * Readers are finished with the source list, so perform splice. - * The order is important if the new list is global and accessible - * to concurrent RCU readers. Note that RCU readers are not - * permitted to traverse the prev pointers without excluding - * this function. - */ - - last->next = at; - smp_wmb(); - head->next = first; - first->prev = head; - at->prev = last; -} - /** * list_entry - get the struct for this entry * @ptr: the &struct list_head pointer. @@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.prev, typeof(*n), member)) -/** - * list_for_each_rcu - iterate over an rcu-protected list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - prefetch(pos->next), pos != (head); \ - pos = rcu_dereference(pos->next)) - -#define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->next); \ - pos != (head); \ - pos = rcu_dereference(pos->next)) - -/** - * list_for_each_entry_rcu - iterate over rcu list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ - prefetch(pos->member.next), &pos->member != (head); \ - pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) - - -/** - * list_for_each_continue_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * Iterate over an rcu-protected list, continuing after current point. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference((pos)->next); \ - prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference((pos)->next)) - /* * Double linked lists with a single pointer list head. * Mostly useful for hash tables where the two pointer list head is @@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n) n->pprev = LIST_POISON2; } -/** - * hlist_del_rcu - deletes entry from hash list without re-initialization - * @n: the element to delete from the hash list. - * - * Note: list_unhashed() on entry does not return true after this, - * the entry is in an undefined state. It is useful for RCU based - * lockfree traversal. - * - * In particular, it means that we can not poison the forward - * pointers that may still be used for walking the hash list. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry(). - */ -static inline void hlist_del_rcu(struct hlist_node *n) -{ - __hlist_del(n); - n->pprev = LIST_POISON2; -} - static inline void hlist_del_init(struct hlist_node *n) { if (!hlist_unhashed(n)) { @@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n) } } -/** - * hlist_replace_rcu - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * - * The @old entry will be replaced with the @new entry atomically. - */ -static inline void hlist_replace_rcu(struct hlist_node *old, - struct hlist_node *new) -{ - struct hlist_node *next = old->next; - - new->next = next; - new->pprev = old->pprev; - smp_wmb(); - if (next) - new->next->pprev = &new->next; - *new->pprev = new; - old->pprev = LIST_POISON2; -} - static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; @@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) n->pprev = &h->first; } - -/** - * hlist_add_head_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the specified hlist, - * while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_add_head_rcu(struct hlist_node *n, - struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - n->pprev = &h->first; - smp_wmb(); - if (first) - first->pprev = &n->next; - h->first = n; -} - /* next must be != NULL */ static inline void hlist_add_before(struct hlist_node *n, struct hlist_node *next) @@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } -/** - * hlist_add_before_rcu - * @n: the new element to add to the hash list. - * @next: the existing element to add the new element before. - * - * Description: - * Adds the specified element to the specified hlist - * before the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_before_rcu(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - smp_wmb(); - next->pprev = &n->next; - *(n->pprev) = n; -} - -/** - * hlist_add_after_rcu - * @prev: the existing element to add the new element after. - * @n: the new element to add to the hash list. - * - * Description: - * Adds the specified element to the specified hlist - * after the specified node while permitting racing traversals. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_add_head_rcu() - * or hlist_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. - */ -static inline void hlist_add_after_rcu(struct hlist_node *prev, - struct hlist_node *n) -{ - n->next = prev->next; - n->pprev = &prev->next; - smp_wmb(); - prev->next = n; - if (n->next) - n->next->pprev = &n->next; -} - #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ @@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ pos = n) -/** - * hlist_for_each_entry_rcu - iterate over rcu list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as hlist_add_head_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1;}) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = rcu_dereference(pos->next)) - #endif diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 00000000000..aa9b3eb1568 --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,396 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + smp_wmb(); + next->prev = new; + prev->next = new; +} + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->prev = LIST_POISON2; +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + smp_wmb(); + new->next->prev = new; + new->prev->next = new; + old->prev = LIST_POISON2; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @head: the place in the list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * @head can be RCU-read traversed concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to + * prevent any other updates to @head. In principle, it is possible + * to modify the list as soon as sync() begins execution. + * If this sort of thing becomes necessary, an alternative version + * based on call_rcu() could be created. But only if -really- + * needed -- there is no shortage of RCU API members. + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + if (list_empty(head)) + return; + + /* "first" and "last" tracking list, so initialize it. */ + + INIT_LIST_HEAD(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = at; + smp_wmb(); + head->next = first; + first->prev = head; + at->prev = last; +} + +/** + * list_for_each_rcu - iterate over an rcu-protected list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + prefetch(rcu_dereference(pos)->next), pos != (head); \ + pos = pos->next) + +#define __list_for_each_rcu(pos, head) \ + for (pos = (head)->next; \ + rcu_dereference(pos) != (head); \ + pos = pos->next) + +/** + * list_for_each_safe_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + * + * Iterate over an rcu-protected list, safe against removal of list entry. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_safe_rcu(pos, n, head) \ + for (pos = (head)->next; \ + n = rcu_dereference(pos)->next, pos != (head); \ + pos = n) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + prefetch(rcu_dereference(pos)->member.next), \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + + +/** + * list_for_each_continue_rcu + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + * + * Iterate over an rcu-protected list, continuing after current point. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_continue_rcu(pos, head) \ + for ((pos) = (pos)->next; \ + prefetch(rcu_dereference((pos))->next), (pos) != (head); \ + (pos) = (pos)->next) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + smp_wmb(); + if (next) + new->next->pprev = &new->next; + *new->pprev = new; + old->pprev = LIST_POISON2; +} + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + n->pprev = &h->first; + smp_wmb(); + if (first) + first->pprev = &n->next; + h->first = n; +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +/** + * hlist_add_after_rcu + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = (head)->first; \ + rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ + pos = pos->next) + +#endif /* __KERNEL__ */ +#endif -- cgit v1.2.3 From 10aa9d2cf9878757b003023d33ff90a37aa3044b Mon Sep 17 00:00:00 2001 From: Franck Bui-Huu Date: Mon, 12 May 2008 21:21:06 +0200 Subject: rculist.h: use the rcu API Make almost all list mutation primitives use rcu_assign_pointer(). The main point of this being readability improvement. Signed-off-by: Franck Bui-Huu Cc: "Paul E. McKenney" Cc: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index aa9b3eb1568..8d2c81fccfe 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -7,6 +7,7 @@ * RCU-protected list version */ #include +#include /* * Insert a new entry between two known consecutive entries. @@ -19,9 +20,8 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - smp_wmb(); + rcu_assign_pointer(prev->next, new); next->prev = new; - prev->next = new; } /** @@ -110,9 +110,8 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - smp_wmb(); + rcu_assign_pointer(new->prev->next, new); new->next->prev = new; - new->prev->next = new; old->prev = LIST_POISON2; } @@ -166,8 +165,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - smp_wmb(); - head->next = first; + rcu_assign_pointer(head->next, first); first->prev = head; at->prev = last; } @@ -280,10 +278,9 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - smp_wmb(); + rcu_assign_pointer(*new->pprev, new); if (next) new->next->pprev = &new->next; - *new->pprev = new; old->pprev = LIST_POISON2; } @@ -310,12 +307,12 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; + n->next = first; n->pprev = &h->first; - smp_wmb(); + rcu_assign_pointer(h->first, n); if (first) first->pprev = &n->next; - h->first = n; } /** @@ -341,9 +338,8 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - smp_wmb(); + rcu_assign_pointer(*(n->pprev), n); next->pprev = &n->next; - *(n->pprev) = n; } /** @@ -369,8 +365,7 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - smp_wmb(); - prev->next = n; + rcu_assign_pointer(prev->next, n); if (n->next) n->next->pprev = &n->next; } -- cgit v1.2.3 From 78b0e0e9b27b62c4b22f05a147f7a80fa58b1ae3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 12 May 2008 21:21:06 +0200 Subject: RCU, rculist.h: fix list iterators RCU list iterators: should prefetch ever be optimised out with no side-effects, the current version will lose the barrier completely. Pointed-out-by: Linus Torvalds Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 48 +++++++++++++++--------------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 8d2c81fccfe..b0f39be08b6 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -180,31 +180,14 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - prefetch(rcu_dereference(pos)->next), pos != (head); \ - pos = pos->next) + for (pos = rcu_dereference((head)->next); \ + prefetch(pos->next), pos != (head); \ + pos = rcu_dereference(pos->next)) #define __list_for_each_rcu(pos, head) \ - for (pos = (head)->next; \ - rcu_dereference(pos) != (head); \ - pos = pos->next) - -/** - * list_for_each_safe_rcu - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - * - * Iterate over an rcu-protected list, safe against removal of list entry. - * - * This list-traversal primitive may safely run concurrently with - * the _rcu list-mutation primitives such as list_add_rcu() - * as long as the traversal is guarded by rcu_read_lock(). - */ -#define list_for_each_safe_rcu(pos, n, head) \ - for (pos = (head)->next; \ - n = rcu_dereference(pos)->next, pos != (head); \ - pos = n) + for (pos = rcu_dereference((head)->next); \ + pos != (head); \ + pos = rcu_dereference(pos->next)) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -217,10 +200,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - prefetch(rcu_dereference(pos)->member.next), \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) + for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \ + prefetch(pos->member.next), &pos->member != (head); \ + pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member)) /** @@ -235,9 +217,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = (pos)->next; \ - prefetch(rcu_dereference((pos))->next), (pos) != (head); \ - (pos) = (pos)->next) + for ((pos) = rcu_dereference((pos)->next); \ + prefetch((pos)->next), (pos) != (head); \ + (pos) = rcu_dereference((pos)->next)) /** * hlist_del_rcu - deletes entry from hash list without re-initialization @@ -382,10 +364,10 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = (head)->first; \ - rcu_dereference(pos) && ({ prefetch(pos->next); 1; }) && \ + for (pos = rcu_dereference((head)->first); \ + pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = pos->next) + pos = rcu_dereference(pos->next)) #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 1a189b97190d3f0f8cf0379a799d3555b2d648bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 13 Apr 2008 21:41:55 +0100 Subject: [ARM] pxa: Add bare bones PWM API Signed-off-by: Russell King --- include/linux/pwm.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/pwm.h (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h new file mode 100644 index 00000000000..3945f803d51 --- /dev/null +++ b/include/linux/pwm.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_PWM_H +#define __LINUX_PWM_H + +struct pwm_device; + +/* + * pwm_request - request a PWM device + */ +struct pwm_device *pwm_request(int pwm_id, const char *label); + +/* + * pwm_free - free a PWM device + */ +void pwm_free(struct pwm_device *pwm); + +/* + * pwm_config - change a PWM device configuration + */ +int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns); + +/* + * pwm_enable - start a PWM output toggling + */ +int pwm_enable(struct pwm_device *pwm); + +/* + * pwm_disable - stop a PWM output toggling + */ +void pwm_disable(struct pwm_device *pwm); + +#endif /* __ASM_ARCH_PWM_H */ -- cgit v1.2.3 From bd3bff9e20f454b242d979ec2f9a4dca0d5fa06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: sched: add latency tracer callbacks to the scheduler add 3 lightweight callbacks to the tracer backend. zero impact if tracing is turned off. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..717cab8a0c8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2117,6 +2117,32 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +#else +static inline void +ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +{ +} +#endif + +#ifdef CONFIG_SCHED_TRACER +extern void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); +extern void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); +#else +static inline void +ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +static inline void +ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) +{ +} +#endif + extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.3 From 7c731e0a495e25e79dc1e9e68772a67a55721a65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: make the task state char-string visible to all The tracer wants to be able to convert the state number into a user visible character. This patch pulls that conversion string out the scheduler into the header. This way if it were to ever change, other parts of the kernel will know. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 717cab8a0c8..6e26f1fdbfe 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2237,6 +2237,8 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) } #endif /* CONFIG_MM_OWNER */ +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 502825282e6f79c975a644afc124432ec1744de4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:41 +0200 Subject: ftrace: add preempt_enable/disable notrace macros The tracer may need to call preempt_enable and disable functions for time keeping and such. The trace gets ugly when we see these functions show up for all traces. To make the output cleaner this patch adds preempt_enable_notrace and preempt_disable_notrace to be used by tracer (and debugging) functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/preempt.h | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 23f0c54175c..36b03d50bf4 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -52,6 +52,34 @@ do { \ preempt_check_resched(); \ } while (0) +/* For debugging and tracer internals only! */ +#define add_preempt_count_notrace(val) \ + do { preempt_count() += (val); } while (0) +#define sub_preempt_count_notrace(val) \ + do { preempt_count() -= (val); } while (0) +#define inc_preempt_count_notrace() add_preempt_count_notrace(1) +#define dec_preempt_count_notrace() sub_preempt_count_notrace(1) + +#define preempt_disable_notrace() \ +do { \ + inc_preempt_count_notrace(); \ + barrier(); \ +} while (0) + +#define preempt_enable_no_resched_notrace() \ +do { \ + barrier(); \ + dec_preempt_count_notrace(); \ +} while (0) + +/* preempt_check_resched is OK to trace */ +#define preempt_enable_notrace() \ +do { \ + preempt_enable_no_resched_notrace(); \ + barrier(); \ + preempt_check_resched(); \ +} while (0) + #else #define preempt_disable() do { } while (0) @@ -59,6 +87,10 @@ do { \ #define preempt_enable() do { } while (0) #define preempt_check_resched() do { } while (0) +#define preempt_disable_notrace() do { } while (0) +#define preempt_enable_no_resched_notrace() do { } while (0) +#define preempt_enable_notrace() do { } while (0) + #endif #ifdef CONFIG_PREEMPT_NOTIFIERS -- cgit v1.2.3 From ffdc1a09ae7e2cbd714a446ee38a27f625b5f1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:41 +0200 Subject: tracing: add notrace to linkage.h notrace signals that a function should not be traced. Most of the time this is used by tracers to annotate code that cannot be traced - it's in a volatile state (such as in user vdso context or NMI context) or it's in the tracer internals. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/linkage.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f..14f329c64ba 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -3,6 +3,8 @@ #include +#define notrace __attribute__((no_instrument_function)) + #ifdef __cplusplus #define CPP_ASMLINKAGE extern "C" #else -- cgit v1.2.3 From 16444a8a40d4c7b4f6de34af0cae1f76a4f6c901 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: add basic support for gcc profiler instrumentation If CONFIG_FTRACE is selected and /proc/sys/kernel/ftrace_enabled is set to a non-zero value the ftrace routine will be called everytime we enter a kernel function that is not marked with the "notrace" attribute. The ftrace routine will then call a registered function if a function happens to be registered. [ This code has been highly hacked by Steven Rostedt and Ingo Molnar, so don't blame Arnaldo for all of this ;-) ] Update: It is now possible to register more than one ftrace function. If only one ftrace function is registered, that will be the function that ftrace calls directly. If more than one function is registered, then ftrace will call a function that will loop through the functions to call. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 include/linux/ftrace.h (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h new file mode 100644 index 00000000000..b96ef14c249 --- /dev/null +++ b/include/linux/ftrace.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_FTRACE_H +#define _LINUX_FTRACE_H + +#ifdef CONFIG_FTRACE + +#include + +#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) + +typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); + +struct ftrace_ops { + ftrace_func_t func; + struct ftrace_ops *next; +}; + +/* + * The ftrace_ops must be a static and should also + * be read_mostly. These functions do modify read_mostly variables + * so use them sparely. Never free an ftrace_op or modify the + * next pointer after it has been registered. Even after unregistering + * it, the next pointer may still be used internally. + */ +int register_ftrace_function(struct ftrace_ops *ops); +int unregister_ftrace_function(struct ftrace_ops *ops); +void clear_ftrace_function(void); + +extern void ftrace_stub(unsigned long a0, unsigned long a1); +extern void mcount(void); + +#else /* !CONFIG_FTRACE */ +# define register_ftrace_function(ops) do { } while (0) +# define unregister_ftrace_function(ops) do { } while (0) +# define clear_ftrace_function(ops) do { } while (0) +#endif /* CONFIG_FTRACE */ +#endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 352ad25aa4a189c667cb2af333948d34692a2d27 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: tracer for scheduler wakeup latency This patch adds the tracer that tracks the wakeup latency of the highest priority waking task. "wakeup" is added to /debugfs/tracing/available_tracers Also added to /debugfs/tracing tracing_max_latency holds the current max latency for the wakeup wakeup_thresh if set to other than zero, a log will be recorded for every wakeup that takes longer than the number entered in here (usecs for all counters) (deletes previous trace) Examples: (with ftrace_enabled = 0) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 26 us, #2/2, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/0-3 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / quilt-8551 0d..3 0us+: wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) quilt-8551 0d..4 26us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ (with ftrace_enabled = 1) ============ preemption latency trace v1.1.5 on 2.6.24-rc8 -------------------------------------------------------------------- latency: 36 us, #45/45, CPU#0 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: migration/1-5 (uid:0 nice:-5 policy:1 rt_prio:99) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / bash-10653 1d..3 0us : wake_up_process+0x15/0x17 (sched_exec+0xc9/0x100 ) bash-10653 1d..3 1us : try_to_wake_up+0x271/0x2e7 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..2 2us : try_to_wake_up+0x296/0x2e7 (update_rq_clock+0x9/0x20 ) bash-10653 1d..2 2us : update_rq_clock+0x1e/0x20 (__update_rq_clock+0xc/0x90 ) bash-10653 1d..2 3us : __update_rq_clock+0x1b/0x90 (sched_clock+0x9/0x29 ) bash-10653 1d..2 4us : try_to_wake_up+0x2a6/0x2e7 (activate_task+0xc/0x3f ) bash-10653 1d..2 4us : activate_task+0x2d/0x3f (enqueue_task+0xe/0x66 ) bash-10653 1d..2 5us : enqueue_task+0x5b/0x66 (enqueue_task_rt+0x9/0x3c ) bash-10653 1d..2 6us : try_to_wake_up+0x2ba/0x2e7 (check_preempt_wakeup+0x12/0x99 ) [...] bash-10653 1d..5 33us : tracing_record_cmdline+0xcf/0xd4 (_spin_unlock+0x9/0x33 ) bash-10653 1d..5 34us : _spin_unlock+0x19/0x33 (sub_preempt_count+0xc/0x7a ) bash-10653 1d..4 35us : wakeup_sched_switch+0x65/0x2ff (_spin_lock_irqsave+0xc/0xa9 ) bash-10653 1d..4 35us : _spin_lock_irqsave+0x19/0xa9 (add_preempt_count+0xe/0x77 ) bash-10653 1d..4 36us : sched_switch_callback+0x73/0x81 (schedule+0x483/0x6d5 ) vim:ft=help ============ The [...] was added here to not waste your email box space. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b96ef14c249..db8a5e7abe4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,10 +5,6 @@ #include -#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -#define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) -#define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) - typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { @@ -35,4 +31,23 @@ extern void mcount(void); # define unregister_ftrace_function(ops) do { } while (0) # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ + + +#ifdef CONFIG_FRAME_POINTER +/* TODO: need to fix this for ARM */ +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1)) +# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2)) +# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) +# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) +# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +#else +# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) +# define CALLER_ADDR1 0UL +# define CALLER_ADDR2 0UL +# define CALLER_ADDR3 0UL +# define CALLER_ADDR4 0UL +# define CALLER_ADDR5 0UL +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From 81d68a96a39844853b37f20cc8282d9b65b78ef3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace irq disabled critical timings This patch adds latency tracing for critical timings (how long interrupts are disabled for). "irqsoff" is added to /debugfs/tracing/available_tracers Note: tracing_max_latency also holds the max latency for irqsoff (in usecs). (default to large number so one must start latency tracing) tracing_thresh threshold (in usecs) to always print out if irqs off is detected to be longer than stated here. If irq_thresh is non-zero, then max_irq_latency is ignored. Here's an example of a trace with ftrace_enabled = 0 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 Signed-off-by: Ingo Molnar -------------------------------------------------------------------- latency: 100 us, #3/3, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d.s3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1d.s3 100us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1d.s3 100us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= And this is a trace with ftrace_enabled == 1 ======= preemption latency trace v1.1.5 on 2.6.24-rc7 -------------------------------------------------------------------- latency: 102 us, #12/12, CPU#1 | (M:rt VP:0, KP:0, SP:0 HP:0 #P:2) ----------------- | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- => started at: _spin_lock_irqsave+0x2a/0xb7 => ended at: _spin_unlock_irqrestore+0x32/0x5f _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1dNs3 0us+: _spin_lock_irqsave+0x2a/0xb7 (e1000_update_stats+0x47/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_read_phy_reg+0x16/0x225 [e1000] (e1000_update_stats+0x5e2/0x64c [e1000]) swapper-0 1dNs3 46us : e1000_swfw_sync_acquire+0x10/0x99 [e1000] (e1000_read_phy_reg+0x49/0x225 [e1000]) swapper-0 1dNs3 46us : e1000_get_hw_eeprom_semaphore+0x12/0xa6 [e1000] (e1000_swfw_sync_acquire+0x36/0x99 [e1000]) swapper-0 1dNs3 47us : __const_udelay+0x9/0x47 (e1000_read_phy_reg+0x116/0x225 [e1000]) swapper-0 1dNs3 47us+: __delay+0x9/0x50 (__const_udelay+0x45/0x47) swapper-0 1dNs3 97us : preempt_schedule+0xc/0x84 (__delay+0x4e/0x50) swapper-0 1dNs3 98us : e1000_swfw_sync_release+0xc/0x55 [e1000] (e1000_read_phy_reg+0x211/0x225 [e1000]) swapper-0 1dNs3 99us+: e1000_put_hw_eeprom_semaphore+0x9/0x35 [e1000] (e1000_swfw_sync_release+0x50/0x55 [e1000]) swapper-0 1dNs3 101us : _spin_unlock_irqrestore+0xe/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : _spin_unlock_irqrestore+0x32/0x5f (e1000_update_stats+0x641/0x64c [e1000]) swapper-0 1dNs3 102us : trace_hardirqs_on_caller+0x75/0x89 (_spin_unlock_irqrestore+0x32/0x5f) vim:ft=help ======= Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db8a5e7abe4..0a20445dcbc 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -50,4 +50,12 @@ extern void mcount(void); # define CALLER_ADDR5 0UL #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); +#else +# define time_hardirqs_on(a0, a1) do { } while (0) +# define time_hardirqs_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index e600c4e9b8c..5b711d4e9fd 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -12,10 +12,10 @@ #define _LINUX_TRACE_IRQFLAGS_H #ifdef CONFIG_TRACE_IRQFLAGS - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); extern void trace_softirqs_on(unsigned long ip); extern void trace_softirqs_off(unsigned long ip); + extern void trace_hardirqs_on(void); + extern void trace_hardirqs_off(void); # define trace_hardirq_context(p) ((p)->hardirq_context) # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) @@ -41,6 +41,14 @@ # define INIT_TRACE_IRQFLAGS #endif +#ifdef CONFIG_IRQSOFF_TRACER + extern void stop_critical_timings(void); + extern void start_critical_timings(void); +#else +# define stop_critical_timings() do { } while (0) +# define start_critical_timings() do { } while (0) +#endif + #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT #include -- cgit v1.2.3 From 6cd8a4bb2f97527a9ceb30bc77ea4e959c6a95e3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: trace preempt off critical timings Add preempt off timings. A lot of kernel core code is taken from the RT patch latency trace that was written by Ingo Molnar. This adds "preemptoff" and "preemptirqsoff" to /debugfs/tracing/available_tracers Now instead of just tracing irqs off, preemption off can be selected to be recorded. When this is selected, it shares the same files as irqs off timings. One can either trace preemption off, irqs off, or one or the other off. By echoing "preemptoff" into /debugfs/tracing/current_tracer, recording of preempt off only is performed. "irqsoff" will only record the time irqs are disabled, but "preemptirqsoff" will take the total time irqs or preemption are disabled. Runtime switching of these options is now supported by simpling echoing in the appropriate trace name into /debugfs/tracing/current_tracer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ include/linux/irqflags.h | 3 ++- include/linux/preempt.h | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0a20445dcbc..740c97dcf9c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,4 +58,12 @@ extern void mcount(void); # define time_hardirqs_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_PREEMPT_TRACER + extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); + extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); +#else +# define trace_preempt_on(a0, a1) do { } while (0) +# define trace_preempt_off(a0, a1) do { } while (0) +#endif + #endif /* _LINUX_FTRACE_H */ diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5b711d4e9fd..2b1c2e58566 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -41,7 +41,8 @@ # define INIT_TRACE_IRQFLAGS #endif -#ifdef CONFIG_IRQSOFF_TRACER +#if defined(CONFIG_IRQSOFF_TRACER) || \ + defined(CONFIG_PREEMPT_TRACER) extern void stop_critical_timings(void); extern void start_critical_timings(void); #else diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 36b03d50bf4..72b1a10a59b 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -10,7 +10,7 @@ #include #include -#ifdef CONFIG_DEBUG_PREEMPT +#if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void add_preempt_count(int val); extern void sub_preempt_count(int val); #else -- cgit v1.2.3 From 3d0833953e1b98b79ddf491dd49229eef9baeac1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:42 +0200 Subject: ftrace: dynamic enabling/disabling of function calls This patch adds a feature to dynamically replace the ftrace code with the jmps to allow a kernel with ftrace configured to run as fast as it can without it configured. The way this works, is on bootup (if ftrace is enabled), a ftrace function is registered to record the instruction pointer of all places that call the function. Later, if there's still any code to patch, a kthread is awoken (rate limited to at most once a second) that performs a stop_machine, and replaces all the code that was called with a jmp over the call to ftrace. It only replaces what was found the previous time. Typically the system reaches equilibrium quickly after bootup and there's no code patching needed at all. e.g. call ftrace /* 5 bytes */ is replaced with jmp 3f /* jmp is 2 bytes and we jump 3 forward */ 3: When we want to enable ftrace for function tracing, the IP recording is removed, and stop_machine is called again to replace all the locations of that were recorded back to the call of ftrace. When it is disabled, we replace the code back to the jmp. Allocation is done by the kthread. If the ftrace recording function is called, and we don't have any record slots available, then we simply skip that call. Once a second a new page (if needed) is allocated for recording new ftrace function calls. A large batch is allocated at boot up to get most of the calls there. Because we do this via stop_machine, we don't have to worry about another CPU executing a ftrace call as we modify it. But we do need to worry about NMI's so all functions that might be called via nmi must be annotated with notrace_nmi. When this code is configured in, the NMI code will not call notrace. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 740c97dcf9c..90dbc0ee204 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -32,6 +32,24 @@ extern void mcount(void); # define clear_ftrace_function(ops) do { } while (0) #endif /* CONFIG_FTRACE */ +#ifdef CONFIG_DYNAMIC_FTRACE +# define FTRACE_HASHBITS 10 +# define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add ftrace_enabled sysctl to disable mcount function This patch adds back the sysctl ftrace_enabled. This time it is defaulted to on, if DYNAMIC_FTRACE is configured. When ftrace_enabled is disabled, the ftrace function is set to the stub return. If DYNAMIC_FTRACE is also configured, on ftrace_enabled = 0, the registered ftrace functions will all be set to jmps, but no more new calls to ftrace recording (used to find the ftrace calling sites) will be called. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 90dbc0ee204..ccd8537dbdb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -5,6 +5,12 @@ #include +extern int ftrace_enabled; +extern int +ftrace_enable_sysctl(struct ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, + loff_t *ppos); + typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip); struct ftrace_ops { -- cgit v1.2.3 From 3c1720f00bb619302ba19d55986ab565e74d06db Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: move memory management out of arch code This patch moves the memory management of the ftrace records out of the arch code and into the generic code making the arch code simpler. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ccd8537dbdb..d509ad6c9cb 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,19 +42,23 @@ extern void mcount(void); # define FTRACE_HASHBITS 10 # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: use dynamic patching for updating mcount calls This patch replaces the indirect call to the mcount function pointer with a direct call that will be patched by the dynamic ftrace routines. On boot up, the mcount function calls the ftace_stub function. When the dynamic ftrace code is initialized, the ftrace_stub is replaced with a call to the ftrace_record_ip, which records the instruction pointers of the locations that call it. Later, the ftraced daemon will call kstop_machine and patch all the locations to nops. When a ftrace is enabled, the original calls to mcount will now be set top call ftrace_caller, which will do a direct call to the registered ftrace function. This direct call is also patched when the function that should be called is updated. All patching is performed by a kstop_machine routine to prevent any type of race conditions that is associated with modifying code on the fly. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d509ad6c9cb..b0dd0093058 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,9 +56,14 @@ struct dyn_ftrace { extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); -extern int ftrace_dyn_arch_init(void); +extern int ftrace_dyn_arch_init(void *data); +extern int ftrace_mcount_set(unsigned long *data); extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code); +extern int ftrace_update_ftrace_func(ftrace_func_t func); +extern void ftrace_caller(void); +extern void ftrace_call(void); +extern void mcount_call(void); #endif #ifdef CONFIG_FRAME_POINTER -- cgit v1.2.3 From 5072c59fd45e9976d02ee6f18c7336ef97623cbc Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: add filter select functions to trace This patch adds two files to the debugfs system: /debugfs/tracing/available_filter_functions and /debugfs/tracing/set_ftrace_filter The available_filter_functions lists all functions that has been recorded by the ftraced that has called the ftrace_record_ip function. This is to allow users to see what functions have been converted to nops and can be enabled for tracing. To enable functions, simply echo the names (whitespace delimited) into set_ftrace_filter. Simple wildcards are also allowed. echo 'scheduler' > /debugfs/tracing/set_ftrace_filter Will have only the scheduler be activated when tracing is enabled. echo 'sched_*' > /debugfs/tracing/set_ftrace_filter Will have only the functions starting with 'sched_' be activated. echo '*lock' > /debugfs/tracing/set_ftrace_filter Will have only functions ending with 'lock' be activated. echo '*lock*' > /debugfs/tracing/set_ftrace_filter Will have only functions with 'lock' in its name be activated. Note: 'sched*lock' will not work. The only wildcards that are allowed is an asterisk and the beginning and or end of the string passed in. Multiple names can be passed in with whitespace delimited: echo 'scheduler *lock *acpi*' > /debugfs/tracing/set_ftrace_filter is also the same as: echo 'scheduler' > /debugfs/tracing/set_ftrace_filter echo '*lock' >> /debugfs/tracing/set_ftrace_filter echo '*acpi*' >> /debugfs/tracing/set_ftrace_filter Appending does just that. It appends to the list. To disable all filters simply echo an empty line in: echo > /debugfs/tracing/set_ftrace_filter Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b0dd0093058..f5911d2d42c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,7 +43,9 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:43 +0200 Subject: ftrace: fix kexec disable the tracer while kexec pulls the rug from under the old kernel. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index f5911d2d42c..a42390c1d6e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -68,6 +68,13 @@ extern void ftrace_call(void); extern void mcount_call(void); #endif +static inline void tracer_disable(void) +{ +#ifdef CONFIG_FTRACE + ftrace_enabled = 0; +#endif +} + #ifdef CONFIG_FRAME_POINTER /* TODO: need to fix this for ARM */ # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) -- cgit v1.2.3 From e1c08bdd9fa73e44096e5a82c0d5928b04ab02c8 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:44 +0200 Subject: ftrace: force recording Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a42390c1d6e..2c1670c6523 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -54,6 +54,8 @@ struct dyn_ftrace { unsigned long flags; }; +int ftrace_force_update(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -66,6 +68,8 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +#else +# define ftrace_force_update() do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.3 From c7aafc549766b87819285d3480648fc652a47bc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: cleanups factor out code and clean it up. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2c1670c6523..953a36d6a19 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -69,7 +69,7 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() do { } while (0) +# define ftrace_force_update() ({ 0; }) #endif static inline void tracer_disable(void) -- cgit v1.2.3 From 77a2b37d227483fe52aead242652aee406c25bf0 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:45 +0200 Subject: ftrace: startup tester on dynamic tracing. This patch adds a startup self test on dynamic code modification and filters. The test filters on a specific function, makes sure that no other function is traced, exectutes the function, then makes sure that the function is traced. This patch also fixes a slight bug with the ftrace selftest, where tracer_enabled was not being set. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 953a36d6a19..a842d96c634 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -55,6 +55,7 @@ struct dyn_ftrace { }; int ftrace_force_update(void); +void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); @@ -70,6 +71,7 @@ extern void ftrace_call(void); extern void mcount_call(void); #else # define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif static inline void tracer_disable(void) -- cgit v1.2.3 From 37ad508419f0fdfda7b378756eb1f35cfd26d96d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace - fix dynamic ftrace memory leak The ftrace dynamic function update allocates a record to store the instruction pointers that are being modified. If the modified instruction pointer fails to update, then the record is marked as failed and nothing more is done. Worse, if the modification fails, but the record ip function is still called, it will allocate a new record and try again. In just a matter of time, will this cause a serious memory leak and crash the system. This patch plugs this memory leak. When a record fails, it is included back into the pool of records to be used. Now a record may fail over and over again, but the number of allocated records will not increase. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index a842d96c634..61e757bd235 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -43,9 +43,10 @@ extern void mcount(void); # define FTRACE_HASHSIZE (1< Date: Mon, 12 May 2008 21:20:48 +0200 Subject: ftrace: disable tracing on failure Since ftrace touches practically every function. If we detect any anomaly, we want to fully disable ftrace. This patch adds code to try shutdown ftrace as much as possible without doing any more harm is something is detected not quite correct. This only kills ftrace, this patch does have checks for other parts of the tracer (irqsoff, wakeup, etc.). Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 61e757bd235..4650a3160b7 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,6 +58,9 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); -- cgit v1.2.3 From aeaee8a2c9cb4489f166ca0e39c568e8254faaa6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:49 +0200 Subject: ftrace: build fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4650a3160b7..08fbef1744c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -58,9 +58,6 @@ struct dyn_ftrace { int ftrace_force_update(void); void ftrace_set_filter(unsigned char *buf, int len, int reset); -/* totally disable ftrace - can not re-enable after this */ -void ftrace_kill(void); - /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); extern unsigned char *ftrace_nop_replace(void); @@ -74,10 +71,13 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); #else -# define ftrace_force_update() ({ 0; }) -# define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_force_update() ({ 0; }) +# define ftrace_set_filter(buf, len, reset) do { } while (0) #endif +/* totally disable ftrace - can not re-enable after this */ +void ftrace_kill(void); + static inline void tracer_disable(void) { #ifdef CONFIG_FTRACE -- cgit v1.2.3 From 86387f7ee5d3273ff4859e2c64ce656639b6ca65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: add stack tracing Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 08fbef1744c..0d3714e7110 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -93,6 +93,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3)) # define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4)) # define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5)) +# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6)) #else # define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) # define CALLER_ADDR1 0UL @@ -100,6 +101,7 @@ static inline void tracer_disable(void) # define CALLER_ADDR3 0UL # define CALLER_ADDR4 0UL # define CALLER_ADDR5 0UL +# define CALLER_ADDR6 0UL #endif #ifdef CONFIG_IRQSOFF_TRACER -- cgit v1.2.3 From 8ac0fca4ccb355ce50471d7aa3f10f5900b28b95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:51 +0200 Subject: ftrace: sched tracer fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e26f1fdbfe..05744f9cb09 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2130,17 +2130,11 @@ ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) #ifdef CONFIG_SCHED_TRACER extern void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -extern void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr); #else static inline void ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) { } -static inline void -ftrace_wake_up_new_task(struct task_struct *wakee, struct task_struct *curr) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.3 From 4e65551905fb0300ae7e667cbaa41ee2e3f29a13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: sched tracer, trace full rbtree Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 05744f9cb09..652d380ae56 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,20 +2119,34 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next); +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); +extern void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr); +extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void -ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next) +ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) +{ +} +static inline void +sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) +{ +} +static inline void +ftrace_wake_up_task(void *rq, struct task_struct *wakee, + struct task_struct *curr) +{ +} +static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -#endif - -#ifdef CONFIG_SCHED_TRACER -extern void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr); -#else static inline void -ftrace_wake_up_task(struct task_struct *wakee, struct task_struct *curr) +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) { } #endif -- cgit v1.2.3 From 017730c11241e26577673eb9d957cfc66172ea91 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix wakeups Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 652d380ae56..a3970b56375 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -246,6 +246,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); +extern int runqueue_is_locked(void); + extern cpumask_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern int select_nohz_load_balancer(int cpu); -- cgit v1.2.3 From 1a3c3034336320554a3342572dae98d69e054fc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:52 +0200 Subject: ftrace: fix __trace_special() Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a3970b56375..5b186bed54b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2119,6 +2119,18 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) } #endif +#ifdef CONFIG_TRACING +extern void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +__trace_special(void *__tr, void *__data, + unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} +#endif + #ifdef CONFIG_CONTEXT_SWITCH_TRACER extern void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); @@ -2126,9 +2138,6 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2146,11 +2155,6 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } -static inline void -__trace_special(void *__tr, void *__data, - unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.3 From 88a4216c3ec4281fc7e6725cc3a3ccd01fb1aa14 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:20:53 +0200 Subject: ftrace: sched special Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5b186bed54b..360ca99033d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2138,6 +2138,8 @@ extern void ftrace_wake_up_task(void *rq, struct task_struct *wakee, struct task_struct *curr); extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else static inline void ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) @@ -2155,6 +2157,10 @@ ftrace_wake_up_task(void *rq, struct task_struct *wakee, static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) { } +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) +{ +} #endif extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); -- cgit v1.2.3 From 3eefae994d9224fb7771a3ddb683868363c23510 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 12 May 2008 21:21:04 +0200 Subject: ftrace: limit trace entries Currently there is no protection from the root user to use up all of memory for trace buffers. If the root user allocates too many entries, the OOM killer might start kill off all tasks. This patch adds an algorith to check the following condition: pages_requested > (freeable_memory + current_trace_buffer_pages) / 4 If the above is met then the allocation fails. The above prevents more than 1/4th of freeable memory from being used by trace buffers. To determine the freeable_memory, I made determine_dirtyable_memory in mm/page-writeback.c global. Special thanks goes to Peter Zijlstra for suggesting the above calculation. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc28..bd91987c065 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -105,6 +105,8 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; +extern unsigned long determine_dirtyable_memory(void); + extern int dirty_ratio_handler(struct ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); -- cgit v1.2.3 From dc102a8fae2d0d6bf5223fc549247f2e23959ae6 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:09 +0200 Subject: Markers - remove extra format argument Denys Vlasenko : > Not in this patch, but I noticed: > > #define __trace_mark(name, call_private, format, args...) \ > do { \ > static const char __mstrtab_##name[] \ > __attribute__((section("__markers_strings"))) \ > = #name "\0" format; \ > static struct marker __mark_##name \ > __attribute__((section("__markers"), aligned(8))) = \ > { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ > 0, 0, marker_probe_cb, \ > { __mark_empty_function, NULL}, NULL }; \ > __mark_check_format(format, ## args); \ > if (unlikely(__mark_##name.state)) { \ > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > } \ > } while (0) > > In this call: > > (*__mark_##name.call) \ > (&__mark_##name, call_private, \ > format, ## args); \ > > you make gcc allocate duplicate format string. You can use > &__mstrtab_##name[sizeof(#name)] instead since it holds the same string, > or drop ", format," above and "const char *fmt" from here: > > void (*call)(const struct marker *mdata, /* Probe wrapper */ > void *call_private, const char *fmt, ...); > > since mdata->format is the same and all callees which need it can take it there. Very good point. I actually thought about dropping it, since it would remove an unnecessary argument from the stack. And actually, since I now have the marker_probe_cb sitting between the marker site and the callbacks, there is no API change required. Thanks :) Mathieu Signed-off-by: Mathieu Desnoyers CC: Denys Vlasenko Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 430f6adf976..338533abb47 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -44,8 +44,8 @@ struct marker { */ char state; /* Marker state. */ char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); + /* Probe wrapper */ + void (*call)(const struct marker *mdata, void *call_private, ...); struct marker_probe_closure single; struct marker_probe_closure *multi; } __attribute__((aligned(8))); @@ -72,8 +72,7 @@ struct marker { __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ (*__mark_##name.call) \ - (&__mark_##name, call_private, \ - format, ## args); \ + (&__mark_##name, call_private, ## args);\ } \ } while (0) @@ -117,9 +116,9 @@ static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); + void *call_private, ...); /* * Connect a probe to a marker. -- cgit v1.2.3 From 0aa977f592f17004f9d1d545f2e1bb9ea71896c3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Markers - define non optimized marker To support the forthcoming "immediate values" marker optimization, we must have a way to declare markers in few code paths that does not use instruction modification based enable. This will be the case of printk(), some traps and eventually lockdep instrumentation. Changelog : - Fix reversed boolean logic of "generic". Signed-off-by: Mathieu Desnoyers Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/marker.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/marker.h b/include/linux/marker.h index 338533abb47..1290653f924 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -58,8 +58,12 @@ struct marker { * Make sure the alignment of the structure in the __markers section will * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. + * + * The "generic" argument controls which marker enabling mechanism must be used. + * If generic is true, a variable read is used. + * If generic is false, immediate values are used. */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ do { \ static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ @@ -79,7 +83,7 @@ struct marker { extern void marker_update_probe_range(struct marker *begin, struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_private, format, args...) \ +#define __trace_mark(generic, name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, struct marker *end) @@ -87,15 +91,30 @@ static inline void marker_update_probe_range(struct marker *begin, #endif /* CONFIG_MARKERS */ /** - * trace_mark - Marker + * trace_mark - Marker using code patching * @name: marker name, not quoted. * @format: format string * @args...: variable argument list * - * Places a marker. + * Places a marker using optimized code patching technique (imv_read()) + * to be enabled when immediate values are present. */ #define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) + __trace_mark(0, name, NULL, format, ## args) + +/** + * _trace_mark - Marker using variable read + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker using a standard memory read (_imv_read()) to be + * enabled. Should be used for markers in code paths where instruction + * modification based enabling is not welcome. (__init and __exit functions, + * lockdep, some traps, printk). + */ +#define _trace_mark(name, format, args...) \ + __trace_mark(1, name, NULL, format, ## args) /** * MARK_NOARGS - Format string for a marker with no argument. -- cgit v1.2.3 From 5b82a1b08a00b2adca3d9dd9777efff40b7aaaa1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 12 May 2008 21:21:10 +0200 Subject: Port ftrace to markers Porting ftrace to the marker infrastructure. Don't need to chain to the wakeup tracer from the sched tracer, because markers support multiple probes connected. Signed-off-by: Mathieu Desnoyers CC: Steven Rostedt Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 360ca99033d..c0b1c69b55c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2131,38 +2131,6 @@ __trace_special(void *__tr, void *__data, } #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER -extern void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next); -extern void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr); -extern void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data); -extern void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); -#else -static inline void -ftrace_ctx_switch(void *rq, struct task_struct *prev, struct task_struct *next) -{ -} -static inline void -sched_trace_special(unsigned long p1, unsigned long p2, unsigned long p3) -{ -} -static inline void -ftrace_wake_up_task(void *rq, struct task_struct *wakee, - struct task_struct *curr) -{ -} -static inline void ftrace_all_fair_tasks(void *__rq, void *__tr, void *__data) -{ -} -static inline void -ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) -{ -} -#endif - extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); extern long sched_getaffinity(pid_t pid, cpumask_t *mask); -- cgit v1.2.3 From 74f4e369fc5b52433ad824cef32d3bf1304549be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:15 +0200 Subject: ftrace: stacktrace fix Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0d3714e7110..017ab44d572 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -120,4 +120,12 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif +#ifdef CONFIG_CONTEXT_SWITCH_TRACER +extern void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); +#else +static inline void +ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { } +#endif + #endif /* _LINUX_FTRACE_H */ -- cgit v1.2.3 From d49dbf33f0bf8748ee3662b973eb57e60525d622 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 May 2008 10:41:53 +0200 Subject: ftrace: fix include file dependency Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 017ab44d572..911d5d80b49 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -4,6 +4,7 @@ #ifdef CONFIG_FTRACE #include +#include extern int ftrace_enabled; extern int -- cgit v1.2.3 From 489f139614596cbc956a06f5e4bb41288e276fe3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 Feb 2008 13:38:05 +0100 Subject: ftrace: fix build bug Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 911d5d80b49..922e23d0196 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -106,16 +106,16 @@ static inline void tracer_disable(void) #endif #ifdef CONFIG_IRQSOFF_TRACER - extern void notrace time_hardirqs_on(unsigned long a0, unsigned long a1); - extern void notrace time_hardirqs_off(unsigned long a0, unsigned long a1); + extern void time_hardirqs_on(unsigned long a0, unsigned long a1); + extern void time_hardirqs_off(unsigned long a0, unsigned long a1); #else # define time_hardirqs_on(a0, a1) do { } while (0) # define time_hardirqs_off(a0, a1) do { } while (0) #endif #ifdef CONFIG_PREEMPT_TRACER - extern void notrace trace_preempt_on(unsigned long a0, unsigned long a1); - extern void notrace trace_preempt_off(unsigned long a0, unsigned long a1); + extern void trace_preempt_on(unsigned long a0, unsigned long a1); + extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else # define trace_preempt_on(a0, a1) do { } while (0) # define trace_preempt_off(a0, a1) do { } while (0) -- cgit v1.2.3 From 8b7d89d02ef3c6a7c73d6596f28cea7632850af4 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:56 +0200 Subject: x86: mmiotrace - trace memory mapped IO Mmiotrace is a tool for trapping memory mapped IO (MMIO) accesses within the kernel. It is used for debugging and especially for reverse engineering evil binary drivers. Mmiotrace works by wrapping the ioremap family of kernel functions and marking the returned pages as not present. Access to the IO memory triggers a page fault, which will be handled by mmiotrace's custom page fault handler. This will single-step the faulted instruction with the MMIO page marked as present. Access logs are directed to user space via relay and debug_fs. This page fault approach is necessary, because binary drivers have readl/writel etc. calls inlined and therefore extremely difficult to trap with with e.g. kprobes. This patch depends on the custom page fault handlers patch. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 include/linux/mmiotrace.h (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h new file mode 100644 index 00000000000..cb247825f3e --- /dev/null +++ b/include/linux/mmiotrace.h @@ -0,0 +1,62 @@ +#ifndef MMIOTRACE_H +#define MMIOTRACE_H + +#include + +#define MMIO_VERSION 0x04 + +/* mm_io_header.type */ +#define MMIO_OPCODE_MASK 0xff +#define MMIO_OPCODE_SHIFT 0 +#define MMIO_WIDTH_MASK 0xff00 +#define MMIO_WIDTH_SHIFT 8 +#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) +#define MMIO_MAGIC_MASK 0xffff0000 + +enum mm_io_opcode { /* payload type: */ + MMIO_READ = 0x1, /* struct mm_io_rw */ + MMIO_WRITE = 0x2, /* struct mm_io_rw */ + MMIO_PROBE = 0x3, /* struct mm_io_map */ + MMIO_UNPROBE = 0x4, /* struct mm_io_map */ + MMIO_MARKER = 0x5, /* raw char data */ + MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ +}; + +struct mm_io_header { + __u32 type; + __u32 sec; /* timestamp */ + __u32 nsec; + __u32 pid; /* PID of the process, or 0 for kernel core */ + __u16 data_len; /* length of the following payload */ +}; + +struct mm_io_rw { + __u64 address; /* virtual address of register */ + __u64 value; + __u64 pc; /* optional program counter */ +}; + +struct mm_io_map { + __u64 phys; /* base address in PCI space */ + __u64 addr; /* base virtual address */ + __u64 len; /* mapping size */ + __u64 pc; /* optional program counter */ +}; + + +/* + * These structures are used to allow a single relay_write() + * call to write a full packet. + */ + +struct mm_io_header_rw { + struct mm_io_header header; + struct mm_io_rw rw; +} __attribute__((packed)); + +struct mm_io_header_map { + struct mm_io_header header; + struct mm_io_map map; +} __attribute__((packed)); + +#endif /* MMIOTRACE_H */ -- cgit v1.2.3 From 63ffa3e456c1a9884a3ebac997d91e3fdae18d78 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86 mmiotrace: comment about user space ABI Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb247825f3e..6ec288f1fe2 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,10 @@ #include +/* + * If you change anything here, you must bump MMIO_VERSION. + * This is the relay data format for user space. + */ #define MMIO_VERSION 0x04 /* mm_io_header.type */ @@ -23,7 +27,7 @@ enum mm_io_opcode { /* payload type: */ }; struct mm_io_header { - __u32 type; + __u32 type; /* see MMIO_* macros above */ __u32 sec; /* timestamp */ __u32 nsec; __u32 pid; /* PID of the process, or 0 for kernel core */ -- cgit v1.2.3 From 0fd0e3da4557c479b820b9a4a7afa25b4637ddf2 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace full patch, preview 1 kmmio.c handles the list of mmio probes with callbacks, list of traced pages, and attaching into the page fault handler and die notifier. It arms, traps and disarms the given pages, this is the core of mmiotrace. mmio-mod.c is a user interface, hooking into ioremap functions and registering the mmio probes. It also decodes the required information from trapped mmio accesses via the pre and post callbacks in each probe. Currently, hooking into ioremap functions works by redefining the symbols of the target (binary) kernel module, so that it calls the traced versions of the functions. The most notable changes done since the last discussion are: - kmmio.c is a built-in, not part of the module - direct call from fault.c to kmmio.c, removing all dynamic hooks - prepare for unregistering probes at any time - make kmmio re-initializable and accessible to more than one user - rewrite kmmio locking to remove all spinlocks from page fault path Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe() or is there a better way? The function called via call_rcu() itself calls call_rcu() again, will this work or break? There I need a second grace period for RCU after the first grace period for page faults. Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack that next. At some point I will start looking into how to make mmiotrace a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should make the user space part of mmiotracing as simple as 'cat /debug/trace/mmio > dump.txt'. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 6ec288f1fe2..d87a6cd8b68 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -3,6 +3,44 @@ #include +#ifdef __KERNEL__ + +#include + +struct kmmio_probe; +struct pt_regs; + +typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *, + struct pt_regs *, unsigned long addr); +typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, + unsigned long condition, struct pt_regs *); + +struct kmmio_probe { + struct list_head list; + unsigned long addr; /* start location of the probe point */ + unsigned long len; /* length of the probe region */ + kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ + kmmio_post_handler_t post_handler; /* Called after addr is executed */ +}; + +/* kmmio is active by some kmmio_probes? */ +static inline int is_kmmio_active(void) +{ + extern unsigned int kmmio_count; + return kmmio_count; +} + +extern void reference_kmmio(void); +extern void unreference_kmmio(void); +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +/* Called from page fault handler. */ +extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); + +#endif /* __KERNEL__ */ + + /* * If you change anything here, you must bump MMIO_VERSION. * This is the relay data format for user space. -- cgit v1.2.3 From d61fc44853f46fb002228b18aa5f30db21fcd4ac Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: x86: mmiotrace, preview 2 Kconfig.debug, Makefile and testmmiotrace.c style fixes. Use real mutex instead of mutex. Fix failure path in register probe func. kmmio: RCU read-locked over single stepping. Generate mapping id's. Make mmio-mod.c built-in and rewrite its locking. Add debugfs file to enable/disable mmiotracing. kmmio: use irqsave spinlocks. Lots of cleanups in mmio-mod.c Marker file moved from /proc into debugfs. Call mmiotrace entrypoints directly from ioremap.c. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index d87a6cd8b68..cb5efd0c7f5 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -16,11 +16,12 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; + struct list_head list; /* kmmio internal list */ unsigned long addr; /* start location of the probe point */ unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ + void *user_data; }; /* kmmio is active by some kmmio_probes? */ @@ -38,6 +39,21 @@ extern void unregister_kmmio_probe(struct kmmio_probe *p); /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); +/* Called from ioremap.c */ +#ifdef CONFIG_MMIOTRACE +extern void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_iounmap(volatile void __iomem *addr); +#else +static inline void +mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +{ +} +static inline void mmiotrace_iounmap(volatile void __iomem *addr) +{ +} +#endif /* CONFIG_MMIOTRACE_HOOKS */ + #endif /* __KERNEL__ */ -- cgit v1.2.3 From f984b51e0779a6dd30feedc41404013ca54e5d05 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: add mmiotrace plugin On Sat, 22 Mar 2008 13:07:47 +0100 Ingo Molnar wrote: > > > i'd suggest the following: pull x86.git and sched-devel.git into a > > > single tree [the two will combine without rejects]. Then try to add a > > > kernel/tracing/trace_mmiotrace.c ftrace plugin. The trace_sysprof.c > > > plugin might be a good example. > > > > I did this and now I have mmiotrace enabled/disabled via the tracing > > framework (what do we call this, since ftrace is one of the tracers?). > > cool! could you send the patches for that? (even if they are not fully > functional yet) Patch attached in the end. Nice to see how much code disappeared. I tried to mark all the features I had to break with XXX-comments. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index cb5efd0c7f5..579b3b06c90 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,6 +54,12 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ +/* in kernel/trace/trace_mmiotrace.c */ +extern int __init init_mmiotrace(void); +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); + #endif /* __KERNEL__ */ -- cgit v1.2.3 From bd8ac686c73c7e925fcfe0b02dc4e7b947127864 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:57 +0200 Subject: ftrace: mmiotrace, updates here is a patch that makes mmiotrace work almost well within the tracing framework. The patch applies on top of my previous patch. I have my own output formatting in place now. Summary of changes: - fix the NULL dereference that was due to not calling tracing_reset() - add print_line() callback into struct tracer - implement print_line() for mmiotrace, producing up-to-spec text - add my output header, but that is not really called in the right place - rewrote the main structs in mmiotrace - added two new trace entry types: TRACE_MMIO_RW and TRACE_MMIO_MAP - made some functions in trace.c non-static - check current==NULL in tracing_generic_entry_update() - fix(?) comparison in trace_seq_printf() Things seem to work fine except a few issues. Markers (text lines injected into mmiotrace log) are missing, I did not feel hacking them in before we have variable length entries. My output header is printed only for 'trace' file, but not 'trace_pipe'. For some reason, despite my quick fix, iter->trace is NULL in print_trace_line() when called from 'trace_pipe' file, which means I don't get proper output formatting. I only tried by loading nouveau.ko, which just detects the card, and that is traced fine. I didn't try further. Map, two reads and unmap. Works perfectly. I am missing the information about overflows, I'd prefer to have a counter for lost events. I didn't try, but I guess currently there is no way of knowning when it overflows? So, not too far from being fully operational, it seems :-) And looking at the diffstat, there also is some 700-900 lines of user space code that just became obsolete. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 85 ++++++++++++++--------------------------------- 1 file changed, 25 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 579b3b06c90..c88a9c197d2 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -54,73 +54,38 @@ static inline void mmiotrace_iounmap(volatile void __iomem *addr) } #endif /* CONFIG_MMIOTRACE_HOOKS */ -/* in kernel/trace/trace_mmiotrace.c */ -extern int __init init_mmiotrace(void); -extern void enable_mmiotrace(void); -extern void disable_mmiotrace(void); -extern void mmio_trace_record(u32 type, unsigned long addr, unsigned long arg); - -#endif /* __KERNEL__ */ - - -/* - * If you change anything here, you must bump MMIO_VERSION. - * This is the relay data format for user space. - */ -#define MMIO_VERSION 0x04 - -/* mm_io_header.type */ -#define MMIO_OPCODE_MASK 0xff -#define MMIO_OPCODE_SHIFT 0 -#define MMIO_WIDTH_MASK 0xff00 -#define MMIO_WIDTH_SHIFT 8 -#define MMIO_MAGIC (0x6f000000 | (MMIO_VERSION<<16)) -#define MMIO_MAGIC_MASK 0xffff0000 - -enum mm_io_opcode { /* payload type: */ - MMIO_READ = 0x1, /* struct mm_io_rw */ - MMIO_WRITE = 0x2, /* struct mm_io_rw */ - MMIO_PROBE = 0x3, /* struct mm_io_map */ - MMIO_UNPROBE = 0x4, /* struct mm_io_map */ +enum mm_io_opcode { + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ MMIO_MARKER = 0x5, /* raw char data */ - MMIO_UNKNOWN_OP = 0x6, /* struct mm_io_rw */ + MMIO_UNKNOWN_OP = 0x6, /* struct mmiotrace_rw */ }; -struct mm_io_header { - __u32 type; /* see MMIO_* macros above */ - __u32 sec; /* timestamp */ - __u32 nsec; - __u32 pid; /* PID of the process, or 0 for kernel core */ - __u16 data_len; /* length of the following payload */ +struct mmiotrace_rw { + unsigned long phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; -struct mm_io_rw { - __u64 address; /* virtual address of register */ - __u64 value; - __u64 pc; /* optional program counter */ +struct mmiotrace_map { + unsigned long phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; -struct mm_io_map { - __u64 phys; /* base address in PCI space */ - __u64 addr; /* base virtual address */ - __u64 len; /* mapping size */ - __u64 pc; /* optional program counter */ -}; - - -/* - * These structures are used to allow a single relay_write() - * call to write a full packet. - */ - -struct mm_io_header_rw { - struct mm_io_header header; - struct mm_io_rw rw; -} __attribute__((packed)); +/* in kernel/trace/trace_mmiotrace.c */ +extern void enable_mmiotrace(void); +extern void disable_mmiotrace(void); +extern void mmio_trace_rw(struct mmiotrace_rw *rw); +extern void mmio_trace_mapping(struct mmiotrace_map *map); -struct mm_io_header_map { - struct mm_io_header header; - struct mm_io_map map; -} __attribute__((packed)); +#endif /* __KERNEL__ */ #endif /* MMIOTRACE_H */ -- cgit v1.2.3 From 138295373ccf7625fcb0218dfea114837983bc39 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:20:58 +0200 Subject: ftrace: mmiotrace update, #2 another weekend, another patch. This should apply on top of my previous patch from March 23rd. Summary of changes: - Print PCI device list in output header - work around recursive probe hits on SMP - refactor dis/arm_kmmio_fault_page() and add check for page levels - remove un/reference_kmmio(), the die notifier hook is registered permanently into the list - explicitly check for single stepping in die notifier callback I have tested this version on my UP Athlon64 desktop with Nouveau, and SMP Core 2 Duo laptop with the proprietary nvidia driver. Both systems are 64-bit. One previously unknown bug crept into daylight: the ftrace framework's output routines print the first entry last after buffer has wrapped around. The most important regressions compared to non-ftrace mmiotrace at this time are: - failure of trace_pipe file - illegal lines in output file - unaware of losing data due to buffer full Personally I'd like to see these three solved before submitting to mainline. Other issues may come up once we know when we lose events. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mmiotrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index c88a9c197d2..dd6b64b160f 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -31,8 +31,6 @@ static inline int is_kmmio_active(void) return kmmio_count; } -extern void reference_kmmio(void); -extern void unreference_kmmio(void); extern int register_kmmio_probe(struct kmmio_probe *p); extern void unregister_kmmio_probe(struct kmmio_probe *p); -- cgit v1.2.3 From 970e6fa03885f32cc43e42cb08c73a5f54cd8bd9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: code style cleanups From c2da03771e29159627c5c7b9509ec70bce9f91ee Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 28 Apr 2008 21:25:22 +0300 Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index dd6b64b160f..de8e91258da 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,9 +1,7 @@ #ifndef MMIOTRACE_H #define MMIOTRACE_H -#include - -#ifdef __KERNEL__ +#include #include @@ -84,6 +82,4 @@ extern void disable_mmiotrace(void); extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); -#endif /* __KERNEL__ */ - #endif /* MMIOTRACE_H */ -- cgit v1.2.3 From dee310d0adf41019aca476052ac3085ff286d9be Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: x86 mmiotrace: use resource_size_t for phys addresses Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index de8e91258da..5cbbc374e94 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -2,7 +2,6 @@ #define MMIOTRACE_H #include - #include struct kmmio_probe; @@ -37,14 +36,15 @@ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); /* Called from ioremap.c */ #ifdef CONFIG_MMIOTRACE -extern void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr); +extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, + void __iomem *addr); extern void mmiotrace_iounmap(volatile void __iomem *addr); #else -static inline void -mmiotrace_ioremap(unsigned long offset, unsigned long size, void __iomem *addr) +static inline void mmiotrace_ioremap(resource_size_t offset, + unsigned long size, void __iomem *addr) { } + static inline void mmiotrace_iounmap(volatile void __iomem *addr) { } @@ -60,7 +60,7 @@ enum mm_io_opcode { }; struct mmiotrace_rw { - unsigned long phys; /* PCI address of register */ + resource_size_t phys; /* PCI address of register */ unsigned long value; unsigned long pc; /* optional program counter */ int map_id; @@ -69,7 +69,7 @@ struct mmiotrace_rw { }; struct mmiotrace_map { - unsigned long phys; /* base address in PCI space */ + resource_size_t phys; /* base address in PCI space */ unsigned long virt; /* base virtual address */ unsigned long len; /* mapping size */ int map_id; -- cgit v1.2.3 From a50445d76c22a34ae149704ea5adaef171c8acb7 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 12 May 2008 21:21:03 +0200 Subject: mmiotrace: rename kmmio_probe::user_data to :private. Signed-off-by: Pekka Paalanen Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 5cbbc374e94..61d19e1b7a0 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -18,7 +18,7 @@ struct kmmio_probe { unsigned long len; /* length of the probe region */ kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *user_data; + void *private; }; /* kmmio is active by some kmmio_probes? */ -- cgit v1.2.3 From 42fdfa238a23643226910acf922ea930b3286032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 24 May 2008 23:14:51 +0200 Subject: namespacecheck: more kernel/printk.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/kernel.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779..f2a668c195b 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -184,9 +184,6 @@ asmlinkage int vprintk(const char *fmt, va_list args) __attribute__ ((format (printf, 1, 0))); asmlinkage int printk(const char * fmt, ...) __attribute__ ((format (printf, 1, 2))) __cold; -extern int log_buf_get_len(void); -extern int log_buf_read(int idx); -extern int log_buf_copy(char *dest, int idx, int len); extern int printk_ratelimit_jiffies; extern int printk_ratelimit_burst; @@ -202,9 +199,6 @@ static inline int vprintk(const char *s, va_list args) { return 0; } static inline int printk(const char *s, ...) __attribute__ ((format (printf, 1, 2))); static inline int __cold printk(const char *s, ...) { return 0; } -static inline int log_buf_get_len(void) { return 0; } -static inline int log_buf_read(int idx) { return 0; } -static inline int log_buf_copy(char *dest, int idx, int len) { return 0; } static inline int printk_ratelimit(void) { return 0; } static inline int __printk_ratelimit(int ratelimit_jiffies, \ int ratelimit_burst) { return 0; } -- cgit v1.2.3 From 63687a528c39a67c1a213cdffa09feb0e6af9dbe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 12 May 2008 15:44:41 +0200 Subject: x86: move tracedata to RODATA .. allowing it to be write-protected just as other read-only data under CONFIG_DEBUG_RODATA. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/resume-trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index f3f4f28c696..c9ba2fdf807 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -8,7 +8,7 @@ extern int pm_trace_enabled; struct device; extern void set_trace_device(struct device *); -extern void generate_resume_trace(void *tracedata, unsigned int user); +extern void generate_resume_trace(const void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) do { \ if (pm_trace_enabled) \ -- cgit v1.2.3 From 962cf36c5bf6d2840b8d66ee9a606fae2f540bbd Mon Sep 17 00:00:00 2001 From: "Carlos R. Mafra" Date: Thu, 15 May 2008 11:15:37 -0300 Subject: Remove argument from open_softirq which is always NULL As git-grep shows, open_softirq() is always called with the last argument being NULL block/blk-core.c: open_softirq(BLOCK_SOFTIRQ, blk_done_softirq, NULL); kernel/hrtimer.c: open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq, NULL); kernel/rcuclassic.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/rcupreempt.c: open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL); kernel/sched.c: open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); kernel/softirq.c: open_softirq(TASKLET_SOFTIRQ, tasklet_action, NULL); kernel/softirq.c: open_softirq(HI_SOFTIRQ, tasklet_hi_action, NULL); kernel/timer.c: open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL); net/core/dev.c: open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); net/core/dev.c: open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); This observation has already been made by Matthew Wilcox in June 2002 (http://www.cs.helsinki.fi/linux/linux-kernel/2002-25/0687.html) "I notice that none of the current softirq routines use the data element passed to them." and the situation hasn't changed since them. So it appears we can safely remove that extra argument to save 128 (54) bytes of kernel data (text). Signed-off-by: Carlos R. Mafra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26..a86186dd047 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -285,12 +285,11 @@ enum struct softirq_action { void (*action)(struct softirq_action *); - void *data; }; asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); -extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); +extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) extern void raise_softirq_irqoff(unsigned int nr); -- cgit v1.2.3 From e9197bf0114661195bee35e7795cfc42164d9b2c Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:10 -0700 Subject: x86 boot: remove some unused extern function declarations Remove three extern declarations for routines that don't exist. Fix a typo in a comment. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index a5f359a7ad0..807373d467f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out) extern void efi_init (void); extern void *efi_get_pal_addr (void); extern void efi_map_pal_code (void); -extern void efi_map_memmap(void); extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg); extern void efi_gettimeofday (struct timespec *ts); extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ @@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void); extern u32 efi_mem_type (unsigned long phys_addr); extern u64 efi_mem_attributes (unsigned long phys_addr); extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size); -extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, - u64 attr); extern int __init efi_uart_console_only (void); extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); -extern int is_available_memory(efi_memory_desc_t * md); extern struct efi_memory_map memmap; /** -- cgit v1.2.3 From c801ed3860fe2f84281d4cae4c3e6ca87e81e890 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Wed, 14 May 2008 08:15:23 -0700 Subject: x86 boot: simplify pageblock_bits enum declaration The use of #defines with '##' pre-processor concatenation is a useful way to form several symbol names with a common pattern. But when there is just a single name obtained from that #define, it's just obfuscation. Better to just write the plain symbol name, as is. The following patch is a result of my wasting ten minutes looking through the kernel to figure out what 'PB_migrate_end' meant, and forgetting what I came to do, by the time I figured out that the #define PB_range macro defined it. Signed-off-by: Paul Jackson Signed-off-by: Ingo Molnar --- include/linux/pageblock-flags.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e875905f7b1..e8c06122be3 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -25,13 +25,11 @@ #include -/* Macro to aid the definition of ranges of bits */ -#define PB_range(name, required_bits) \ - name, name ## _end = (name + required_bits) - 1 - /* Bit indices that affect a whole block of pages */ enum pageblock_bits { - PB_range(PB_migrate, 3), /* 3 bits required for migrate types */ + PB_migrate, + PB_migrate_end = PB_migrate + 3 - 1, + /* 3 bits required for migrate types */ NR_PAGEBLOCK_BITS }; -- cgit v1.2.3 From 41c52c0db9607e59f90da7da5309489fa06e887f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 22 May 2008 11:46:33 -0400 Subject: ftrace: set_ftrace_notrace feature While debugging latencies in the RT kernel, I found that it would be nice to be able to filter away functions from the trace than just to filter on functions. I added a new interface to the debugfs tracing directory called set_ftrace_notrace When dynamic frace is enabled, this lets you filter away functions that will not be recorded in the trace. It is similar to adding 'notrace' to those functions but by doing it without recompiling the kernel. Here's how set_ftrace_filter and set_ftrace_notrace interact. Remember, if set_ftrace_filter is set, it removes all functions from the trace execpt for those listed in the set_ftrace_filter. set_ftrace_notrace will prevent those functions from being traced. If you were to set one function in both set_ftrace_filter and set_ftrace_notrace and that function was the same, then you would end up with an empty trace. the set of functions to trace is: set_ftrace_filter == empty then all functions not in set_ftrace_notrace else set of the set_ftrace_filter and not in set of set_ftrace_notrace. Signed-off-by: Steven Rostedt Signed-off-by: Thomas Gleixner --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 922e23d0196..ffbbd54a720 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -48,6 +48,7 @@ enum { FTRACE_FL_FAILED = (1 << 1), FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), + FTRACE_FL_NOTRACE = (1 << 4), }; struct dyn_ftrace { -- cgit v1.2.3 From 9e124fe16ff24746d6de5a2ad685266d7bce0e08 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Mon, 26 May 2008 23:31:07 +0100 Subject: xen: Enable console tty by default in domU if it's not a dummy Without console= arguments on the kernel command line, the first console to register becomes enabled and the preferred console (the one behind /dev/console). This is normally tty (assuming CONFIG_VT_CONSOLE is enabled, which it commonly is). This is okay as long tty is a useful console. But unless we have the PV framebuffer, and it is enabled for this domain, tty0 in domU is merely a dummy. In that case, we want the preferred console to be the Xen console hvc0, and we want it without having to fiddle with the kernel command line. Commit b8c2d3dfbc117dff26058fbac316b8acfc2cb5f7 did that for us. Since we now have the PV framebuffer, we want to enable and prefer tty again, but only when PVFB is enabled. But even then we still want to enable the Xen console as well. Problem: when tty registers, we can't yet know whether the PVFB is enabled. By the time we can know (xenstore is up), the console setup game is over. Solution: enable console tty by default, but keep hvc as the preferred console. Change the preferred console to tty when PVFB probes successfully, unless we've been given console kernel parameters. Signed-off-by: Markus Armbruster Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/console.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index a4f27fbdf54..248e6e3b9b7 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -108,6 +108,8 @@ struct console { struct console *next; }; +extern int console_set_on_cmdline; + extern int add_preferred_console(char *name, int idx, char *options); extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options); extern void register_console(struct console *); -- cgit v1.2.3 From 0e91398f2a5d4eb6b07df8115917d0d1cf3e9b58 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 26 May 2008 23:31:27 +0100 Subject: xen: implement save/restore This patch implements Xen save/restore and migration. Saving is triggered via xenbus, which is polled in drivers/xen/manage.c. When a suspend request comes in, the kernel prepares itself for saving by: 1 - Freeze all processes. This is primarily to prevent any partially-completed pagetable updates from confusing the suspend process. If CONFIG_PREEMPT isn't defined, then this isn't necessary. 2 - Suspend xenbus and other devices 3 - Stop_machine, to make sure all the other vcpus are quiescent. The Xen tools require the domain to run its save off vcpu0. 4 - Within the stop_machine state, it pins any unpinned pgds (under construction or destruction), performs canonicalizes various other pieces of state (mostly converting mfns to pfns), and finally 5 - Suspend the domain Restore reverses the steps used to save the domain, ending when all the frozen processes are thawed. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Thomas Gleixner --- include/linux/page-flags.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 590cff32415..02955a1c3d7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -157,6 +157,7 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) __PAGEFLAG(Slab, slab) PAGEFLAG(Checked, owner_priv_1) /* Used by some filesystems */ PAGEFLAG(Pinned, owner_priv_1) TESTSCFLAG(Pinned, owner_priv_1) /* Xen */ +PAGEFLAG(SavePinned, dirty); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private) __SETPAGEFLAG(Private, private) -- cgit v1.2.3 From b1829d2705daa7cb72eb1e08bdc8b7e9fad34266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 28 May 2008 01:22:08 +0200 Subject: ftrace: fix merge Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ffbbd54a720..b482fe88bc0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -122,7 +122,7 @@ static inline void tracer_disable(void) # define trace_preempt_off(a0, a1) do { } while (0) #endif -#ifdef CONFIG_CONTEXT_SWITCH_TRACER +#ifdef CONFIG_TRACING extern void ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3); #else -- cgit v1.2.3 From ad90c0e3ce8d20d6873b57e36181ef6d7a0097fe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 27 May 2008 20:48:37 -0400 Subject: ftrace: user update and disable dynamic ftrace daemon In dynamic ftrace, the mcount function starts off pointing to a stub function that just returns. On start up, the call to the stub is modified to point to a "record_ip" function. The job of the record_ip function is to add the function to a pre-allocated hash list. If the function is already there, it simply is ignored, otherwise it is added to the list. Later, a ftraced daemon wakes up and calls kstop_machine if any functions have been recorded, and changes the calls to the recorded functions to a simple nop. If no functions were recorded, the daemon goes back to sleep. The daemon wakes up once a second to see if it needs to update any newly recorded functions into nops. Usually it does not, but if a lot of code has been executed for the first time in the kernel, the ftraced daemon will call kstop_machine to update those into nops. The problem currently is that there's no way to stop the daemon from doing this, and it can cause unneeded latencies (800us which for some is bothersome). This patch adds a new file /debugfs/tracing/ftraced_enabled. If the daemon is active, reading this will return "enabled\n" and "disabled\n" when the daemon is not running. To disable the daemon, the user can echo "0" or "disable" into this file, and "1" or "enable" to re-enable the daemon. Since the daemon is used to convert the functions into nops to increase the performance of the system, I also added that anytime something is written into the ftraced_enabled file, kstop_machine will run if there are new functions that have been detected that need to be converted. This way the user can disable the daemon but still be able to control the conversion of the mcount calls to nops by simply, "echo 0 > /debugfs/tracing/ftraced_enabled" when they need to do more conversions. To see the number of converted functions: "cat /debugfs/tracing/dyn_ftrace_total_info" Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b482fe88bc0..623819433ed 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,9 +72,15 @@ extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); + +void ftrace_disable_daemon(void); +void ftrace_enable_daemon(void); + #else # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) +# define ftrace_disable_daemon() do { } while (0) +# define ftrace_enable_daemon() do { } while (0) #endif /* totally disable ftrace - can not re-enable after this */ -- cgit v1.2.3 From 18404756765c713a0be4eb1082920c04822ce588 Mon Sep 17 00:00:00 2001 From: Max Krasnyansky Date: Thu, 29 May 2008 11:02:52 -0700 Subject: genirq: Expose default irq affinity mask (take 3) Current IRQ affinity interface does not provide a way to set affinity for the IRQs that will be allocated/activated in the future. This patch creates /proc/irq/default_smp_affinity that lets users set default affinity mask for the newly allocated IRQs. Changing the default does not affect affinity masks for the currently active IRQs, they have to be changed explicitly. Updated based on Paul J's comments and added some more documentation. Signed-off-by: Max Krasnyansky Cc: pj@sgi.com Cc: a.p.zijlstra@chello.nl Cc: tglx@linutronix.de Cc: rdunlap@xenotime.net Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 5 +++++ include/linux/irq.h | 9 --------- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index f1fc7470d26..043400f3d45 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -104,8 +104,11 @@ extern void enable_irq(unsigned int irq); #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) +extern cpumask_t irq_default_affinity; + extern int irq_set_affinity(unsigned int irq, cpumask_t cpumask); extern int irq_can_set_affinity(unsigned int irq); +extern int irq_select_affinity(unsigned int irq); #else /* CONFIG_SMP */ @@ -119,6 +122,8 @@ static inline int irq_can_set_affinity(unsigned int irq) return 0; } +static inline int irq_select_affinity(unsigned int irq) { return 0; } + #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */ #ifdef CONFIG_GENERIC_HARDIRQS diff --git a/include/linux/irq.h b/include/linux/irq.h index 552e0ec269c..8ccb462ea42 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -244,15 +244,6 @@ static inline void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) } #endif -#ifdef CONFIG_AUTO_IRQ_AFFINITY -extern int select_smp_affinity(unsigned int irq); -#else -static inline int select_smp_affinity(unsigned int irq) -{ - return 1; -} -#endif - extern int no_irq_affinity; static inline int irq_balancing_disabled(unsigned int irq) -- cgit v1.2.3 From 554ec22f075d46e4363520a407d2b7eeb5dfdd43 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 21:21:03 +0200 Subject: namespacecheck: more sched.c fixes [ Stephen Rothwell : build fix ] Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ae0be3c6237..dc36c3aea01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -134,7 +134,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_active(void); extern unsigned long nr_iowait(void); -extern unsigned long weighted_cpuload(const int cpu); struct seq_file; struct cfs_rq; @@ -823,23 +822,6 @@ extern int arch_reinit_sched_domains(void); #endif /* CONFIG_SMP */ -/* - * A runqueue laden with a single nice 0 task scores a weighted_cpuload of - * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a - * task of nice 0 or enough lower priority tasks to bring up the - * weighted_cpuload - */ -static inline int above_background_load(void) -{ - unsigned long cpu; - - for_each_online_cpu(cpu) { - if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) - return 1; - } - return 0; -} - struct io_context; /* See blkdev.h */ #define NGROUPS_SMALL 32 #define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -- cgit v1.2.3 From c7aceaba042702538b23cf4e0de1b2891ad8e671 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 15 May 2008 12:09:15 +0100 Subject: sched: reorder task_struct to reduce padding on 64bit builds This patch removes 24 bytes of padding and allows 1 extra object per slab on my fedora based config. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index dc36c3aea01..ea2857b9959 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1021,6 +1021,7 @@ struct task_struct { #endif int prio, static_prio, normal_prio; + unsigned int rt_priority; const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; @@ -1104,7 +1105,6 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - unsigned int rt_priority; cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; cputime_t prev_utime, prev_stime; @@ -1123,12 +1123,12 @@ struct task_struct { gid_t gid,egid,sgid,fsgid; struct group_info *group_info; kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - unsigned securebits; struct user_struct *user; + unsigned securebits; #ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested keys to */ struct key *request_key_auth; /* assumed request_key authority */ struct key *thread_keyring; /* keyring private to this thread */ - unsigned char jit_keyring; /* default keyring to attach requested keys to */ #endif char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1215,8 +1215,8 @@ struct task_struct { # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; - struct held_lock held_locks[MAX_LOCK_DEPTH]; unsigned int lockdep_recursion; + struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif /* journalling filesystem info */ @@ -1244,10 +1244,6 @@ struct task_struct { u64 acct_vm_mem1; /* accumulated virtual memory usage */ cputime_t acct_stimexpd;/* stime since last update */ #endif -#ifdef CONFIG_NUMA - struct mempolicy *mempolicy; - short il_next; -#endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; int cpuset_mems_generation; @@ -1266,6 +1262,10 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; +#endif +#ifdef CONFIG_NUMA + struct mempolicy *mempolicy; + short il_next; #endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; -- cgit v1.2.3 From 1f11eb6a8bc92536d9e93ead48fa3ffbd1478571 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Wed, 4 Jun 2008 15:04:05 -0400 Subject: sched: fix cpupri hotplug support The RT folks over at RedHat found an issue w.r.t. hotplug support which was traced to problems with the cpupri infrastructure in the scheduler: https://bugzilla.redhat.com/show_bug.cgi?id=449676 This bug affects 23-rt12+, 24-rtX, 25-rtX, and sched-devel. This patch applies to 25.4-rt4, though it should trivially apply to most cpupri enabled kernels mentioned above. It turned out that the issue was that offline cpus could get inadvertently registered with cpupri so that they were erroneously selected during migration decisions. The end result would be an OOPS as the offline cpu had tasks routed to it. This patch generalizes the old join/leave domain interface into an online/offline interface, and adjusts the root-domain/hotplug code to utilize it. I was able to easily reproduce the issue prior to this patch, and am no longer able to reproduce it after this patch. I can offline cpus indefinately and everything seems to be in working order. Thanks to Arnaldo (acme), Thomas, and Peter for doing the legwork to point me in the right direction. Also thank you to Peter for reviewing the early iterations of this patch. Signed-off-by: Gregory Haskins Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index ea2857b9959..d25acf600a3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -903,8 +903,8 @@ struct sched_class { void (*set_cpus_allowed)(struct task_struct *p, const cpumask_t *newmask); - void (*join_domain)(struct rq *rq); - void (*leave_domain)(struct rq *rq); + void (*rq_online)(struct rq *rq); + void (*rq_offline)(struct rq *rq); void (*switched_from) (struct rq *this_rq, struct task_struct *task, int running); -- cgit v1.2.3 From cc1a9d86ce989083703c4bdc11b75a87e1cc404a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Jun 2008 19:39:16 -0700 Subject: mm, x86: shrink_active_range() should check all Now we are using register_e820_active_regions() instead of add_active_range() directly. So end_pfn could be different between the value in early_node_map to node_end_pfn. So we need to make shrink_active_range() smarter. shrink_active_range() is a generic MM function in mm/page_alloc.c but it is only used on 32-bit x86. Should we move it back to some file in arch/x86? Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c31a9cd2a30..7cbd949f251 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -997,8 +997,7 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn, - unsigned long new_end_pfn); +extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.3 From 0eb967012ea15e6e8cfab483d9fa37bc602d400c Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sun, 1 Jun 2008 21:47:30 +0530 Subject: ftrace: prevent freeing of all failed updates Prevent freeing of records which cause problems and correspond to function from core kernel text. A new flag, FTRACE_FL_CONVERTED is used to mark a record as "converted". All other records are patched lazily to NOPs. Failed records now also remain on frace_hash table. Each invocation of ftrace_record_ip now checks whether the traced function has ever been recorded (including past failures) and doesn't re-record it again. Signed-off-by: Abhishek Sagar Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 623819433ed..20e14d0093c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_FILTER = (1 << 2), FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), + FTRACE_FL_CONVERTED = (1 << 5), }; struct dyn_ftrace { -- cgit v1.2.3 From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Jun 2008 12:57:11 -0700 Subject: sched: prevent bound kthreads from changing cpus_allowed Kthreads that have called kthread_bind() are bound to specific cpus, so other tasks should not be able to change their cpus_allowed from under them. Otherwise, it is possible to move kthreads, such as the migration or software watchdog threads, so they are not allowed access to the cpu they work on. Cc: Peter Zijlstra Cc: Paul Menage Cc: Paul Jackson Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d25acf600a3..2db1485f865 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1486,6 +1486,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ +#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ -- cgit v1.2.3 From 19792a0859f96e9fc8ce87d97b269bcb895389e5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 5 May 2008 21:25:47 +0300 Subject: PCI: drivers/pci/pci.c: add prototypes This patch adds prototypes for pcibios_disable_device() and pcibios_set_pcie_reset_state() in include/linux/pci.h While I was at it, I also removed the unneeded "extern" from the prototype of pcibios_add_platform_entries(). Signed-off-by: Adrian Bunk Signed-off-by: Jesse Barnes --- include/linux/pci.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 509159bcd4e..aaa9f333fb4 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1059,7 +1059,10 @@ extern int pci_pci_problems; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; -extern int pcibios_add_platform_entries(struct pci_dev *dev); +int pcibios_add_platform_entries(struct pci_dev *dev); +void pcibios_disable_device(struct pci_dev *dev); +int pcibios_set_pcie_reset_state(struct pci_dev *dev, + enum pcie_reset_state state); #ifdef CONFIG_PCI_MMCONFIG extern void __init pci_mmcfg_early_init(void); -- cgit v1.2.3 From e1a2a51e684bfe9d6165992d4a065439617a3107 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 May 2008 21:51:31 +0200 Subject: Suspend/Resume bug in PCI layer wrt quirks Some quirks should be called with interrupt disabled, we can't directly call them in .resume_early. Also the patch introduces pci_fixup_resume_early and pci_fixup_suspend, which matches current device core callbacks (.suspend/.resume_early). TBD: Somebody knows why we need quirk resume should double check if a quirk should be called in resume or resume_early. I changed some per my understanding, but can't make sure I fixed all. Signed-off-by: Shaohua Li Signed-off-by: Jesse Barnes --- include/linux/pci.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index aaa9f333fb4..700704ef70f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1013,7 +1013,9 @@ enum pci_fixup_pass { pci_fixup_header, /* After reading configuration header */ pci_fixup_final, /* Final phase of device fixups */ pci_fixup_enable, /* pci_enable_device() time */ - pci_fixup_resume, /* pci_enable_device() time */ + pci_fixup_resume, /* pci_device_resume() */ + pci_fixup_suspend, /* pci_device_suspend */ + pci_fixup_resume_early, /* pci_device_resume_early() */ }; /* Anonymous variables would be nice... */ @@ -1035,6 +1037,12 @@ enum pci_fixup_pass { #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ resume##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ + resume_early##vendor##device##hook, vendor, device, hook) +#define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ + DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ + suspend##vendor##device##hook, vendor, device, hook) void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); -- cgit v1.2.3 From 1eede070a59e1cc73da51e1aaa00d9ab86572cfc Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 23:00:01 +0200 Subject: Introduce new top level suspend and hibernation callbacks Introduce 'struct pm_ops' and 'struct pm_ext_ops' ('ext' meaning 'extended') representing suspend and hibernation operations for bus types, device classes, device types and device drivers. Modify the PM core to use 'struct pm_ops' and 'struct pm_ext_ops' objects, if defined, instead of the ->suspend(), ->resume(), ->suspend_late(), and ->resume_early() callbacks (the old callbacks will be considered as legacy and gradually phased out). The main purpose of doing this is to separate suspend (aka S2RAM and standby) callbacks from hibernation callbacks in such a way that the new callbacks won't take arguments and the semantics of each of them will be clearly specified. This has been requested for multiple times by many people, including Linus himself, and the reason is that within the current scheme if ->resume() is called, for example, it's difficult to say why it's been called (ie. is it a resume from RAM or from hibernation or a suspend/hibernation failure etc.?). The second purpose is to make the suspend/hibernation callbacks more flexible so that device drivers can handle more than they can within the current scheme. For example, some drivers may need to prevent new children of the device from being registered before their ->suspend() callbacks are executed or they may want to carry out some operations requiring the availability of some other devices, not directly bound via the parent-child relationship, in order to prepare for the execution of ->suspend(), etc. Ultimately, we'd like to stop using the freezing of tasks for suspend and therefore the drivers' suspend/hibernation code will have to take care of the handling of the user space during suspend/hibernation. That, in turn, would be difficult within the current scheme, without the new ->prepare() and ->complete() callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- include/linux/device.h | 9 ++ include/linux/pm.h | 314 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 295 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 6a2d04c011b..f71a78d123a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -68,6 +68,8 @@ struct bus_type { int (*resume_early)(struct device *dev); int (*resume)(struct device *dev); + struct pm_ext_ops *pm; + struct bus_type_private *p; }; @@ -131,6 +133,8 @@ struct device_driver { int (*resume) (struct device *dev); struct attribute_group **groups; + struct pm_ops *pm; + struct driver_private *p; }; @@ -197,6 +201,8 @@ struct class { int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; extern int __must_check class_register(struct class *class); @@ -248,8 +254,11 @@ struct device_type { struct attribute_group **groups; int (*uevent)(struct device *dev, struct kobj_uevent_env *env); void (*release)(struct device *dev); + int (*suspend)(struct device *dev, pm_message_t state); int (*resume)(struct device *dev); + + struct pm_ops *pm; }; /* interface for exporting device attributes */ diff --git a/include/linux/pm.h b/include/linux/pm.h index 39a7ee859b6..4ad9de94449 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -112,7 +112,9 @@ typedef struct pm_message { int event; } pm_message_t; -/* +/** + * struct pm_ops - device PM callbacks + * * Several driver power state transitions are externally visible, affecting * the state of pending I/O queues and (for drivers that touch hardware) * interrupts, wakeups, DMA, and other hardware state. There may also be @@ -120,6 +122,284 @@ typedef struct pm_message { * to the rest of the driver stack (such as a driver that's ON gating off * clocks which are not in active use). * + * The externally visible transitions are handled with the help of the following + * callbacks included in this structure: + * + * @prepare: Prepare the device for the upcoming transition, but do NOT change + * its hardware state. Prevent new children of the device from being + * registered after @prepare() returns (the driver's subsystem and + * generally the rest of the kernel is supposed to prevent new calls to the + * probe method from being made too once @prepare() has succeeded). If + * @prepare() detects a situation it cannot handle (e.g. registration of a + * child already in progress), it may return -EAGAIN, so that the PM core + * can execute it once again (e.g. after the new child has been registered) + * to recover from the race condition. This method is executed for all + * kinds of suspend transitions and is followed by one of the suspend + * callbacks: @suspend(), @freeze(), or @poweroff(). + * The PM core executes @prepare() for all devices before starting to + * execute suspend callbacks for any of them, so drivers may assume all of + * the other devices to be present and functional while @prepare() is being + * executed. In particular, it is safe to make GFP_KERNEL memory + * allocations from within @prepare(). However, drivers may NOT assume + * anything about the availability of the user space at that time and it + * is not correct to request firmware from within @prepare() (it's too + * late to do that). [To work around this limitation, drivers may + * register suspend and hibernation notifiers that are executed before the + * freezing of tasks.] + * + * @complete: Undo the changes made by @prepare(). This method is executed for + * all kinds of resume transitions, following one of the resume callbacks: + * @resume(), @thaw(), @restore(). Also called if the state transition + * fails before the driver's suspend callback (@suspend(), @freeze(), + * @poweroff()) can be executed (e.g. if the suspend callback fails for one + * of the other devices that the PM core has unsuccessfully attempted to + * suspend earlier). + * The PM core executes @complete() after it has executed the appropriate + * resume callback for all devices. + * + * @suspend: Executed before putting the system into a sleep state in which the + * contents of main memory are preserved. Quiesce the device, put it into + * a low power state appropriate for the upcoming system state (such as + * PCI_D3hot), and enable wakeup events as appropriate. + * + * @resume: Executed after waking the system up from a sleep state in which the + * contents of main memory were preserved. Put the device into the + * appropriate state, according to the information saved in memory by the + * preceding @suspend(). The driver starts working again, responding to + * hardware events and software requests. The hardware may have gone + * through a power-off reset, or it may have maintained state from the + * previous suspend() which the driver may rely on while resuming. On most + * platforms, there are no restrictions on availability of resources like + * clocks during @resume(). + * + * @freeze: Hibernation-specific, executed before creating a hibernation image. + * Quiesce operations so that a consistent image can be created, but do NOT + * otherwise put the device into a low power device state and do NOT emit + * system wakeup events. Save in main memory the device settings to be + * used by @restore() during the subsequent resume from hibernation or by + * the subsequent @thaw(), if the creation of the image or the restoration + * of main memory contents from it fails. + * + * @thaw: Hibernation-specific, executed after creating a hibernation image OR + * if the creation of the image fails. Also executed after a failing + * attempt to restore the contents of main memory from such an image. + * Undo the changes made by the preceding @freeze(), so the device can be + * operated in the same way as immediately before the call to @freeze(). + * + * @poweroff: Hibernation-specific, executed after saving a hibernation image. + * Quiesce the device, put it into a low power state appropriate for the + * upcoming system state (such as PCI_D3hot), and enable wakeup events as + * appropriate. + * + * @restore: Hibernation-specific, executed after restoring the contents of main + * memory from a hibernation image. Driver starts working again, + * responding to hardware events and software requests. Drivers may NOT + * make ANY assumptions about the hardware state right prior to @restore(). + * On most platforms, there are no restrictions on availability of + * resources like clocks during @restore(). + * + * All of the above callbacks, except for @complete(), return error codes. + * However, the error codes returned by the resume operations, @resume(), + * @thaw(), and @restore(), do not cause the PM core to abort the resume + * transition during which they are returned. The error codes returned in + * that cases are only printed by the PM core to the system logs for debugging + * purposes. Still, it is recommended that drivers only return error codes + * from their resume methods in case of an unrecoverable failure (i.e. when the + * device being handled refuses to resume and becomes unusable) to allow us to + * modify the PM core in the future, so that it can avoid attempting to handle + * devices that failed to resume and their children. + * + * It is allowed to unregister devices while the above callbacks are being + * executed. However, it is not allowed to unregister a device from within any + * of its own callbacks. + */ + +struct pm_ops { + int (*prepare)(struct device *dev); + void (*complete)(struct device *dev); + int (*suspend)(struct device *dev); + int (*resume)(struct device *dev); + int (*freeze)(struct device *dev); + int (*thaw)(struct device *dev); + int (*poweroff)(struct device *dev); + int (*restore)(struct device *dev); +}; + +/** + * struct pm_ext_ops - extended device PM callbacks + * + * Some devices require certain operations related to suspend and hibernation + * to be carried out with interrupts disabled. Thus, 'struct pm_ext_ops' below + * is defined, adding callbacks to be executed with interrupts disabled to + * 'struct pm_ops'. + * + * The following callbacks included in 'struct pm_ext_ops' are executed with + * the nonboot CPUs switched off and with interrupts disabled on the only + * functional CPU. They also are executed with the PM core list of devices + * locked, so they must NOT unregister any devices. + * + * @suspend_noirq: Complete the operations of ->suspend() by carrying out any + * actions required for suspending the device that need interrupts to be + * disabled + * + * @resume_noirq: Prepare for the execution of ->resume() by carrying out any + * actions required for resuming the device that need interrupts to be + * disabled + * + * @freeze_noirq: Complete the operations of ->freeze() by carrying out any + * actions required for freezing the device that need interrupts to be + * disabled + * + * @thaw_noirq: Prepare for the execution of ->thaw() by carrying out any + * actions required for thawing the device that need interrupts to be + * disabled + * + * @poweroff_noirq: Complete the operations of ->poweroff() by carrying out any + * actions required for handling the device that need interrupts to be + * disabled + * + * @restore_noirq: Prepare for the execution of ->restore() by carrying out any + * actions required for restoring the operations of the device that need + * interrupts to be disabled + * + * All of the above callbacks return error codes, but the error codes returned + * by the resume operations, @resume_noirq(), @thaw_noirq(), and + * @restore_noirq(), do not cause the PM core to abort the resume transition + * during which they are returned. The error codes returned in that cases are + * only printed by the PM core to the system logs for debugging purposes. + * Still, as stated above, it is recommended that drivers only return error + * codes from their resume methods if the device being handled fails to resume + * and is not usable any more. + */ + +struct pm_ext_ops { + struct pm_ops base; + int (*suspend_noirq)(struct device *dev); + int (*resume_noirq)(struct device *dev); + int (*freeze_noirq)(struct device *dev); + int (*thaw_noirq)(struct device *dev); + int (*poweroff_noirq)(struct device *dev); + int (*restore_noirq)(struct device *dev); +}; + +/** + * PM_EVENT_ messages + * + * The following PM_EVENT_ messages are defined for the internal use of the PM + * core, in order to provide a mechanism allowing the high level suspend and + * hibernation code to convey the necessary information to the device PM core + * code: + * + * ON No transition. + * + * FREEZE System is going to hibernate, call ->prepare() and ->freeze() + * for all devices. + * + * SUSPEND System is going to suspend, call ->prepare() and ->suspend() + * for all devices. + * + * HIBERNATE Hibernation image has been saved, call ->prepare() and + * ->poweroff() for all devices. + * + * QUIESCE Contents of main memory are going to be restored from a (loaded) + * hibernation image, call ->prepare() and ->freeze() for all + * devices. + * + * RESUME System is resuming, call ->resume() and ->complete() for all + * devices. + * + * THAW Hibernation image has been created, call ->thaw() and + * ->complete() for all devices. + * + * RESTORE Contents of main memory have been restored from a hibernation + * image, call ->restore() and ->complete() for all devices. + * + * RECOVER Creation of a hibernation image or restoration of the main + * memory contents from a hibernation image has failed, call + * ->thaw() and ->complete() for all devices. + */ + +#define PM_EVENT_ON 0x0000 +#define PM_EVENT_FREEZE 0x0001 +#define PM_EVENT_SUSPEND 0x0002 +#define PM_EVENT_HIBERNATE 0x0004 +#define PM_EVENT_QUIESCE 0x0008 +#define PM_EVENT_RESUME 0x0010 +#define PM_EVENT_THAW 0x0020 +#define PM_EVENT_RESTORE 0x0040 +#define PM_EVENT_RECOVER 0x0080 + +#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) + +#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_QUIESCE ((struct pm_message){ .event = PM_EVENT_QUIESCE, }) +#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) +#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) +#define PMSG_RESUME ((struct pm_message){ .event = PM_EVENT_RESUME, }) +#define PMSG_THAW ((struct pm_message){ .event = PM_EVENT_THAW, }) +#define PMSG_RESTORE ((struct pm_message){ .event = PM_EVENT_RESTORE, }) +#define PMSG_RECOVER ((struct pm_message){ .event = PM_EVENT_RECOVER, }) +#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) + +/** + * Device power management states + * + * These state labels are used internally by the PM core to indicate the current + * status of a device with respect to the PM core operations. + * + * DPM_ON Device is regarded as operational. Set this way + * initially and when ->complete() is about to be called. + * Also set when ->prepare() fails. + * + * DPM_PREPARING Device is going to be prepared for a PM transition. Set + * when ->prepare() is about to be called. + * + * DPM_RESUMING Device is going to be resumed. Set when ->resume(), + * ->thaw(), or ->restore() is about to be called. + * + * DPM_SUSPENDING Device has been prepared for a power transition. Set + * when ->prepare() has just succeeded. + * + * DPM_OFF Device is regarded as inactive. Set immediately after + * ->suspend(), ->freeze(), or ->poweroff() has succeeded. + * Also set when ->resume()_noirq, ->thaw_noirq(), or + * ->restore_noirq() is about to be called. + * + * DPM_OFF_IRQ Device is in a "deep sleep". Set immediately after + * ->suspend_noirq(), ->freeze_noirq(), or + * ->poweroff_noirq() has just succeeded. + */ + +enum dpm_state { + DPM_INVALID, + DPM_ON, + DPM_PREPARING, + DPM_RESUMING, + DPM_SUSPENDING, + DPM_OFF, + DPM_OFF_IRQ, +}; + +struct dev_pm_info { + pm_message_t power_state; + unsigned can_wakeup:1; + unsigned should_wakeup:1; + enum dpm_state status; /* Owned by the PM core */ +#ifdef CONFIG_PM_SLEEP + struct list_head entry; +#endif +}; + +/* + * The PM_EVENT_ messages are also used by drivers implementing the legacy + * suspend framework, based on the ->suspend() and ->resume() callbacks common + * for suspend and hibernation transitions, according to the rules below. + */ + +/* Necessary, because several drivers use PM_EVENT_PRETHAW */ +#define PM_EVENT_PRETHAW PM_EVENT_QUIESCE + +/* * One transition is triggered by resume(), after a suspend() call; the * message is implicit: * @@ -164,35 +444,13 @@ typedef struct pm_message { * or from system low-power states such as standby or suspend-to-RAM. */ -#define PM_EVENT_ON 0 -#define PM_EVENT_FREEZE 1 -#define PM_EVENT_SUSPEND 2 -#define PM_EVENT_HIBERNATE 4 -#define PM_EVENT_PRETHAW 8 - -#define PM_EVENT_SLEEP (PM_EVENT_SUSPEND | PM_EVENT_HIBERNATE) - -#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) -#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) -#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) -#define PMSG_HIBERNATE ((struct pm_message){ .event = PM_EVENT_HIBERNATE, }) -#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) - -struct dev_pm_info { - pm_message_t power_state; - unsigned can_wakeup:1; - unsigned should_wakeup:1; - bool sleeping:1; /* Owned by the PM core */ -#ifdef CONFIG_PM_SLEEP - struct list_head entry; -#endif -}; +#ifdef CONFIG_PM_SLEEP +extern void device_pm_lock(void); +extern void device_power_up(pm_message_t state); +extern void device_resume(pm_message_t state); +extern void device_pm_unlock(void); extern int device_power_down(pm_message_t state); -extern void device_power_up(void); -extern void device_resume(void); - -#ifdef CONFIG_PM_SLEEP extern int device_suspend(pm_message_t state); extern int device_prepare_suspend(pm_message_t state); -- cgit v1.2.3 From bbb44d9f23d868a2837c6b22b8dfb123d8e7800c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 20 May 2008 00:49:04 +0200 Subject: PCI: implement new suspend/resume callbacks Implement new suspend and hibernation callbacks for the PCI bus type. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 700704ef70f..507ee52323c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -389,7 +389,7 @@ struct pci_driver { int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - + struct pm_ext_ops *pm; struct pci_error_handlers *err_handler; struct device_driver driver; struct pci_dynids dynids; -- cgit v1.2.3 From 25e18499e08cb097cbbfeab5de25d094d5312ee5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 May 2008 01:40:43 +0200 Subject: Implement new suspend and hibernation callbacks for platform busses Implement new suspend and hibernation callbacks for the platform bus type. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Greg KH Signed-off-by: Jesse Barnes --- include/linux/platform_device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 3261681c82a..95ac21ab3a0 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -53,6 +53,7 @@ struct platform_driver { int (*suspend_late)(struct platform_device *, pm_message_t state); int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); + struct pm_ext_ops *pm; struct device_driver driver; }; -- cgit v1.2.3 From ac9c052d10d8d6f46a30cb46c0d6d753997c299f Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 28 May 2008 15:01:03 +0900 Subject: shpchp: check firmware before taking control Fix the following problems of shpchp driver about getting hotplug control from firmware. - The shpchp driver must not control the hotplug controller if it fails to get control from the firmware. But current shpchp controls the hotplug controller regardless the result, because it doesn't check the return value of get_hp_hw_control_from_firmware(). - Current shpchp driver doesn't support _OSC. The pciehp driver already have the code for evaluating _OSC and OSHP and shpchp and pciehp can share it. So this patch move that code from pciehp to acpi_pcihp.c. Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- include/linux/pci_hotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index 8f67e8f2a3c..dbdcd1ad3c6 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -227,9 +227,9 @@ struct hotplug_params { #include #include #include -extern acpi_status acpi_run_oshp(acpi_handle handle); extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus, struct hotplug_params *hpp); +int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags); int acpi_root_bridge(acpi_handle handle); #endif #endif -- cgit v1.2.3 From f46753c5e354b857b20ab8e0fe7b2579831dc369 Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Tue, 10 Jun 2008 15:28:50 -0600 Subject: PCI: introduce pci_slot Currently, /sys/bus/pci/slots/ only exposes hotplug attributes when a hotplug driver is loaded, but PCI slots have attributes such as address, speed, width, etc. that are not related to hotplug at all. Introduce pci_slot as the primary data structure and kobject model. Hotplug attributes described in hotplug_slot become a secondary structure associated with the pci_slot. This patch only creates the infrastructure that allows the separation of PCI slot attributes and hotplug attributes. In this patch, the PCI hotplug core remains the only user of this infrastructure, and thus, /sys/bus/pci/slots/ will still only become populated when a hotplug driver is loaded. A later patch in this series will add a second user of this new infrastructure and demonstrate splitting the task of exposing pci_slot attributes from hotplug_slot attributes. - Make pci_slot the primary sysfs entity. hotplug_slot becomes a subsidiary structure. o pci_create_slot() creates and registers a slot with the PCI core o pci_slot_add_hotplug() gives it hotplug capability - Change the prototype of pci_hp_register() to take the bus and slot number (on parent bus) as parameters. - Remove all the ->get_address methods since this functionality is now handled by pci_slot directly. [achiang@hp.com: rpaphp-correctly-pci_hp_register-for-empty-pci-slots] Tested-by: Badari Pulavarty Acked-by: Benjamin Herrenschmidt [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: make headers_check happy] [akpm@linux-foundation.org: nuther build fix] [akpm@linux-foundation.org: fix typo in #include] Signed-off-by: Alex Chiang Signed-off-by: Matthew Wilcox Cc: Greg KH Cc: Kristen Carlson Accardi Cc: Len Brown Acked-by: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Jesse Barnes --- include/linux/pci.h | 19 +++++++++++++++++-- include/linux/pci_hotplug.h | 12 +++--------- 2 files changed, 20 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 507ee52323c..f1f73f79a18 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -17,8 +17,7 @@ #ifndef LINUX_PCI_H #define LINUX_PCI_H -/* Include the pci register defines */ -#include +#include /* The pci register defines */ /* * The PCI interface treats multi-function devices as independent @@ -49,12 +48,22 @@ #include #include #include +#include #include #include /* Include the ID list */ #include +/* pci_slot represents a physical slot */ +struct pci_slot { + struct pci_bus *bus; /* The bus this slot is on */ + struct list_head list; /* node in list of slots on this bus */ + struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */ + unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + struct kobject kobj; +}; + /* File state for mmap()s on /proc/bus/pci/X/Y */ enum pci_mmap_state { pci_mmap_io, @@ -142,6 +151,7 @@ struct pci_dev { void *sysdata; /* hook for sys-specific extension */ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + struct pci_slot *slot; /* Physical slot this device is in */ unsigned int devfn; /* encoded device & function index */ unsigned short vendor; @@ -266,6 +276,7 @@ struct pci_bus { struct list_head children; /* list of child buses */ struct list_head devices; /* list of devices on this bus */ struct pci_dev *self; /* bridge device as seen by parent */ + struct list_head slots; /* list of slots on this bus */ struct resource *resource[PCI_BUS_NUM_RESOURCES]; /* address space routed to this bus */ @@ -488,6 +499,10 @@ struct pci_bus *pci_create_bus(struct device *parent, int bus, struct pci_ops *ops, void *sysdata); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); +struct pci_slot *pci_create_slot(struct pci_bus *parent, int slot_nr, + const char *name); +void pci_destroy_slot(struct pci_slot *slot); +void pci_update_slot_number(struct pci_slot *slot, int slot_nr); int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn); void pci_device_add(struct pci_dev *dev, struct pci_bus *bus); diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index dbdcd1ad3c6..a08cd06b541 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -95,9 +95,6 @@ struct hotplug_slot_attribute { * @get_adapter_status: Called to get see if an adapter is present in the slot or not. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. - * @get_address: Called to get pci address of a slot. - * If this field is NULL, the value passed in the struct hotplug_slot_info - * will be used when this value is requested by a user. * @get_max_bus_speed: Called to get the max bus speed for a slot. * If this field is NULL, the value passed in the struct hotplug_slot_info * will be used when this value is requested by a user. @@ -120,7 +117,6 @@ struct hotplug_slot_ops { int (*get_attention_status) (struct hotplug_slot *slot, u8 *value); int (*get_latch_status) (struct hotplug_slot *slot, u8 *value); int (*get_adapter_status) (struct hotplug_slot *slot, u8 *value); - int (*get_address) (struct hotplug_slot *slot, u32 *value); int (*get_max_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); int (*get_cur_bus_speed) (struct hotplug_slot *slot, enum pci_bus_speed *value); }; @@ -140,7 +136,6 @@ struct hotplug_slot_info { u8 attention_status; u8 latch_status; u8 adapter_status; - u32 address; enum pci_bus_speed max_bus_speed; enum pci_bus_speed cur_bus_speed; }; @@ -166,15 +161,14 @@ struct hotplug_slot { /* Variables below this are for use only by the hotplug pci core. */ struct list_head slot_list; - struct kobject kobj; + struct pci_slot *pci_slot; }; #define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj) -extern int pci_hp_register (struct hotplug_slot *slot); -extern int pci_hp_deregister (struct hotplug_slot *slot); +extern int pci_hp_register(struct hotplug_slot *, struct pci_bus *, int nr); +extern int pci_hp_deregister(struct hotplug_slot *slot); extern int __must_check pci_hp_change_slot_info (struct hotplug_slot *slot, struct hotplug_slot_info *info); -extern struct kset *pci_hotplug_slots_kset; /* PCI Setting Record (Type 0) */ struct hpp_type0 { -- cgit v1.2.3 From d8f3de0d2412bb91639cfefc5b3c79dbf3812212 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 12 Jun 2008 23:24:06 +0200 Subject: Suspend-related patches for 2.6.27 ACPI PM: Add possibility to change suspend sequence There are some systems out there that don't work correctly with our current suspend/hibernation code ordering. Provide a workaround for these systems allowing them to pass 'acpi_sleep=old_ordering' in the kernel command line so that it will use the pre-ACPI 2.0 ("old") suspend code ordering. Unfortunately, this requires us to add a platform hook to the resuming of devices for recovering the platform in case one of the device drivers' .suspend() routines returns error code. Namely, ACPI 1.0 specifies that _PTS should be called before suspending devices, but _WAK still should be called before resuming them in order to undo the changes made by _PTS. However, if there is an error during suspending devices, they are automatically resumed without returning control to the PM core, so the _WAK has to be called from within device_resume() in that cases. The patch also reorders and refactors the ACPI suspend/hibernation code to avoid duplication as far as reasonably possible. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Jesse Barnes --- include/linux/acpi.h | 3 +++ include/linux/suspend.h | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7..33adcf91ef4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -234,6 +234,9 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_check_mem_region(resource_size_t start, resource_size_t n, const char *name); +#ifdef CONFIG_PM_SLEEP +void __init acpi_old_suspend_ordering(void); +#endif /* CONFIG_PM_SLEEP */ #else /* CONFIG_ACPI */ static inline int early_acpi_boot_init(void) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index a6977423baf..e8e69159af7 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -86,6 +86,11 @@ typedef int __bitwise suspend_state_t; * that implement @begin(), but platforms implementing @begin() should * also provide a @end() which cleans up transitions aborted before * @enter(). + * + * @recover: Recover the platform from a suspend failure. + * Called by the PM core if the suspending of devices fails. + * This callback is optional and should only be implemented by platforms + * which require special recovery actions in that situation. */ struct platform_suspend_ops { int (*valid)(suspend_state_t state); @@ -94,6 +99,7 @@ struct platform_suspend_ops { int (*enter)(suspend_state_t state); void (*finish)(void); void (*end)(void); + void (*recover)(void); }; #ifdef CONFIG_SUSPEND @@ -149,7 +155,7 @@ extern void mark_free_pages(struct zone *zone); * The methods in this structure allow a platform to carry out special * operations required by it during a hibernation transition. * - * All the methods below must be implemented. + * All the methods below, except for @recover(), must be implemented. * * @begin: Tell the platform driver that we're starting hibernation. * Called right after shrinking memory and before freezing devices. @@ -189,6 +195,11 @@ extern void mark_free_pages(struct zone *zone); * @restore_cleanup: Clean up after a failing image restoration. * Called right after the nonboot CPUs have been enabled and before * thawing devices (runs with IRQs on). + * + * @recover: Recover the platform from a failure to suspend devices. + * Called by the PM core if the suspending of devices during hibernation + * fails. This callback is optional and should only be implemented by + * platforms which require special recovery actions in that situation. */ struct platform_hibernation_ops { int (*begin)(void); @@ -200,6 +211,7 @@ struct platform_hibernation_ops { void (*leave)(void); int (*pre_restore)(void); void (*restore_cleanup)(void); + void (*recover)(void); }; #ifdef CONFIG_HIBERNATION -- cgit v1.2.3 From c50cbb05a05cf1f9ca3592272eff053c847727d8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 4 Jun 2008 21:47:29 -0700 Subject: cpu topology: always define CPU topology information This can result in an empty topology directory in sysfs, and requires in-kernel users to protect all uses with #ifdef - see . The documentation of CPU topology specifies what the defaults should be if only partial information is available from the hardware. So we can provide these defaults as a fallback. This patch: - Adds default definitions of the 4 topology macros to - Changes drivers/base/topology.c to use the topology macros unconditionally and to cope with definitions that aren't lvalues - Updates documentation accordingly [ From: Andrew Morton - fold now-duplicated code - fix layout ] Signed-off-by: Ben Hutchings Cc: Vegard Nossum Cc: Nick Piggin Cc: Chandra Seetharaman Cc: Suresh Siddha Cc: Mike Travis Cc: Christoph Lameter Cc: John Hawkes Cc: Zhang, Yanmin Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/topology.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 24f3d2282e1..2158fc0d5a5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -179,4 +179,17 @@ void arch_update_cpu_topology(void); #endif #endif /* CONFIG_NUMA */ +#ifndef topology_physical_package_id +#define topology_physical_package_id(cpu) ((void)(cpu), -1) +#endif +#ifndef topology_core_id +#define topology_core_id(cpu) ((void)(cpu), 0) +#endif +#ifndef topology_thread_siblings +#define topology_thread_siblings(cpu) cpumask_of_cpu(cpu) +#endif +#ifndef topology_core_siblings +#define topology_core_siblings(cpu) cpumask_of_cpu(cpu) +#endif + #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From e17ba73b0ee6c0f24393c48b455e0d8db761782c Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 12 May 2008 15:44:40 +0200 Subject: x86, generic: mark early_printk as asmlinkage It's not explicitly marked as asmlinkage, but invoked from x86_32 startup code with parameters on stack. No other architectures define early_printk and none of them are affected by this change, since defines asmlinkage as empty token. Signed-off-by: Jiri Slaby Cc: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index f2a668c195b..4cb8d3df414 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -207,7 +207,7 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \ { return false; } #endif -extern void __attribute__((format(printf, 1, 2))) +extern void asmlinkage __attribute__((format(printf, 1, 2))) early_printk(const char *fmt, ...); unsigned long int_sqrt(unsigned long); -- cgit v1.2.3 From 443cd507ce7f78c6f8742b72736585c031d5a921 Mon Sep 17 00:00:00 2001 From: "Huang, Ying" Date: Fri, 20 Jun 2008 16:39:21 +0800 Subject: lockdep: add lock_class information to lock_chain and output it This patch records array of lock_class into lock_chain, and export lock_chain information via /proc/lockdep_chains. It is based on x86/master branch of git-x86 tree, and has been tested on x86_64 platform. Signed-off-by: Huang Ying Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 4c4d236ded1..b26fbc715a5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -182,6 +182,9 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { + u8 irq_context; + u8 depth; + u16 base; struct list_head entry; u64 chain_key; }; -- cgit v1.2.3 From 0b2806768899dba5967bcd4a3b93eaed9a1dc4f3 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 18 May 2008 14:27:41 -0600 Subject: Add cycle_kernel_lock() A number of driver functions are so obviously trivial that they do not need the big kernel lock - at least not overtly. It turns out that the acquisition of the BKL in driver open() functions can perform a sort of poor-hacker's serialization function, delaying the open operation until the driver is certain to have completed its initialization. Add a simple cycle_kernel_lock() function for these cases to make it clear that there is no need to *hold* the BKL, just to be sure that we can acquire it. Signed-off-by: Jonathan Corbet --- include/linux/smp_lock.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h index aab3a4cff4e..813be59bf34 100644 --- a/include/linux/smp_lock.h +++ b/include/linux/smp_lock.h @@ -27,11 +27,24 @@ static inline int reacquire_kernel_lock(struct task_struct *task) extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); +/* + * Various legacy drivers don't really need the BKL in a specific + * function, but they *do* need to know that the BKL became available. + * This function just avoids wrapping a bunch of lock/unlock pairs + * around code which doesn't really need it. + */ +static inline void cycle_kernel_lock(void) +{ + lock_kernel(); + unlock_kernel(); +} + #else #define lock_kernel() do { } while(0) #define unlock_kernel() do { } while(0) #define release_kernel_lock(task) do { } while(0) +#define cycle_kernel_lock() do { } while(0) #define reacquire_kernel_lock(task) 0 #define kernel_locked() 1 -- cgit v1.2.3 From 395a59d0f8e86bb39cd700c3d185d30c670bb958 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:27 +0530 Subject: ftrace: store mcount address in rec->ip Record the address of the mcount call-site. Currently all archs except sparc64 record the address of the instruction following the mcount call-site. Some general cleanups are entailed. Storing mcount addresses in rec->ip enables looking them up in the kprobe hash table later on to check if they're kprobe'd. Signed-off-by: Abhishek Sagar Cc: davem@davemloft.net Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 20e14d0093c..366098d591d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -31,7 +31,6 @@ int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); extern void ftrace_stub(unsigned long a0, unsigned long a1); -extern void mcount(void); #else /* !CONFIG_FTRACE */ # define register_ftrace_function(ops) do { } while (0) @@ -54,7 +53,7 @@ enum { struct dyn_ftrace { struct hlist_node node; - unsigned long ip; + unsigned long ip; /* address of mcount call-site */ unsigned long flags; }; -- cgit v1.2.3 From 785656a41f9a9c0e843a23d1ae05d900b5158f8f Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:39 +0530 Subject: kprobes: enable clean usage of get_kprobe Allow clean use of get_kprobe() outside of core kprobe code. Ftrace makes use of get_kprobe to identify probes installed on mcount call-sites. Signed-off-by: Abhishek Sagar Acked-by: Ananth N Mavinakayanahalli Cc: Masami Hiramatsu Cc: jkenisto@us.ibm.com Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 1036631ff4f..04a3556bdea 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -259,6 +259,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); struct jprobe; struct kretprobe; +static inline struct kprobe *get_kprobe(void *addr) +{ + return NULL; +} static inline struct kprobe *kprobe_running(void) { return NULL; -- cgit v1.2.3 From ecea656d1d5e912d2f3d332657ea4a6d8380f891 Mon Sep 17 00:00:00 2001 From: Abhishek Sagar Date: Sat, 21 Jun 2008 23:47:53 +0530 Subject: ftrace: freeze kprobe'd records Let records identified as being kprobe'd be marked as "frozen". The trouble with records which have a kprobe installed on their mcount call-site is that they don't get updated. So if such a function which is currently being traced gets its tracing disabled due to a new filter rule (or because it was added to the notrace list) then it won't be updated and continue being traced. This patch allows scanning of all frozen records during tracing to check if they should be traced. Signed-off-by: Abhishek Sagar Cc: Steven Rostedt Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366098d591d..3121b95443d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -49,6 +49,7 @@ enum { FTRACE_FL_ENABLED = (1 << 3), FTRACE_FL_NOTRACE = (1 << 4), FTRACE_FL_CONVERTED = (1 << 5), + FTRACE_FL_FROZEN = (1 << 6), }; struct dyn_ftrace { @@ -73,15 +74,18 @@ extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); +extern int skip_trace(unsigned long ip); + void ftrace_disable_daemon(void); void ftrace_enable_daemon(void); #else +# define skip_trace(ip) ({ 0; }) # define ftrace_force_update() ({ 0; }) # define ftrace_set_filter(buf, len, reset) do { } while (0) # define ftrace_disable_daemon() do { } while (0) # define ftrace_enable_daemon() do { } while (0) -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); -- cgit v1.2.3 From 3da757daf86e498872855f0b5e101f763ba79499 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 20 Jun 2008 15:06:33 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation On the x86 platform we can use the value of tsc_khz computed during tsc calibration to calculate the loops_per_jiffy value. Its very important to keep the error in lpj values to minimum as any error in that may result in kernel panic in check_timer. In virtualization environment, On a highly overloaded host the guest delay calibration may sometimes result in errors beyond the ~50% that timer_irq_works can handle, resulting in the guest panicking. Does some formating changes to lpj_setup code to now have a single printk to print the bogomips value. We do this only for the boot processor because the AP's can have different base frequencies or the BIOS might boot a AP at a different frequency. Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 54552d21296..01aec60590a 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,6 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif +extern unsigned long lpj_tsc; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.3 From 961ccddd59d627b89bd3dc284b6517833bbdf25d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 23 Jun 2008 13:55:38 +1000 Subject: sched: add new API sched_setscheduler_nocheck: add a flag to control access checks Hidehiro Kawai noticed that sched_setscheduler() can fail in stop_machine: it calls sched_setscheduler() from insmod, which can have CAP_SYS_MODULE without CAP_SYS_NICE. Two cases could have failed, so are changed to sched_setscheduler_nocheck: kernel/softirq.c:cpu_callback() - CPU hotplug callback kernel/stop_machine.c:__stop_machine_run() - Called from various places, including modprobe() Signed-off-by: Rusty Russell Cc: Jeremy Fitzhardinge Cc: Hidehiro Kawai Cc: Andrew Morton Cc: linux-mm@kvack.org Cc: sugita Cc: Satoshi OSHIMA Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8..fe3b9b5d739 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1655,6 +1655,8 @@ extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, struct sched_param *); +extern int sched_setscheduler_nocheck(struct task_struct *, int, + struct sched_param *); extern struct task_struct *idle_task(int cpu); extern struct task_struct *curr_task(int cpu); extern void set_curr_task(int cpu, struct task_struct *p); -- cgit v1.2.3 From a033c332e047397904ed74816946b2edd9b0d5cd Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Mon, 23 Jun 2008 10:52:42 +0800 Subject: lockdep: remove duplicate definition of STATIC_LOCKDEP_MAP_INIT STATIC_LOCKDEP_MAP_INIT is defined twice in lockdep.h. I guess it's a copy & paste. Signed-off-by: Li Zefan Cc: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b26fbc715a5..2486eb4edbf 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -278,14 +278,6 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name, lockdep_init_map(&(lock)->dep_map, #lock, \ (lock)->dep_map.key, sub) -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - - /* * Acquire a lock. * -- cgit v1.2.3 From f3f3149f35b9195ef4b761b1353fc0766b5f53be Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Mon, 23 Jun 2008 18:21:56 -0700 Subject: x86: use cpu_khz for loops_per_jiffy calculation, cleanup As suggested by Ingo, remove all references to tsc from init/calibrate.c TSC is x86 specific, and using tsc in variable names in a generic file should be avoided. lpj_tsc is now called lpj_fine, since it is related to fine tuning of lpj value. Also tsc_rate_* is called timer_rate_* Signed-off-by: Alok N Kataria Cc: Arjan van de Ven Cc: Daniel Hecht Cc: Tim Mann Cc: Zach Amsden Cc: Sahil Rihan Signed-off-by: Ingo Molnar --- include/linux/delay.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/delay.h b/include/linux/delay.h index 01aec60590a..fd832c6d419 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -41,7 +41,7 @@ static inline void ndelay(unsigned long x) #define ndelay(x) ndelay(x) #endif -extern unsigned long lpj_tsc; +extern unsigned long lpj_fine; void calibrate_delay(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); -- cgit v1.2.3 From 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 26 Jun 2008 11:21:34 +0200 Subject: Add generic helpers for arch IPI function calls This adds kernel/smp.c which contains helpers for IPI function calls. In addition to supporting the existing smp_call_function() in a more efficient manner, it also adds a more scalable variant called smp_call_function_single() for calling a given function on a single CPU only. The core of this is based on the x86-64 patch from Nick Piggin, lots of changes since then. "Alan D. Brunelle" has contributed lots of fixes and suggestions as well. Also thanks to Paul E. McKenney for reviewing RCU usage and getting rid of the data allocation fallback deadlock. Acked-by: Ingo Molnar Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- include/linux/smp.h | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 55232ccf9cf..eac3e062250 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -7,9 +7,19 @@ */ #include +#include +#include +#include extern void cpu_idle(void); +struct call_single_data { + struct list_head list; + void (*func) (void *info); + void *info; + unsigned int flags; +}; + #ifdef CONFIG_SMP #include @@ -53,9 +63,28 @@ extern void smp_cpus_done(unsigned int max_cpus); * Call a function on all other processors */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); - +int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, + int wait); int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int retry, int wait); +void __smp_call_function_single(int cpuid, struct call_single_data *data); + +/* + * Generic and arch helpers + */ +#ifdef CONFIG_USE_GENERIC_SMP_HELPERS +void generic_smp_call_function_single_interrupt(void); +void generic_smp_call_function_interrupt(void); +void init_call_single_data(void); +void ipi_call_lock(void); +void ipi_call_unlock(void); +void ipi_call_lock_irq(void); +void ipi_call_unlock_irq(void); +#else +static inline void init_call_single_data(void) +{ +} +#endif /* * Call a function on all processors @@ -112,7 +141,9 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) - +static inline void init_call_single_data(void) +{ +} #endif /* !SMP */ /* -- cgit v1.2.3 From 8691e5a8f691cc2a4fda0651e8d307aaba0e7d68 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jun 2008 11:18:06 +0200 Subject: smp_call_function: get rid of the unused nonatomic/retry argument It's never used and the comments refer to nonatomic and retry interchangably. So get rid of it. Acked-by: Jeremy Fitzhardinge Signed-off-by: Jens Axboe --- include/linux/smp.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index eac3e062250..338cad1b954 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -62,11 +62,11 @@ extern void smp_cpus_done(unsigned int max_cpus); /* * Call a function on all other processors */ -int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function(void(*func)(void *info), void *info, int wait); int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info, int wait); int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, - int retry, int wait); + int wait); void __smp_call_function_single(int cpuid, struct call_single_data *data); /* @@ -119,7 +119,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) { return 0; } -#define smp_call_function(func, info, retry, wait) \ +#define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) #define on_each_cpu(func,info,retry,wait) \ ({ \ @@ -131,7 +131,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) static inline void smp_send_reschedule(int cpu) { } #define num_booting_cpus() 1 #define smp_prepare_boot_cpu() do {} while (0) -#define smp_call_function_single(cpuid, func, info, retry, wait) \ +#define smp_call_function_single(cpuid, func, info, wait) \ ({ \ WARN_ON(cpuid != 0); \ local_irq_disable(); \ -- cgit v1.2.3 From 15c8b6c1aaaf1c4edd67e2f02e4d8e1bd1a51c0d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2008 09:39:44 +0200 Subject: on_each_cpu(): kill unused 'retry' parameter It's not even passed on to smp_call_function() anymore, since that was removed. So kill it. Acked-by: Jeremy Fitzhardinge Reviewed-by: Paul E. McKenney Signed-off-by: Jens Axboe --- include/linux/smp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 338cad1b954..55261101d09 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -89,7 +89,7 @@ static inline void init_call_single_data(void) /* * Call a function on all processors */ -int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait); +int on_each_cpu(void (*func) (void *info), void *info, int wait); #define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */ #define MSG_ALL 0x8001 @@ -121,7 +121,7 @@ static inline int up_smp_call_function(void (*func)(void *), void *info) } #define smp_call_function(func, info, wait) \ (up_smp_call_function(func, info)) -#define on_each_cpu(func,info,retry,wait) \ +#define on_each_cpu(func,info,wait) \ ({ \ local_irq_disable(); \ func(info); \ -- cgit v1.2.3 From 1bdad606338debc6384b2844f1b53cc436b3ac90 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 3 Jun 2008 14:09:53 +0100 Subject: [GFS2] Remove remote lock dropping code There are several reasons why this is undesirable: 1. It never happens during normal operation anyway 2. If it does happen it causes performance to be very, very poor 3. It isn't likely to solve the original problem (memory shortage on remote DLM node) it was supposed to solve 4. It uses a bunch of arbitrary constants which are unlikely to be correct for any particular situation and for which the tuning seems to be a black art. 5. In an N node cluster, only 1/N of the dropped locked will actually contribute to solving the problem on average. So all in all we are better off without it. This also makes merging the lock_dlm module into GFS2 a bit easier. Signed-off-by: Steven Whitehouse --- include/linux/lm_interface.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h index f274997bc28..d0a7112b971 100644 --- a/include/linux/lm_interface.h +++ b/include/linux/lm_interface.h @@ -138,9 +138,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); * LM_CB_NEED_RECOVERY * The given journal needs to be recovered. * - * LM_CB_DROPLOCKS - * Reduce the number of cached locks. - * * LM_CB_ASYNC * The given lock has been granted. */ @@ -149,7 +146,6 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); #define LM_CB_NEED_D 258 #define LM_CB_NEED_S 259 #define LM_CB_NEED_RECOVERY 260 -#define LM_CB_DROPLOCKS 261 #define LM_CB_ASYNC 262 /* -- cgit v1.2.3 From b2cad26cfc2091050574a460b304ed103a35dbda Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 3 Jun 2008 14:34:14 +0100 Subject: [GFS2] Remove obsolete conversion deadlock avoidance code This is only used by GFS1 so can be removed. Signed-off-by: Steven Whitehouse --- include/linux/lm_interface.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lm_interface.h b/include/linux/lm_interface.h index d0a7112b971..2ed8fa1b762 100644 --- a/include/linux/lm_interface.h +++ b/include/linux/lm_interface.h @@ -122,11 +122,9 @@ typedef void (*lm_callback_t) (void *ptr, unsigned int type, void *data); */ #define LM_OUT_ST_MASK 0x00000003 -#define LM_OUT_CACHEABLE 0x00000004 #define LM_OUT_CANCELED 0x00000008 #define LM_OUT_ASYNC 0x00000080 #define LM_OUT_ERROR 0x00000100 -#define LM_OUT_CONV_DEADLK 0x00000200 /* * lm_callback_t types -- cgit v1.2.3 From c09595f63bb1909c5dc4dca288f4fe818561b5f3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:14 +0200 Subject: sched: revert revert of: fair-group: SMP-nice for group scheduling Try again.. Initial commit: 18d95a2832c1392a2d63227a7a6d433cb9f2037e Revert: 6363ca57c76b7b83639ca8c83fc285fa26a7880e Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072db..97a58b622ee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,6 +765,7 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ + int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.3 From b6a86c746f5b708012809958462234d19e9c8177 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:18 +0200 Subject: sched: fix sched_domain aggregation Keeping the aggregate on the first cpu of the sched domain has two problems: - it could collide between different sched domains on different cpus - it could slow things down because of the remote accesses Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 97a58b622ee..eaf821072db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -765,7 +765,6 @@ struct sched_domain { struct sched_domain *child; /* bottom domain must be null terminated */ struct sched_group *groups; /* the balancing groups of the domain */ cpumask_t span; /* span of all CPUs in this domain */ - int first_cpu; /* cache of the first cpu in this domain */ unsigned long min_interval; /* Minimum balance interval ms */ unsigned long max_interval; /* Maximum balance interval ms */ unsigned int busy_factor; /* less balancing by factor if busy */ -- cgit v1.2.3 From 2398f2c6d34b43025f274fc42eaca34d23ec2320 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 27 Jun 2008 13:41:35 +0200 Subject: sched: update shares on wakeup We found that the affine wakeup code needs rather accurate load figures to be effective. The trouble is that updating the load figures is fairly expensive with group scheduling. Therefore ratelimit the updating. Signed-off-by: Peter Zijlstra Cc: Srivatsa Vaddagiri Cc: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eaf821072db..835b6c6fcc5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -783,6 +783,8 @@ struct sched_domain { unsigned int balance_interval; /* initialise to 1. units in ms. */ unsigned int nr_balance_failed; /* initialise to 0 */ + u64 last_update; + #ifdef CONFIG_SCHEDSTATS /* load_balance() stats */ unsigned int lb_count[CPU_MAX_IDLE_TYPES]; @@ -1605,6 +1607,7 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; +extern unsigned int sysctl_sched_shares_ratelimit; int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, -- cgit v1.2.3 From 9433f6dd3a4677e9b92c6e1cd7f98b11598b7c2c Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Thu, 26 Jun 2008 10:50:04 +0800 Subject: PCI: Fix comment of pci_dynids struct pci_driver has no field of driver_data. It's in pci_device_id. Signed-off-by: Wang Chen Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f1f73f79a18..76c9a4a9615 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -338,7 +338,7 @@ struct pci_bus_region { struct pci_dynids { spinlock_t lock; /* protects list, index */ struct list_head list; /* for IDs added at runtime */ - unsigned int use_driver_data:1; /* pci_driver->driver_data is used */ + unsigned int use_driver_data:1; /* pci_device_id->driver_data is used */ }; /* ---------------------------------------------------------------- */ -- cgit v1.2.3 From 80be038593dba7aa46fb24a14f0ba83e5ade0edb Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Jun 2008 11:35:53 -0700 Subject: PCI: add stub for pci_set_consistent_dma_mask() When CONFIG_PCI=n, there is no stub for pci_set_consistent_dma_mask(), so add one like other similar stubs. Otherwise there can be build errors, as here: linux-next-20080630/drivers/ssb/main.c:1175: error: implicit declaration of function 'pci_set_consistent_dma_mask' Signed-off-by: Randy Dunlap Signed-off-by: Jesse Barnes --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 76c9a4a9615..96ebaa8d80e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -853,6 +853,11 @@ static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask) return -EIO; } +static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) +{ + return -EIO; +} + static inline int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) { -- cgit v1.2.3 From 9b4a8dd2e9f8af206eb39e3d99c442b0d6158953 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 24 Jun 2008 03:46:57 +1000 Subject: drivers/macintosh: Various cleanups This contains the following cleanups: - make the following needlessly global code static: - adb.c: adb_controller - adb.c: adb_init() - adbhid.c: adb_to_linux_keycodes[] (also make it const) - via-pmu68k.c: backlight_level - via-pmu68k.c: backlight_enabled - remove the following unused code: - via-pmu68k.c: sleep_notifier_list Signed-off-by: Adrian Bunk Acked-by: Geert Uytterhoeven Acked-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/linux/adb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/adb.h b/include/linux/adb.h index 64d8878e144..63bca502fa5 100644 --- a/include/linux/adb.h +++ b/include/linux/adb.h @@ -84,7 +84,6 @@ enum adb_message { ADB_MSG_PRE_RESET, /* Called before resetting the bus */ ADB_MSG_POST_RESET /* Called after resetting the bus (re-do init & register) */ }; -extern struct adb_driver *adb_controller; extern struct blocking_notifier_head adb_client_list; int adb_request(struct adb_request *req, void (*done)(struct adb_request *), -- cgit v1.2.3 From f3e909c2750eb20536bacacc867dc9047b70546a Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 1 Jul 2008 14:01:39 +1000 Subject: powerpc: Update for VSX core file and ptrace This correctly hooks the VSX dump into Roland McGrath core file infrastructure. It adds the VSX dump information as an additional elf note in the core file (after talking more to the tool chain/gdb guys). This also ensures the formats are consistent between signals, ptrace and core files. Signed-off-by: Michael Neuling Signed-off-by: Paul Mackerras --- include/linux/elf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/elf.h b/include/linux/elf.h index ff9fbed9012..edc3dac3f02 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -358,6 +358,7 @@ typedef struct elf64_shdr { #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ #define NT_PPC_VMX 0x100 /* PowerPC Altivec/VMX registers */ #define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ +#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ -- cgit v1.2.3 From 9465efc9e96135a2cec8154c0c766fa59984a298 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 27 Jun 2008 11:05:24 +0200 Subject: Remove BKL from remote_llseek v2 - Replace remote_llseek with generic_file_llseek_unlocked (to force compilation failures in all users) - Change all users to either use generic_file_llseek_unlocked directly or take the BKL around. I changed the file systems who don't use the BKL for anything (CIFS, GFS) to call it directly. NCPFS and SMBFS and NFS take the BKL, but explicitely in their own source now. I moved them all over in a single patch to avoid unbisectable sections. Open problem: 32bit kernels can corrupt fpos because its modification is not atomic, but they can do that anyways because there's other paths who modify it without BKL. Do we need a special lock for the pos/f_version = 0 checks? Trond says the NFS BKL is likely not needed, but keep it for now until his full audit. v2: Use generic_file_llseek_unlocked instead of remote_llseek_unlocked and factor duplicated code (suggested by hch) Cc: Trond.Myklebust@netapp.com Cc: swhiteho@redhat.com Cc: sfrench@samba.org Cc: vandrove@vc.cvut.cz Signed-off-by: Andi Kleen Signed-off-by: Andi Kleen Signed-off-by: Jonathan Corbet --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index f413085f748..b158e5161bc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1871,7 +1871,8 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, + int origin); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.3 From 02c62304e6af60f1963695c6bc1bbffe619aa585 Mon Sep 17 00:00:00 2001 From: "Alan D. Brunelle" Date: Wed, 11 Jun 2008 09:12:52 +0200 Subject: Added in user-injected messages into blk traces This allows a user to annotate the blk trace stream: writing a suitable message to {/sys/kernel/debug}/block//msg will have it propagated into the trace stream. Signed-off-by: Alan D. Brunelle Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index e3ef903aae8..d084b8d227a 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -129,6 +129,7 @@ struct blk_trace { u32 dev; struct dentry *dir; struct dentry *dropped_file; + struct dentry *msg_file; atomic_t dropped; }; -- cgit v1.2.3 From 244b4d56f85bcd11b21ab0b94845a3dabeed5c10 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 12 Jun 2008 20:12:36 +0200 Subject: block: kill request_queue_t Everything was moved to struct request_queue a few kernel revisions ago, maintaining the deprecated typedef to avoid breaking things. Now the time has come to get rid of that typedef. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2a1b71e93c..6a3da671713 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -23,7 +23,6 @@ struct scsi_ioctl_command; struct request_queue; -typedef struct request_queue request_queue_t __deprecated; struct elevator_queue; typedef struct elevator_queue elevator_t; struct request_pm_state; -- cgit v1.2.3 From 51d654e1d885607a6edd02b337105fa5c28b6d33 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 17 Jun 2008 18:59:56 +0200 Subject: block: Globalize bio_set and bio_vec_slab Move struct bio_set and biovec_slab definitions to bio.h so they can be used outside of bio.c. Signed-off-by: Martin K. Petersen Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/bio.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 61c15eaf3fb..49dfb3cb746 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -333,6 +333,35 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, int, int); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); +extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); + +/* + * bio_set is used to allow other portions of the IO system to + * allocate their own private memory pools for bio and iovec structures. + * These memory pools in turn all allocate from the bio_slab + * and the bvec_slabs[]. + */ +#define BIO_POOL_SIZE 2 +#define BIOVEC_NR_POOLS 6 + +struct bio_set { + mempool_t *bio_pool; + mempool_t *bvec_pools[BIOVEC_NR_POOLS]; +}; + +struct biovec_slab { + int nr_vecs; + char *name; + struct kmem_cache *slab; +}; + +extern struct bio_set *fs_bio_set; + +/* + * a small number of entries is fine, not going to be performance critical. + * basically we just need to survive + */ +#define BIO_SPLIT_ENTRIES 2 #ifdef CONFIG_HIGHMEM /* -- cgit v1.2.3 From 7ba1ba12eeef0aa7113beb16410ef8b7c748e18b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Mon, 30 Jun 2008 20:04:41 +0200 Subject: block: Block layer data integrity support Some block devices support verifying the integrity of requests by way of checksums or other protection information that is submitted along with the I/O. This patch implements support for generating and verifying integrity metadata, as well as correctly merging, splitting and cloning bios and requests that have this extra information attached. See Documentation/block/data-integrity.txt for more information. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/bio.h | 94 +++++++++++++++++++++++++++++++++++++++++-- include/linux/blkdev.h | 105 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 3 ++ 3 files changed, 198 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 49dfb3cb746..6bfc3e8d9d8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -64,6 +64,7 @@ struct bio_vec { struct bio_set; struct bio; +struct bio_integrity_payload; typedef void (bio_end_io_t) (struct bio *, int); typedef void (bio_destructor_t) (struct bio *); @@ -112,6 +113,9 @@ struct bio { atomic_t bi_cnt; /* pin count */ void *bi_private; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; /* data integrity */ +#endif bio_destructor_t *bi_destructor; /* destructor */ }; @@ -271,6 +275,29 @@ static inline void *bio_data(struct bio *bio) */ #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +#if defined(CONFIG_BLK_DEV_INTEGRITY) +/* + * bio integrity payload + */ +struct bio_integrity_payload { + struct bio *bip_bio; /* parent bio */ + struct bio_vec *bip_vec; /* integrity data vector */ + + sector_t bip_sector; /* virtual start sector */ + + void *bip_buf; /* generated integrity data */ + bio_end_io_t *bip_end_io; /* saved I/O completion fn */ + + int bip_error; /* saved I/O error */ + unsigned int bip_size; + + unsigned short bip_pool; /* pool the ivec came from */ + unsigned short bip_vcnt; /* # of integrity bio_vecs */ + unsigned short bip_idx; /* current bip_vec index */ + + struct work_struct bip_work; /* I/O completion */ +}; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ /* * A bio_pair is used when we need to split a bio. @@ -283,10 +310,14 @@ static inline void *bio_data(struct bio *bio) * in bio2.bi_private */ struct bio_pair { - struct bio bio1, bio2; - struct bio_vec bv1, bv2; - atomic_t cnt; - int error; + struct bio bio1, bio2; + struct bio_vec bv1, bv2; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload bip1, bip2; + struct bio_vec iv1, iv2; +#endif + atomic_t cnt; + int error; }; extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors); @@ -334,6 +365,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); +extern unsigned int bvec_nr_vecs(unsigned short idx); /* * bio_set is used to allow other portions of the IO system to @@ -346,6 +378,9 @@ extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set struct bio_set { mempool_t *bio_pool; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + mempool_t *bio_integrity_pool; +#endif mempool_t *bvec_pools[BIOVEC_NR_POOLS]; }; @@ -410,5 +445,56 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) +#define bip_vec(bip) bip_vec_idx(bip, 0) + +#define __bip_for_each_vec(bvl, bip, i, start_idx) \ + for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx); \ + i < (bip)->bip_vcnt; \ + bvl++, i++) + +#define bip_for_each_vec(bvl, bip, i) \ + __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) + +#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) + +extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); +extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); +extern void bio_integrity_free(struct bio *, struct bio_set *); +extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); +extern int bio_integrity_enabled(struct bio *bio); +extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); +extern int bio_integrity_prep(struct bio *); +extern void bio_integrity_endio(struct bio *, int); +extern void bio_integrity_advance(struct bio *, unsigned int); +extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); +extern void bio_integrity_split(struct bio *, struct bio_pair *, int); +extern int bio_integrity_clone(struct bio *, struct bio *, struct bio_set *); +extern int bioset_integrity_create(struct bio_set *, int); +extern void bioset_integrity_free(struct bio_set *); +extern void bio_integrity_init_slab(void); + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define bio_integrity(a) (0) +#define bioset_integrity_create(a, b) (0) +#define bio_integrity_prep(a) (0) +#define bio_integrity_enabled(a) (0) +#define bio_integrity_clone(a, b, c) (0) +#define bioset_integrity_free(a) do { } while (0) +#define bio_integrity_free(a, b) do { } while (0) +#define bio_integrity_endio(a, b) do { } while (0) +#define bio_integrity_advance(a, b) do { } while (0) +#define bio_integrity_trim(a, b, c) do { } while (0) +#define bio_integrity_split(a, b, c) do { } while (0) +#define bio_integrity_set_tag(a, b, c) do { } while (0) +#define bio_integrity_get_tag(a, b, c) do { } while (0) +#define bio_integrity_init_slab(a) do { } while (0) + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* CONFIG_BLOCK */ #endif /* __LINUX_BIO_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6a3da671713..4a9ed45270f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -112,6 +112,7 @@ enum rq_flag_bits { __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_RW_META, /* metadata io request */ __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ __REQ_NR_BITS, /* stops here */ }; @@ -134,6 +135,7 @@ enum rq_flag_bits { #define REQ_ALLOCED (1 << __REQ_ALLOCED) #define REQ_RW_META (1 << __REQ_RW_META) #define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) #define BLK_MAX_CDB 16 @@ -865,6 +867,109 @@ void kblockd_flush_work(struct work_struct *work); MODULE_ALIAS("block-major-" __stringify(major) "-*") +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ + +struct blk_integrity_exchg { + void *prot_buf; + void *data_buf; + sector_t sector; + unsigned int data_size; + unsigned short sector_size; + const char *disk_name; +}; + +typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); +typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); +typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); +typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); + +struct blk_integrity { + integrity_gen_fn *generate_fn; + integrity_vrfy_fn *verify_fn; + integrity_set_tag_fn *set_tag_fn; + integrity_get_tag_fn *get_tag_fn; + + unsigned short flags; + unsigned short tuple_size; + unsigned short sector_size; + unsigned short tag_size; + + const char *name; + + struct kobject kobj; +}; + +extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); +extern void blk_integrity_unregister(struct gendisk *); +extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request *); + +static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) +{ + if (bi) + return bi->tuple_size; + + return 0; +} + +static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +{ + return bdev->bd_disk->integrity; +} + +static inline unsigned int bdev_get_tag_size(struct block_device *bdev) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi) + return bi->tag_size; + + return 0; +} + +static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi == NULL) + return 0; + + if (rw == READ && bi->verify_fn != NULL && + test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + return 1; + + if (rw == WRITE && bi->generate_fn != NULL && + test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + return 1; + + return 0; +} + +static inline int blk_integrity_rq(struct request *rq) +{ + BUG_ON(rq->bio == NULL); + + return bio_integrity(rq->bio); +} + +#else /* CONFIG_BLK_DEV_INTEGRITY */ + +#define blk_integrity_rq(rq) (0) +#define blk_rq_count_integrity_sg(a) (0) +#define blk_rq_map_integrity_sg(a, b) (0) +#define bdev_get_integrity(a) (0) +#define bdev_get_tag_size(a) (0) +#define blk_integrity_compare(a, b) (0) +#define blk_integrity_register(a, b) (0) +#define blk_integrity_unregister(a) do { } while (0); + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + + #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ae7aec3cabe..524ec96f5a2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -141,6 +141,9 @@ struct gendisk { struct disk_stats dkstats; #endif struct work_struct async_notify; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity *integrity; +#endif }; /* -- cgit v1.2.3 From da9cbc87395308a21465bd25441297bbba0477e1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 30 Jun 2008 20:42:08 +0200 Subject: block: blkdev.h cleanup, move iocontext stuff to iocontext.h Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 17 ----------------- include/linux/iocontext.h | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a9ed45270f..443df75d2cd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -33,12 +33,6 @@ struct sg_io_hdr; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ -int put_io_context(struct io_context *ioc); -void exit_io_context(void); -struct io_context *get_io_context(gfp_t gfp_flags, int node); -struct io_context *alloc_io_context(gfp_t gfp_flags, int node); -void copy_io_context(struct io_context **pdst, struct io_context **psrc); - struct request; typedef void (rq_end_io_fn)(struct request *, int); @@ -981,17 +975,6 @@ static inline long nr_blockdev_pages(void) return 0; } -static inline void exit_io_context(void) -{ -} - -struct io_context; -static inline int put_io_context(struct io_context *ioc) -{ - return 1; -} - - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index 2b7a1187cb2..08b987bccf8 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -99,4 +99,22 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc) return NULL; } +#ifdef CONFIG_BLOCK +int put_io_context(struct io_context *ioc); +void exit_io_context(void); +struct io_context *get_io_context(gfp_t gfp_flags, int node); +struct io_context *alloc_io_context(gfp_t gfp_flags, int node); +void copy_io_context(struct io_context **pdst, struct io_context **psrc); +#else +static inline void exit_io_context(void) +{ +} + +struct io_context; +static inline int put_io_context(struct io_context *ioc) +{ + return 1; +} +#endif + #endif -- cgit v1.2.3 From 6e2401ad6f33de15ff00f78b88159f00a14f3b35 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 18 Jun 2008 10:15:02 +0200 Subject: block: integrity cleanups - No need to check for NULL bio, we'll get an immediate oops anyway. - Make bio_integrity() a proper function. Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 ++++++++- include/linux/blkdev.h | 4 ---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6bfc3e8d9d8..0933a14e641 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -458,7 +458,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -#define bio_integrity(bio) ((bio)->bi_integrity ? 1 : 0) +static inline int bio_integrity(struct bio *bio) +{ +#if defined(CONFIG_BLK_DEV_INTEGRITY) + return bio->bi_integrity != NULL; +#else + return 0; +#endif +} extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 443df75d2cd..d3ae9ad9721 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,7 +860,6 @@ void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ MODULE_ALIAS("block-major-" __stringify(major) "-*") - #if defined(CONFIG_BLK_DEV_INTEGRITY) #define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ @@ -945,8 +944,6 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { - BUG_ON(rq->bio == NULL); - return bio_integrity(rq->bio); } @@ -963,7 +960,6 @@ static inline int blk_integrity_rq(struct request *rq) #endif /* CONFIG_BLK_DEV_INTEGRITY */ - #else /* CONFIG_BLOCK */ /* * stubs for when the block layer is configured out -- cgit v1.2.3 From 0b07de85a76e1346e675f0e98437378932473df7 Mon Sep 17 00:00:00 2001 From: Adel Gadllah Date: Thu, 26 Jun 2008 13:48:27 +0200 Subject: allow userspace to modify scsi command filter on per device basis This patch exports the per-gendisk command filter to user space through sysfs, so it can be changed by the system administrator. All users of the old cmd filter have been converted to use the new one. Original patch from Peter Jones. Signed-off-by: Adel Gadllah Signed-off-by: Peter Jones Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 +++++++++- include/linux/genhd.h | 9 +++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d3ae9ad9721..a842b776d09 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -671,7 +671,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern int blk_verify_command(unsigned char *, int); extern void blk_unplug(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) @@ -797,6 +796,15 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, extern int blkdev_issue_flush(struct block_device *, sector_t *); +/* +* command filter functions +*/ +extern int blk_verify_command(struct file *file, unsigned char *cmd); +extern int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter, + unsigned char *cmd, mode_t *f_mode); +extern int blk_register_filter(struct gendisk *disk); +extern void blk_unregister_filter(struct gendisk *disk); + #define MAX_PHYS_SEGMENTS 128 #define MAX_HW_SEGMENTS 128 #define SAFE_MAX_SECTORS 255 diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 524ec96f5a2..e8787417f65 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -110,6 +110,14 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_FAIL 64 +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; struct gendisk { int major; /* major number of driver */ @@ -120,6 +128,7 @@ struct gendisk { struct hd_struct **part; /* [indexed by minor] */ struct block_device_operations *fops; struct request_queue *queue; + struct blk_scsi_cmd_filter cmd_filter; void *private_data; sector_t capacity; -- cgit v1.2.3 From b24498d477a14680fc3bb3ad884fa9fa76a2d237 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 27 Jun 2008 09:12:09 +0200 Subject: block: integrity flags can't use bit ops on unsigned short Just use normal open coded bit operations instead, they need not be atomic. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a842b776d09..7ab8acad5b6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -870,8 +870,8 @@ void kblockd_flush_work(struct work_struct *work); #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define INTEGRITY_FLAG_READ 1 /* verify data integrity on read */ -#define INTEGRITY_FLAG_WRITE 2 /* generate data integrity on write */ +#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ +#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ struct blk_integrity_exchg { void *prot_buf; @@ -940,11 +940,11 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) return 0; if (rw == READ && bi->verify_fn != NULL && - test_bit(INTEGRITY_FLAG_READ, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_READ)) return 1; if (rw == WRITE && bi->generate_fn != NULL && - test_bit(INTEGRITY_FLAG_WRITE, &bi->flags)) + (bi->flags & INTEGRITY_FLAG_WRITE)) return 1; return 0; -- cgit v1.2.3 From cc371e66e340f35eed8dc4651c7c18e754c7fb26 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 3 Jul 2008 09:53:43 +0200 Subject: Add bvec_merge_data to handle stacked devices and ->merge_bvec() When devices are stacked, one device's merge_bvec_fn may need to perform the mapping and then call one or more functions for its underlying devices. The following bio fields are used: bio->bi_sector bio->bi_bdev bio->bi_size bio->bi_rw using bio_data_dir() This patch creates a new struct bvec_merge_data holding a copy of those fields to avoid having to change them directly in the struct bio when going down the stack only to have to change them back again on the way back up. (And then when the bio gets mapped for real, the whole exercise gets repeated, but that's a problem for another day...) Signed-off-by: Alasdair G Kergon Cc: Neil Brown Cc: Milan Broz Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7ab8acad5b6..ff9d0bdf2a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -254,7 +254,14 @@ typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); struct bio_vec; -typedef int (merge_bvec_fn) (struct request_queue *, struct bio *, struct bio_vec *); +struct bvec_merge_data { + struct block_device *bi_bdev; + sector_t bi_sector; + unsigned bi_size; + unsigned long bi_rw; +}; +typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, + struct bio_vec *); typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -- cgit v1.2.3 From e48ec69005f02b70b7ecfde1bc39a599086d16ef Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Jul 2008 13:18:54 +0200 Subject: block: extend queue_flag bitops Add test_and_clear and test_and_set. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ff9d0bdf2a1..e04c4ac8a7c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -428,6 +428,32 @@ static inline void queue_flag_set_unlocked(unsigned int flag, __set_bit(flag, &q->queue_flags); } +static inline int queue_flag_test_and_clear(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (test_bit(flag, &q->queue_flags)) { + __clear_bit(flag, &q->queue_flags); + return 1; + } + + return 0; +} + +static inline int queue_flag_test_and_set(unsigned int flag, + struct request_queue *q) +{ + WARN_ON_ONCE(!queue_is_locked(q)); + + if (!test_bit(flag, &q->queue_flags)) { + __set_bit(flag, &q->queue_flags); + return 0; + } + + return 1; +} + static inline void queue_flag_set(unsigned int flag, struct request_queue *q) { WARN_ON_ONCE(!queue_is_locked(q)); -- cgit v1.2.3 From 42796d37da6ef4fd851dc6d5d0387baf7e2b0c3c Mon Sep 17 00:00:00 2001 From: eric miao Date: Mon, 14 Apr 2008 09:35:08 +0100 Subject: [ARM] pxa: add generic PWM backlight driver Patch mostly from Eric Miao, with minor edits by rmk to convert Eric's driver to a generic PWM-based backlight driver. Signed-off-by: eric miao Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 include/linux/pwm_backlight.h (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h new file mode 100644 index 00000000000..aeeffedbe82 --- /dev/null +++ b/include/linux/pwm_backlight.h @@ -0,0 +1,14 @@ +/* + * Generic PWM backlight driver data - see drivers/video/backlight/pwm_bl.c + */ +#ifndef __LINUX_PWM_BACKLIGHT_H +#define __LINUX_PWM_BACKLIGHT_H + +struct platform_pwm_backlight_data { + int pwm_id; + unsigned int max_brightness; + unsigned int dft_brightness; + unsigned int pwm_period_ns; +}; + +#endif -- cgit v1.2.3 From 3b73125af69f93972625f4b655675f42ca4274eb Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Thu, 22 May 2008 14:18:40 +0100 Subject: [ARM] 5044/1: pwm_bl: add init/notify/exit callbacks This allows platform code to manipulate GPIOs and brightness level as needed. Signed-off-by: Philipp Zabel Signed-off-by: Russell King --- include/linux/pwm_backlight.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h index aeeffedbe82..7a9754c9677 100644 --- a/include/linux/pwm_backlight.h +++ b/include/linux/pwm_backlight.h @@ -9,6 +9,9 @@ struct platform_pwm_backlight_data { unsigned int max_brightness; unsigned int dft_brightness; unsigned int pwm_period_ns; + int (*init)(struct device *dev); + int (*notify)(int brightness); + void (*exit)(struct device *dev); }; #endif -- cgit v1.2.3 From 41d54d3bf83f62d3ff5948cb788fe6007e66a0d0 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 3 Jul 2008 09:14:26 -0500 Subject: slub: Do not use 192 byte sized cache if minimum alignment is 128 byte The 192 byte cache is not necessary if we have a basic alignment of 128 byte. If it would be used then the 192 would be aligned to the next 128 byte boundary which would result in another 256 byte cache. Two 256 kmalloc caches cause sysfs to complain about a duplicate entry. MIPS needs 128 byte aligned kmalloc caches and spits out warnings on boot without this patch. Signed-off-by: Christoph Lameter Signed-off-by: Pekka Enberg --- include/linux/slub_def.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 71e43a12ebb..cef6f8fddd7 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -137,10 +137,12 @@ static __always_inline int kmalloc_index(size_t size) if (size <= KMALLOC_MIN_SIZE) return KMALLOC_SHIFT_LOW; +#if KMALLOC_MIN_SIZE <= 64 if (size > 64 && size <= 96) return 1; if (size > 128 && size <= 192) return 2; +#endif if (size <= 8) return 3; if (size <= 16) return 4; if (size <= 32) return 5; -- cgit v1.2.3 From 27f8221af406e43b529a5425bc99c9b1e9bdf521 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 4 Jul 2008 09:30:03 +0200 Subject: block: add blk_queue_update_dma_pad This adds blk_queue_update_dma_pad to prevent LLDs from overwriting the dma pad mask wrongly (we added blk_queue_update_dma_alignment due to the same reason). This also converts libata to use blk_queue_update_dma_pad instead of blk_queue_dma_pad. Signed-off-by: FUJITA Tomonori Cc: Tejun Heo Cc: Bartlomiej Zolnierkiewicz Cc: Thomas Bogendoerfer Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e04c4ac8a7c..1ffd8bfdc4c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -776,6 +776,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); extern void blk_queue_dma_pad(struct request_queue *, unsigned int); +extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -- cgit v1.2.3 From ba8dd03ac09f51a69c154b8cb508b701d713a2cd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 4 Jul 2008 11:26:40 +0200 Subject: generic-ipi: fix s390 build bug forgot to remove #include from linux/smp.h while fixing the original s390 build bug. Patch below fixes this build bug caused by header inclusion dependencies: CC kernel/timer.o In file included from include/linux/spinlock.h:87, from include/linux/smp.h:11, from include/linux/kernel_stat.h:4, from kernel/timer.c:22: include/asm/spinlock.h: In function '__raw_spin_lock': include/asm/spinlock.h:69: error: implicit declaration of function 'smp_processor_id' Signed-off-by: Heiko Carstens Signed-off-by: Ingo Molnar --- include/linux/smp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 55261101d09..48262f86c96 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -8,7 +8,6 @@ #include #include -#include #include extern void cpu_idle(void); -- cgit v1.2.3 From cde53535991fbb5c34a1566f25955297c1487b8d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 4 Jul 2008 09:59:22 -0700 Subject: Christoph has moved Remove all clameter@sgi.com addresses from the kernel tree since they will become invalid on June 27th. Change my maintainer email address for the slab allocators to cl@linux-foundation.org (which will be the new email address for the future). Signed-off-by: Christoph Lameter Signed-off-by: Christoph Lameter Cc: Pekka Enberg Cc: Stephen Rothwell Cc: Matt Mackall Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- include/linux/slub_def.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index c2ad3501659..9aa90a6f20e 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -1,7 +1,7 @@ /* * Written by Mark Hemment, 1996 (markhe@nextd.demon.co.uk). * - * (C) SGI 2006, Christoph Lameter + * (C) SGI 2006, Christoph Lameter * Cleaned up and restructured to ease the addition of alternative * implementations of SLAB allocators. */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index cef6f8fddd7..d117ea2825a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -4,7 +4,7 @@ /* * SLUB : A Slab allocator without object queues. * - * (C) 2007 SGI, Christoph Lameter + * (C) 2007 SGI, Christoph Lameter */ #include #include -- cgit v1.2.3 From 69d44a1835ec8163a82c4ee57367f87ae0f85c2e Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 4 Jul 2008 09:59:27 -0700 Subject: firmware: fix the request_firmware() dummy > the build (.config attached) failed, make ends with : > ... > UPD include/linux/compile.h > CC init/version.o > LD init/built-in.o > LD vmlinux > drivers/built-in.o: In function `sas_request_addr': > (.text+0x33bab): undefined reference to `request_firmware' > drivers/built-in.o: In function `sas_request_addr': > (.text+0x33c3f): undefined reference to `release_firmware' > make: *** [vmlinux] Error 1 There's a slight fault in the stub logic. It fails for FW_LOADER=m and the user =y. This should fix it. This patch fixes the following 2.6.26-rc regression: http://bugzilla.kernel.org/show_bug.cgi?id=10730 Reviewed-by: Toralf Foerster Signed-off-by: Adrian Bunk Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 4d10c7328d2..6c7eff2ebad 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -13,7 +13,7 @@ struct firmware { struct device; -#if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE) +#if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( -- cgit v1.2.3 From 450c622e9ff19888818d4e2c4d31adb97a5242b2 Mon Sep 17 00:00:00 2001 From: Miguel Ojeda Date: Fri, 4 Jul 2008 09:59:33 -0700 Subject: Miguel Ojeda has moved Signed-off-by: Miguel Ojeda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cfag12864b.h | 2 +- include/linux/ks0108.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h index 1605dd8aa64..6f9f19d6659 100644 --- a/include/linux/cfag12864b.h +++ b/include/linux/cfag12864b.h @@ -4,7 +4,7 @@ * Description: cfag12864b LCD driver header * License: GPLv2 * - * Author: Copyright (C) Miguel Ojeda Sandonis + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-12 * * This program is free software; you can redistribute it and/or modify diff --git a/include/linux/ks0108.h b/include/linux/ks0108.h index a2c54acceb4..cb311798e0b 100644 --- a/include/linux/ks0108.h +++ b/include/linux/ks0108.h @@ -4,7 +4,7 @@ * Description: ks0108 LCD Controller driver header * License: GPLv2 * - * Author: Copyright (C) Miguel Ojeda Sandonis + * Author: Copyright (C) Miguel Ojeda Sandonis * Date: 2006-10-31 * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From 93921f5c2ce7427cc30341c86882527d1d1d8770 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 4 Jul 2008 09:59:48 -0700 Subject: Introduce rculist.h In linux-next there is a commit ("rcu: split list.h and move rcu-protected lists into rculist.h") that moved the rcu related list iterators from list.h to rculist.h. Add a trivial version of the file now so that various subsystem trees can start using it now for -next changes and so reduce the build errors caused by adding uses of the moved functions. Cc: Franck Bui-Huu Acked-by: Paul E. McKenney Cc: Josh Triplett Acked-by: Ingo Molnar Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rculist.h | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 include/linux/rculist.h (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 00000000000..bde4586f438 --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#include + +#endif /* _LINUX_RCULIST_H */ -- cgit v1.2.3 From 086f7316f0d400806d76323beefae996bb3849b1 Mon Sep 17 00:00:00 2001 From: "Andrew G. Morgan" Date: Fri, 4 Jul 2008 09:59:58 -0700 Subject: security: filesystem capabilities: fix fragile setuid fixup code This commit includes a bugfix for the fragile setuid fixup code in the case that filesystem capabilities are supported (in access()). The effect of this fix is gated on filesystem capability support because changing securebits is only supported when filesystem capabilities support is configured.) [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Andrew G. Morgan Acked-by: Serge Hallyn Acked-by: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 2 ++ include/linux/securebits.h | 15 ++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index fa830f8de03..02673846d20 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -501,6 +501,8 @@ extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_full_set; extern const kernel_cap_t __cap_init_eff_set; +kernel_cap_t cap_set_effective(const kernel_cap_t pE_new); + int capable(int cap); int __capable(struct task_struct *t, int cap); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index c1f19dbceb0..92f09bdf117 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -7,14 +7,15 @@ inheritance of root-permissions and suid-root executable under compatibility mode. We raise the effective and inheritable bitmasks *of the executable file* if the effective uid of the new process is - 0. If the real uid is 0, we raise the inheritable bitmask of the + 0. If the real uid is 0, we raise the effective (legacy) bit of the executable file. */ #define SECURE_NOROOT 0 #define SECURE_NOROOT_LOCKED 1 /* make bit-0 immutable */ -/* When set, setuid to/from uid 0 does not trigger capability-"fixes" - to be compatible with old programs relying on set*uid to loose - privileges. When unset, setuid doesn't change privileges. */ +/* When set, setuid to/from uid 0 does not trigger capability-"fixup". + When unset, to provide compatiblility with old programs relying on + set*uid to gain/lose privilege, transitions to/from uid 0 cause + capabilities to be gained/lost. */ #define SECURE_NO_SETUID_FIXUP 2 #define SECURE_NO_SETUID_FIXUP_LOCKED 3 /* make bit-2 immutable */ @@ -26,10 +27,10 @@ #define SECURE_KEEP_CAPS 4 #define SECURE_KEEP_CAPS_LOCKED 5 /* make bit-4 immutable */ -/* Each securesetting is implemented using two bits. One bit specify +/* Each securesetting is implemented using two bits. One bit specifies whether the setting is on or off. The other bit specify whether the - setting is fixed or not. A setting which is fixed cannot be changed - from user-level. */ + setting is locked or not. A setting which is locked cannot be + changed from user-level. */ #define issecure_mask(X) (1 << (X)) #define issecure(X) (issecure_mask(X) & current->securebits) -- cgit v1.2.3 From e08c1694d9e2138204f2b79b73f0f159074ce2f5 Mon Sep 17 00:00:00 2001 From: Andres Salomon Date: Fri, 4 Jul 2008 10:00:03 -0700 Subject: olpc: sdhci: add quirk for the Marvell CaFe's vdd/powerup issue This has been sitting around unloved for way too long.. The Marvell CaFe chip's SD implementation chokes during card insertion if one attempts to set the voltage and power up in the same SDHCI_POWER_CONTROL register write. This adds a quirk that does that particular dance in two steps. It also adds an entry to pci_ids.h for the CaFe chip's SD device. Signed-off-by: Andres Salomon Cc: Pierre Ossman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index eafc9d6d2b3..65953822c9c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1520,6 +1520,7 @@ #define PCI_DEVICE_ID_MARVELL_GT64260 0x6430 #define PCI_DEVICE_ID_MARVELL_MV64360 0x6460 #define PCI_DEVICE_ID_MARVELL_MV64460 0x6480 +#define PCI_DEVICE_ID_MARVELL_CAFE_SD 0x4101 #define PCI_VENDOR_ID_V3 0x11b0 #define PCI_DEVICE_ID_V3_V960 0x0001 -- cgit v1.2.3 From acb7669c125676e63cf96582455509216c39745e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 4 Jul 2008 10:00:05 -0700 Subject: cpumask: introduce new APIs In linux-next there is a commit ("x86: Add performance variants of cpumask operators") which, as part of the 4096 cpu support work adds some new APIs for dealing with cpu masks. Add trivial versions of these now so that subsystems can update in a timely manner and avoid conflicts in linux-next and the next merge window. Cc: Mike Travis Cc: Thomas Gleixner Cc: Ingo Molnar Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 5df3db58fcc..c24875bd9c5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -353,6 +353,10 @@ static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp, for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) #endif /* NR_CPUS */ +#define next_cpu_nr(n, src) next_cpu(n, src) +#define cpus_weight_nr(cpumask) cpus_weight(cpumask) +#define for_each_cpu_mask_nr(cpu, mask) for_each_cpu_mask(cpu, mask) + /* * The following particular system cpumasks and operations manage * possible, present and online cpus. Each of them is a fixed size -- cgit v1.2.3 From ca31e146d5c2fe51498e619eb3a64782d02e310a Mon Sep 17 00:00:00 2001 From: Eduard - Gabriel Munteanu Date: Sat, 5 Jul 2008 12:14:23 +0300 Subject: Move _RET_IP_ and _THIS_IP_ to include/linux/kernel.h These two macros are useful beyond lock debugging. Moved definitions from include/linux/debug_locks.h to include/linux/kernel.h, so code that needs them does not have to include the former, which would have been a less intuitive choice of a header. Signed-off-by: Eduard - Gabriel Munteanu Acked-by: Pekka Enberg Signed-off-by: Linus Torvalds --- include/linux/debug_locks.h | 10 ++-------- include/linux/kernel.h | 3 +++ 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index f4a5871767f..4aaa4afb1cb 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -1,6 +1,8 @@ #ifndef __LINUX_DEBUG_LOCKING_H #define __LINUX_DEBUG_LOCKING_H +#include + struct task_struct; extern int debug_locks; @@ -11,14 +13,6 @@ extern int debug_locks_silent; */ extern int debug_locks_off(void); -/* - * In the debug case we carry the caller's instruction pointer into - * other functions, but we dont want the function argument overhead - * in the nondebug case - hence these macros: - */ -#define _RET_IP_ (unsigned long)__builtin_return_address(0) -#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) - #define DEBUG_LOCKS_WARN_ON(c) \ ({ \ int __ret = 0; \ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 792bf0aa779..2e70006c7fa 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -46,6 +46,9 @@ extern const char linux_proc_banner[]; #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + #ifdef CONFIG_LBD # include # define sector_div(a, b) do_div(a, b) -- cgit v1.2.3 From c6c4f070a61b2b6e5cd317a5fbf25255878688a2 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Thu, 3 Jul 2008 09:49:39 -0700 Subject: PCI: make pci_name use dev_name Also fixes up the sparc code that was assuming this is not a constant. Acked-by: David S. Miller Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman Signed-off-by: Jesse Barnes --- include/linux/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 96ebaa8d80e..4c80dc3f299 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -996,9 +996,9 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data) /* If you want to know what to call your pci_dev, ask this function. * Again, it's a wrapper around the generic device. */ -static inline char *pci_name(struct pci_dev *pdev) +static inline const char *pci_name(struct pci_dev *pdev) { - return pdev->dev.bus_id; + return dev_name(&pdev->dev); } -- cgit v1.2.3 From eb9d0fe40e313c0a74115ef456a2e43a6c8da72f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:34:48 +0200 Subject: PCI ACPI: Rework PCI handling of wake-up * Introduce function acpi_pm_device_sleep_wake() for enabling and disabling the system wake-up capability of devices that are power manageable by ACPI. * Introduce function acpi_bus_can_wakeup() allowing other (dependent) subsystems to check if ACPI is able to enable the system wake-up capability of given device. * Introduce callback .sleep_wake() in struct pci_platform_pm_ops and for the ACPI PCI 'driver' make it use acpi_pm_device_sleep_wake(). * Introduce callback .can_wakeup() in struct pci_platform_pm_ops and for the ACPI 'driver' make it use acpi_bus_can_wakeup(). * Move the PME# handlig code out of pci_enable_wake() and split it into two functions, pci_pme_capable() and pci_pme_active(), allowing the caller to check if given device is capable of generating PME# from given power state and to enable/disable the device's PME# functionality, respectively. * Modify pci_enable_wake() to use the new ACPI callbacks and the new PME#-related functions. * Drop the generic .platform_enable_wakeup() callback that is not used any more. * Introduce device_set_wakeup_capable() that will set the power.can_wakeup flag of given device. * Rework PCI device PM initialization so that, if given device is capable of generating wake-up events, either natively through the PME# mechanism, or with the help of the platform, its power.can_wakeup flag is set and its power.should_wakeup flag is unset as appropriate. * Make ACPI set the power.can_wakeup flag for devices found to be wake-up capable by it. * Make the ACPI wake-up code enable/disable GPEs for devices that have the wakeup.flags.prepared flag set (which means that their wake-up power has been enabled). Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pm_wakeup.h | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index f0d0b2cb8d2..3af0c8d05cd 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -35,6 +35,11 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = dev->power.should_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) +{ + dev->power.can_wakeup = !!val; +} + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; @@ -47,21 +52,7 @@ static inline void device_set_wakeup_enable(struct device *dev, int val) static inline int device_may_wakeup(struct device *dev) { - return dev->power.can_wakeup & dev->power.should_wakeup; -} - -/* - * Platform hook to activate device wakeup capability, if that's not already - * handled by enable_irq_wake() etc. - * Returns zero on success, else negative errno - */ -extern int (*platform_enable_wakeup)(struct device *dev, int is_on); - -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - if (platform_enable_wakeup) - return (*platform_enable_wakeup)(dev, is_on); - return 0; + return dev->power.can_wakeup && dev->power.should_wakeup; } #else /* !CONFIG_PM */ @@ -80,11 +71,6 @@ static inline int device_can_wakeup(struct device *dev) #define device_set_wakeup_enable(dev, val) do {} while (0) #define device_may_wakeup(dev) 0 -static inline int call_platform_enable_wakeup(struct device *dev, int is_on) -{ - return 0; -} - #endif /* !CONFIG_PM */ #endif /* _LINUX_PM_WAKEUP_H */ -- cgit v1.2.3 From 404cc2d8ce41ed4031958fba8e633767e8a2e028 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:35:26 +0200 Subject: PCI PM: Introduce pci_prepare_to_sleep and pci_back_from_sleep Introduce functions pci_prepare_to_sleep() and pci_back_from_sleep(), to be used by the PCI drivers that want to place their devices into the lowest power state appropiate for them (PCI_D3hot, if the device is not supposed to wake up the system, or the deepest state from which the wake-up is possible, otherwise) while the system is being prepared to go into a sleeping state and to put them back into D0 during the subsequent transition to the working state. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4c80dc3f299..52ac06dcce9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -632,6 +632,8 @@ int pci_restore_state(struct pci_dev *dev); int pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); +int pci_prepare_to_sleep(struct pci_dev *dev); +int pci_back_from_sleep(struct pci_dev *dev); /* Functions for PCI Hotplug drivers to use */ int pci_bus_find_capability(struct pci_bus *bus, unsigned int devfn, int cap); -- cgit v1.2.3 From 337001b6c42938f49a880b1b8306c3ed771a7e61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 7 Jul 2008 03:36:24 +0200 Subject: PCI: Simplify PCI device PM code If the offset of PCI device's PM capability in its configuration space, the mask of states that the device supports PME# from and the D1 and D2 support bits are cached in the corresponding struct pci_dev, the PCI device PM code can be simplified quite a bit. Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pci.h | 8 +++++++- include/linux/pci_regs.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 52ac06dcce9..68a29f0f274 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -177,6 +177,13 @@ struct pci_dev { pci_power_t current_state; /* Current operating state. In ACPI-speak, this is D0-D3, D0 being fully functional, and D3 being off. */ + int pm_cap; /* PM capability offset in the + configuration space */ + unsigned int pme_support:5; /* Bitmask of states from which PME# + can be generated */ + unsigned int d1_support:1; /* Low power state D1 is supported */ + unsigned int d2_support:1; /* Low power state D2 is supported */ + unsigned int no_d1d2:1; /* Only allow D0 and D3 */ #ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state. */ @@ -201,7 +208,6 @@ struct pci_dev { unsigned int is_added:1; unsigned int is_busmaster:1; /* device is busmaster */ unsigned int no_msi:1; /* device may not use msi */ - unsigned int no_d1d2:1; /* only allow d0 or d3 */ unsigned int block_ucfg_access:1; /* userspace config space access is blocked */ unsigned int broken_parity_status:1; /* Device generates false positive parity */ unsigned int msi_enabled:1; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index c0c1223c919..19958b92990 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -231,6 +231,7 @@ #define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ #define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CAP_PME_SHIFT 11 /* Start of the PME Mask in PMC */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ #define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ -- cgit v1.2.3 From d2dbf343329dc777d77488743465f7be4245971d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 02:00:56 -0700 Subject: x86: clean up reserve_bootmem_generic() and port it to 32-bit 1. add reserve_bootmem_generic for 32bit 2. change len to unsigned long 3. make early_res_to_bootmem to use it Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/bootmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 686895bacd9..a1d9b79078e 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -84,6 +84,8 @@ extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ +extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, + int flags); extern unsigned long free_all_bootmem(void); extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); extern void *__alloc_bootmem_node(pg_data_t *pgdat, -- cgit v1.2.3 From cc1050bafebfb1d7935331282e948b5016318192 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 13 Jun 2008 19:08:52 -0700 Subject: x86: replace shrink_active_range() with remove_active_range() in case we have kva before ramdisk on a node, we still need to use those ranges. v2: reserve_early kva ram area, in case there are holes in highmem, to avoid those area could be treat as free high pages. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ce8e397a61f..034a3156d2f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -998,7 +998,8 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat, extern void free_area_init_nodes(unsigned long *max_zone_pfn); extern void add_active_range(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); -extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn); +extern void remove_active_range(unsigned int nid, unsigned long start_pfn, + unsigned long end_pfn); extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn, unsigned long end_pfn); extern void remove_all_active_ranges(void); -- cgit v1.2.3 From b5bc6c0e55000dab86b73f838f5ad02908b23755 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 14 Jun 2008 18:32:52 -0700 Subject: x86, mm: use add_highpages_with_active_regions() for high pages init v2 use early_node_map to init high pages, so we can remove page_is_ram() and page_is_reserved_early() in the big loop with add_one_highpage also remove page_is_reserved_early(), it is not needed anymore. v2: fix the build of other platforms Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 034a3156d2f..e4de460907c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,6 +1011,8 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); +typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID extern int early_pfn_to_nid(unsigned long pfn); -- cgit v1.2.3 From 3461b0af025251bbc6b3d56c821c6ac2de6f7209 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 12 May 2008 21:21:13 +0200 Subject: x86: remove static boot_cpu_pda array v2 * Remove the boot_cpu_pda array and pointer table from the data section. Allocate the pointer table and array during init. do_boot_cpu() will reallocate the pda in node local memory and if the cpu is being brought up before the bootmem array is released (after_bootmem = 0), then it will free the initial pda. This will happen for all cpus present at system startup. This removes 512k + 32k bytes from the data section. For inclusion into sched-devel/latest tree. Based on: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git + sched-devel/latest .../mingo/linux-2.6-sched-devel.git Signed-off-by: Mike Travis Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab0..0ea48a5af82 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1024,6 +1024,7 @@ extern void mem_init(void); extern void show_mem(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); +extern int after_bootmem; #ifdef CONFIG_NUMA extern void setup_per_cpu_pageset(void); -- cgit v1.2.3 From a7bf0bd5e6af7fe69342dabf2a3b721f0163469a Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 28 May 2008 15:02:14 +0100 Subject: build: add __page_aligned_data and __page_aligned_bss Making a variable page-aligned by using __attribute__((section(".data.page_aligned"))) is fragile because if sizeof(variable) is not also a multiple of page size, it leaves variables in the remainder of the section unaligned. This patch introduces two new qualifiers, __page_aligned_data and __page_aligned_bss to set the section *and* the alignment of variables. This makes page-aligned variables more robust because the linker will make sure they're aligned properly. Unfortunately it requires *all* page-aligned data to use these macros... Signed-off-by: Ingo Molnar --- include/linux/linkage.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 2119610b24f..9fd1f859021 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,6 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H +#include #include #ifdef __cplusplus @@ -17,6 +18,9 @@ # define asmregparm #endif +#define __page_aligned_data __section(.data.page_aligned) __aligned(PAGE_SIZE) +#define __page_aligned_bss __section(.bss.page_aligned) __aligned(PAGE_SIZE) + /* * This is used by architectures to keep arguments on the stack * untouched by the compiler by keeping them live until the end. -- cgit v1.2.3 From d52d53b8a5b258bfaab9223a5e7284fcfdd48577 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 16 Jun 2008 20:10:55 -0700 Subject: RFC x86: try to remove arch_get_ram_range want to remove arch_get_ram_range, and use early_node_map instead. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d647b24041..cf1cd3a2ed7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,7 +1011,7 @@ extern unsigned long find_min_pfn_with_active_regions(void); extern unsigned long find_max_pfn_with_active_regions(void); extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); -typedef void (*work_fn_t)(unsigned long, unsigned long, void *); +typedef int (*work_fn_t)(unsigned long, unsigned long, void *); extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); extern void sparse_memory_present_with_active_regions(int nid); #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -- cgit v1.2.3 From 3c999f142665265afd0fe9190204dd051f17e505 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 20 Jun 2008 16:11:20 -0700 Subject: x86: check command line when CONFIG_X86_MPPARSE is not set, v2 if acpi=off, acpi=noirq and pci=noacpi, we need to disable apic. Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 41f7ce7edd7..0601075d09a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -82,6 +82,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); +int acpi_mps_check (void); int acpi_numa_init (void); int acpi_table_init (void); @@ -250,6 +251,11 @@ static inline int acpi_boot_table_init(void) return 0; } +static inline int acpi_mps_check(void) +{ + return 0; +} + static inline int acpi_check_resource_conflict(struct resource *res) { return 0; -- cgit v1.2.3 From 69ac9cd629ca96e59f34eb4ccd12d00b2c8276a7 Mon Sep 17 00:00:00 2001 From: Bernhard Walle Date: Fri, 27 Jun 2008 13:12:54 +0200 Subject: sysfs: add /sys/firmware/memmap This patch adds /sys/firmware/memmap interface that represents the BIOS (or Firmware) provided memory map. The tree looks like: /sys/firmware/memmap/0/start (hex number) end (hex number) type (string) ... /1/start end type With the following shell snippet one can print the memory map in the same form the kernel prints itself when booting on x86 (the E820 map). --------- 8< -------------------------- #!/bin/sh cd /sys/firmware/memmap for dir in * ; do start=$(cat $dir/start) end=$(cat $dir/end) type=$(cat $dir/type) printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type" done --------- >8 -------------------------- That patch only provides the needed interface: 1. The sysfs interface. 2. The structure and enumeration definition. 3. The function firmware_map_add() and firmware_map_add_early() that should be called from architecture code (E820/EFI, for example) to add the contents to the interface. If the kernel is compiled without CONFIG_FIRMWARE_MEMMAP, the interface does nothing without cluttering the architecture-specific code with #ifdef's. The purpose of the new interface is kexec: While /proc/iomem represents the *used* memory map (e.g. modified via kernel parameters like 'memmap' and 'mem'), the /sys/firmware/memmap tree represents the unmodified memory map provided via the firmware. So kexec can: - use the original memory map for rebooting, - use the /proc/iomem for setting up the ELF core headers for kdump case that should only represent the memory of the system. The patch has been tested on i386 and x86_64. Signed-off-by: Bernhard Walle Acked-by: Greg KH Acked-by: Vivek Goyal Cc: kexec@lists.infradead.org Cc: yhlu.kernel@gmail.com Signed-off-by: Ingo Molnar --- include/linux/firmware-map.h | 74 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 include/linux/firmware-map.h (limited to 'include/linux') diff --git a/include/linux/firmware-map.h b/include/linux/firmware-map.h new file mode 100644 index 00000000000..acbdbcc1605 --- /dev/null +++ b/include/linux/firmware-map.h @@ -0,0 +1,74 @@ +/* + * include/linux/firmware-map.h: + * Copyright (C) 2008 SUSE LINUX Products GmbH + * by Bernhard Walle + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License v2.0 as published by + * the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#ifndef _LINUX_FIRMWARE_MAP_H +#define _LINUX_FIRMWARE_MAP_H + +#include +#include + +/* + * provide a dummy interface if CONFIG_FIRMWARE_MEMMAP is disabled + */ +#ifdef CONFIG_FIRMWARE_MEMMAP + +/** + * Adds a firmware mapping entry. This function uses kmalloc() for memory + * allocation. Use firmware_map_add_early() if you want to use the bootmem + * allocator. + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type); + +/** + * Adds a firmware mapping entry. This function uses the bootmem allocator + * for memory allocation. Use firmware_map_add() if you want to use kmalloc(). + * + * That function must be called before late_initcall. + * + * @start: Start of the memory range. + * @end: End of the memory range (inclusive). + * @type: Type of the memory range. + * + * Returns 0 on success, or -ENOMEM if no memory could be allocated. + */ +int firmware_map_add_early(resource_size_t start, resource_size_t end, + const char *type); + +#else /* CONFIG_FIRMWARE_MEMMAP */ + +static inline int firmware_map_add(resource_size_t start, resource_size_t end, + const char *type) +{ + return 0; +} + +static inline int firmware_map_add_early(resource_size_t start, + resource_size_t end, const char *type) +{ + return 0; +} + +#endif /* CONFIG_FIRMWARE_MEMMAP */ + +#endif /* _LINUX_FIRMWARE_MAP_H */ -- cgit v1.2.3 From a861beb1401d65e3f095fee074c13645ab06490e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 8 Jul 2008 19:27:22 +0200 Subject: ide: add __ide_default_irq() inline helper Add __ide_default_irq() inline helper and use it instead of ide_default_irq() in ide-probe.c and ns87415.c (all host drivers except IDE PCI ones always setup hwif->irq so it is enough to check only for I/O bases 0x1f0 and 0x170). This fixes post-2.6.25 regression since ide_default_irq() define could shadow ide_default_irq() inline. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 9918772bf27..eddb6daadf4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -189,6 +189,21 @@ static inline void ide_std_init_ports(hw_regs_t *hw, hw->io_ports.ctl_addr = ctl_addr; } +/* for IDE PCI controllers in legacy mode, temporary */ +static inline int __ide_default_irq(unsigned long base) +{ + switch (base) { +#ifdef CONFIG_IA64 + case 0x1f0: return isa_irq_to_vector(14); + case 0x170: return isa_irq_to_vector(15); +#else + case 0x1f0: return 14; + case 0x170: return 15; +#endif + } + return 0; +} + #include #if !defined(MAX_HWIFS) || defined(CONFIG_EMBEDDED) -- cgit v1.2.3 From b845f313d78e4e259ec449909e3bbadf77b53a6d Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 8 Jul 2008 00:28:51 +1000 Subject: mm: Allow architectures to define additional protection bits This patch allows architectures to define functions to deal with additional protections bits for mmap() and mprotect(). arch_calc_vm_prot_bits() maps additonal protection bits to vm_flags arch_vm_get_page_prot() maps additional vm_flags to the vma's vm_page_prot arch_validate_prot() checks for valid values of the protection bits Note: vm_get_page_prot() is now pretty ugly, but the generated code should be identical for architectures that don't define additional protection bits. Signed-off-by: Dave Kleikamp Acked-by: Andrew Morton Acked-by: Hugh Dickins Signed-off-by: Benjamin Herrenschmidt --- include/linux/mman.h | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index dab8892e6ff..30d1073bac3 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -33,6 +33,32 @@ static inline void vm_unacct_memory(long pages) vm_acct_memory(-pages); } +/* + * Allow architectures to handle additional protection bits + */ + +#ifndef arch_calc_vm_prot_bits +#define arch_calc_vm_prot_bits(prot) 0 +#endif + +#ifndef arch_vm_get_page_prot +#define arch_vm_get_page_prot(vm_flags) __pgprot(0) +#endif + +#ifndef arch_validate_prot +/* + * This is called from mprotect(). PROT_GROWSDOWN and PROT_GROWSUP have + * already been masked out. + * + * Returns true if the prot flags are valid + */ +static inline int arch_validate_prot(unsigned long prot) +{ + return (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM)) == 0; +} +#define arch_validate_prot arch_validate_prot +#endif + /* * Optimisation macro. It is equivalent to: * (x & bit1) ? bit2 : 0 @@ -51,7 +77,8 @@ calc_vm_prot_bits(unsigned long prot) { return _calc_vm_trans(prot, PROT_READ, VM_READ ) | _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) | - _calc_vm_trans(prot, PROT_EXEC, VM_EXEC ); + _calc_vm_trans(prot, PROT_EXEC, VM_EXEC) | + arch_calc_vm_prot_bits(prot); } /* -- cgit v1.2.3 From aba46c5027cb59d98052231b36efcbbde9c77a1d Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Tue, 8 Jul 2008 00:28:52 +1000 Subject: powerpc/mm: Define flags for Strong Access Ordering This patch defines: - PROT_SAO, which is passed into mmap() and mprotect() in the prot field - VM_SAO in vma->vm_flags, and - _PAGE_SAO, the combination of WIMG bits in the pte that enables strong access ordering for the page. Signed-off-by: Dave Kleikamp Signed-off-by: Benjamin Herrenschmidt --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 586a943cab0..689184446fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -108,6 +108,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_CAN_NONLINEAR 0x08000000 /* Has ->fault & does nonlinear pages */ #define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ +#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -- cgit v1.2.3 From 2116271a347d1181b5497602c2bfada1de8fd53b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 May 2008 19:34:39 -0400 Subject: NFS: Add correct bounds checking to NFSv2 locks NFSv2 file locking currently fails the Connectathon tests, because the calls to the VFS locking code do not return an EINVAL error if the struct file_lock overflows the 32-bit boundaries. The problem is due to the fact that we occasionally call helpers from fs/locks.c in order to avoid RPC calls to the server when we know that a local process holds the lock. These helpers are, of course, always 64-bit enabled, so EINVAL is not returned in cases when it would if the call had gone to the NLM code. For consistency, we therefore add support for a bounds-checking helper. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 24263bb8e0b..8d780de371f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -832,6 +832,7 @@ struct nfs_rpc_ops { int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); + int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); }; -- cgit v1.2.3 From b6b6152c46861dd914d0e6cea9c27df057d6e235 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 9 Jun 2008 16:51:31 -0400 Subject: rpc: bring back cl_chatty The cl_chatty flag alows us to control whether a given rpc client leaves "server X not responding, timed out" messages in the syslog. Such messages make sense for ordinary nfs clients (where an unresponsive server means applications on the mountpoint are probably hanging), but not for the callback client (which can fail more commonly, with the only result just of disabling some optimizations). Previously cl_chatty was removed, do to lack of users; reinstate it, and use it for the nfsd's callback client. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 6fff7f82ef1..764fd4c286e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -42,7 +42,8 @@ struct rpc_clnt { unsigned int cl_softrtry : 1,/* soft timeouts */ cl_discrtry : 1,/* disconnect before retry */ - cl_autobind : 1;/* use getport() */ + cl_autobind : 1,/* use getport() */ + cl_chatty : 1;/* be verbose */ struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ @@ -114,6 +115,7 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3) #define RPC_CLNT_CREATE_NOPING (1UL << 4) #define RPC_CLNT_CREATE_DISCRTRY (1UL << 5) +#define RPC_CLNT_CREATE_QUIET (1UL << 6) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, -- cgit v1.2.3 From a486aeda9b2b0d944aecce7871b3186379b898de Mon Sep 17 00:00:00 2001 From: "\\\\\\\"J. Bruce Fields\\\\\\" Date: Mon, 9 Jun 2008 16:51:35 -0400 Subject: rpc: minor cleanup of scheduler callback code Try to make the comment here a little more clear and concise. Also, this macro definition seems unnecessary. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d1a5c8c1a0f..64981a2f1ca 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -135,7 +135,6 @@ struct rpc_task_setup { #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_RUNNING 0 -- cgit v1.2.3 From 46cb650c224bb8e64a749090105d74b9e8eda669 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Jun 2008 16:32:46 -0400 Subject: NFS: Remove the redundant file_open entry from struct nfs_rpc_ops All instances are set to nfs_open(), so we should just remove the redundant indirection. Ditto for the file_release op Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8d780de371f..8c77c11224d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -829,8 +829,6 @@ struct nfs_rpc_ops { int (*write_done) (struct rpc_task *, struct nfs_write_data *); void (*commit_setup) (struct nfs_write_data *, struct rpc_message *); int (*commit_done) (struct rpc_task *, struct nfs_write_data *); - int (*file_open) (struct inode *, struct file *); - int (*file_release) (struct inode *, struct file *); int (*lock)(struct file *, int, struct file_lock *); int (*lock_check_bounds)(const struct file_lock *); void (*clear_acl_cache)(struct inode *); -- cgit v1.2.3 From 34e8f92831cb5c40b3137e47a3daf4c09016ef02 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 12 Jun 2008 12:32:25 -0400 Subject: NFS: Move fs/nfs/iostat.h to include/linux The fs/nfs/iostat.h header has definitions that were designed to be exposed to user space. Move these definitions under include/linux so user space can use the definitions in applications that read /proc/self/mountstats. Also address a handful of coding style issues called out by checkpatch.pl in fs/nfs/iostat.h. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_iostat.h | 119 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 include/linux/nfs_iostat.h (limited to 'include/linux') diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h new file mode 100644 index 00000000000..1cb9a3fed2b --- /dev/null +++ b/include/linux/nfs_iostat.h @@ -0,0 +1,119 @@ +/* + * User-space visible declarations for NFS client per-mount + * point statistics + * + * Copyright (C) 2005, 2006 Chuck Lever + * + * NFS client per-mount statistics provide information about the + * health of the NFS client and the health of each NFS mount point. + * Generally these are not for detailed problem diagnosis, but + * simply to indicate that there is a problem. + * + * These counters are not meant to be human-readable, but are meant + * to be integrated into system monitoring tools such as "sar" and + * "iostat". As such, the counters are sampled by the tools over + * time, and are never zeroed after a file system is mounted. + * Moving averages can be computed by the tools by taking the + * difference between two instantaneous samples and dividing that + * by the time between the samples. + */ + +#ifndef _LINUX_NFS_IOSTAT +#define _LINUX_NFS_IOSTAT + +#define NFS_IOSTAT_VERS "1.0" + +/* + * NFS byte counters + * + * 1. SERVER - the number of payload bytes read from or written + * to the server by the NFS client via an NFS READ or WRITE + * request. + * + * 2. NORMAL - the number of bytes read or written by applications + * via the read(2) and write(2) system call interfaces. + * + * 3. DIRECT - the number of bytes read or written from files + * opened with the O_DIRECT flag. + * + * These counters give a view of the data throughput into and out + * of the NFS client. Comparing the number of bytes requested by + * an application with the number of bytes the client requests from + * the server can provide an indication of client efficiency + * (per-op, cache hits, etc). + * + * These counters can also help characterize which access methods + * are in use. DIRECT by itself shows whether there is any O_DIRECT + * traffic. NORMAL + DIRECT shows how much data is going through + * the system call interface. A large amount of SERVER traffic + * without much NORMAL or DIRECT traffic shows that applications + * are using mapped files. + * + * NFS page counters + * + * These count the number of pages read or written via nfs_readpage(), + * nfs_readpages(), or their write equivalents. + * + * NB: When adding new byte counters, please include the measured + * units in the name of each byte counter to help users of this + * interface determine what exactly is being counted. + */ +enum nfs_stat_bytecounters { + NFSIOS_NORMALREADBYTES = 0, + NFSIOS_NORMALWRITTENBYTES, + NFSIOS_DIRECTREADBYTES, + NFSIOS_DIRECTWRITTENBYTES, + NFSIOS_SERVERREADBYTES, + NFSIOS_SERVERWRITTENBYTES, + NFSIOS_READPAGES, + NFSIOS_WRITEPAGES, + __NFSIOS_BYTESMAX, +}; + +/* + * NFS event counters + * + * These counters provide a low-overhead way of monitoring client + * activity without enabling NFS trace debugging. The counters + * show the rate at which VFS requests are made, and how often the + * client invalidates its data and attribute caches. This allows + * system administrators to monitor such things as how close-to-open + * is working, and answer questions such as "why are there so many + * GETATTR requests on the wire?" + * + * They also count anamolous events such as short reads and writes, + * silly renames due to close-after-delete, and operations that + * change the size of a file (such operations can often be the + * source of data corruption if applications aren't using file + * locking properly). + */ +enum nfs_stat_eventcounters { + NFSIOS_INODEREVALIDATE = 0, + NFSIOS_DENTRYREVALIDATE, + NFSIOS_DATAINVALIDATE, + NFSIOS_ATTRINVALIDATE, + NFSIOS_VFSOPEN, + NFSIOS_VFSLOOKUP, + NFSIOS_VFSACCESS, + NFSIOS_VFSUPDATEPAGE, + NFSIOS_VFSREADPAGE, + NFSIOS_VFSREADPAGES, + NFSIOS_VFSWRITEPAGE, + NFSIOS_VFSWRITEPAGES, + NFSIOS_VFSGETDENTS, + NFSIOS_VFSSETATTR, + NFSIOS_VFSFLUSH, + NFSIOS_VFSFSYNC, + NFSIOS_VFSLOCK, + NFSIOS_VFSRELEASE, + NFSIOS_CONGESTIONWAIT, + NFSIOS_SETATTRTRUNC, + NFSIOS_EXTENDWRITE, + NFSIOS_SILLYRENAME, + NFSIOS_SHORTREAD, + NFSIOS_SHORTWRITE, + NFSIOS_DELAY, + __NFSIOS_COUNTSMAX, +}; + +#endif /* _LINUX_NFS_IOSTAT */ -- cgit v1.2.3 From e468bae97d243fe0e1515abaa1f7d0edf1476ad0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Jun 2008 13:25:22 -0400 Subject: NFS: Allow redirtying of a completed unstable write. Currently, if an unstable write completes, we cannot redirty the page in order to reflect a new change in the page data until after we've sent a COMMIT request. This patch allows a page rewrite to proceed without the unnecessary COMMIT step, putting it immediately back onto the dirty page list, undoing the VM unstable write accounting, and removing the NFS_PAGE_TAG_COMMIT tag from the NFS radix tree. Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index a1676e19e49..3c60685d972 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -27,9 +27,12 @@ /* * Valid flags for a dirty buffer */ -#define PG_BUSY 0 -#define PG_NEED_COMMIT 1 -#define PG_NEED_RESCHED 2 +enum { + PG_BUSY = 0, + PG_CLEAN, + PG_NEED_COMMIT, + PG_NEED_RESCHED, +}; struct nfs_inode; struct nfs_page { -- cgit v1.2.3 From ce3b7e1906ebbe96753fe090b36de6ffb8e0e0e7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Jun 2008 12:36:53 -0400 Subject: NFS: Add string length argument to nfs_parse_server_address To make nfs_parse_server_address() more generally useful, allow it to accept input strings that are not terminated with '\0'. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/inet.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 1354080cf8c..4cca05c9678 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -44,6 +44,13 @@ #include +/* + * These mimic similar macros defined in user-space for inet_ntop(3). + * See /usr/include/netinet/in.h . + */ +#define INET_ADDRSTRLEN (16) +#define INET6_ADDRSTRLEN (48) + extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); -- cgit v1.2.3 From 259875efed06d6936f54c9a264e868937f1bc217 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 2 Jul 2008 14:43:47 -0400 Subject: NFS: set transport defaults after mount option parsing is finished Move the UDP/TCP default timeo/retrans settings for text mounts to nfs_init_timeout_values(), which was were they were always being initialised (and sanity checked) for binary mounts. Document the default timeout values using appropriate #defines. Ensure that we initialise and sanity check the transport protocols that may have been specified by the user. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 27d6a8d98ce..830d9cc8cdc 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -12,6 +12,11 @@ #include /* Default timeout values */ +#define NFS_DEF_UDP_TIMEO (11) +#define NFS_DEF_UDP_RETRANS (3) +#define NFS_DEF_TCP_TIMEO (600) +#define NFS_DEF_TCP_RETRANS (2) + #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) -- cgit v1.2.3 From 0e0cab744b17a70ef0f08d818d66935feade7cad Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 26 Jun 2008 17:47:12 -0400 Subject: NFS: use documenting macro constants for initializing ac{reg, dir}{min, max} Clean up. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 830d9cc8cdc..29d26191873 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -20,6 +20,11 @@ #define NFS_MAX_UDP_TIMEOUT (60*HZ) #define NFS_MAX_TCP_TIMEOUT (600*HZ) +#define NFS_DEF_ACREGMIN (3) +#define NFS_DEF_ACREGMAX (60) +#define NFS_DEF_ACDIRMIN (30) +#define NFS_DEF_ACDIRMAX (60) + /* * When flushing a cluster of dirty pages, there can be different * strategies: -- cgit v1.2.3 From 004a403c2e954734090a69aedc7f4f822bdcc142 Mon Sep 17 00:00:00 2001 From: Loc Ho Date: Wed, 14 May 2008 20:41:47 +0800 Subject: [CRYPTO] hash: Add asynchronous hash support This patch adds asynchronous hash and digest support. Signed-off-by: Loc Ho Signed-off-by: Herbert Xu --- include/linux/crypto.h | 187 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 425824bd49f..b6efe569128 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -30,15 +30,17 @@ */ #define CRYPTO_ALG_TYPE_MASK 0x0000000f #define CRYPTO_ALG_TYPE_CIPHER 0x00000001 -#define CRYPTO_ALG_TYPE_DIGEST 0x00000002 -#define CRYPTO_ALG_TYPE_HASH 0x00000003 +#define CRYPTO_ALG_TYPE_COMPRESS 0x00000002 +#define CRYPTO_ALG_TYPE_AEAD 0x00000003 #define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 #define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 #define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 -#define CRYPTO_ALG_TYPE_COMPRESS 0x00000008 -#define CRYPTO_ALG_TYPE_AEAD 0x00000009 +#define CRYPTO_ALG_TYPE_DIGEST 0x00000008 +#define CRYPTO_ALG_TYPE_HASH 0x00000009 +#define CRYPTO_ALG_TYPE_AHASH 0x0000000a #define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000c #define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c #define CRYPTO_ALG_LARVAL 0x00000010 @@ -102,6 +104,7 @@ struct crypto_async_request; struct crypto_aead; struct crypto_blkcipher; struct crypto_hash; +struct crypto_ahash; struct crypto_tfm; struct crypto_type; struct aead_givcrypt_request; @@ -131,6 +134,18 @@ struct ablkcipher_request { void *__ctx[] CRYPTO_MINALIGN_ATTR; }; +struct ahash_request { + struct crypto_async_request base; + + void *info; + + unsigned int nbytes; + struct scatterlist *src; + u8 *result; + + void *__ctx[] CRYPTO_MINALIGN_ATTR; +}; + /** * struct aead_request - AEAD request * @base: Common attributes for async crypto requests @@ -195,6 +210,17 @@ struct ablkcipher_alg { unsigned int ivsize; }; +struct ahash_alg { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; +}; + struct aead_alg { int (*setkey)(struct crypto_aead *tfm, const u8 *key, unsigned int keylen); @@ -272,6 +298,7 @@ struct compress_alg { #define cra_cipher cra_u.cipher #define cra_digest cra_u.digest #define cra_hash cra_u.hash +#define cra_ahash cra_u.ahash #define cra_compress cra_u.compress struct crypto_alg { @@ -298,6 +325,7 @@ struct crypto_alg { struct cipher_alg cipher; struct digest_alg digest; struct hash_alg hash; + struct ahash_alg ahash; struct compress_alg compress; } cra_u; @@ -383,6 +411,19 @@ struct hash_tfm { unsigned int digestsize; }; +struct ahash_tfm { + int (*init)(struct ahash_request *req); + int (*update)(struct ahash_request *req); + int (*final)(struct ahash_request *req); + int (*digest)(struct ahash_request *req); + int (*setkey)(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen); + + unsigned int digestsize; + struct crypto_ahash *base; + unsigned int reqsize; +}; + struct compress_tfm { int (*cot_compress)(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, @@ -397,6 +438,7 @@ struct compress_tfm { #define crt_blkcipher crt_u.blkcipher #define crt_cipher crt_u.cipher #define crt_hash crt_u.hash +#define crt_ahash crt_u.ahash #define crt_compress crt_u.compress struct crypto_tfm { @@ -409,6 +451,7 @@ struct crypto_tfm { struct blkcipher_tfm blkcipher; struct cipher_tfm cipher; struct hash_tfm hash; + struct ahash_tfm ahash; struct compress_tfm compress; } crt_u; @@ -441,6 +484,10 @@ struct crypto_hash { struct crypto_tfm base; }; +struct crypto_ahash { + struct crypto_tfm base; +}; + enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1264,5 +1311,137 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } +static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_ahash *)tfm; +} + +static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + mask &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_AHASH; + mask |= CRYPTO_ALG_TYPE_AHASH_MASK; + + return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) +{ + return &tfm->base; +} + +static inline void crypto_free_ahash(struct crypto_ahash *tfm) +{ + crypto_free_tfm(crypto_ahash_tfm(tfm)); +} + +static inline unsigned int crypto_ahash_alignmask( + struct crypto_ahash *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); +} + +static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) +{ + return &crypto_ahash_tfm(tfm)->crt_ahash; +} + +static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->digestsize; +} + +static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) +{ + return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); +} + +static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) +{ + crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); +} + +static inline struct crypto_ahash *crypto_ahash_reqtfm( + struct ahash_request *req) +{ + return __crypto_ahash_cast(req->base.tfm); +} + +static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) +{ + return crypto_ahash_crt(tfm)->reqsize; +} + +static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct ahash_tfm *crt = crypto_ahash_crt(tfm); + + return crt->setkey(crt->base, key, keylen); +} + +static inline int crypto_ahash_digest(struct ahash_request *req) +{ + struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); + return crt->digest(req); +} + +static inline void ahash_request_set_tfm(struct ahash_request *req, + struct crypto_ahash *tfm) +{ + req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); +} + +static inline struct ahash_request *ahash_request_alloc( + struct crypto_ahash *tfm, gfp_t gfp) +{ + struct ahash_request *req; + + req = kmalloc(sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm), gfp); + + if (likely(req)) + ahash_request_set_tfm(req, tfm); + + return req; +} + +static inline void ahash_request_free(struct ahash_request *req) +{ + kfree(req); +} + +static inline struct ahash_request *ahash_request_cast( + struct crypto_async_request *req) +{ + return container_of(req, struct ahash_request, base); +} + +static inline void ahash_request_set_callback(struct ahash_request *req, + u32 flags, + crypto_completion_t complete, + void *data) +{ + req->base.complete = complete; + req->base.data = data; + req->base.flags = flags; +} + +static inline void ahash_request_set_crypt(struct ahash_request *req, + struct scatterlist *src, u8 *result, + unsigned int nbytes) +{ + req->src = src; + req->nbytes = nbytes; + req->result = result; +} + #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From 166247f46a9c866e6f7f7d2212be875fb82212a1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 7 Jul 2008 20:54:35 +0800 Subject: crypto: hash - Removed vestigial ahash fields The base field in ahash_tfm appears to have been cut-n-pasted from ablkcipher. It isn't needed here at all. Similarly, the info field in ahash_request also appears to have originated from its cipher counter-part and is vestigial. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b6efe569128..68ef293644d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -137,8 +137,6 @@ struct ablkcipher_request { struct ahash_request { struct crypto_async_request base; - void *info; - unsigned int nbytes; struct scatterlist *src; u8 *result; @@ -420,7 +418,6 @@ struct ahash_tfm { unsigned int keylen); unsigned int digestsize; - struct crypto_ahash *base; unsigned int reqsize; }; @@ -1384,7 +1381,7 @@ static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, { struct ahash_tfm *crt = crypto_ahash_crt(tfm); - return crt->setkey(crt->base, key, keylen); + return crt->setkey(tfm, key, keylen); } static inline int crypto_ahash_digest(struct ahash_request *req) @@ -1396,7 +1393,7 @@ static inline int crypto_ahash_digest(struct ahash_request *req) static inline void ahash_request_set_tfm(struct ahash_request *req, struct crypto_ahash *tfm) { - req->base.tfm = crypto_ahash_tfm(crypto_ahash_crt(tfm)->base); + req->base.tfm = crypto_ahash_tfm(tfm); } static inline struct ahash_request *ahash_request_alloc( -- cgit v1.2.3 From 18e33e6d5cc0495826f5245777cd267732815e01 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 10 Jul 2008 16:01:22 +0800 Subject: crypto: hash - Move ahash functions into crypto/hash.h All new crypto interfaces should go into individual files as much as possible in order to ensure that crypto.h does not collapse under its own weight. This patch moves the ahash code into crypto/hash.h and crypto/internal/hash.h respectively. Signed-off-by: Herbert Xu --- include/linux/crypto.h | 136 ------------------------------------------------- 1 file changed, 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 68ef293644d..c43dc47fdf7 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -481,10 +481,6 @@ struct crypto_hash { struct crypto_tfm base; }; -struct crypto_ahash { - struct crypto_tfm base; -}; - enum { CRYPTOA_UNSPEC, CRYPTOA_ALG, @@ -1308,137 +1304,5 @@ static inline int crypto_comp_decompress(struct crypto_comp *tfm, src, slen, dst, dlen); } -static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm) -{ - return (struct crypto_ahash *)tfm; -} - -static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, - u32 type, u32 mask) -{ - type &= ~CRYPTO_ALG_TYPE_MASK; - mask &= ~CRYPTO_ALG_TYPE_MASK; - type |= CRYPTO_ALG_TYPE_AHASH; - mask |= CRYPTO_ALG_TYPE_AHASH_MASK; - - return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask)); -} - -static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm) -{ - return &tfm->base; -} - -static inline void crypto_free_ahash(struct crypto_ahash *tfm) -{ - crypto_free_tfm(crypto_ahash_tfm(tfm)); -} - -static inline unsigned int crypto_ahash_alignmask( - struct crypto_ahash *tfm) -{ - return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm)); -} - -static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm) -{ - return &crypto_ahash_tfm(tfm)->crt_ahash; -} - -static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->digestsize; -} - -static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm) -{ - return crypto_tfm_get_flags(crypto_ahash_tfm(tfm)); -} - -static inline void crypto_ahash_set_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_set_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline void crypto_ahash_clear_flags(struct crypto_ahash *tfm, u32 flags) -{ - crypto_tfm_clear_flags(crypto_ahash_tfm(tfm), flags); -} - -static inline struct crypto_ahash *crypto_ahash_reqtfm( - struct ahash_request *req) -{ - return __crypto_ahash_cast(req->base.tfm); -} - -static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm) -{ - return crypto_ahash_crt(tfm)->reqsize; -} - -static inline int crypto_ahash_setkey(struct crypto_ahash *tfm, - const u8 *key, unsigned int keylen) -{ - struct ahash_tfm *crt = crypto_ahash_crt(tfm); - - return crt->setkey(tfm, key, keylen); -} - -static inline int crypto_ahash_digest(struct ahash_request *req) -{ - struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req)); - return crt->digest(req); -} - -static inline void ahash_request_set_tfm(struct ahash_request *req, - struct crypto_ahash *tfm) -{ - req->base.tfm = crypto_ahash_tfm(tfm); -} - -static inline struct ahash_request *ahash_request_alloc( - struct crypto_ahash *tfm, gfp_t gfp) -{ - struct ahash_request *req; - - req = kmalloc(sizeof(struct ahash_request) + - crypto_ahash_reqsize(tfm), gfp); - - if (likely(req)) - ahash_request_set_tfm(req, tfm); - - return req; -} - -static inline void ahash_request_free(struct ahash_request *req) -{ - kfree(req); -} - -static inline struct ahash_request *ahash_request_cast( - struct crypto_async_request *req) -{ - return container_of(req, struct ahash_request, base); -} - -static inline void ahash_request_set_callback(struct ahash_request *req, - u32 flags, - crypto_completion_t complete, - void *data) -{ - req->base.complete = complete; - req->base.data = data; - req->base.flags = flags; -} - -static inline void ahash_request_set_crypt(struct ahash_request *req, - struct scatterlist *src, u8 *result, - unsigned int nbytes) -{ - req->src = src; - req->nbytes = nbytes; - req->result = result; -} - #endif /* _LINUX_CRYPTO_H */ -- cgit v1.2.3 From b7a39bd0afc4021e8ad2b1189e884551e147427f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 23 May 2008 18:38:49 +0100 Subject: firmware: make fw->data const In preparation for supporting firmware files linked into the static kernel, make fw->data const to ensure that users aren't modifying it (so that we can pass a pointer to the original in-kernel copy, rather than having to copy it). Signed-off-by: David Woodhouse --- include/linux/firmware.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 6c7eff2ebad..88718d60153 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -8,7 +8,7 @@ struct firmware { size_t size; - u8 *data; + const u8 *data; }; struct device; -- cgit v1.2.3 From 5658c769443d543728b6c5c673dffc2df8676317 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 23 May 2008 13:52:42 +0100 Subject: firmware: allow firmware files to be built into kernel image Some drivers have their own hacks to bypass the kernel's firmware loader and build their firmware into the kernel; this renders those unnecessary. Other drivers don't use the firmware loader at all, because they always want the firmware to be available. This allows them to start using the firmware loader. A third set of drivers already use the firmware loader, but can't be used without help from userspace, which sometimes requires an initrd. This allows them to work in a static kernel. Signed-off-by: David Woodhouse --- include/linux/firmware.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 88718d60153..c8ecf5b2a20 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -1,7 +1,10 @@ #ifndef _LINUX_FIRMWARE_H #define _LINUX_FIRMWARE_H + #include #include +#include + #define FIRMWARE_NAME_MAX 30 #define FW_ACTION_NOHOTPLUG 0 #define FW_ACTION_HOTPLUG 1 @@ -13,6 +16,24 @@ struct firmware { struct device; +struct builtin_fw { + char *name; + void *data; + unsigned long size; +}; + +/* We have to play tricks here much like stringify() to get the + __COUNTER__ macro to be expanded as we want it */ +#define __fw_concat1(x, y) x##y +#define __fw_concat(x, y) __fw_concat1(x, y) + +#define DECLARE_BUILTIN_FIRMWARE(name, blob) \ + DECLARE_BUILTIN_FIRMWARE_SIZE(name, &(blob), sizeof(blob)) + +#define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size) \ + static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \ + __used __section(.builtin_fw) = { name, blob, size } + #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE)) int request_firmware(const struct firmware **fw, const char *name, struct device *device); -- cgit v1.2.3 From bacfe09dd7545467965e8d8f1eab20bc62dce00d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 30 May 2008 13:57:27 +0300 Subject: ihex.h: binary representation of ihex records Some devices need their firmware as a set of {address, len, data...} records in some specific order rather than a simple blob. The normal way of doing this kind of thing is 'ihex', which is a text format and not entirely suitable for use in the kernel. This provides a binary representation which is very similar, but much more compact -- and a helper routine to skip to the next record, because the alignment constraints mean that everybody will screw it up for themselves otherwise. Also a helper function which can verify that a 'struct firmware' contains a valid set of ihex records, and that following them won't run off the end of the loaded data. Signed-off-by: David Woodhouse --- include/linux/ihex.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 include/linux/ihex.h (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h new file mode 100644 index 00000000000..df89edd890a --- /dev/null +++ b/include/linux/ihex.h @@ -0,0 +1,50 @@ +/* + * Compact binary representation of ihex records. Some devices need their + * firmware loaded in strange orders rather than a single big blob, but + * actually parsing ihex-as-text within the kernel seems silly. Thus,... + */ + +#ifndef __LINUX_IHEX_H__ +#define __LINUX_IHEX_H__ + +#include +#include + +/* Intel HEX files actually limit the length to 256 bytes, but we have + drivers which would benefit from using separate records which are + longer than that, so we extend to 16 bits of length */ +struct ihex_binrec { + __be32 addr; + __be16 len; + uint8_t data[0]; +} __attribute__((aligned(4))); + +/* Find the next record, taking into account the 4-byte alignment */ +static inline const struct ihex_binrec * +ihex_next_binrec(const struct ihex_binrec *rec) +{ + int next = ((be16_to_cpu(rec->len) + 5) & ~3) - 2; + rec = (void *)&rec->data[next]; + + return be16_to_cpu(rec->len) ? rec : NULL; +} + +/* Check that ihex_next_binrec() won't take us off the end of the image... */ +static inline int ihex_validate_fw(const struct firmware *fw) +{ + const struct ihex_binrec *rec; + size_t ofs = 0; + + while (ofs <= fw->size - sizeof(*rec)) { + rec = (void *)&fw->data[ofs]; + + /* Zero length marks end of records */ + if (!be16_to_cpu(rec->len)) + return 0; + + /* Point to next record... */ + ofs += (sizeof(*rec) + be16_to_cpu(rec->len) + 3) & ~3; + } + return -EINVAL; +} +#endif /* __LINUX_IHEX_H__ */ -- cgit v1.2.3 From f1485f3deb89e6ae10c4d34662ec9e692855ab5d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 31 May 2008 15:20:37 +0300 Subject: ihex: request_ihex_firmware() function to load and validate firmware Provide a helper to load the file and validate it in one call, to simplify error handling in the drivers which are going to use it. Signed-off-by: David Woodhouse --- include/linux/ihex.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ihex.h b/include/linux/ihex.h index df89edd890a..2baace2788a 100644 --- a/include/linux/ihex.h +++ b/include/linux/ihex.h @@ -9,6 +9,7 @@ #include #include +#include /* Intel HEX files actually limit the length to 256 bytes, but we have drivers which would benefit from using separate records which are @@ -47,4 +48,27 @@ static inline int ihex_validate_fw(const struct firmware *fw) } return -EINVAL; } + +/* Request firmware and validate it so that we can trust we won't + * run off the end while reading records... */ +static inline int request_ihex_firmware(const struct firmware **fw, + const char *fw_name, + struct device *dev) +{ + const struct firmware *lfw; + int ret; + + ret = request_firmware(&lfw, fw_name, dev); + if (ret) + return ret; + ret = ihex_validate_fw(lfw); + if (ret) { + dev_err(dev, "Firmware \"%s\" not valid IHEX records\n", + fw_name); + release_firmware(lfw); + return ret; + } + *fw = lfw; + return 0; +} #endif /* __LINUX_IHEX_H__ */ -- cgit v1.2.3 From a2bb6a3d85ef3124cd336403a95abc0540d3fbe2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Jul 2008 20:58:15 -0400 Subject: ftrace: add ftrace_kill_atomic It has been suggested that I add a way to disable the function tracer on an oops. This code adds a ftrace_kill_atomic. It is not meant to be used in normal situations. It will disable the ftrace tracer, but will not perform the nice shutdown that requires scheduling. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/ftrace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3121b95443d..f368d041e02 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -89,6 +89,7 @@ void ftrace_enable_daemon(void); /* totally disable ftrace - can not re-enable after this */ void ftrace_kill(void); +void ftrace_kill_atomic(void); static inline void tracer_disable(void) { -- cgit v1.2.3 From af52a90a14cdaa54ecbfb6e6982abb13466a4b56 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 7 Jul 2008 14:16:52 -0400 Subject: sched_clock: stop maximum check on NO HZ Working with ftrace I would get large jumps of 11 millisecs or more with the clock tracer. This killed the latencing timings of ftrace and also caused the irqoff self tests to fail. What was happening is with NO_HZ the idle would stop the jiffy counter and before the jiffy counter was updated the sched_clock would have a bad delta jiffies to compare with the gtod with the maximum. The jiffies would stop and the last sched_tick would record the last gtod. On wakeup, the sched clock update would compare the gtod + delta jiffies (which would be zero) and compare it to the TSC. The TSC would have correctly (with a stable TSC) moved forward several jiffies. But because the jiffies has not been updated yet the clock would be prevented from moving forward because it would appear that the TSC jumped too far ahead. The clock would then virtually stop, until the jiffies are updated. Then the next sched clock update would see that the clock was very much behind since the delta jiffies is now correct. This would then jump the clock forward by several jiffies. This caused ftrace to report several milliseconds of interrupts off latency at every resume from NO_HZ idle. This patch adds hooks into the nohz code to disable the checking of the maximum clock update when nohz is in effect. It resumes the max check when nohz has updated the jiffies again. Signed-off-by: Steven Rostedt Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c5d3f847ca8..33a8f42041f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1573,13 +1573,28 @@ static inline void sched_clock_idle_sleep_event(void) static inline void sched_clock_idle_wakeup_event(u64 delta_ns) { } -#else + +#ifdef CONFIG_NO_HZ +static inline void sched_clock_tick_stop(int cpu) +{ +} + +static inline void sched_clock_tick_start(int cpu) +{ +} +#endif + +#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ extern void sched_clock_init(void); extern u64 sched_clock_cpu(int cpu); extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); +#ifdef CONFIG_NO_HZ +extern void sched_clock_tick_stop(int cpu); +extern void sched_clock_tick_start(int cpu); #endif +#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu -- cgit v1.2.3 From 736603ab297506f4396cb5af592004499950fcfd Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Add commit time into the commit block Carlo Wood has demonstrated that it's possible to recover deleted files from the journal. Something that will make this easier is if we can put the time of the commit into commit block. Signed-off-by: "Theodore Ts'o" --- include/linux/jbd2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index d147f0f9036..ec9cadf5822 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -168,6 +168,8 @@ struct commit_header { unsigned char h_chksum_size; unsigned char h_padding[2]; __be32 h_chksum[JBD2_CHECKSUM_BYTES]; + __be64 h_commit_sec; + __be32 h_commit_nsec; }; /* -- cgit v1.2.3 From f4c0a0fdfae708f7aa438c27a380ed4071294e11 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: vfs: export filemap_fdatawrite_range() Make filemap_fdatawrite_range() function public, so that it can later be used in ordered mode rewrite by JBD/JBD2. Signed-off-by: Jan Kara --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d8e2762ed14..97f992adc62 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1740,6 +1740,8 @@ extern int wait_on_page_writeback_range(struct address_space *mapping, pgoff_t start, pgoff_t end); extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +extern int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); extern long do_fsync(struct file *file, int datasync); extern void sync_supers(void); -- cgit v1.2.3 From c851ed540173736e60d48b53b91a16ea5c903896 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Implement data=ordered mode handling via inodes This patch adds necessary framework into JBD2 to be able to track inodes with each transaction and write-out their dirty data during transaction commit time. This new ordered mode brings all sorts of advantages such as possibility to get rid of journal heads and buffer heads for data buffers in ordered mode, better ordering of writes on transaction commit, simplification of some JBD code, no more anonymous pages when truncate of data being committed happens. Also with this new ordered mode, delayed allocation on ordered mode is much simpler. Signed-off-by: Jan Kara --- include/linux/jbd2.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index ec9cadf5822..622c3d8ca4e 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -381,6 +381,38 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) bit_spin_unlock(BH_JournalHead, &bh->b_state); } +/* Flags in jbd_inode->i_flags */ +#define __JI_COMMIT_RUNNING 0 +/* Commit of the inode data in progress. We use this flag to protect us from + * concurrent deletion of inode. We cannot use reference to inode for this + * since we cannot afford doing last iput() on behalf of kjournald + */ +#define JI_COMMIT_RUNNING (1 << __JI_COMMIT_RUNNING) + +/** + * struct jbd_inode is the structure linking inodes in ordered mode + * present in a transaction so that we can sync them during commit. + */ +struct jbd2_inode { + /* Which transaction does this inode belong to? Either the running + * transaction or the committing one. [j_list_lock] */ + transaction_t *i_transaction; + + /* Pointer to the running transaction modifying inode's data in case + * there is already a committing transaction touching it. [j_list_lock] */ + transaction_t *i_next_transaction; + + /* List of inodes in the i_transaction [j_list_lock] */ + struct list_head i_list; + + /* VFS inode this inode belongs to [constant during the lifetime + * of the structure] */ + struct inode *i_vfs_inode; + + /* Flags of inode [j_list_lock] */ + unsigned int i_flags; +}; + struct jbd2_revoke_table_s; /** @@ -566,6 +598,12 @@ struct transaction_s */ struct journal_head *t_log_list; + /* + * List of inodes whose data we've modified in data=ordered mode. + * [j_list_lock] + */ + struct list_head t_inode_list; + /* * Protects info related to handles */ @@ -1046,6 +1084,10 @@ extern void jbd2_journal_ack_err (journal_t *); extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); +extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode); +extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size); +extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode); +extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode); /* * journal_head management -- cgit v1.2.3 From 87c89c232c8f7b3820c33c3b9bc803e9358027da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: jbd2: Remove data=ordered mode support using jbd buffer heads Signed-off-by: Jan Kara --- include/linux/jbd2.h | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 622c3d8ca4e..3dd20900709 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -542,24 +542,12 @@ struct transaction_s */ struct journal_head *t_reserved_list; - /* - * Doubly-linked circular list of all buffers under writeout during - * commit [j_list_lock] - */ - struct journal_head *t_locked_list; - /* * Doubly-linked circular list of all metadata buffers owned by this * transaction [j_list_lock] */ struct journal_head *t_buffers; - /* - * Doubly-linked circular list of all data buffers still to be - * flushed before this transaction can be committed [j_list_lock] - */ - struct journal_head *t_sync_datalist; - /* * Doubly-linked circular list of all forget buffers (superseded * buffers which we can un-checkpoint once this transaction commits) @@ -1044,7 +1032,6 @@ extern int jbd2_journal_extend (handle_t *, int nblocks); extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *); extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *); extern int jbd2_journal_get_undo_access(handle_t *, struct buffer_head *); -extern int jbd2_journal_dirty_data (handle_t *, struct buffer_head *); extern int jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *); extern void jbd2_journal_release_buffer (handle_t *, struct buffer_head *); extern int jbd2_journal_forget (handle_t *, struct buffer_head *); @@ -1223,15 +1210,13 @@ static inline int jbd_space_needed(journal_t *journal) /* journaling buffer types */ #define BJ_None 0 /* Not journaled */ -#define BJ_SyncData 1 /* Normal data: flush before commit */ -#define BJ_Metadata 2 /* Normal journaled metadata */ -#define BJ_Forget 3 /* Buffer superseded by this transaction */ -#define BJ_IO 4 /* Buffer is for temporary IO use */ -#define BJ_Shadow 5 /* Buffer contents being shadowed to the log */ -#define BJ_LogCtl 6 /* Buffer contains log descriptors */ -#define BJ_Reserved 7 /* Buffer is reserved for access by journal */ -#define BJ_Locked 8 /* Locked for I/O during commit */ -#define BJ_Types 9 +#define BJ_Metadata 1 /* Normal journaled metadata */ +#define BJ_Forget 2 /* Buffer superseded by this transaction */ +#define BJ_IO 3 /* Buffer is for temporary IO use */ +#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */ +#define BJ_LogCtl 5 /* Buffer contains log descriptors */ +#define BJ_Reserved 6 /* Buffer is reserved for access by journal */ +#define BJ_Types 7 extern int jbd_blocks_per_page(struct inode *inode); -- cgit v1.2.3 From 29a814d2ee0e43c2980f33f91c1311ec06c0aa35 Mon Sep 17 00:00:00 2001 From: Alex Tomas Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: vfs: add hooks for ext4's delayed allocation support Export mpage_bio_submit() and __mpage_writepage() for the benefit of ext4's delayed allocation support. Also change __block_write_full_page so that if buffers that have the BH_Delay flag set it will call get_block() to get the physical block allocated, just as in the !BH_Mapped case. Signed-off-by: Alex Tomas Signed-off-by: "Theodore Ts'o" --- include/linux/mpage.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 068a0c9946a..5c42821da2d 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -11,11 +11,21 @@ */ #ifdef CONFIG_BLOCK +struct mpage_data { + struct bio *bio; + sector_t last_block_in_bio; + get_block_t *get_block; + unsigned use_writepage; +}; + struct writeback_control; +struct bio *mpage_bio_submit(int rw, struct bio *bio); int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block); int mpage_readpage(struct page *page, get_block_t get_block); +int __mpage_writepage(struct page *page, struct writeback_control *wbc, + void *data); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, -- cgit v1.2.3 From e8ced39d5e8911c662d4d69a342b9d053eaaac4e Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: percpu_counter: new function percpu_counter_sum_and_set Delayed allocation need to check free blocks at every write time. percpu_counter_read_positive() is not quit accurate. delayed allocation need a more accurate accounting, but using percpu_counter_sum_positive() is frequently is quite expensive. This patch added a new function to update center counter when sum per-cpu counter, to increase the accurate rate for next percpu_counter_read() and require less calling expensive percpu_counter_sum(). Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- include/linux/percpu_counter.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc11..20838883535 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) -- cgit v1.2.3 From 06d6cf6959d22037fcec598f4f954db5db3d7356 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 11 Jul 2008 19:27:31 -0400 Subject: mm: Add range_cont mode for writeback Filesystems like ext4 needs to start a new transaction in the writepages for block allocation. This happens with delayed allocation and there is limit to how many credits we can request from the journal layer. So we call write_cache_pages multiple times with wbc->nr_to_write set to the maximum possible value limitted by the max journal credits available. Add a new mode to writeback that enables us to handle this behaviour. In the new mode we update the wbc->range_start to point to the new offset to be written. Next call to call to write_cache_pages will start writeout from specified range_start offset. In the new mode we also limit writing to the specified wbc->range_end. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Acked-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f462439cc28..0d8573e6b9e 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -63,6 +63,7 @@ struct writeback_control { unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned more_io:1; /* more io to be dispatched */ + unsigned range_cont:1; }; /* -- cgit v1.2.3 From f11f594edba7f689af9792a5673ed59d660ad371 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 25 Jun 2008 11:22:42 -0400 Subject: [SCSI] lib: Add support for the T10 (SCSI) Data Integrity Field CRC The SCSI Block Protocol uses this 16-bit CRC to verify the integrity of each data sector. crc_t10dif() is used by sd_dif.c when performing I/O to or from disks formatted with protection information. Signed-off-by: Martin K. Petersen Signed-off-by: James Bottomley --- include/linux/crc-t10dif.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/linux/crc-t10dif.h (limited to 'include/linux') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h new file mode 100644 index 00000000000..a9c96d865ee --- /dev/null +++ b/include/linux/crc-t10dif.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_CRC_T10DIF_H +#define _LINUX_CRC_T10DIF_H + +#include + +__u16 crc_t10dif(unsigned char const *, size_t); + +#endif -- cgit v1.2.3 From 006ebb40d3d65338bd74abb03b945f8d60e362bd Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Mon, 19 May 2008 08:32:49 -0400 Subject: Security: split proc ptrace checking into read vs. attach Enable security modules to distinguish reading of process state via proc from full ptrace access by renaming ptrace_may_attach to ptrace_may_access and adding a mode argument indicating whether only read access or full attach access is requested. This allows security modules to permit access to reading process state without granting full ptrace access. The base DAC/capability checking remains unchanged. Read access to /proc/pid/mem continues to apply a full ptrace attach check since check_mem_permission() already requires the current task to already be ptracing the target. The other ptrace checks within proc for elements like environ, maps, and fds are changed to pass the read mode instead of attach. In the SELinux case, we model such reading of process state as a reading of a proc file labeled with the target process' label. This enables SELinux policy to permit such reading of process state without permitting control or manipulation of the target process, as there are a number of cases where programs probe for such information via proc but do not need to be able to control the target (e.g. procps, lsof, PolicyKit, ConsoleKit). At present we have to choose between allowing full ptrace in policy (more permissive than required/desired) or breaking functionality (or in some cases just silencing the denials via dontaudit rules but this can hide genuine attacks). This version of the patch incorporates comments from Casey Schaufler (change/replace existing ptrace_may_attach interface, pass access mode), and Chris Wright (provide greater consistency in the checking). Note that like their predecessors __ptrace_may_attach and ptrace_may_attach, the __ptrace_may_access and ptrace_may_access interfaces use different return value conventions from each other (0 or -errno vs. 1 or 0). I retained this difference to avoid any changes to the caller logic but made the difference clearer by changing the latter interface to return a bool rather than an int and by adding a comment about it to ptrace.h for any future callers. Signed-off-by: Stephen Smalley Acked-by: Chris Wright Signed-off-by: James Morris --- include/linux/ptrace.h | 8 ++++++-- include/linux/security.h | 16 +++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index f98501ba557..c6f5f9dd0ce 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -95,8 +95,12 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); -extern int ptrace_may_attach(struct task_struct *task); -extern int __ptrace_may_attach(struct task_struct *task); +#define PTRACE_MODE_READ 1 +#define PTRACE_MODE_ATTACH 2 +/* Returns 0 on success, -errno on denial. */ +extern int __ptrace_may_access(struct task_struct *task, unsigned int mode); +/* Returns true on success, false on denial. */ +extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) { diff --git a/include/linux/security.h b/include/linux/security.h index 50737c70e78..62bd80cb7f8 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -46,7 +46,8 @@ struct audit_krule; */ extern int cap_capable(struct task_struct *tsk, int cap); extern int cap_settime(struct timespec *ts, struct timezone *tz); -extern int cap_ptrace(struct task_struct *parent, struct task_struct *child); +extern int cap_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern int cap_capset_check(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); extern void cap_capset_set(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1170,6 +1171,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * attributes would be changed by the execve. * @parent contains the task_struct structure for parent process. * @child contains the task_struct structure for child process. + * @mode contains the PTRACE_MODE flags indicating the form of access. * Return 0 if permission is granted. * @capget: * Get the @effective, @inheritable, and @permitted capability sets for @@ -1295,7 +1297,8 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) struct security_operations { char name[SECURITY_NAME_MAX + 1]; - int (*ptrace) (struct task_struct *parent, struct task_struct *child); + int (*ptrace) (struct task_struct *parent, struct task_struct *child, + unsigned int mode); int (*capget) (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted); @@ -1573,7 +1576,8 @@ extern struct dentry *securityfs_create_dir(const char *name, struct dentry *par extern void securityfs_remove(struct dentry *dentry); /* Security operations */ -int security_ptrace(struct task_struct *parent, struct task_struct *child); +int security_ptrace(struct task_struct *parent, struct task_struct *child, + unsigned int mode); int security_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, @@ -1755,9 +1759,11 @@ static inline int security_init(void) return 0; } -static inline int security_ptrace(struct task_struct *parent, struct task_struct *child) +static inline int security_ptrace(struct task_struct *parent, + struct task_struct *child, + unsigned int mode) { - return cap_ptrace(parent, child); + return cap_ptrace(parent, child, mode); } static inline int security_capget(struct task_struct *target, -- cgit v1.2.3 From 2069f457848f846cb31149c9aa29b330a6b66d1b Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 4 Jul 2008 09:47:13 +1000 Subject: LSM/SELinux: show LSM mount options in /proc/mounts This patch causes SELinux mount options to show up in /proc/mounts. As with other code in the area seq_put errors are ignored. Other LSM's will not have their mount options displayed until they fill in their own security_sb_show_options() function. Signed-off-by: Eric Paris Signed-off-by: Miklos Szeredi Signed-off-by: James Morris --- include/linux/security.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 62bd80cb7f8..c8ad8ec684b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -80,6 +80,7 @@ struct xfrm_selector; struct xfrm_policy; struct xfrm_state; struct xfrm_user_sec_ctx; +struct seq_file; extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); @@ -1331,6 +1332,7 @@ struct security_operations { void (*sb_free_security) (struct super_block *sb); int (*sb_copy_data) (char *orig, char *copy); int (*sb_kern_mount) (struct super_block *sb, void *data); + int (*sb_show_options) (struct seq_file *m, struct super_block *sb); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1610,6 +1612,7 @@ int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); int security_sb_copy_data(char *orig, char *copy); int security_sb_kern_mount(struct super_block *sb, void *data); +int security_sb_show_options(struct seq_file *m, struct super_block *sb); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data); @@ -1887,6 +1890,12 @@ static inline int security_sb_kern_mount(struct super_block *sb, void *data) return 0; } +static inline int security_sb_show_options(struct seq_file *m, + struct super_block *sb) +{ + return 0; +} + static inline int security_sb_statfs(struct dentry *dentry) { return 0; -- cgit v1.2.3 From b478a9f9889c81e88077d1495daadee64c0af541 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Jul 2008 20:56:04 +0200 Subject: security: remove unused sb_get_mnt_opts hook The sb_get_mnt_opts() hook is unused, and is superseded by the sb_show_options() hook. Signed-off-by: Miklos Szeredi Acked-by: James Morris --- include/linux/security.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index c8ad8ec684b..43c6357568a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -291,10 +291,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * Update module state after a successful pivot. * @old_path contains the path for the old root. * @new_path contains the path for the new root. - * @sb_get_mnt_opts: - * Get the security relevant mount options used for a superblock - * @sb the superblock to get security mount options from - * @opts binary data structure containing all lsm mount data * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock * @sb the superblock to set security mount options for @@ -1348,8 +1344,6 @@ struct security_operations { struct path *new_path); void (*sb_post_pivotroot) (struct path *old_path, struct path *new_path); - int (*sb_get_mnt_opts) (const struct super_block *sb, - struct security_mnt_opts *opts); int (*sb_set_mnt_opts) (struct super_block *sb, struct security_mnt_opts *opts); void (*sb_clone_mnt_opts) (const struct super_block *oldsb, @@ -1624,8 +1618,6 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint); int security_sb_pivotroot(struct path *old_path, struct path *new_path); void security_sb_post_pivotroot(struct path *old_path, struct path *new_path); -int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts); int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); @@ -1942,12 +1934,6 @@ static inline int security_sb_pivotroot(struct path *old_path, static inline void security_sb_post_pivotroot(struct path *old_path, struct path *new_path) { } -static inline int security_sb_get_mnt_opts(const struct super_block *sb, - struct security_mnt_opts *opts) -{ - security_init_mnt_opts(opts); - return 0; -} static inline int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) -- cgit v1.2.3 From 6f0f0fd496333777d53daff21a4e3b28c4d03a6d Mon Sep 17 00:00:00 2001 From: James Morris Date: Thu, 10 Jul 2008 17:02:07 +0900 Subject: security: remove register_security hook The register security hook is no longer required, as the capability module is always registered. LSMs wishing to stack capability as a secondary module should do so explicitly. Signed-off-by: James Morris Acked-by: Stephen Smalley Acked-by: Greg Kroah-Hartman --- include/linux/security.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 43c6357568a..31c8851ec5d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1239,11 +1239,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @pages contains the number of pages. * Return 0 if permission is granted. * - * @register_security: - * allow module stacking. - * @name contains the name of the security module being stacked. - * @ops contains a pointer to the struct security_operations of the module to stack. - * * @secid_to_secctx: * Convert secid to security context. * @secid contains the security ID. @@ -1471,10 +1466,6 @@ struct security_operations { int (*netlink_send) (struct sock *sk, struct sk_buff *skb); int (*netlink_recv) (struct sk_buff *skb, int cap); - /* allow module stacking */ - int (*register_security) (const char *name, - struct security_operations *ops); - void (*d_instantiate) (struct dentry *dentry, struct inode *inode); int (*getprocattr) (struct task_struct *p, char *name, char **value); @@ -1564,7 +1555,6 @@ struct security_operations { extern int security_init(void); extern int security_module_enable(struct security_operations *ops); extern int register_security(struct security_operations *ops); -extern int mod_reg_security(const char *name, struct security_operations *ops); extern struct dentry *securityfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.3 From 7e9db9eaefdb8798730790214ff1b7746006ec98 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:58:44 +0200 Subject: [S390] cio: Introduce modalias for css bus. Add modalias and subchannel type attributes for all subchannels. I/O subchannel specific attributes are now created in io_subchannel_probe(). modalias and subchannel type are also added to the uevent for the css bus. Also make the css modalias known. Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 69b2342d5eb..1fd03e732e0 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -159,6 +159,15 @@ struct ap_device_id { #define AP_DEVICE_ID_MATCH_DEVICE_TYPE 0x01 +/* s390 css bus devices (subchannels) */ +struct css_device_id { + __u8 type; /* subchannel type */ + __u8 pad1; + __u16 pad2; + __u32 pad3; + kernel_ulong_t driver_data; +}; + #define ACPI_ID_LEN 16 /* only 9 bytes needed here, 16 bytes are used */ /* to workaround crosscompile issues */ -- cgit v1.2.3 From f08adc008d84f6b03d377ede951e29ed169e76e2 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 14 Jul 2008 09:59:03 +0200 Subject: [S390] css: Use css_device_id for bus matching. css_device_id exists, so use it for determining the right driver (and add a match_flags which is always 1 for valid types). Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- include/linux/mod_devicetable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 1fd03e732e0..c4db5827963 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -161,8 +161,8 @@ struct ap_device_id { /* s390 css bus devices (subchannels) */ struct css_device_id { + __u8 match_flags; __u8 type; /* subchannel type */ - __u8 pad1; __u16 pad2; __u32 pad3; kernel_ulong_t driver_data; -- cgit v1.2.3 From 2f3804edf971d2080243d2b4552bfd61ddfbf969 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Wed, 2 Jul 2008 01:36:15 -0500 Subject: powerpc/85xx: Add support for MPC8536DS Add support for the MPC8536 process and MPC8536DS reference board. The MPC8536 is an e500v2 based SoC which eTSEC, USB, SATA, PCI, and PCIe. The USB and SATA IP blocks are similiar to those on the PQ2 Pro SoCs and thus use the same drivers. Signed-off-by: Kumar Gala --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 65953822c9c..1cf4084b51e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2171,6 +2171,8 @@ #define PCI_DEVICE_ID_MPC8544 0x0033 #define PCI_DEVICE_ID_MPC8572E 0x0040 #define PCI_DEVICE_ID_MPC8572 0x0041 +#define PCI_DEVICE_ID_MPC8536E 0x0050 +#define PCI_DEVICE_ID_MPC8536 0x0051 #define PCI_DEVICE_ID_MPC8641 0x7010 #define PCI_DEVICE_ID_MPC8641D 0x7011 #define PCI_DEVICE_ID_MPC8610 0x7018 -- cgit v1.2.3 From 4ee6afd34409d296782a5b667d7991b1050e910a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 7 May 2008 21:01:30 +0300 Subject: VFS: export sync_sb_inodes This patch exports the 'sync_sb_inodes()' which is needed for UBIFS because it has to force write-back from time to time. Namely, the UBIFS budgeting subsystem forces write-back when its pessimistic callculations show that there is no free space on the media. Signed-off-by: Artem Bityutskiy --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d8e2762ed14..f9d2aab47ed 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1729,6 +1729,8 @@ static inline void invalidate_remote_inode(struct inode *inode) extern int invalidate_inode_pages2(struct address_space *mapping); extern int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); +extern void generic_sync_sb_inodes(struct super_block *sb, + struct writeback_control *wbc); extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); -- cgit v1.2.3 From 341c2c958ec7bdd9f54733a8b0b432fe76842a82 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:51 +0900 Subject: libata: consistently use msecs for time durations libata has been using mix of jiffies and msecs for time druations. This is getting confusing. As writing sub HZ values in jiffies is PITA and msecs_to_jiffies() can't be used as initializer, unify unit for all time durations to msecs. So, durations are in msecs and deadlines are in jiffies. ata_deadline() is added to compute deadline from a start time and duration in msecs. While at it, drop now superflous _msec suffix from arguments and rename @timeout to @deadline if it represents a fixed point in time rather than duration. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index e57e5d08312..94110b652b3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -27,6 +27,7 @@ #define __LINUX_LIBATA_H__ #include +#include #include #include #include @@ -115,7 +116,7 @@ enum { /* tag ATA_MAX_QUEUE - 1 is reserved for internal commands */ ATA_MAX_QUEUE = 32, ATA_TAG_INTERNAL = ATA_MAX_QUEUE - 1, - ATA_SHORT_PAUSE = (HZ >> 6) + 1, + ATA_SHORT_PAUSE = 16, ATAPI_MAX_DRAIN = 16 << 10, @@ -234,17 +235,17 @@ enum { /* bits 24:31 of host->flags are reserved for LLD specific flags */ /* various lengths of time */ - ATA_TMOUT_BOOT = 30 * HZ, /* heuristic */ - ATA_TMOUT_BOOT_QUICK = 7 * HZ, /* heuristic */ - ATA_TMOUT_INTERNAL = 30 * HZ, - ATA_TMOUT_INTERNAL_QUICK = 5 * HZ, + ATA_TMOUT_BOOT = 30000, /* heuristic */ + ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ + ATA_TMOUT_INTERNAL = 30000, + ATA_TMOUT_INTERNAL_QUICK = 5000, /* FIXME: GoVault needs 2s but we can't afford that without * parallel probing. 800ms is enough for iVDR disk * HHD424020F7SV00. Increase to 2secs when parallel probing * is in place. */ - ATA_TMOUT_FF_WAIT = 4 * HZ / 5, + ATA_TMOUT_FF_WAIT = 800, /* Spec mandates to wait for ">= 2ms" before checking status * after reset. We wait 150ms, because that was the magic @@ -256,14 +257,14 @@ enum { * * Old drivers/ide uses the 2mS rule and then waits for ready. */ - ATA_WAIT_AFTER_RESET_MSECS = 150, + ATA_WAIT_AFTER_RESET = 150, /* If PMP is supported, we have to do follow-up SRST. As some * PMPs don't send D2H Reg FIS after hardreset, LLDs are * advised to wait only for the following duration before * doing SRST. */ - ATA_TMOUT_PMP_SRST_WAIT = 1 * HZ, + ATA_TMOUT_PMP_SRST_WAIT = 1000, /* ATA bus states */ BUS_UNKNOWN = 0, @@ -895,8 +896,7 @@ extern void ata_host_resume(struct ata_host *host); #endif extern int ata_ratelimit(void); extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val, - unsigned long interval_msec, - unsigned long timeout_msec); + unsigned long interval, unsigned long timeout); extern int atapi_cmd_type(u8 opcode); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1389,6 +1389,12 @@ static inline int ata_check_ready(u8 status) return 0; } +static inline unsigned long ata_deadline(unsigned long from_jiffies, + unsigned long timeout_msecs) +{ + return from_jiffies + msecs_to_jiffies(timeout_msecs); +} + /************************************************************************** * PMP - drivers/ata/libata-pmp.c -- cgit v1.2.3 From 0a2c0f56159999e20015241d3b8fa89b1ab14309 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:52 +0900 Subject: libata: improve EH retry delay handling EH retries were delayed by 5 seconds to ensure that resets don't occur back-to-back. However, this 5 second delay is superflous or excessive in many cases. For example, after IDENTIFY times out, there's no reason to wait five more seconds before retrying. This patch adds ehc->last_reset timestamp and record the timestamp for the last reset trial or success and uses it to space resets by ATA_EH_RESET_COOL_DOWN which is 5 secs and removes unconditional 5 sec sleeps. As this change makes inter-try waits often shorter and they're redundant in nature, this patch also removes the "retrying..." messages. While at it, convert explicit rounding up division to DIV_ROUND_UP(). This change speeds up EH in many cases w/o sacrificing robustness. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 94110b652b3..9058c2a325a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -602,6 +602,8 @@ struct ata_eh_context { unsigned int did_probe_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; + /* timestamp for the last reset attempt or success */ + unsigned long last_reset; }; struct ata_acpi_drive -- cgit v1.2.3 From 87fbc5a060faf2394bee88a93519f9b9d434727c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 20 May 2008 02:17:54 +0900 Subject: libata: improve EH internal command timeout handling ATA_TMOUT_INTERNAL which was 30secs were used for all internal commands which is way too long when something goes wrong. This patch implements command type based stepped timeouts. Different command types can use different timeouts and each command type can use different timeout values after timeouts. ie. the initial timeout is set to a value which should cover most of the cases but not too long so that run away cases don't delay things too much. After the first try times out, the second try can use longer timeout and if that one times out too, it can go for full 30sec timeout. IDENTIFYs use 5s - 10s - 30s timeout and all other commands use 5s - 10s timeouts. This patch significantly cuts down the needed time to handle failure cases while still allowing libata to work with nut job devices through retries. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 9058c2a325a..035f8e1cd0a 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -237,7 +237,6 @@ enum { /* various lengths of time */ ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ - ATA_TMOUT_INTERNAL = 30000, ATA_TMOUT_INTERNAL_QUICK = 5000, /* FIXME: GoVault needs 2s but we can't afford that without @@ -341,6 +340,11 @@ enum { SATA_PMP_RW_TIMEOUT = 3000, /* PMP read/write timeout */ + /* This should match the actual table size of + * ata_eh_cmd_timeout_table in libata-eh.c. + */ + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 5, + /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependant */ @@ -598,6 +602,8 @@ struct ata_eh_info { struct ata_eh_context { struct ata_eh_info i; int tries[ATA_MAX_DEVICES]; + int cmd_timeout_idx[ATA_MAX_DEVICES] + [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; unsigned int saved_ncq_enabled; -- cgit v1.2.3 From 18f7ba4c2f4be6b37d925931f04d6cc28d88d1ee Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Tue, 3 Jun 2008 10:33:55 -0700 Subject: libata/ahci: enclosure management support Add Enclosure Management support to libata and ahci. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Jeff Garzik --- include/linux/libata.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 035f8e1cd0a..5b247b8a6b3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -169,6 +169,7 @@ enum { ATA_LFLAG_ASSUME_CLASS = ATA_LFLAG_ASSUME_ATA | ATA_LFLAG_ASSUME_SEMB, ATA_LFLAG_NO_RETRY = (1 << 5), /* don't retry this link */ ATA_LFLAG_DISABLED = (1 << 6), /* link is disabled */ + ATA_LFLAG_SW_ACTIVITY = (1 << 7), /* keep activity stats */ /* struct ata_port flags */ ATA_FLAG_SLAVE_POSS = (1 << 0), /* host supports slave dev */ @@ -191,6 +192,10 @@ enum { ATA_FLAG_AN = (1 << 18), /* controller supports AN */ ATA_FLAG_PMP = (1 << 19), /* controller supports PMP */ ATA_FLAG_IPM = (1 << 20), /* driver can handle IPM */ + ATA_FLAG_EM = (1 << 21), /* driver supports enclosure + * management */ + ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity + * led */ /* The following flag belongs to ap->pflags but is kept in * ap->flags because it's referenced in many LLDs and will be @@ -446,6 +451,15 @@ enum link_pm { MEDIUM_POWER, }; extern struct device_attribute dev_attr_link_power_management_policy; +extern struct device_attribute dev_attr_em_message_type; +extern struct device_attribute dev_attr_em_message; +extern struct device_attribute dev_attr_sw_activity; + +enum sw_activity { + OFF, + BLINK_ON, + BLINK_OFF, +}; #ifdef CONFIG_ATA_SFF struct ata_ioports { @@ -701,6 +715,7 @@ struct ata_port { struct timer_list fastdrain_timer; unsigned long fastdrain_cnt; + int em_message_type; void *private_data; #ifdef CONFIG_ATA_ACPI @@ -792,6 +807,12 @@ struct ata_port_operations { u8 (*bmdma_status)(struct ata_port *ap); #endif /* CONFIG_ATA_SFF */ + ssize_t (*em_show)(struct ata_port *ap, char *buf); + ssize_t (*em_store)(struct ata_port *ap, const char *message, + size_t size); + ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf); + ssize_t (*sw_activity_store)(struct ata_device *dev, + enum sw_activity val); /* * Obsolete */ -- cgit v1.2.3 From 20a9b6e7c303f2a6f9afe17c0997bc9a3c734442 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Jul 2008 22:38:22 +0200 Subject: i2c: Remove 3 deprecated bus drivers This patch contains the scheduled removal of i2c-i810, i2c-prosavage and i2c-savage4. Signed-off-by: Adrian Bunk Signed-off-by: Jean Delvare --- include/linux/i2c-id.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 580acc93903..988e566d3ed 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -111,7 +111,6 @@ #define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ #define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */ #define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */ -#define I2C_HW_B_S3VIA 0x010018 /* S3Via ProSavage adapter */ #define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ #define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ #define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ -- cgit v1.2.3 From 67c2e66571c383404a5acd08189194da660da942 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:23 +0200 Subject: i2c: Delete unused function i2c_smbus_write_quick Function i2c_smbus_write_quick has no users left, so we can delete it. Also update the list of these helper functions which are gone but could be added back if needed. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 8dc73013219..b3695f353f7 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -71,7 +71,6 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr, /* Now follow the 'nice' access routines. These also document the calling conventions of smbus_access. */ -extern s32 i2c_smbus_write_quick(struct i2c_client * client, u8 value); extern s32 i2c_smbus_read_byte(struct i2c_client * client); extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); extern s32 i2c_smbus_read_byte_data(struct i2c_client * client, u8 command); -- cgit v1.2.3 From ae7193f7fa3e1735ab70807eb6e35a2a6575623f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:24 +0200 Subject: i2c: Update stray references to smbus_access That function is actually named i2c_smbus_xfer. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index b3695f353f7..7c36d5188d3 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -69,7 +69,7 @@ extern s32 i2c_smbus_xfer (struct i2c_adapter * adapter, u16 addr, union i2c_smbus_data * data); /* Now follow the 'nice' access routines. These also document the calling - conventions of smbus_access. */ + conventions of i2c_smbus_xfer. */ extern s32 i2c_smbus_read_byte(struct i2c_client * client); extern s32 i2c_smbus_write_byte(struct i2c_client * client, u8 value); @@ -536,7 +536,7 @@ union i2c_smbus_data { /* and one more for user-space compatibility */ }; -/* smbus_access read or write markers */ +/* i2c_smbus_xfer read or write markers */ #define I2C_SMBUS_READ 1 #define I2C_SMBUS_WRITE 0 -- cgit v1.2.3 From c1b6b4f2342d073698dfc2547240c35045a1d00e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:28 +0200 Subject: i2c: Let framebuffer drivers set their I2C bus class to DDC Let framebuffer drivers set their I2C bus class to DDC. Once this is done, we will be able to tell the eeprom driver to only probe for EDID EEPROMs on these buses. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7c36d5188d3..145797fe6a3 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -349,7 +349,7 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_TV_ANALOG (1<<1) /* bttv + friends */ #define I2C_CLASS_TV_DIGITAL (1<<2) /* dvb cards */ -#define I2C_CLASS_DDC (1<<3) /* i2c-matroxfb ? */ +#define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ -- cgit v1.2.3 From 3401b2fff38fbb8b73ea6bcc69a8370ae5d2a7a0 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:29 +0200 Subject: i2c: Let bus drivers add SPD to their class Let general purpose I2C/SMBus bus drivers add SPD to their class. Once this is done, we will be able to tell the eeprom driver to only probe for SPD EEPROMs and similar on these buses. Note that I took a conservative approach here, adding I2C_CLASS_SPD to many drivers that have no idea whether they can host SPD EEPROMs or not. This is to make sure that the eeprom driver doesn't stop probing buses where SPD EEPROMs or equivalent live. So, bus driver maintainers and users should feel free to remove the SPD class from drivers those buses never have SPD EEPROMs or they don't want the eeprom driver to bind to them. Likewise, feel free to add the SPD class to any bus driver I might have missed. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 145797fe6a3..839d0ea3dca 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -353,6 +353,7 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data) #define I2C_CLASS_CAM_ANALOG (1<<4) /* camera with analog CCD */ #define I2C_CLASS_CAM_DIGITAL (1<<5) /* most webcams */ #define I2C_CLASS_SOUND (1<<6) /* sound devices */ +#define I2C_CLASS_SPD (1<<7) /* SPD EEPROMs and similar */ #define I2C_CLASS_ALL (UINT_MAX) /* all of the above */ /* i2c_client_address_data is the struct for holding default client -- cgit v1.2.3 From 0573d11b2bbd0e4774f33f4c1959c1939c055e96 Mon Sep 17 00:00:00 2001 From: Eric Brower Date: Mon, 14 Jul 2008 22:38:31 +0200 Subject: i2c-algo-pcf: Multi-master lost-arbitration improvement Improve lost-arbitration handling of PCF8584. This is necessary for support of a currently out-of-kernel driver for Sun Microsystems E250 environmental management; perhaps others. Signed-off-by: Eric Brower Acked-by: Dan Smolik Signed-off-by: Jean Delvare --- include/linux/i2c-algo-pcf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 77afbb60fd1..74fb6f889a7 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -36,6 +36,12 @@ struct i2c_algo_pcf_data { /* local settings */ int udelay; int timeout; + + /* Multi-master lost arbitration back-off delay (msecs) + * This should be set by the bus adapter or knowledgable client + * if bus is multi-mastered, else zero + */ + unsigned long lab_mdelay; }; int i2c_pcf_add_bus(struct i2c_adapter *); -- cgit v1.2.3 From e3e7fc3c401a5d53f0599a357b3cf65d6a4f52e3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:31 +0200 Subject: i2c-algo-pcf: Drop unused struct members Struct members udelay and timeout aren't used anywhere, so drop them. Signed-off-by: Jean Delvare Acked-by: Eric Brower --- include/linux/i2c-algo-pcf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-algo-pcf.h b/include/linux/i2c-algo-pcf.h index 74fb6f889a7..0177d280f73 100644 --- a/include/linux/i2c-algo-pcf.h +++ b/include/linux/i2c-algo-pcf.h @@ -33,10 +33,6 @@ struct i2c_algo_pcf_data { int (*getclock) (void *data); void (*waitforpin) (void); - /* local settings */ - int udelay; - int timeout; - /* Multi-master lost arbitration back-off delay (msecs) * This should be set by the bus adapter or knowledgable client * if bus is multi-mastered, else zero -- cgit v1.2.3 From f6a7110520037ba786f17b53790c6eb8a3d4ef55 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:34 +0200 Subject: i2c-dev: Delete empty detach_client callback Implementing detach_client is optional, so there is no point in an empty implementation. Likewise, i2c driver IDs are optional, and we don't need one. Signed-off-by: Jean Delvare --- include/linux/i2c-id.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 988e566d3ed..ef13b7c66df 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -91,8 +91,6 @@ #define I2C_DRIVERID_M52790 95 /* Mitsubishi M52790SP/FP AV switch */ #define I2C_DRIVERID_CS5345 96 /* cs5345 audio processor */ -#define I2C_DRIVERID_I2CDEV 900 - #define I2C_DRIVERID_OV7670 1048 /* Omnivision 7670 camera */ /* -- cgit v1.2.3 From e9ca9eb9d7fc7bf3dc3cec5ba7edb089c4625f7b Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Mon, 14 Jul 2008 22:38:35 +0200 Subject: i2c: Export the i2c_bus_type symbol Export the root of the i2c bus so that PowerPC device tree code can iterate over devices on the i2c bus. Signed-off-by: Jon Smirl Signed-off-by: Jean Delvare --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 839d0ea3dca..50cbab4b62b 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -35,6 +35,8 @@ #include /* for completion */ #include +extern struct bus_type i2c_bus_type; + /* --- General options ------------------------------------------------ */ struct i2c_msg; -- cgit v1.2.3 From 2b7a5056a0a7ff17d5d2004c29c852a92a6bd632 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 14 Jul 2008 22:38:35 +0200 Subject: i2c: New-style EEPROM driver using device IDs Add a new-style driver for most I2C EEPROMs, giving sysfs read/write access to their data. Tested with various chips and clock rates. Signed-off-by: Wolfram Sang Signed-off-by: Jean Delvare --- include/linux/i2c/at24.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 include/linux/i2c/at24.h (limited to 'include/linux') diff --git a/include/linux/i2c/at24.h b/include/linux/i2c/at24.h new file mode 100644 index 00000000000..f6edd522a92 --- /dev/null +++ b/include/linux/i2c/at24.h @@ -0,0 +1,28 @@ +#ifndef _LINUX_AT24_H +#define _LINUX_AT24_H + +#include + +/* + * As seen through Linux I2C, differences between the most common types of I2C + * memory include: + * - How much memory is available (usually specified in bit)? + * - What write page size does it support? + * - Special flags (16 bit addresses, read_only, world readable...)? + * + * If you set up a custom eeprom type, please double-check the parameters. + * Especially page_size needs extra care, as you risk data loss if your value + * is bigger than what the chip actually supports! + */ + +struct at24_platform_data { + u32 byte_len; /* size (sum of all addr) */ + u16 page_size; /* for writes */ + u8 flags; +#define AT24_FLAG_ADDR16 0x80 /* address pointer is 16 bit */ +#define AT24_FLAG_READONLY 0x40 /* sysfs-entry will be read-only */ +#define AT24_FLAG_IRUGO 0x20 /* sysfs-entry will be world-readable */ +#define AT24_FLAG_TAKE8ADDR 0x10 /* take always 8 addresses (24c00) */ +}; + +#endif /* _LINUX_AT24_H */ -- cgit v1.2.3 From 4735c98f8447acb1c8977e2b8024640f7bf36dd6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 14 Jul 2008 22:38:36 +0200 Subject: i2c: Add detection capability to new-style drivers Add a mechanism to let new-style i2c drivers optionally autodetect devices they would support on selected buses and ask i2c-core to instantiate them. This is a replacement for legacy i2c drivers, much cleaner. Where drivers had to implement both a legacy i2c_driver and a new-style i2c_driver so far, this mechanism makes it possible to get rid of the legacy i2c_driver and implement both enumerated and detected device support with just one (new-style) i2c_driver. Here is a quick conversion guide for these drivers, step by step: * Delete the legacy driver definition, registration and removal. Delete the attach_adapter and detach_client methods of the legacy driver. * Change the prototype of the legacy detect function from static int foo_detect(struct i2c_adapter *adapter, int address, int kind); to static int foo_detect(struct i2c_client *client, int kind, struct i2c_board_info *info); * Set the new-style driver detect callback to this new function, and set its address_data to &addr_data (addr_data is generally provided by I2C_CLIENT_INSMOD.) * Add the appropriate class to the new-style driver. This is typically the class the legacy attach_adapter method was checking for. Class checking is now mandatory (done by i2c-core.) See for the list of available classes. * Remove the i2c_client allocation and freeing from the detect function. A pre-allocated client is now handed to you by i2c-core, and is freed automatically. * Make the detect function fill the type field of the i2c_board_info structure it was passed as a parameter, and return 0, on success. If the detection fails, return -ENODEV. Signed-off-by: Jean Delvare --- include/linux/i2c.h | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 50cbab4b62b..08be0d21864 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -45,6 +45,7 @@ struct i2c_adapter; struct i2c_client; struct i2c_driver; union i2c_smbus_data; +struct i2c_board_info; /* * The master routines are the ones normally used to transmit data to devices @@ -94,15 +95,33 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client * client, u8 command, u8 length, const u8 *values); -/* - * A driver is capable of handling one or more physical devices present on - * I2C adapters. This information is used to inform the driver of adapter - * events. +/** + * struct i2c_driver - represent an I2C device driver + * @class: What kind of i2c device we instantiate (for detect) + * @detect: Callback for device detection + * @address_data: The I2C addresses to probe, ignore or force (for detect) + * @clients: List of detected clients we created (for i2c-core use only) * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. + * + * For automatic device detection, both @detect and @address_data must + * be defined. @class should also be set, otherwise only devices forced + * with module parameters will be created. The detect function must + * fill at least the name field of the i2c_board_info structure it is + * handed upon successful detection, and possibly also the flags field. + * + * If @detect is missing, the driver will still work fine for enumerated + * devices. Detected devices simply won't be supported. This is expected + * for the many I2C/SMBus devices which can't be detected reliably, and + * the ones which can always be enumerated in practice. + * + * The i2c_client structure which is handed to the @detect callback is + * not a real i2c_client. It is initialized just enough so that you can + * call i2c_smbus_read_byte_data and friends on it. Don't do anything + * else with it. In particular, calling dev_dbg and friends on it is + * not allowed. */ - struct i2c_driver { int id; unsigned int class; @@ -142,6 +161,11 @@ struct i2c_driver { struct device_driver driver; const struct i2c_device_id *id_table; + + /* Device detection callback for automatic device creation */ + int (*detect)(struct i2c_client *, int kind, struct i2c_board_info *); + const struct i2c_client_address_data *address_data; + struct list_head clients; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) @@ -157,6 +181,7 @@ struct i2c_driver { * @dev: Driver model device node for the slave. * @irq: indicates the IRQ generated by this device (if any) * @list: list of active/busy clients (DEPRECATED) + * @detected: member of an i2c_driver.clients list * @released: used to synchronize client releases & detaches and references * * An i2c_client identifies a single device (i.e. chip) connected to an @@ -174,6 +199,7 @@ struct i2c_client { struct device dev; /* the device structure */ int irq; /* irq issued by device */ struct list_head list; /* DEPRECATED */ + struct list_head detected; struct completion released; }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) -- cgit v1.2.3 From 11c3b79218390a139f2d474ee1e983a672d5839a Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 12 Jun 2008 14:00:18 -0700 Subject: configfs: Allow ->make_item() and ->make_group() to return detailed errors. The configfs operations ->make_item() and ->make_group() currently return a new item/group. A return of NULL signifies an error. Because of this, -ENOMEM is the only return code bubbled up the stack. Multiple folks have requested the ability to return specific error codes when these operations fail. This patch adds that ability by changing the ->make_item/group() ops to return an int. Also updated are the in-kernel users of configfs. Signed-off-by: Joel Becker --- include/linux/configfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 3ae65b1bf90..0488f937634 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -165,8 +165,8 @@ struct configfs_item_operations { }; struct configfs_group_operations { - struct config_item *(*make_item)(struct config_group *group, const char *name); - struct config_group *(*make_group)(struct config_group *group, const char *name); + int (*make_item)(struct config_group *group, const char *name, struct config_item **new_item); + int (*make_group)(struct config_group *group, const char *name, struct config_group **new_group); int (*commit_item)(struct config_item *item); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); -- cgit v1.2.3 From c300bd2fb583afb6d68804afd38bc90b31310d95 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 10 Jul 2008 02:16:44 +0200 Subject: PCI: include linux/pm_wakeup.h for device_set_wakeup_capable drivers/pci/pci.c needs pm_wakeup.h since it uses device_set_wakup_capable(). The latter also needs to be stubbed out for !CONFIG_PM. Signed-off-by: Stephen Rothwell Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- include/linux/pm_wakeup.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index 3af0c8d05cd..0aae7776185 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -63,6 +63,8 @@ static inline void device_init_wakeup(struct device *dev, int val) dev->power.can_wakeup = !!val; } +static inline void device_set_wakeup_capable(struct device *dev, int val) { } + static inline int device_can_wakeup(struct device *dev) { return dev->power.can_wakeup; -- cgit v1.2.3 From 521e575b9a7324a0bca762622139f69582a042bf Mon Sep 17 00:00:00 2001 From: Ron Livne Date: Mon, 14 Jul 2008 23:48:48 -0700 Subject: IB/mlx4: Add support for blocking multicast loopback packets Add support for handling the IB_QP_CREATE_MULTICAST_BLOCK_LOOPBACK flag by using the per-multicast group loopback blocking feature of mlx4 hardware. Signed-off-by: Ron Livne Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index a744383d16e..81b3dd5206e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -398,7 +398,8 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm int mlx4_INIT_PORT(struct mlx4_dev *dev, int port); int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port); -int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); +int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], + int block_mcast_loopback); int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16]); int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list, -- cgit v1.2.3 From 4489428ab5a49a6f443d9aa17f1d891417787d7b Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Fri, 4 Apr 2008 19:36:59 +0200 Subject: sdhci: support JMicron secondary interface JMicron chips sometimes have two interfaces to work around limitations in Microsoft's sdhci driver. This patch allows us to use either interface. Signed-off-by: Pierre Ossman --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 65953822c9c..30153473bc3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2188,6 +2188,7 @@ #define PCI_DEVICE_ID_JMICRON_JMB366 0x2366 #define PCI_DEVICE_ID_JMICRON_JMB368 0x2368 #define PCI_DEVICE_ID_JMICRON_JMB38X_SD 0x2381 +#define PCI_DEVICE_ID_JMICRON_JMB38X_MMC 0x2382 #define PCI_DEVICE_ID_JMICRON_JMB38X_MS 0x2383 #define PCI_VENDOR_ID_KORENIX 0x1982 -- cgit v1.2.3 From 150a55683b6b0ccb66aae75a10a3a514340c7c03 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 20 May 2008 00:57:27 +0300 Subject: include/linux/mmc/mmc.h: remove CVS tags This patch removes a CVS tag that wasn't updated for a long time. Signed-off-by: Adrian Bunk Signed-off-by: Pierre Ossman --- include/linux/mmc/mmc.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 4236fbf0b6f..14b81f3e523 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -16,7 +16,6 @@ * Based strongly on code by: * * Author: Yong-iL Joh - * Date : $Date: 2002/06/18 12:37:30 $ * * Author: Andrew Christian * 15 May 2002 -- cgit v1.2.3 From 28f52482b41edc88cdf575aa6ed414c6e116ce10 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:15 +0400 Subject: mmc: add support for card-detection polling Some hosts (and boards that use mmc_spi) do not use interrupts on the CD line, so they can't trigger mmc_detect_change. We want to poll the card and see if there was a change. 1 second poll interval seems resonable. This patch also implements .get_cd() host operation, that could be used by the hosts that are able to report card-detect status without need to talk MMC. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman --- include/linux/mmc/host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 7ab962fa1d7..6188e19d233 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -51,8 +51,18 @@ struct mmc_ios { struct mmc_host_ops { void (*request)(struct mmc_host *host, struct mmc_request *req); + /* + * Avoid calling these three functions too often or in a "fast path", + * since underlaying controller might implement them in an expensive + * and/or slow way. + * + * Also note that these functions might sleep, so don't call them + * in the atomic contexts! + */ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); int (*get_ro)(struct mmc_host *host); + int (*get_cd)(struct mmc_host *host); + void (*enable_sdio_irq)(struct mmc_host *host, int enable); }; @@ -94,6 +104,7 @@ struct mmc_host { #define MMC_CAP_SD_HIGHSPEED (1 << 3) /* Can do SD high-speed timing */ #define MMC_CAP_SDIO_IRQ (1 << 4) /* Can signal pending SDIO IRQs */ #define MMC_CAP_SPI (1 << 5) /* Talks only SPI protocols */ +#define MMC_CAP_NEEDS_POLL (1 << 6) /* Needs polling for card-detection */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v1.2.3 From 619ef4b42128709de4d89d209b2c874f560deecd Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:21 +0400 Subject: mmc_spi: add support for card-detection polling This patch adds new platform data variable "caps", so platforms could pass theirs capabilities into MMC core (for example, platforms without interrupt on the CD line will most probably want to pass MMC_CAP_NEEDS_POLL). New platform get_cd() callback provided to optimize polling. Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman --- include/linux/spi/mmc_spi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/mmc_spi.h b/include/linux/spi/mmc_spi.h index d5ca78b93a3..a3626aedaec 100644 --- a/include/linux/spi/mmc_spi.h +++ b/include/linux/spi/mmc_spi.h @@ -23,6 +23,15 @@ struct mmc_spi_platform_data { /* sense switch on sd cards */ int (*get_ro)(struct device *); + /* + * If board does not use CD interrupts, driver can optimize polling + * using this function. + */ + int (*get_cd)(struct device *); + + /* Capabilities to pass into mmc core (e.g. MMC_CAP_NEEDS_POLL). */ + unsigned long caps; + /* how long to debounce card detect, in msecs */ u16 detect_delay; -- cgit v1.2.3 From 08f80bb5196517a0dfe50dc7c10f234c0ff2f0e8 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Tue, 17 Jun 2008 18:17:39 +0400 Subject: mmc: change .get_ro() callback semantics Now get_ro() callback must return 0/1 values for its logical states, and negative errno values in case of error. If particular host instance doesn't support RO/WP switch, it should return -ENOSYS. This patch changes some hosts in two ways: 1. Now functions should be smart to not return negative values in "RO asserted" case (particularly gpio_ calls could return negative values for the outermost GPIOs). Also, board code usually passes get_ro() callbacks that directly return gpioreg & bit result, so at91_mci, imxmmc, pxamci and mmc_spi's get_ro() handlers need take special care when returning platform's values to the mmc core. 2. In case of host instance didn't implement get_ro() callback, it should really return -ENOSYS and let the mmc core decide what to do about it (mmc core thinks the same way as the hosts, so it isn't functional change). Signed-off-by: Anton Vorontsov Signed-off-by: Pierre Ossman --- include/linux/mmc/host.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6188e19d233..753b7231b88 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -58,6 +58,18 @@ struct mmc_host_ops { * * Also note that these functions might sleep, so don't call them * in the atomic contexts! + * + * Return values for the get_ro callback should be: + * 0 for a read/write card + * 1 for a read-only card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened + * + * Return values for the get_ro callback should be: + * 0 for a absent card + * 1 for a present card + * -ENOSYS when not supported (equal to NULL callback) + * or a negative errno value when something bad happened */ void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); int (*get_ro)(struct mmc_host *host); -- cgit v1.2.3 From ad3868b2ec96ec14a1549c9e33f5f9a2a3c6ab15 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 28 Jun 2008 12:52:45 +0200 Subject: mmc,sdio: helper function for transfer padding There are a lot of crappy controllers out there that cannot handle all the request sizes that the MMC/SD/SDIO specifications require. In case the card driver can pad the data to overcome the problems, this commit adds a helper that calculates how much that padding should be. A corresponding helper is also added for SDIO, but it can also deal with all the complexities of splitting up a large transfer efficiently. Signed-off-by: Pierre Ossman --- include/linux/mmc/core.h | 1 + include/linux/mmc/sdio_func.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index d0c3abed74c..143cebf0586 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -135,6 +135,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *); +extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int); extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort); extern void mmc_release_host(struct mmc_host *host); diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index b050f4d7b41..f57f22b3be8 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -1,7 +1,7 @@ /* * include/linux/mmc/sdio_func.h * - * Copyright 2007 Pierre Ossman + * Copyright 2007-2008 Pierre Ossman * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -120,6 +120,8 @@ extern int sdio_set_block_size(struct sdio_func *func, unsigned blksz); extern int sdio_claim_irq(struct sdio_func *func, sdio_irq_handler_t *handler); extern int sdio_release_irq(struct sdio_func *func); +extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); + extern unsigned char sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); extern unsigned short sdio_readw(struct sdio_func *func, -- cgit v1.2.3 From 6d37333163025b46afbcad434ec9a5f2e88e7254 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Mon, 30 Jun 2008 10:50:24 +0300 Subject: mmc: fix sdio_io sparse errors This patch fixes sdio_io sparse errors. This fix changes signature of API functions, changing unsigned char -> u8 unsigned short -> u16 unsigned long -> u32 - this was probably a bug in 64 bit platforms Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman --- include/linux/mmc/sdio_func.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) mode change 100644 => 100755 include/linux/mmc/sdio_func.h (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h old mode 100644 new mode 100755 index f57f22b3be8..28fb0a33acf --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -122,23 +122,20 @@ extern int sdio_release_irq(struct sdio_func *func); extern unsigned int sdio_align_size(struct sdio_func *func, unsigned int sz); -extern unsigned char sdio_readb(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned short sdio_readw(struct sdio_func *func, - unsigned int addr, int *err_ret); -extern unsigned long sdio_readl(struct sdio_func *func, - unsigned int addr, int *err_ret); +extern u8 sdio_readb(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u16 sdio_readw(struct sdio_func *func, unsigned int addr, int *err_ret); +extern u32 sdio_readl(struct sdio_func *func, unsigned int addr, int *err_ret); extern int sdio_memcpy_fromio(struct sdio_func *func, void *dst, unsigned int addr, int count); extern int sdio_readsb(struct sdio_func *func, void *dst, unsigned int addr, int count); -extern void sdio_writeb(struct sdio_func *func, unsigned char b, +extern void sdio_writeb(struct sdio_func *func, u8 b, unsigned int addr, int *err_ret); -extern void sdio_writew(struct sdio_func *func, unsigned short b, +extern void sdio_writew(struct sdio_func *func, u16 b, unsigned int addr, int *err_ret); -extern void sdio_writel(struct sdio_func *func, unsigned long b, +extern void sdio_writel(struct sdio_func *func, u32 b, unsigned int addr, int *err_ret); extern int sdio_memcpy_toio(struct sdio_func *func, unsigned int addr, -- cgit v1.2.3 From 23af60398af2f5033e2f53665538a09f498dbc03 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sun, 6 Jul 2008 01:10:27 +0200 Subject: mmc: remove multiwrite capability Relax requirements on host controllers and only require that they do not report a transfer count than is larger than the actual one (i.e. a lower value is okay). This is how many other parts of the kernel behaves so upper layers should already be prepared to handle that scenario. This gives us a performance boost on MMC cards. Signed-off-by: Pierre Ossman --- include/linux/mmc/host.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 753b7231b88..10a2080086c 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -111,12 +111,11 @@ struct mmc_host { unsigned long caps; /* Host capabilities */ #define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ -#define MMC_CAP_MULTIWRITE (1 << 1) /* Can accurately report bytes sent to card on error */ -#define MMC_CAP_MMC_HIGHSPEED (1 << 2) /* Can do MMC high-speed timing */ -#define MMC_CAP_SD_HIGHSPEED (1 << 3) /* Can do SD high-speed timing */ -#define MMC_CAP_SDIO_IRQ (1 << 4) /* Can signal pending SDIO IRQs */ -#define MMC_CAP_SPI (1 << 5) /* Talks only SPI protocols */ -#define MMC_CAP_NEEDS_POLL (1 << 6) /* Needs polling for card-detection */ +#define MMC_CAP_MMC_HIGHSPEED (1 << 1) /* Can do MMC high-speed timing */ +#define MMC_CAP_SD_HIGHSPEED (1 << 2) /* Can do SD high-speed timing */ +#define MMC_CAP_SDIO_IRQ (1 << 3) /* Can signal pending SDIO IRQs */ +#define MMC_CAP_SPI (1 << 4) /* Talks only SPI protocols */ +#define MMC_CAP_NEEDS_POLL (1 << 5) /* Needs polling for card-detection */ /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ -- cgit v1.2.3 From 62a7573ee9f31d4fdb330b3e68ebf6efaba1d57c Mon Sep 17 00:00:00 2001 From: Benzi Zbit Date: Thu, 10 Jul 2008 02:41:43 +0300 Subject: sdio: fix the use of hard coded timeout value. This adds reading and using of enable_timeout from the CIS Signed-off-by: Benzi Zbit Signed-off-by: Tomas Winkler Signed-off-by: Pierre Ossman --- include/linux/mmc/sdio_func.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index 28fb0a33acf..07bee4a0d45 100755 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -46,6 +46,8 @@ struct sdio_func { unsigned max_blksize; /* maximum block size */ unsigned cur_blksize; /* current block size */ + unsigned enable_timeout; /* max enable timeout in msec */ + unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ -- cgit v1.2.3 From 124cafc5eb973e748c4ce3dc1caad29274e64613 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:44 +0200 Subject: ide: remove ide_init_drive_cmd ide_init_drive_cmd just calls blk_rq_init. This converts the users of ide_init_drive_cmd to use blk_rq_init directly and removes ide_init_drive_cmd. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Cc: Jens Axboe Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index eddb6daadf4..3261c669175 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -857,8 +857,6 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -extern void ide_init_drive_cmd (struct request *rq); - /* * "action" parameter type for ide_do_drive_cmd() below. */ -- cgit v1.2.3 From 681a561b7ec7fdcd8f35b68e44ac6d6c70aecc04 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:45 +0200 Subject: block: unexport blk_end_sync_rq All the users of blk_end_sync_rq has gone (they are converted to use blk_execute_rq). This unexports blk_end_sync_rq. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Signed-off-by: Jens Axboe Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d2a1b71e93c..1171abd7eb1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -623,7 +623,6 @@ extern void generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); -extern void blk_end_sync_rq(struct request *rq, int error); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); -- cgit v1.2.3 From 30e5ee4d1a651a0c66e86c6612c003034bd20ba2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:46 +0200 Subject: ide: remove obsoleted "idebus=" kernel parameter * Remove obsoleted "idebus=" kernel parameter. * Remove no longer needed ide_system_bus_speed() and system_bus_clock() (together with idebus_parameter and system_bus_speed variables). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3261c669175..dad53565924 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -994,8 +994,6 @@ int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); -extern int system_bus_clock(void); - extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); extern u8 eighty_ninty_three (ide_drive_t *); -- cgit v1.2.3 From 931ee0dc5c69e8113233d21942681ab8fecde7f9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:47 +0200 Subject: ide: remove obsoleted "ide=" kernel parameters * Remove obsoleted "ide=" kernel parameters. * Remove no longer needed: - ide_setup() - parse_options() - __setup("", ...) - module_param(options, ...) * Use module_{init,exit}() for MODULE=y case and remove MODULE ifdef. * Make ide_*acpi* and ide_doubler variables static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index dad53565924..0fa1812d043 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -813,10 +813,6 @@ int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsig #ifndef _IDE_C extern ide_hwif_t ide_hwifs[]; /* master data repository */ #endif -extern int ide_noacpi; -extern int ide_acpigtf; -extern int ide_acpionboot; -extern int noautodma; extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v1.2.3 From 9a410e79b552bacb4481f85618aa7333b7776ed7 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:48 +0200 Subject: ide: remove IDE_TFLAG_NO_SELECT_MASK taskfile flag Always call SELECT_MASK(..., 0) in ide_tf_load() (needs to be done to match ide_set_irq(..., 1)) and then remove IDE_TFLAG_NO_SELECT_MASK taskfile flag. This change should only affect hpt366 and icside host drivers since ->maskproc(..., 0) for sgiioc4 is equivalent to ide_set_irq(..., 1). Cc: Sergei Shtylyov Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 0fa1812d043..d4a910cdb90 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -869,7 +869,6 @@ extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); enum { IDE_TFLAG_LBA48 = (1 << 0), - IDE_TFLAG_NO_SELECT_MASK = (1 << 1), IDE_TFLAG_FLAGGED = (1 << 2), IDE_TFLAG_OUT_DATA = (1 << 3), IDE_TFLAG_OUT_HOB_FEATURE = (1 << 4), -- cgit v1.2.3 From ed4af48fd660176680da905817f6e40d51436e4c Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:48 +0200 Subject: ide: move IRQ unmasking out from ->tf_load method Move IRQ unmasking out from ->tf_load method to its users. There should be no functional changes caused by this patch (SELECT_MASK() is NOP except for hpt366, icside and sgiioc4). Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d4a910cdb90..56d0bc2dffe 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -973,6 +973,7 @@ typedef struct ide_task_s { void ide_tf_dump(const char *, struct ide_taskfile *); extern void SELECT_DRIVE(ide_drive_t *); +void SELECT_MASK(ide_drive_t *, int); extern int drive_is_ready(ide_drive_t *); -- cgit v1.2.3 From 135721446144af005109c25eeacca4fdddcd9a66 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:49 +0200 Subject: ide: remove ->mmio flag from ide_hwif_t Since scc_pata host driver no longer uses IDE PCI layer / ide_dma_setup() and all other ->mmio users set also IDE_HFLAG_MMIO host flag we can safely remove ->mmio flag. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 56d0bc2dffe..b01b102be4d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -532,7 +532,6 @@ typedef struct hwif_s { unsigned serialized : 1; /* serialized all channel operation */ unsigned sharing_irq: 1; /* 1 = sharing irq with another hwif */ unsigned sg_mapped : 1; /* sg_table and sg_nents are ready */ - unsigned mmio : 1; /* host uses MMIO */ struct device gendev; struct device *portdev; -- cgit v1.2.3 From f8c4bd0ab2b8783c0f080957781e9f70bee48eaa Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:49 +0200 Subject: ide: pass 'hwif *' instead of 'drive *' to ->OUTBSYNC method There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index b01b102be4d..1c343146964 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -493,7 +493,7 @@ typedef struct hwif_s { void (*ide_dma_clear_irq)(ide_drive_t *drive); void (*OUTB)(u8 addr, unsigned long port); - void (*OUTBSYNC)(ide_drive_t *drive, u8 addr, unsigned long port); + void (*OUTBSYNC)(struct hwif_s *hwif, u8 addr, unsigned long port); u8 (*INB)(unsigned long port); -- cgit v1.2.3 From 0fd04dcc2ebb6ec9088c24b368b0ce1f42a98ef5 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:50 +0200 Subject: ide: use ->OUTBSYNC in ide_set_irq() Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 1c343146964..4d1c9714f1d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1340,7 +1340,8 @@ static inline void ide_set_irq(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; - hwif->OUTB(drive->ctl | (on ? 0 : 2), hwif->io_ports.ctl_addr); + hwif->OUTBSYNC(hwif, drive->ctl | (on ? 0 : 2), + hwif->io_ports.ctl_addr); } static inline u8 ide_read_status(ide_drive_t *drive) -- cgit v1.2.3 From ff07488346702f554aaeb6aae982540aa0302373 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:50 +0200 Subject: ide: remove drive->ctl Remove drive->ctl (it is always equal to 0x08 after init time). While at it: * Use ATA_DEVCTL_OBS define. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4d1c9714f1d..d8c86f0362c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -364,7 +364,6 @@ typedef struct ide_drive_s { u8 wcache; /* status of write cache */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ - u8 ctl; /* "normal" value for Control register */ u8 ready_stat; /* min status value for drive ready */ u8 mult_count; /* current multiple sector setting */ u8 mult_req; /* requested multiple sector setting */ @@ -1340,7 +1339,7 @@ static inline void ide_set_irq(ide_drive_t *drive, int on) { ide_hwif_t *hwif = drive->hwif; - hwif->OUTBSYNC(hwif, drive->ctl | (on ? 0 : 2), + hwif->OUTBSYNC(hwif, ATA_DEVCTL_OBS | (on ? 0 : 2), hwif->io_ports.ctl_addr); } -- cgit v1.2.3 From 63f5abb0959337db0d5bece9cefba03cdcadec51 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 15 Jul 2008 21:21:51 +0200 Subject: ide: remove action argument in ide_do_drive_cmd ide_do_drive_cmd is called only with ide_preempt action argument. So we can remove the action argument in ide_do_drive_cmd and ide_action_t typedef. This patch also includes two minor cleanups: 1) ide_do_drive_cmd always succeeds so we don't need the return value; 2) the callers use blk_rq_init before ide_do_drive_cmd so there is no need to initialize rq->errors. Signed-off-by: FUJITA Tomonori Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index d8c86f0362c..04267dc1edf 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -851,17 +851,7 @@ int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); -/* - * "action" parameter type for ide_do_drive_cmd() below. - */ -typedef enum { - ide_wait, /* insert rq at end of list, and wait for it */ - ide_preempt, /* insert rq in front of current request */ - ide_head_wait, /* insert rq in front of current request and wait for it */ - ide_end /* insert rq at end of list, but don't wait for it */ -} ide_action_t; - -extern int ide_do_drive_cmd(ide_drive_t *, struct request *, ide_action_t); +extern void ide_do_drive_cmd(ide_drive_t *, struct request *); extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); -- cgit v1.2.3 From 92f5daff2b8439fa4c57c57f47823ffc459c3bd9 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:55 +0200 Subject: ide-tape: make pc->idetape_callback void There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 04267dc1edf..8936b21a703 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -641,7 +641,7 @@ struct ide_atapi_pc { */ u8 pc_buf[256]; void (*idefloppy_callback) (ide_drive_t *); - ide_startstop_t (*idetape_callback) (ide_drive_t *); + void (*idetape_callback) (ide_drive_t *); /* idetape only */ struct idetape_bh *bh; -- cgit v1.2.3 From 1b06e92aa03018e4b3ba281e03a7711d9b71a998 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:56 +0200 Subject: ide-{floppy,tape}: merge pc->idefloppy_callback and pc->idetape_callback Merge pc->idefloppy_callback and pc->idetape_callback into pc->callback. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 8936b21a703..f079456adfd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -640,8 +640,8 @@ struct ide_atapi_pc { * to change/removal later. */ u8 pc_buf[256]; - void (*idefloppy_callback) (ide_drive_t *); - void (*idetape_callback) (ide_drive_t *); + + void (*callback)(ide_drive_t *); /* idetape only */ struct idetape_bh *bh; -- cgit v1.2.3 From 5e3310958204912f3f00be2592c945fbc37db6ae Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:56 +0200 Subject: ide-{floppy,tape}: PC_FLAG_DMA_RECOMMENDED -> PC_FLAG_DMA_OK * Use PC_FLAG_DMA_OK flag instead of PC_FLAG_DMA_RECOMMENDED one. * Remove no longer used PC_FLAG_DMA_RECOMMENDED flag. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f079456adfd..63cee2947f6 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -602,12 +602,11 @@ enum { PC_FLAG_SUPPRESS_ERROR = (1 << 1), PC_FLAG_WAIT_FOR_DSC = (1 << 2), PC_FLAG_DMA_OK = (1 << 3), - PC_FLAG_DMA_RECOMMENDED = (1 << 4), - PC_FLAG_DMA_IN_PROGRESS = (1 << 5), - PC_FLAG_DMA_ERROR = (1 << 6), - PC_FLAG_WRITING = (1 << 7), + PC_FLAG_DMA_IN_PROGRESS = (1 << 4), + PC_FLAG_DMA_ERROR = (1 << 5), + PC_FLAG_WRITING = (1 << 6), /* command timed out */ - PC_FLAG_TIMEDOUT = (1 << 8), + PC_FLAG_TIMEDOUT = (1 << 7), }; struct ide_atapi_pc { -- cgit v1.2.3 From 5d41893c0f9caf94b449eada0279a08c86f0212e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:57 +0200 Subject: ide: add PC_FLAG_ZIP_DRIVE pc flag Add PC_FLAG_ZIP_DRIVE pc flag, set it in idefloppy_do_request() and check for it (instead of checking for IDEFLOPPY_FLAG_ZIP_DRIVE) in idefloppy_transfer_pc(). This is a preparation for adding generic ide_transfer_pc() helper. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 63cee2947f6..89feaea9e20 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -607,6 +607,7 @@ enum { PC_FLAG_WRITING = (1 << 6), /* command timed out */ PC_FLAG_TIMEDOUT = (1 << 7), + PC_FLAG_ZIP_DRIVE = (1 << 8), }; struct ide_atapi_pc { -- cgit v1.2.3 From 594c16d8dd54cd7b1c5ef1ec3ac0f6bf34301dad Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:58 +0200 Subject: ide: add ide_transfer_pc() helper * Add ide-atapi.c file for generic ATAPI support together with CONFIG_IDE_ATAPI config option. * Add generic ide_transfer_pc() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 89feaea9e20..bed3c58798a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -967,6 +967,9 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, + ide_handler_t *, unsigned int, ide_expiry_t *); + ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); void task_end_request(ide_drive_t *, struct request *, u8); -- cgit v1.2.3 From 28c7214bd8c2bbd4873b8f1e7f58d86d3731124f Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:21:59 +0200 Subject: ide: add PC_FLAG_DRQ_INTERRUPT pc flag Add PC_FLAG_DRQ_INTERRUPT pc flag, set it in ide*_do_request() and check for it (instead of checking for IDE*_FLAG_DRQ_INTERRUPT) in ide*_issue_pc(). This is a preparation for adding generic ide_issue_pc() helper. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index bed3c58798a..c2274ad44b2 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -608,6 +608,7 @@ enum { /* command timed out */ PC_FLAG_TIMEDOUT = (1 << 7), PC_FLAG_ZIP_DRIVE = (1 << 8), + PC_FLAG_DRQ_INTERRUPT = (1 << 9), }; struct ide_atapi_pc { -- cgit v1.2.3 From 6bf1641ca1c7554f0da54aaf89788731b541bacc Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:22:00 +0200 Subject: ide: add ide_issue_pc() helper Add generic ide_issue_pc() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. There should be no functional changes caused by this patch. Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c2274ad44b2..fee07a7edb1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -970,6 +970,8 @@ void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); +ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *, + ide_handler_t *, unsigned int, ide_expiry_t *); ide_startstop_t do_rw_taskfile(ide_drive_t *, ide_task_t *); -- cgit v1.2.3 From 646c0cb6c430f8d3ad3769dd1518fe664ff0ce27 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 15 Jul 2008 21:22:03 +0200 Subject: ide: add ide_pc_intr() helper * ide-tape.c: add 'drive' argument to idetape_update_buffers(). * Add generic ide_pc_intr() helper to ide-atapi.c and then convert ide-{floppy,tape,scsi} device drivers to use it. * ide-tape.c: remove no longer needed DBG_PC_INTR. There should be no functional changes caused by this patch (unless the debugging is explicitely compiled in). Cc: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index fee07a7edb1..ac4eeb2932e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -968,6 +968,12 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, + ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, + void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), + void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), + void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, + int)); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); ide_startstop_t ide_issue_pc(ide_drive_t *, struct ide_atapi_pc *, -- cgit v1.2.3 From c2e1b09ff237c0a3687b9a804cc8bf489743cffc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 14 Jul 2008 16:03:30 -0400 Subject: SUNRPC: Support registering IPv6 interfaces with local rpcbind daemon Introduce a new API to register RPC services on IPv6 interfaces to allow the NFS server and lockd to advertise on IPv6 networks. Unlike rpcb_register(), the new rpcb_v4_register() function uses rpcbind protocol version 4 to contact the local rpcbind daemon. The version 4 SET/UNSET procedures allow services to register address families besides AF_INET, register at specific network interfaces, and register transport protocols besides UDP and TCP. All of this functionality is exposed via the new rpcb_v4_register() kernel API. A user-space rpcbind daemon implementation that supports version 4 of the rpcbind protocol is required in order to make use of this new API. Note that rpcbind version 3 is sufficient to support the new rpcbind facilities listed above, but most extant implementations use version 4. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- include/linux/sunrpc/clnt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 764fd4c286e..e5bfe01ee30 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -125,6 +125,9 @@ void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); int rpcb_register(u32, u32, int, unsigned short, int *); +int rpcb_v4_register(const u32 program, const u32 version, + const struct sockaddr *address, + const char *netid, int *result); int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); -- cgit v1.2.3 From 3be53f3f213223f50d8e29b5e1869685bf040a1e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:36 +0200 Subject: ide: move some bits from ide-timing.h to Move struct ide_timing and IDE_TIMING_* defines to from drivers/ide/ide-timing.h. While at it: - use u8/u16 instead of short for struct ide_timing fields - use enum for IDE_TIMING_* There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index ac4eeb2932e..81c6ea436be 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1271,6 +1271,34 @@ static inline int ide_dev_is_sata(struct hd_driveid *id) u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); +struct ide_timing { + u8 mode; + u8 setup; /* t1 */ + u16 act8b; /* t2 for 8-bit io */ + u16 rec8b; /* t2i for 8-bit io */ + u16 cyc8b; /* t0 for 8-bit io */ + u16 active; /* t2 or tD */ + u16 recover; /* t2i or tK */ + u16 cycle; /* t0 */ + u16 udma; /* t2CYCTYP/2 */ +}; + +enum { + IDE_TIMING_SETUP = (1 << 0), + IDE_TIMING_ACT8B = (1 << 1), + IDE_TIMING_REC8B = (1 << 2), + IDE_TIMING_CYC8B = (1 << 3), + IDE_TIMING_8BIT = IDE_TIMING_ACT8B | IDE_TIMING_REC8B | + IDE_TIMING_CYC8B, + IDE_TIMING_ACTIVE = (1 << 4), + IDE_TIMING_RECOVER = (1 << 5), + IDE_TIMING_CYCLE = (1 << 6), + IDE_TIMING_UDMA = (1 << 7), + IDE_TIMING_ALL = IDE_TIMING_SETUP | IDE_TIMING_8BIT | + IDE_TIMING_ACTIVE | IDE_TIMING_RECOVER | + IDE_TIMING_CYCLE | IDE_TIMING_UDMA, +}; + typedef struct ide_pio_timings_s { int setup_time; /* Address setup (ns) minimum */ int active_time; /* Active pulse (ns) minimum */ -- cgit v1.2.3 From f06ab3402aa2d6de060442c1053ea10b24b65076 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:37 +0200 Subject: ide: convert ide-timing.h to ide-timings.c library (take 2) * Don't include ide-timing.h in cs5535 and sis5513 host drivers (they don't need it currently). * Convert ide-timing.h to ide-timings.c library and add CONFIG_IDE_TIMINGS config option to be selected by host drivers using the library. While at it: - fix ide_timing_find_mode() placement v2: * Add missing EXPORT_SYMBOLs. (Stephen Rothwell ) There should be no functional changes caused by this patch. Cc: Stephen Rothwell Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 81c6ea436be..057001f6b1d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1299,6 +1299,11 @@ enum { IDE_TIMING_CYCLE | IDE_TIMING_UDMA, }; +struct ide_timing *ide_timing_find_mode(u8); +void ide_timing_merge(struct ide_timing *, struct ide_timing *, + struct ide_timing *, unsigned int); +int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); + typedef struct ide_pio_timings_s { int setup_time; /* Address setup (ns) minimum */ int active_time; /* Active pulse (ns) minimum */ -- cgit v1.2.3 From c9d6c1a2379373219bb3271bdcbdc0ab2edf349d Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: move ide_pio_cycle_time() to ide-timings.c All ide_pio_cycle_time() users already select CONFIG_IDE_TIMINGS so move the function from ide-lib.c to ide-timings.c. While at it: - convert ide_pio_cycle_time() to use ide_timing_find_mode() - cleanup ide_pio_cycle_time() a bit There should be no functional changes caused by this patch. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 057001f6b1d..3899c761b30 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1300,6 +1300,7 @@ enum { }; struct ide_timing *ide_timing_find_mode(u8); +u16 ide_pio_cycle_time(ide_drive_t *, u8); void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); @@ -1311,7 +1312,6 @@ typedef struct ide_pio_timings_s { /* active + recovery (+ setup for some chips) */ } ide_pio_timings_t; -unsigned int ide_pio_cycle_time(ide_drive_t *, u8); u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); extern const ide_pio_timings_t ide_pio_timings[6]; -- cgit v1.2.3 From 3e153cfb5e38ae237ff27a10a833946ac95db8a4 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: remove no longer used ide_pio_timings[] Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 3899c761b30..4e44525fa5c 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1305,15 +1305,7 @@ void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); -typedef struct ide_pio_timings_s { - int setup_time; /* Address setup (ns) minimum */ - int active_time; /* Active pulse (ns) minimum */ - int cycle_time; /* Cycle time (ns) minimum = */ - /* active + recovery (+ setup for some chips) */ -} ide_pio_timings_t; - u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); -extern const ide_pio_timings_t ide_pio_timings[6]; int ide_set_pio_mode(ide_drive_t *, u8); int ide_set_dma_mode(ide_drive_t *, u8); -- cgit v1.2.3 From 9ad540937554a3779c5fe7af13aa390b1d2aeb3e Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:39 +0200 Subject: ide: move PIO blacklist to ide-pio-blacklist.c Move PIO blacklist to ide-pio-blacklist.c. While at it: - fix comment - fix whitespace damage There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 4e44525fa5c..535c439fd8f 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1305,6 +1305,8 @@ void ide_timing_merge(struct ide_timing *, struct ide_timing *, struct ide_timing *, unsigned int); int ide_timing_compute(ide_drive_t *, u8, struct ide_timing *, int, int); +int ide_scan_pio_blacklist(char *); + u8 ide_get_best_pio_mode(ide_drive_t *, u8, u8); int ide_set_pio_mode(ide_drive_t *, u8); -- cgit v1.2.3 From 63b51c6d1d63276fd320615c042f1ff5d94ebab8 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide: make ide_hwifs[] static Move ide_hwifs[] from ide.c to ide-probe.c and make it static. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 535c439fd8f..15d5668198a 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -801,18 +801,6 @@ struct ide_driver_s { int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); -/* - * ide_hwifs[] is the master data structure used to keep track - * of just about everything in ide.c. Whenever possible, routines - * should be using pointers to a drive (ide_drive_t *) or - * pointers to a hwif (ide_hwif_t *), rather than indexing this - * structure directly (the allocation/layout may change!). - * - */ -#ifndef _IDE_C -extern ide_hwif_t ide_hwifs[]; /* master data repository */ -#endif - extern int ide_vlb_clk; extern int ide_pci_clk; -- cgit v1.2.3 From c56c5648a3bd15ff14c50f284b261140cd5b5472 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:40 +0200 Subject: ide: set hwif->dev in ide_init_port_hw() (take 2) * Add 'parent' field to hw_regs_t for optional parent device pointer (needed by macio PMAC IDE controllers) and set hwif->dev in ide_init_port_hw(). * Update au1xxx-ide.c, sgiioc4.c, pmac.c and setup-pci.c accordingly. v2: * Update scc_pata.c. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 15d5668198a..a6a2eccb652 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -171,7 +171,7 @@ typedef struct hw_regs_s { int irq; /* our irq number */ ide_ack_intr_t *ack_intr; /* acknowledge interrupt */ hwif_chipset_t chipset; - struct device *dev; + struct device *dev, *parent; } hw_regs_t; void ide_init_port_data(struct hwif_s *, unsigned int); -- cgit v1.2.3 From e6d95bd14928926d6658b5e4ace905e8b83ed27a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 16 Jul 2008 20:33:42 +0200 Subject: ide: ->port_init_devs -> ->init_dev Change ->port_init_devs method to take 'ide_drive_t *' as an argument instead of 'ide_hwif_t *' and rename it to ->init_dev. There should be no functional changes caused by this patch. Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index a6a2eccb652..f9cbe9350ca 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -405,8 +405,8 @@ typedef struct ide_drive_s { struct ide_port_info; struct ide_port_ops { - /* host specific initialization of devices on a port */ - void (*port_init_devs)(struct hwif_s *); + /* host specific initialization of a device */ + void (*init_dev)(ide_drive_t *); /* routine to program host for PIO mode */ void (*set_pio_mode)(ide_drive_t *, const u8); /* routine to program host for DMA mode */ -- cgit v1.2.3 From 79e36a9f54aaf4a52eb2d9520953aa3960e99294 Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: IDE: Fix HDIO_DRIVE_RESET handling Currently, the code path executing an HDIO_DRIVE_RESET ioctl is broken in various ways. Most importantly, it is treated as an out of band request in an illegal way which may very likely lead to system lock ups. Use the drive's request queue to avoid this problem (and fix a locking issue for free along the way). Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index f9cbe9350ca..021710cc1b1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -138,6 +138,12 @@ struct ide_io_ports { #define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ #define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ +/* + * Op codes for special requests to be handled by ide_special_rq(). + * Values should be in the range of 0x20 to 0x3f. + */ +#define REQ_DRIVE_RESET 0x20 + /* * Check for an interrupt and acknowledge the interrupt status */ -- cgit v1.2.3 From 3ef5eb424ebf0cd981192a416358fd707a9f959b Mon Sep 17 00:00:00 2001 From: Elias Oltmanns Date: Wed, 16 Jul 2008 20:33:48 +0200 Subject: IDE: Remove unused code Remove some code which has been made obsolete and hasn't worked properly before anyway. Part of the infrastructure may be reintroduced in a follow up patch to implement a working command aborting facility. Signed-off-by: Elias Oltmanns Cc: "Alan Cox" Cc: "Randy Dunlap" Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 021710cc1b1..4726126f5a5 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -571,8 +571,6 @@ typedef struct hwgroup_s { unsigned int sleeping : 1; /* BOOL: polling active & poll_timeout field valid */ unsigned int polling : 1; - /* BOOL: in a polling reset situation. Must not trigger another reset yet */ - unsigned int resetting : 1; /* current drive */ ide_drive_t *drive; @@ -792,7 +790,6 @@ struct ide_driver_s { ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); - ide_startstop_t (*abort)(ide_drive_t *, struct request *rq); struct device_driver gen_driver; int (*probe)(ide_drive_t *); void (*remove)(ide_drive_t *); @@ -834,10 +831,6 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -ide_startstop_t __ide_abort(ide_drive_t *, struct request *); - -extern ide_startstop_t ide_abort(ide_drive_t *, const char *); - extern void ide_fix_driveid(struct hd_driveid *); extern void ide_fixstring(u8 *, const int, const int); -- cgit v1.2.3 From ebb12db51f6c13b30752fcf506baad4c617b153c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:04:29 +0200 Subject: Freezer: Introduce PF_FREEZER_NOSIG The freezer currently attempts to distinguish kernel threads from user space tasks by checking if their mm pointer is unset and it does not send fake signals to kernel threads. However, there are kernel threads, mostly related to networking, that behave like user space tasks and may want to be sent a fake signal to be frozen. Introduce the new process flag PF_FREEZER_NOSIG that will be set by default for all kernel threads and make the freezer only send fake signals to the tasks having PF_FREEZER_NOSIG unset. Provide the set_freezable_with_signal() function to be called by the kernel threads that want to be sent a fake signal for freezing. This patch should not change the freezer's observable behavior. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/freezer.h | 10 ++++++++++ include/linux/sched.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 08934995c7a..deddeedf325 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -127,6 +127,15 @@ static inline void set_freezable(void) current->flags &= ~PF_NOFREEZE; } +/* + * Tell the freezer that the current task should be frozen by it and that it + * should send a fake signal to the task to freeze it. + */ +static inline void set_freezable_with_signal(void) +{ + current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); +} + /* * Freezer-friendly wrappers around wait_event_interruptible() and * wait_event_interruptible_timeout(), originally defined in @@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index 21349173d14..ba2f859c6e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1494,6 +1494,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other -- cgit v1.2.3 From 20bfdbba7212d19613b93dcea93f26cb65af91fe Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:56 -0600 Subject: PNP: make pnp_{port,mem,etc}_start(), et al work for invalid resources Some callers use pnp_port_start() and similar functions without making sure the resource is valid. This patch makes us fall back to returning the initial values if the resource is not valid or not even present. This mostly preserves the previous behavior, where we would just return the initial values set by pnp_init_resource_table(). The original 2.6.25 code didn't range-check the "bar", so it would return garbage if the bar exceeded the table size. This code returns sensible values instead. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- include/linux/pnp.h | 72 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 63b128d512f..8b607aecd95 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -40,19 +40,31 @@ static inline resource_size_t pnp_resource_len(struct resource *res) static inline resource_size_t pnp_port_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_port_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_port_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IO, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) @@ -63,25 +75,41 @@ static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_port_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_IO, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_IO, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_mem_start(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->start; + return 0; } static inline resource_size_t pnp_mem_end(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->end; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->end; + return 0; } static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_MEM, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) @@ -92,18 +120,30 @@ static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_mem_len(struct pnp_dev *dev, unsigned int bar) { - return pnp_resource_len(pnp_get_resource(dev, IORESOURCE_MEM, bar)); + struct resource *res = pnp_get_resource(dev, IORESOURCE_MEM, bar); + + if (pnp_resource_valid(res)) + return pnp_resource_len(res); + return 0; } static inline resource_size_t pnp_irq(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_IRQ, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_IRQ, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) @@ -114,12 +154,20 @@ static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) static inline resource_size_t pnp_dma(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->start; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->start; + return -1; } static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) { - return pnp_get_resource(dev, IORESOURCE_DMA, bar)->flags; + struct resource *res = pnp_get_resource(dev, IORESOURCE_DMA, bar); + + if (pnp_resource_valid(res)) + return res->flags; + return IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; } static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) -- cgit v1.2.3 From aee3ad815dd291a7193ab01da0f1a30c84d00061 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:56:57 -0600 Subject: PNP: replace pnp_resource_table with dynamically allocated resources PNP used to have a fixed-size pnp_resource_table for tracking the resources used by a device. This table often overflowed, so we've had to increase the table size, which wastes memory because most devices have very few resources. This patch replaces the table with a linked list of resources where the entries are allocated on demand. This removes messages like these: pnpacpi: exceeded the max number of IO resources 00:01: too many I/O port resources References: http://bugzilla.kernel.org/show_bug.cgi?id=9535 http://bugzilla.kernel.org/show_bug.cgi?id=9740 http://lkml.org/lkml/2007/11/30/110 This patch also changes the way PNP uses the IORESOURCE_UNSET, IORESOURCE_AUTO, and IORESOURCE_DISABLED flags. Prior to this patch, the pnp_resource_table entries used the flags like this: IORESOURCE_UNSET This table entry is unused and available for use. When this flag is set, we shouldn't look at anything else in the resource structure. This flag is set when a resource table entry is initialized. IORESOURCE_AUTO This resource was assigned automatically by pnp_assign_{io,mem,etc}(). This flag is set when a resource table entry is initialized and cleared whenever we discover a resource setting by reading an ISAPNP config register, parsing a PNPBIOS resource data stream, parsing an ACPI _CRS list, or interpreting a sysfs "set" command. Resources marked IORESOURCE_AUTO are reinitialized and marked as IORESOURCE_UNSET by pnp_clean_resource_table() in these cases: - before we attempt to assign resources automatically, - if we fail to assign resources automatically, - after disabling a device IORESOURCE_DISABLED Set by pnp_assign_{io,mem,etc}() when automatic assignment fails. Also set by PNPBIOS and PNPACPI for: - invalid IRQs or GSI registration failures - invalid DMA channels - I/O ports above 0x10000 - mem ranges with negative length After this patch, there is no pnp_resource_table, and the resource list entries use the flags like this: IORESOURCE_UNSET This flag is no longer used in PNP. Instead of keeping IORESOURCE_UNSET entries in the resource list, we remove entries from the list and free them. IORESOURCE_AUTO No change in meaning: it still means the resource was assigned automatically by pnp_assign_{port,mem,etc}(), but these functions now set the bit explicitly. We still "clean" a device's resource list in the same places, but rather than reinitializing IORESOURCE_AUTO entries, we just remove them from the list. Note that IORESOURCE_AUTO entries are always at the end of the list, so removing them doesn't reorder other list entries. This is because non-IORESOURCE_AUTO entries are added by the ISAPNP, PNPBIOS, or PNPACPI "get resources" methods and by the sysfs "set" command. In each of these cases, we completely free the resource list first. IORESOURCE_DISABLED In addition to the cases where we used to set this flag, ISAPNP now adds an IORESOURCE_DISABLED resource when it reads a configuration register with a "disabled" value. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown Signed-off-by: Andi Kleen --- include/linux/pnp.h | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 8b607aecd95..dfaa567e04a 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -15,7 +15,6 @@ struct pnp_protocol; struct pnp_dev; -struct pnp_resource_table; /* * Resource Management @@ -24,7 +23,14 @@ struct resource *pnp_get_resource(struct pnp_dev *, unsigned int, unsigned int); static inline int pnp_resource_valid(struct resource *res) { - if (res && !(res->flags & IORESOURCE_UNSET)) + if (res) + return 1; + return 0; +} + +static inline int pnp_resource_enabled(struct resource *res) +{ + if (res && !(res->flags & IORESOURCE_DISABLED)) return 1; return 0; } @@ -64,7 +70,7 @@ static inline unsigned long pnp_port_flags(struct pnp_dev *dev, if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_IO | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_IO | IORESOURCE_AUTO; } static inline int pnp_port_valid(struct pnp_dev *dev, unsigned int bar) @@ -109,7 +115,7 @@ static inline unsigned long pnp_mem_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_MEM | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_MEM | IORESOURCE_AUTO; } static inline int pnp_mem_valid(struct pnp_dev *dev, unsigned int bar) @@ -143,7 +149,7 @@ static inline unsigned long pnp_irq_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_IRQ | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_IRQ | IORESOURCE_AUTO; } static inline int pnp_irq_valid(struct pnp_dev *dev, unsigned int bar) @@ -167,7 +173,7 @@ static inline unsigned long pnp_dma_flags(struct pnp_dev *dev, unsigned int bar) if (pnp_resource_valid(res)) return res->flags; - return IORESOURCE_DMA | IORESOURCE_AUTO | IORESOURCE_UNSET; + return IORESOURCE_DMA | IORESOURCE_AUTO; } static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) @@ -296,7 +302,7 @@ struct pnp_dev { int capabilities; struct pnp_option *independent; struct pnp_option *dependent; - struct pnp_resource_table *res; + struct list_head resources; char name[PNP_NAME_LEN]; /* contains a human-readable name */ int flags; /* used by protocols */ -- cgit v1.2.3 From 57fd51a8be26921b56747ddd09d1d9e01c11c9e0 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:01 -0600 Subject: PNP: add pnp_possible_config() -- can a device could be configured this way? As part of a heuristic to identify modem devices, 8250_pnp.c checks to see whether a device can be configured at any of the legacy COM port addresses. This patch moves the code that traverses the PNP "possible resource options" from 8250_pnp.c to the PNP subsystem. This encapsulation is important because a future patch will change the implementation of those resource options. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown --- include/linux/pnp.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index dfaa567e04a..e033e1b14c2 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -479,6 +479,8 @@ void pnp_unregister_card_driver(struct pnp_card_driver *drv); extern struct list_head pnp_cards; /* resource management */ +int pnp_possible_config(struct pnp_dev *dev, int type, resource_size_t base, + resource_size_t size); int pnp_auto_config_dev(struct pnp_dev *dev); int pnp_start_dev(struct pnp_dev *dev); int pnp_stop_dev(struct pnp_dev *dev); @@ -506,6 +508,9 @@ static inline int pnp_register_card_driver(struct pnp_card_driver *drv) { return static inline void pnp_unregister_card_driver(struct pnp_card_driver *drv) { } /* resource management */ +static inline int pnp_possible_config(struct pnp_dev *dev, int type, + resource_size_t base, + resource_size_t size) { return 0; } static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; } static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; } -- cgit v1.2.3 From 08c9f262f268f7948be13bf3a5bda1d635c649b4 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:03 -0600 Subject: PNP: define PNP-specific IORESOURCE_IO_* flags alongside IRQ, DMA, MEM PNP previously defined PNP_PORT_FLAG_16BITADDR and PNP_PORT_FLAG_FIXED in a private header file, but put those flags in struct resource.flags fields. Better to make them IORESOURCE_IO_* flags like the existing IRQ, DMA, and MEM flags. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown --- include/linux/ioport.h | 4 ++++ include/linux/pnp.h | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index c6801bffe76..39db059ffb8 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -88,6 +88,10 @@ struct resource_list { #define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */ #define IORESOURCE_MEM_EXPANSIONROM (1<<6) +/* PnP I/O specific bits (IORESOURCE_BITS) */ +#define IORESOURCE_IO_16BIT_ADDR (1<<0) +#define IORESOURCE_IO_FIXED (1<<1) + /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ #define IORESOURCE_ROM_SHADOW (1<<1) /* ROM is copy at C000:0 */ diff --git a/include/linux/pnp.h b/include/linux/pnp.h index e033e1b14c2..e1454dabde1 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -182,9 +182,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) } -#define PNP_PORT_FLAG_16BITADDR (1<<0) -#define PNP_PORT_FLAG_FIXED (1<<1) - struct pnp_port { unsigned short min; /* min base number */ unsigned short max; /* max base number */ -- cgit v1.2.3 From a1802c42950403657d07e64558eff612d550ce16 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:04 -0600 Subject: PNP: make resource option structures private to PNP subsystem Nothing outside the PNP subsystem should need access to a device's resource options, so this patch moves the option structure declarations to a private header file. Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown --- include/linux/pnp.h | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index e1454dabde1..785126ffcc1 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -182,54 +182,6 @@ static inline int pnp_dma_valid(struct pnp_dev *dev, unsigned int bar) } -struct pnp_port { - unsigned short min; /* min base number */ - unsigned short max; /* max base number */ - unsigned char align; /* align boundary */ - unsigned char size; /* size of range */ - unsigned char flags; /* port flags */ - unsigned char pad; /* pad */ - struct pnp_port *next; /* next port */ -}; - -#define PNP_IRQ_NR 256 -struct pnp_irq { - DECLARE_BITMAP(map, PNP_IRQ_NR); /* bitmask for IRQ lines */ - unsigned char flags; /* IRQ flags */ - unsigned char pad; /* pad */ - struct pnp_irq *next; /* next IRQ */ -}; - -struct pnp_dma { - unsigned char map; /* bitmask for DMA channels */ - unsigned char flags; /* DMA flags */ - struct pnp_dma *next; /* next port */ -}; - -struct pnp_mem { - unsigned int min; /* min base number */ - unsigned int max; /* max base number */ - unsigned int align; /* align boundary */ - unsigned int size; /* size of range */ - unsigned char flags; /* memory flags */ - unsigned char pad; /* pad */ - struct pnp_mem *next; /* next memory resource */ -}; - -#define PNP_RES_PRIORITY_PREFERRED 0 -#define PNP_RES_PRIORITY_ACCEPTABLE 1 -#define PNP_RES_PRIORITY_FUNCTIONAL 2 -#define PNP_RES_PRIORITY_INVALID 65535 - -struct pnp_option { - unsigned short priority; /* priority */ - struct pnp_port *port; /* first port */ - struct pnp_irq *irq; /* first IRQ */ - struct pnp_dma *dma; /* first DMA */ - struct pnp_mem *mem; /* first memory resource */ - struct pnp_option *next; /* used to chain dependent resources */ -}; - /* * Device Management */ -- cgit v1.2.3 From d5ebde6ef5c2d51828f975a81d7d0e58bccfd833 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:14 -0600 Subject: PNP: support optional IRQ resources This patch adds an IORESOURCE_IRQ_OPTIONAL flag for use when assigning resources to a device. If the flag is set and we are unable to assign an IRQ to the device, we can leave the IRQ disabled but allow the overall resource allocation to succeed. Some devices request an IRQ, but can run without an IRQ (possibly with degraded performance). This flag lets us run the device without the IRQ instead of just leaving the device disabled. This is a reimplementation of this previous change by Rene Herman : http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=3b73a223661ed137c5d3d2635f954382e94f5a43 I reimplemented this for two reasons: - to prepare for converting all resource options into a single linked list, as opposed to the per-resource-type lists we have now, and - to preserve the order and number of resource options. In PNPBIOS and ACPI, we configure a device by giving firmware a list of resource assignments. It is important that this list has exactly the same number of resources, in the same order, as the "template" list we got from the firmware in the first place. The problem of a sound card MPU401 being left disabled for want of an IRQ was reported by Uwe Bugla . Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown --- include/linux/ioport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 39db059ffb8..2cd07cc2968 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -59,6 +59,7 @@ struct resource_list { #define IORESOURCE_IRQ_HIGHLEVEL (1<<2) #define IORESOURCE_IRQ_LOWLEVEL (1<<3) #define IORESOURCE_IRQ_SHAREABLE (1<<4) +#define IORESOURCE_IRQ_OPTIONAL (1<<5) /* PnP DMA specific bits (IORESOURCE_BITS) */ #define IORESOURCE_DMA_TYPE_MASK (3<<0) -- cgit v1.2.3 From 1f32ca31e7409d37c1b25e5f81840fb184380cdf Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 27 Jun 2008 16:57:17 -0600 Subject: PNP: convert resource options to single linked list ISAPNP, PNPBIOS, and ACPI describe the "possible resource settings" of a device, i.e., the possibilities an OS bus driver has when it assigns I/O port, MMIO, and other resources to the device. PNP used to maintain this "possible resource setting" information in one independent option structure and a list of dependent option structures for each device. Each of these option structures had lists of I/O, memory, IRQ, and DMA resources, for example: dev independent options ind-io0 -> ind-io1 ... ind-mem0 -> ind-mem1 ... ... dependent option set 0 dep0-io0 -> dep0-io1 ... dep0-mem0 -> dep0-mem1 ... ... dependent option set 1 dep1-io0 -> dep1-io1 ... dep1-mem0 -> dep1-mem1 ... ... ... This data structure was designed for ISAPNP, where the OS configures device resource settings by writing directly to configuration registers. The OS can write the registers in arbitrary order much like it writes PCI BARs. However, for PNPBIOS and ACPI devices, the OS uses firmware interfaces that perform device configuration, and it is important to pass the desired settings to those interfaces in the correct order. The OS learns the correct order by using firmware interfaces that return the "current resource settings" and "possible resource settings," but the option structures above doesn't store the ordering information. This patch replaces the independent and dependent lists with a single list of options. For example, a device might have possible resource settings like this: dev options ind-io0 -> dep0-io0 -> dep1->io0 -> ind-io1 ... All the possible settings are in the same list, in the order they come from the firmware "possible resource settings" list. Each entry is tagged with an independent/dependent flag. Dependent entries also have a "set number" and an optional priority value. All dependent entries must be assigned from the same set. For example, the OS can use all the entries from dependent set 0, or all the entries from dependent set 1, but it cannot mix entries from set 0 with entries from set 1. Prior to this patch PNP didn't keep track of the order of this list, and it assigned all independent options first, then all dependent ones. Using the example above, that resulted in a "desired configuration" list like this: ind->io0 -> ind->io1 -> depN-io0 ... instead of the list the firmware expects, which looks like this: ind->io0 -> depN-io0 -> ind-io1 ... Signed-off-by: Bjorn Helgaas Signed-off-by: Andi Kleen Acked-by: Rene Herman Signed-off-by: Len Brown --- include/linux/pnp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 785126ffcc1..1ce54b63085 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -1,6 +1,8 @@ /* * Linux Plug and Play Support * Copyright by Adam Belay + * Copyright (C) 2008 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas */ #ifndef _LINUX_PNP_H @@ -249,9 +251,9 @@ struct pnp_dev { int active; int capabilities; - struct pnp_option *independent; - struct pnp_option *dependent; + unsigned int num_dependent_sets; struct list_head resources; + struct list_head options; char name[PNP_NAME_LEN]; /* contains a human-readable name */ int flags; /* used by protocols */ -- cgit v1.2.3 From d442cc44c0db56e84ef6aa244a88427d2efe06cd Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 16 Jul 2008 16:09:06 -0400 Subject: block: Trivial fix for blk_integrity_rq() Fail integrity check gracefully when request does not have a bio attached (BLOCK_PC). Signed-off-by: Martin K. Petersen Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 32a441b05fd..88d68081a0f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -985,6 +985,9 @@ static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) static inline int blk_integrity_rq(struct request *rq) { + if (rq->bio == NULL) + return 0; + return bio_integrity(rq->bio); } -- cgit v1.2.3 From f470021adb9190819c03d6d8c5c860a17480aa6d Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Mon, 24 Mar 2008 18:36:23 -0700 Subject: ptrace children revamp ptrace no longer fiddles with the children/sibling links, and the old ptrace_children list is gone. Now ptrace, whether of one's own children or another's via PTRACE_ATTACH, just uses the new ptraced list instead. There should be no user-visible difference that matters. The only change is the order in which do_wait() sees multiple stopped children and stopped ptrace attachees. Since wait_task_stopped() was changed earlier so it no longer reorders the children list, we already know this won't cause any new problems. Signed-off-by: Roland McGrath --- include/linux/init_task.h | 4 ++-- include/linux/sched.h | 26 ++++++++++++-------------- 2 files changed, 14 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 9927a88674a..93c45acf249 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -140,8 +140,8 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ - .ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \ - .ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \ + .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ + .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ .parent = &tsk, \ .children = LIST_HEAD_INIT(tsk.children), \ diff --git a/include/linux/sched.h b/include/linux/sched.h index ba2f859c6e4..1941d8b5cf1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1062,12 +1062,6 @@ struct task_struct { #endif struct list_head tasks; - /* - * ptrace_list/ptrace_children forms the list of my children - * that were stolen by a ptracer. - */ - struct list_head ptrace_children; - struct list_head ptrace_list; struct mm_struct *mm, *active_mm; @@ -1089,18 +1083,25 @@ struct task_struct { /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with - * p->parent->pid) + * p->real_parent->pid) */ - struct task_struct *real_parent; /* real parent process (when being debugged) */ - struct task_struct *parent; /* parent process */ + struct task_struct *real_parent; /* real parent process */ + struct task_struct *parent; /* recipient of SIGCHLD, wait4() reports */ /* - * children/sibling forms the list of my children plus the - * tasks I'm ptracing. + * children/sibling forms the list of my natural children */ struct list_head children; /* list of my children */ struct list_head sibling; /* linkage in my parent's children list */ struct task_struct *group_leader; /* threadgroup leader */ + /* + * ptraced is the list of tasks this task is using ptrace on. + * This includes both natural children and PTRACE_ATTACH targets. + * p->ptrace_entry is p's link on the p->parent->ptraced list. + */ + struct list_head ptraced; + struct list_head ptrace_entry; + /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX]; struct list_head thread_group; @@ -1876,9 +1877,6 @@ extern void wait_task_inactive(struct task_struct * p); #define wait_task_inactive(p) do { } while (0) #endif -#define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) - #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) #define for_each_process(p) \ -- cgit v1.2.3