From f6d87f4bd259cf33e092cd1a8fde05f291c47af1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 13:18:30 +0100 Subject: genirq: keep affinities set from userspace across free/request_irq() Impact: preserve user-modified affinities on interrupts Kumar Galak noticed that commit 18404756765c713a0be4eb1082920c04822ce588 (genirq: Expose default irq affinity mask (take 3)) overrides an already set affinity setting across a free / request_irq(). Happens e.g. with ifdown/ifup of a network device. Change the logic to mark the affinities as set and keep them intact. This also fixes the unlocked access to irq_desc in irq_select_affinity() when called from irq_affinity_proc_write() Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/irq.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index d058c57be02..36b186eb318 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -63,7 +63,8 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ -#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) @@ -210,7 +211,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_GENERIC_PENDING_IRQ -void set_pending_irq(unsigned int irq, cpumask_t mask); void move_native_irq(int irq); void move_masked_irq(int irq); @@ -228,10 +228,6 @@ static inline void move_masked_irq(int irq) { } -static inline void set_pending_irq(unsigned int irq, cpumask_t mask) -{ -} - #endif /* CONFIG_GENERIC_PENDING_IRQ */ #else /* CONFIG_SMP */ -- cgit v1.2.3 From 21098c68df7115554fe041170899bdff709efd08 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 20 Nov 2008 10:58:01 -0500 Subject: [SCSI] fc_transport: fix old bug on bitflag definitions When the fastfail flag was added, it did not account for the flags being bit fields. Correct the definition so there is no longer a conflict. Signed-off-by: James Smart Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 49d8913c4f8..6e04e6fe79c 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -357,7 +357,7 @@ struct fc_rport { /* aka fc_starget_attrs */ /* bit field values for struct fc_rport "flags" field: */ #define FC_RPORT_DEVLOSS_PENDING 0x01 #define FC_RPORT_SCAN_PENDING 0x02 -#define FC_RPORT_FAST_FAIL_TIMEDOUT 0x03 +#define FC_RPORT_FAST_FAIL_TIMEDOUT 0x04 #define dev_to_rport(d) \ container_of(d, struct fc_rport, dev) -- cgit v1.2.3 From 7e56b5d698707a9934833c47b24d78fb0bcaf764 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 21 Nov 2008 16:45:22 -0800 Subject: net: Fix memory leak in the proto_register function If the slub allocator is used, kmem_cache_create() may merge two or more kmem_cache's into one but the cache name pointer is not updated and kmem_cache_name() is no longer guaranteed to return the pointer passed to the former function. This patch stores the kmalloc'ed pointers in the corresponding request_sock_ops and timewait_sock_ops structures. Signed-off-by: Catalin Marinas Acked-by: Arnaldo Carvalho de Melo Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- include/net/request_sock.h | 1 + include/net/timewait_sock.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cac811e51f6..c7190846e12 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -31,6 +31,7 @@ struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; + char *slab_name; int (*rtx_syn_ack)(struct sock *sk, struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 1e1ee3253fd..97c3b14da55 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -16,6 +16,7 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; + char *twsk_slab_name; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); -- cgit v1.2.3 From 2ed1cdcf9a83205d1343f29b630abff232eaa72c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 21 Nov 2008 16:59:57 -0800 Subject: irq.h: fix missing/extra kernel-doc Impact: fix kernel-doc build Fix missing & excess irq.h kernel-doc: Warning(include/linux/irq.h:182): No description found for parameter 'irq' Warning(include/linux/irq.h:182): Excess struct/union/enum/typedef member 'affinity_entry' description in 'irq_desc' Signed-off-by: Randy Dunlap Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/irq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 36b186eb318..3dddfa703eb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -131,7 +131,7 @@ struct irq_chip { /** * struct irq_desc - interrupt descriptor - * + * @irq: interrupt number for this descriptor * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] * @chip: low level interrupt hardware access * @msi_desc: MSI descriptor @@ -150,7 +150,6 @@ struct irq_chip { * @cpu: cpu index useful for balancing * @pending_mask: pending rebalanced interrupts * @dir: /proc/irq/ procfs entry - * @affinity_entry: /proc/irq/smp_affinity procfs entry on SMP * @name: flow handler name for /proc/interrupts output */ struct irq_desc { -- cgit v1.2.3 From 52440211dcdc52c0b757f8b34d122e11b12cdd50 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 18 Nov 2008 09:30:25 -0800 Subject: drm: move drm vblank initialization/cleanup to driver load/unload drm vblank initialization keeps track of the changes in driver-supplied frame counts across vt switch and mode setting, but only if you let it by not tearing down the drm vblank structure. Signed-off-by: Keith Packard Signed-off-by: Dave Airlie --- include/drm/drmP.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 28c7f1679d4..d5e8e5c8954 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1151,6 +1151,7 @@ extern u32 drm_vblank_count(struct drm_device *dev, int crtc); extern void drm_handle_vblank(struct drm_device *dev, int crtc); extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); +extern void drm_vblank_cleanup(struct drm_device *dev); /* Modesetting support */ extern int drm_modeset_ctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -- cgit v1.2.3 From f79fca55f9a6fe54635ad32ddc8a38f92a94ec30 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 24 Nov 2008 16:06:17 -0800 Subject: netfilter: xtables: add missing const qualifier to xt_tgchk_param When entryinfo was a standalone parameter to functions, it used to be "const void *". Put the const back in. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be41b609c88..e52ce475d19 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,7 +251,7 @@ struct xt_target_param { */ struct xt_tgchk_param { const char *table; - void *entryinfo; + const void *entryinfo; const struct xt_target *target; void *targinfo; unsigned int hook_mask; -- cgit v1.2.3 From 5f23b734963ec7eaa3ebcd9050da0c9b7d143dd3 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Wed, 26 Nov 2008 15:32:27 -0800 Subject: net: Fix soft lockups/OOM issues w/ unix garbage collector This is an implementation of David Miller's suggested fix in: https://bugzilla.redhat.com/show_bug.cgi?id=470201 It has been updated to use wait_event() instead of wait_event_interruptible(). Paraphrasing the description from the above report, it makes sendmsg() block while UNIX garbage collection is in progress. This avoids a situation where child processes continue to queue new FDs over a AF_UNIX socket to a parent which is in the exit path and running garbage collection on these FDs. This contention can result in soft lockups and oom-killing of unrelated processes. Signed-off-by: dann frazier Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c29ff1da8a1..1614d78c60e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -9,6 +9,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); +extern void wait_for_unix_gc(void); #define UNIX_HASH_SIZE 256 -- cgit v1.2.3 From 95a28ed08619cc70f31611886ac7b26ab0e462dc Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 13 Nov 2008 11:01:34 +0800 Subject: ACPICA: Allow _WAK method to return an Integer This can happen if the _WAK method returns nothing (as per ACPI 1.0) but does return an integer if the implicit return mechanism is enabled. This is the only method that has this problem, since it is also defined to return a package of two integers (ACPI 1.0b+). In all other cases, if a method returns an object when one was not expected, no warning is issued. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpredef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpredef.h b/include/acpi/acpredef.h index 619fb75f886..e6452dbf39e 100644 --- a/include/acpi/acpredef.h +++ b/include/acpi/acpredef.h @@ -346,7 +346,7 @@ static const union acpi_predefined_info predefined_names[] = { /* Acpi 1.0 defined _WAK with no return value. Later, it was changed to return a package */ - {.info = {"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_PACKAGE}}, + {.info = {"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 2, 0, 0, 0}}, /* fixed (2 Int), but is optional */ {.ret_info = {0, 0, 0, 0, 0, 0}} /* Table terminator */ }; -- cgit v1.2.3 From e899b6485c332aa2d7510739507ab5e5d7b28e59 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 27 Nov 2008 14:42:30 +0800 Subject: ACPICA: disable _BIF warning A generic work-around from ACPICA is in the queue, but since Linux has a work-around in its battery driver, we can disable this warning now. Allow _BIF method to return an Package with Buffer elements http://bugzilla.kernel.org/show_bug.cgi?id=11822 Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpredef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpredef.h b/include/acpi/acpredef.h index e6452dbf39e..16a9ca9a66e 100644 --- a/include/acpi/acpredef.h +++ b/include/acpi/acpredef.h @@ -167,7 +167,7 @@ static const union acpi_predefined_info predefined_names[] = { {.info = {"_BFS", 1, 0}}, {.info = {"_BIF", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, - ACPI_RTYPE_STRING, 4, 0}}, /* fixed (9 Int),(4 Str) */ + ACPI_RTYPE_STRING | ACPI_RTYPE_BUFFER, 4, 0}}, /* fixed (9 Int),(4 Str) */ {.info = {"_BLT", 3, 0}}, {.info = {"_BMC", 1, 0}}, {.info = {"_BMD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}}, /* fixed (5 Int) */ -- cgit v1.2.3 From 487ff32082a9bd7489d8185cf7d7a2fdf18a22fa Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 27 Nov 2008 11:13:58 +0000 Subject: Allow architectures to override copy_user_highpage() With aliasing VIPT cache support, the ARM implementation of clear_user_page() and copy_user_page() sets up a temporary kernel space mapping such that we have the same cache colour as the userspace page. This avoids having to consider any userspace aliases from this operation. However, when highmem is enabled, kmap_atomic() have to setup mappings. The copy_user_highpage() and clear_user_highpage() call these functions before delegating the copies to copy_user_page() and clear_user_page(). The effect of this is that each of the *_user_highpage() functions setup their own kmap mapping, followed by the *_user_page() functions setting up another mapping. This is rather wasteful. Thankfully, copy_user_highpage() can be overriden by architectures by defining __HAVE_ARCH_COPY_USER_HIGHPAGE. However, replacement of clear_user_highpage() is more difficult because its inline definition is not conditional. It seems that you're expected to define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and provide a replacement __alloc_zeroed_user_highpage() implementation instead. The allocation itself is fine, so we don't want to override that. What we really want to do is to override clear_user_highpage() with our own version which doesn't kmap_atomic() unnecessarily. Other VIPT architectures (PARISC and SH) would also like to override this function as well. Acked-by: Hugh Dickins Acked-by: James Bottomley Acked-by: Paul Mundt Signed-off-by: Russell King --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7dcbc82f3b7..13875ce9112 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -63,12 +63,14 @@ static inline void *kmap_atomic(struct page *page, enum km_type idx) #endif /* CONFIG_HIGHMEM */ /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ +#ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); } +#endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** -- cgit v1.2.3 From 9a5aa622dd4cd22b5e0fe83e4a9c0c768d4e2dea Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 28 Nov 2008 21:29:46 -0800 Subject: mlx4_core: Save/restore default port IB capability mask Commit 7ff93f8b ("mlx4_core: Multiple port type support") introduced support for different port types. As part of that support, SET_PORT is invoked to set the port type during driver startup. However, as a side-effect, for IB ports the invocation of this command also sets the port's capability mask to zero (losing the default value set by FW). To fix this, get the default ib port capabilities (via a MAD_IFC Port Info query) during driver startup, and save them for use in the mlx4_SET_PORT command when setting the port-type to Infiniband. This patch fixes problems with subnet manager (SM) failover such as , which occurred because the IsTrapSupported bit in the capability mask was zeroed. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index bd9977b8949..371086fd946 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -179,6 +179,7 @@ struct mlx4_caps { int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; + __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3 From 31168481c32c8a485e1003af9433124dede57f8d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:33:24 +0000 Subject: meminit section warnings Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index f546ad6fc02..1e6d34bfa09 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -17,7 +17,7 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ }; -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat); +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); void __init page_cgroup_init(void); struct page_cgroup *lookup_page_cgroup(struct page *page); @@ -91,7 +91,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; -static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) +static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { } -- cgit v1.2.3 From 02d0e6753d8ab0173b63338157929e52eac86d12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:38:34 +0000 Subject: hotplug_memory_notifier section annotation Same as for hotplug_cpu - we want static notifier_block in there in meminitdata, to avoid false positives whenever it's used. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 2f5f8a5ef2a..36c82c9e6ea 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -91,7 +91,7 @@ extern int memory_notify(unsigned long val, void *v); #ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) { \ - static struct notifier_block fn##_mem_nb = \ + static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri }; \ register_memory_notifier(&fn##_mem_nb); \ } -- cgit v1.2.3 From 96b8936a9ed08746e47081458a5eb9e43a751e24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Nov 2008 08:10:03 +0100 Subject: remove __ARCH_WANT_COMPAT_SYS_PTRACE All architectures now use the generic compat_sys_ptrace, as should every new architecture that needs 32bit compat (if we'll ever get another). Remove the now superflous __ARCH_WANT_COMPAT_SYS_PTRACE define, and also kill a comment about __ARCH_SYS_PTRACE that was added after __ARCH_SYS_PTRACE was already gone. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index f061a1ea1b7..e88f3ecf38b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -252,12 +252,10 @@ extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data); -#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ /* * epoll (fs/eventpoll.c) compat bits follow ... -- cgit v1.2.3 From ac70a964b0e22a95af3628c344815857a01461b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 27 Nov 2008 13:36:48 +0900 Subject: libata: blacklist Seagate drives which time out FLUSH_CACHE when used with NCQ Some recent Seagate harddrives have firmware bug which causes FLUSH CACHE to timeout under certain circumstances if NCQ is being used. This can be worked around by disabling NCQ and fixed by updating the firmware. Implement ATA_HORKAGE_FIRMWARE_UPDATE and blacklist these devices. The wiki page has been updated to contain information on this issue. http://ata.wiki.kernel.org/index.php/Known_issues Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 59b0f1c807b..ed3f26eb5df 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -375,6 +375,7 @@ enum { ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ + ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firwmare update warning */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 7ef9964e6d1b911b78709f144000aacadd0ebc21 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Mon, 1 Dec 2008 13:13:55 -0800 Subject: epoll: introduce resource usage limits It has been thought that the per-user file descriptors limit would also limit the resources that a normal user can request via the epoll interface. Vegard Nossum reported a very simple program (a modified version attached) that can make a normal user to request a pretty large amount of kernel memory, well within the its maximum number of fds. To solve such problem, default limits are now imposed, and /proc based configuration has been introduced. A new directory has been created, named /proc/sys/fs/epoll/ and inside there, there are two configuration points: max_user_instances = Maximum number of devices - per user max_user_watches = Maximum number of "watched" fds - per user The current default for "max_user_watches" limits the memory used by epoll to store "watches", to 1/32 of the amount of the low RAM. As example, a 256MB 32bit machine, will have "max_user_watches" set to roughly 90000. That should be enough to not break existing heavy epoll users. The default value for "max_user_instances" is set to 128, that should be enough too. This also changes the userspace, because a new error code can now come out from EPOLL_CTL_ADD (-ENOSPC). The EMFILE from epoll_create() was already listed, so that should be ok. [akpm@linux-foundation.org: use get_current_user()] Signed-off-by: Davide Libenzi Cc: Michael Kerrisk Cc: Cc: Cyrill Gorcunov Reported-by: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 644ffbda17c..55e30d11447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -630,6 +630,10 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_EPOLL + atomic_t epoll_devs; /* The number of epoll descriptors currently open */ + atomic_t epoll_watches; /* The number of file descriptors currently watched */ +#endif #ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -- cgit v1.2.3 From 6ff2d39b91aec3dcae951afa982059e3dd9b49dc Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 1 Dec 2008 13:14:02 -0800 Subject: lib/idr.c: fix rcu related race with idr_find 2nd part of the fixes needed for http://bugzilla.kernel.org/show_bug.cgi?id=11796. When the idr tree is either grown or shrunk, then the update to the number of layers and the top pointer were not atomic. This race caused crashes. The attached patch fixes that by replicating the layers counter in each layer, thus idr_find doesn't need idp->layers anymore. Signed-off-by: Manfred Spraul Cc: Clement Calmels Cc: Nadia Derbey Cc: Pierre Peiffer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index fa035f96f2a..dd846df8cd3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -52,13 +52,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer *ary[1< Date: Tue, 2 Dec 2008 20:40:03 +0100 Subject: amd74xx: workaround unreliable AltStatus register for nVidia controllers It seems that on some nVidia controllers using AltStatus register can be unreliable so default to Status register if the PCI device is in Compatibility Mode. In order to achieve this: * Add ide_pci_is_in_compatibility_mode() inline helper to . * Add IDE_HFLAG_BROKEN_ALTSTATUS host flag and set it in amd74xx host driver for nVidia controllers in Compatibility Mode. * Teach actual_try_to_identify() and drive_is_ready() about the new flag. This fixes the regression caused by removal of CONFIG_IDEPCI_SHARE_IRQ config option in 2.6.25 and using AltStatus register unconditionally when available (kernel.org bugs #11659 and #10216). [ Moreover for CONFIG_IDEPCI_SHARE_IRQ=y (which is what most people and distributions use) it never worked correctly. ] Thanks to Remy LABENE and Lars Winterfeld for help with debugging the problem. More info at: http://bugzilla.kernel.org/show_bug.cgi?id=11659 http://bugzilla.kernel.org/show_bug.cgi?id=10216 Reported-by: Remy LABENE Tested-by: Remy LABENE Tested-by: Lars Winterfeld Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 54525be4b5f..010fb26a157 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1296,6 +1296,13 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o #define ide_pci_register_driver(d) pci_register_driver(d) #endif +static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) + return 1; + return 0; +} + void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, hw_regs_t *, hw_regs_t **); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); @@ -1375,6 +1382,7 @@ enum { IDE_HFLAG_IO_32BIT = (1 << 24), /* unmask IRQs */ IDE_HFLAG_UNMASK_IRQS = (1 << 25), + IDE_HFLAG_BROKEN_ALTSTATUS = (1 << 26), /* serialize ports if DMA is possible (for sl82c105) */ IDE_HFLAG_SERIALIZE_DMA = (1 << 27), /* force host out of "simplex" mode */ -- cgit v1.2.3 From 1b79cd04fab80be61dcd2732e2423aafde9a4c1c Mon Sep 17 00:00:00 2001 From: "Junjiro R. Okajima" Date: Tue, 2 Dec 2008 10:31:46 -0800 Subject: nfsd: fix vm overcommit crash fix #2 The previous patch from Alan Cox ("nfsd: fix vm overcommit crash", commit 731572d39fcd3498702eda4600db4c43d51e0b26) fixed the problem where knfsd crashes on exported shmemfs objects and strict overcommit is set. But the patch forgot supporting the case when CONFIG_SECURITY is disabled. This patch copies a part of his fix which is mainly for detecting a bug earlier. Acked-by: James Morris Signed-off-by: Alan Cox Signed-off-by: Junjiro R. Okajima Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index c13f1cec9ab..e3d4ecda267 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1818,17 +1818,21 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { + WARN_ON(current->mm == NULL); return cap_vm_enough_memory(current->mm, pages); } -static inline int security_vm_enough_memory_kern(long pages) +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(current->mm, pages); + WARN_ON(mm == NULL); + return cap_vm_enough_memory(mm, pages); } -static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +static inline int security_vm_enough_memory_kern(long pages) { - return cap_vm_enough_memory(mm, pages); + /* If current->mm is a kernel thread then we will pass NULL, + for this specific case that is fine */ + return cap_vm_enough_memory(current->mm, pages); } static inline int security_bprm_alloc(struct linux_binprm *bprm) -- cgit v1.2.3 From 53a08807c01989c6847bb135d8d43f61c5dfdda5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Dec 2008 12:41:26 +0100 Subject: block: internal dequeue shouldn't start timer blkdev_dequeue_request() and elv_dequeue_request() are equivalent and both start the timeout timer. Barrier code dequeues the original barrier request but doesn't passes the request itself to lower level driver, only broken down proxy requests; however, as the original barrier code goes through the same dequeue path and timeout timer is started on it. If barrier sequence takes long enough, this timer expires but the low level driver has no idea about this request and oops follows. Timeout timer shouldn't have been started on the original barrier request as it never goes through actual IO. This patch unexports elv_dequeue_request(), which has no external user anyway, and makes it operate on elevator proper w/o adding the timer and make blkdev_dequeue_request() call elv_dequeue_request() and add timer. Internal users which don't pass the request to driver - barrier code and end_that_request_last() - are converted to use elv_dequeue_request(). Signed-off-by: Tejun Heo Cc: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a135256b272..9cc7cc5fdce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -786,6 +786,8 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } +extern void blkdev_dequeue_request(struct request *req); + /* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with @@ -820,11 +822,6 @@ extern void blk_update_request(struct request *rq, int error, extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); -static inline void blkdev_dequeue_request(struct request *req) -{ - elv_dequeue_request(req->q, req); -} - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3 From 0e435ac26e3f951d83338ed3d4ab7dc0fe0055bc Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 3 Dec 2008 12:55:08 +0100 Subject: block: fix setting of max_segment_size and seg_boundary mask Fix setting of max_segment_size and seg_boundary mask for stacked md/dm devices. When stacking devices (LVM over MD over SCSI) some of the request queue parameters are not set up correctly in some cases by default, namely max_segment_size and and seg_boundary mask. If you create MD device over SCSI, these attributes are zeroed. Problem become when there is over this mapping next device-mapper mapping - queue attributes are set in DM this way: request_queue max_segment_size seg_boundary_mask SCSI 65536 0xffffffff MD RAID1 0 0 LVM 65536 -1 (64bit) Unfortunately bio_add_page (resp. bio_phys_segments) calculates number of physical segments according to these parameters. During the generic_make_request() is segment cout recalculated and can increase bio->bi_phys_segments count over the allowed limit. (After bio_clone() in stack operation.) Thi is specially problem in CCISS driver, where it produce OOPS here BUG_ON(creq->nr_phys_segments > MAXSGENTRIES); (MAXSEGENTRIES is 31 by default.) Sometimes even this command is enough to cause oops: dd iflag=direct if=/dev// of=/dev/null bs=128000 count=10 This command generates bios with 250 sectors, allocated in 32 4k-pages (last page uses only 1024 bytes). For LVM layer, it allocates bio with 31 segments (still OK for CCISS), unfortunatelly on lower layer it is recalculated to 32 segments and this violates CCISS restriction and triggers BUG_ON(). The patch tries to fix it by: * initializing attributes above in queue request constructor blk_queue_make_request() * make sure that blk_queue_stack_limits() inherits setting (DM uses its own function to set the limits because it blk_queue_stack_limits() was introduced later. It should probably switch to use generic stack limit function too.) * sets the default seg_boundary value in one place (blkdev.h) * use this mask as default in DM (instead of -1, which differs in 64bit) Bugs related to this: https://bugzilla.redhat.com/show_bug.cgi?id=471639 http://bugzilla.kernel.org/show_bug.cgi?id=8672 Signed-off-by: Milan Broz Reviewed-by: Alasdair G Kergon Cc: Neil Brown Cc: FUJITA Tomonori Cc: Tejun Heo Cc: Mike Miller Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cc7cc5fdce..6dcd30d806c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -918,6 +918,8 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_SEGMENT_SIZE 65536 +#define BLK_SEG_BOUNDARY_MASK 0xFFFFFFFFUL + #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) static inline int queue_hardsect_size(struct request_queue *q) -- cgit v1.2.3 From d253eee20195b25e298bf162a6e72f14bf4803e5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 3 Dec 2008 15:52:35 -0800 Subject: can: Fix CAN_(EFF|RTR)_FLAG handling in can_filter Due to a wrong safety check in af_can.c it was not possible to filter for SFF frames with a specific CAN identifier without getting the same selected CAN identifier from a received EFF frame also. This fix has a minimum (but user visible) impact on the CAN filter API and therefore the CAN version is set to a new date. Indeed the 'old' API is still working as-is. But when now setting CAN_(EFF|RTR)_FLAG in can_filter.can_mask you might get less traffic than before - but still the stuff that you expected to get for your defined filter ... Thanks to Kurt Van Dijck for pointing at this issue and for the review. Signed-off-by: Oliver Hartkopp Acked-by: Kurt Van Dijck Signed-off-by: David S. Miller --- include/linux/can/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e9ca210ffa5..f50785ad478 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include #include -#define CAN_VERSION "20071116" +#define CAN_VERSION "20081130" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" -- cgit v1.2.3 From fd4ce1acd0f8558033b1a6968001552bd7671e6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:58:42 +0100 Subject: [PATCH 1/2] kill FMODE_NDELAY_NOW Update FMODE_NDELAY before each ioctl call so that we can kill the magic FMODE_NDELAY_NOW. It would be even better to do this directly in setfl(), but for that we'd need to have FMODE_NDELAY for all files, not just block special files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4..b3345a90e11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,7 +79,6 @@ extern int dir_notify_enable; #define FMODE_NDELAY ((__force fmode_t)32) #define FMODE_EXCL ((__force fmode_t)64) #define FMODE_WRITE_IOCTL ((__force fmode_t)128) -#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 -- cgit v1.2.3 From fc9161e54d0dbf799beff9692ea1cc6237162b85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:58:46 +0100 Subject: [PATCH 2/2] documnt FMODE_ constants Make sure all FMODE_ constants are documents, and ensure a coherent style for the already existing comments. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index b3345a90e11..4a853ef6fd3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,21 +63,23 @@ extern int dir_notify_enable; #define MAY_ACCESS 16 #define MAY_OPEN 32 -#define FMODE_READ ((__force fmode_t)1) -#define FMODE_WRITE ((__force fmode_t)2) - -/* Internal kernel extensions */ -#define FMODE_LSEEK ((__force fmode_t)4) -#define FMODE_PREAD ((__force fmode_t)8) -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -/* File is being opened for execution. Primary users of this flag are - distributed filesystems that can use it to achieve correct ETXTBUSY - behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC ((__force fmode_t)16) - -#define FMODE_NDELAY ((__force fmode_t)32) -#define FMODE_EXCL ((__force fmode_t)64) +/* file is open for reading */ +#define FMODE_READ ((__force fmode_t)1) +/* file is open for writing */ +#define FMODE_WRITE ((__force fmode_t)2) +/* file is seekable */ +#define FMODE_LSEEK ((__force fmode_t)4) +/* file can be accessed using pread/pwrite */ +#define FMODE_PREAD ((__force fmode_t)8) +#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* File is opened for execution with sys_execve / sys_uselib */ +#define FMODE_EXEC ((__force fmode_t)16) +/* File is opened with O_NDELAY (only set for block devices) */ +#define FMODE_NDELAY ((__force fmode_t)32) +/* File is opened with O_EXCL (only set for block devices) */ +#define FMODE_EXCL ((__force fmode_t)64) +/* File is opened using open(.., 3, ..) and is writeable only for ioctls + (specialy hack for floppy.c) */ #define FMODE_WRITE_IOCTL ((__force fmode_t)128) #define RW_MASK 1 -- cgit v1.2.3 From f2f1fa78a155524b849edf359e42a3001ea652c0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 5 Dec 2008 14:49:18 -0800 Subject: Enforce a minimum SG_IO timeout There's no point in having too short SG_IO timeouts, since if the command does end up timing out, we'll end up through the reset sequence that is several seconds long in order to abort the command that timed out. As a result, shorter timeouts than a few seconds simply do not make sense, as the recovery would be longer than the timeout itself. Add a BLK_MIN_SG_TIMEOUT to match the existign BLK_DEFAULT_SG_TIMEOUT. Suggested-by: Alan Cox Acked-by: Tejun Heo Acked-by: Jens Axboe Cc: Jeff Garzik Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6dcd30d806c..031a315c050 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -662,6 +662,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; * default timeout for SG_IO if none specified */ #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) +#define BLK_MIN_SG_TIMEOUT (7 * HZ) #ifdef CONFIG_BOUNCE extern int init_emergency_isa_pool(void); -- cgit v1.2.3 From a64e64944f4b8ce3288519555dbaa0232414b8ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Nov 2008 18:37:41 -0500 Subject: [PATCH] return records for fork() both to child and parent Signed-off-by: Al Viro --- include/linux/audit.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43..1b2a6a5c187 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -391,6 +391,7 @@ extern int audit_classify_arch(int arch); #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ +extern void audit_finish_fork(struct task_struct *child); extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); extern void audit_syscall_entry(int arch, @@ -504,6 +505,7 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) extern int audit_n_rules; extern int audit_signals; #else +#define audit_finish_fork(t) #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) -- cgit v1.2.3 From 0b0c940a91f8e6fd0e1be3e01d5e98997446233b Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Tue, 18 Nov 2008 15:03:49 +0800 Subject: [PATCH] asm/generic: fix bug - kernel fails to build when enable some common audit code on Blackfin If you enable some common audit code, the kernel fails to build. In file included from lib/audit.c:17: include/asm-generic/audit_write.h:3: error: '__NR_swapon' undeclared here (not in a function) make[1]: *** [lib/audit.o] Error 1 make: *** [lib] Error 2 So do not use __NR_swapon if it isnt defined for a port. Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu Signed-off-by: Al Viro --- include/asm-generic/audit_write.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-generic/audit_write.h b/include/asm-generic/audit_write.h index f10d367fb2a..c5f1c2c920e 100644 --- a/include/asm-generic/audit_write.h +++ b/include/asm-generic/audit_write.h @@ -1,6 +1,8 @@ #include __NR_acct, +#ifdef __NR_swapon __NR_swapon, +#endif __NR_quotactl, __NR_truncate, #ifdef __NR_truncate64 -- cgit v1.2.3 From 48887e63d6e057543067327da6b091297f7fe645 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 6 Dec 2008 01:05:50 -0500 Subject: [PATCH] fix broken timestamps in AVC generated by kernel threads Timestamp in audit_context is valid only if ->in_syscall is set. Signed-off-by: Al Viro --- include/linux/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/audit.h b/include/linux/audit.h index 1b2a6a5c187..8f0672d13eb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -435,7 +435,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); -extern void auditsc_get_stamp(struct audit_context *ctx, +extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) @@ -518,7 +518,7 @@ extern int audit_signals; #define audit_inode(n,d) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) #define audit_core_dumps(i) do { ; } while (0) -#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) +#define auditsc_get_stamp(c,t,s) (0) #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) -- cgit v1.2.3 From 7b363e440021a1cf9ed76944b2685f48dacefb3e Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 9 Dec 2008 23:22:26 -0800 Subject: netpoll: fix race on poll_list resulting in garbage entry A few months back a race was discused between the netpoll napi service path, and the fast path through net_rx_action: http://kerneltrap.org/mailarchive/linux-netdev/2007/10/16/345470 A patch was submitted for that bug, but I think we missed a case. Consider the following scenario: INITIAL STATE CPU0 has one napi_struct A on its poll_list CPU1 is calling netpoll_send_skb and needs to call poll_napi on the same napi_struct A that CPU0 has on its list CPU0 CPU1 net_rx_action poll_napi !list_empty (returns true) locks poll_lock for A poll_one_napi napi->poll netif_rx_complete __napi_complete (removes A from poll_list) list_entry(list->next) In the above scenario, net_rx_action assumes that the per-cpu poll_list is exclusive to that cpu. netpoll of course violates that, and because the netpoll path can dequeue from the poll list, its possible for CPU0 to detect a non-empty list at the top of the while loop in net_rx_action, but have it become empty by the time it calls list_entry. Since the poll_list isn't surrounded by any other structure, the returned data from that list_entry call in this situation is garbage, and any number of crashes can result based on what exactly that garbage is. Given that its not fasible for performance reasons to place exclusive locks arround each cpus poll list to provide that mutal exclusion, I think the best solution is modify the netpoll path in such a way that we continue to guarantee that the poll_list for a cpu is in fact exclusive to that cpu. To do this I've implemented the patch below. It adds an additional bit to the state field in the napi_struct. When executing napi->poll from the netpoll_path, this bit will be set. When a driver calls netif_rx_complete, if that bit is set, it will not remove the napi_struct from the poll_list. That work will be saved for the next iteration of net_rx_action. I've tested this and it seems to work well. About the biggest drawback I can see to it is the fact that it might result in an extra loop through net_rx_action in the event that the device is actually contended for (i.e. the netpoll path actually preforms all the needed work no the device, and the call to net_rx_action winds up doing nothing, except removing the napi_struct from the poll_list. However I think this is probably a small price to pay, given that the alternative is a crash. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca..e26f5495289 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,6 +319,7 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) + return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); -- cgit v1.2.3 From 71c5576fbd809f2015f4eddf72e501e298720cf3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:13 -0800 Subject: revert "percpu counter: clean up percpu_counter_sum_and_set()" Revert commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 Author: Mingming Cao Date: Thu Oct 9 12:50:59 2008 -0400 percpu counter: clean up percpu_counter_sum_and_set() Before this patch we had the following: percpu_counter_sum(): return the percpu_counter's value percpu_counter_sum_and_set(): return the percpu_counter's value, copying that value into the central value and zeroing the per-cpu counters before returning. After this patch, percpu_counter_sum_and_set() has gone, and percpu_counter_sum() gets the old percpu_counter_sum_and_set() functionality. Problem is, as Eric points out, the old percpu_counter_sum_and_set() functionality was racy and wrong. It zeroes out counters on "other" cpus, without holding any locks which will prevent races agaist updates from those other CPUS. This patch reverts 1f7c14c62ce63805f9574664a6c6de3633d4a354. This means that percpu_counter_sum_and_set() still has the race, but percpu_counter_sum() does not. Note that this is not a simple revert - ext4 has since started using percpu_counter_sum() for its dirty_blocks counter as well. Note that this revert patch changes percpu_counter_sum() semantics. Before the patch, a call to percpu_counter_sum() will bring the counter's central counter mostly up-to-date, so a following percpu_counter_read() will return a close value. After this patch, a call to percpu_counter_sum() will leave the counter's central accumulator unaltered, so a subsequent call to percpu_counter_read() can now return a significantly inaccurate result. If there is any code in the tree which was introduced after e8ced39d5e8911c662d4d69a342b9d053eaaac4e was merged, and which depends upon the new percpu_counter_sum() semantics, that code will break. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc11..20838883535 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) -- cgit v1.2.3 From 02d211688727ad02bb4555b1aa8ae2de16b21b39 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:14 -0800 Subject: revert "percpu_counter: new function percpu_counter_sum_and_set" Revert commit e8ced39d5e8911c662d4d69a342b9d053eaaac4e Author: Mingming Cao Date: Fri Jul 11 19:27:31 2008 -0400 percpu_counter: new function percpu_counter_sum_and_set As described in revert "percpu counter: clean up percpu_counter_sum_and_set()" the new percpu_counter_sum_and_set() is racy against updates to the cpu-local accumulators on other CPUs. Revert that change. This means that ext4 will be slow again. But correct. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu_counter.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 20838883535..9007ccdfc11 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); +s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc, 0); + s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } -static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) -{ - return __percpu_counter_sum(fbc, 1); -} - - static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc, 0); + return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) -- cgit v1.2.3 From aa6f14796630c8b03c11e782484aec2aee05e671 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Dec 2008 13:14:25 -0800 Subject: atomic: fix a typo in atomic_long_xchg() atomic_long_xchg() is not correctly defined for 32bit arches. Signed-off-by: Eric Dumazet Cc: Mathieu Desnoyers Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 4ec0a296bde..7abdaa91ccd 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -251,7 +251,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic_cmpxchg((atomic_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic_xchg((atomic_t *)(l), (new))) + (atomic_xchg((atomic_t *)(v), (new))) #endif /* BITS_PER_LONG == 64 */ -- cgit v1.2.3 From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Dec 2008 13:14:27 -0800 Subject: KSYM_SYMBOL_LEN fixes Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use less stack exposing a bug in slub's list_locations() - kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was beyond the end of page provided. The 100 slop which list_locations() allows at end of page looks roughly enough for all the other stuff it might print after the symbol before it checks again: break out KSYM_SYMBOL_LEN earlier than before. Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies them. [akpm@linux-foundation.org: ftrace.h needs module.h] Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc Miles Lane Acked-by: Pekka Enberg Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ftrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2..9c5bc6be2b0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; -- cgit v1.2.3 From 54b71fba68efbf3ab89721a384df2ce757750979 Mon Sep 17 00:00:00 2001 From: Akira Takeuchi Date: Wed, 10 Dec 2008 12:43:34 +0000 Subject: MN10300: Fix __put_user_asm8() Fix __put_user_asm8() by jumping to the end label (3:) from the exception handler, rather than jumping back to retry the second store instruction (label 2:). Signed-off-by: Akira Takeuchi Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-mn10300/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-mn10300/uaccess.h b/include/asm-mn10300/uaccess.h index 46b9b647f3c..8a3a4dd5576 100644 --- a/include/asm-mn10300/uaccess.h +++ b/include/asm-mn10300/uaccess.h @@ -266,7 +266,7 @@ extern int __get_user_unknown(void); " .section .fixup,\"ax\" \n" \ "4: \n" \ " mov %5,%0 \n" \ - " jmp 2b \n" \ + " jmp 3b \n" \ " .previous \n" \ " .section __ex_table,\"a\"\n" \ " .balign 4 \n" \ -- cgit v1.2.3 From 6c34bc2976b30dc8b56392c020e25bae1f363cab Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 10 Dec 2008 09:26:17 -0800 Subject: Revert "radeonfb: accelerate imageblit and other improvements" This reverts commit b1ee26bab14886350ba12a5c10cbc0696ac679bf, along with the "fixes" for it that all just caused problems: - c4c6fa9891f3d1bcaae4f39fb751d5302965b566 "radeonfb: fix problem with color expansion & alignment" - f3179748a157c21d44d929fd3779421ebfbeaa93 "radeonfb: Disable new color expand acceleration unless explicitely enabled" because even when disabled, it breaks for people. See http://bugzilla.kernel.org/show_bug.cgi?id=12191 for the latest example. Acked-by: Benjamin Herrenschmidt Acked-by: David S. Miller Cc: Krzysztof Halasa Cc: James Cloos Cc: "Rafael J. Wysocki" Cc: Krzysztof Helt Cc: Jean-Luc Coulon Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/video/radeon.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/video/radeon.h b/include/video/radeon.h index d5dcaf154ba..1cd09cc5b16 100644 --- a/include/video/radeon.h +++ b/include/video/radeon.h @@ -525,9 +525,6 @@ #define CRTC_DISPLAY_DIS (1 << 10) #define CRTC_CRT_ON (1 << 15) -/* DSTCACHE_MODE bits constants */ -#define RB2D_DC_AUTOFLUSH_ENABLE (1 << 8) -#define RB2D_DC_DC_DISABLE_IGNORE_PE (1 << 17) /* DSTCACHE_CTLSTAT bit constants */ #define RB2D_DC_FLUSH_2D (1 << 0) @@ -869,10 +866,15 @@ #define GMC_DST_16BPP_YVYU422 0x00000c00 #define GMC_DST_32BPP_AYUV444 0x00000e00 #define GMC_DST_16BPP_ARGB4444 0x00000f00 +#define GMC_SRC_MONO 0x00000000 +#define GMC_SRC_MONO_LBKGD 0x00001000 +#define GMC_SRC_DSTCOLOR 0x00003000 #define GMC_BYTE_ORDER_MSB_TO_LSB 0x00000000 #define GMC_BYTE_ORDER_LSB_TO_MSB 0x00004000 #define GMC_DP_CONVERSION_TEMP_9300 0x00008000 #define GMC_DP_CONVERSION_TEMP_6500 0x00000000 +#define GMC_DP_SRC_RECT 0x02000000 +#define GMC_DP_SRC_HOST 0x03000000 #define GMC_DP_SRC_HOST_BYTEALIGN 0x04000000 #define GMC_3D_FCN_EN_CLR 0x00000000 #define GMC_3D_FCN_EN_SET 0x08000000 @@ -883,9 +885,6 @@ #define GMC_WRITE_MASK_LEAVE 0x00000000 #define GMC_WRITE_MASK_SET 0x40000000 #define GMC_CLR_CMP_CNTL_DIS (1 << 28) -#define GMC_SRC_DATATYPE_MASK (3 << 12) -#define GMC_SRC_DATATYPE_MONO_FG_BG (0 << 12) -#define GMC_SRC_DATATYPE_MONO_FG_LA (1 << 12) #define GMC_SRC_DATATYPE_COLOR (3 << 12) #define ROP3_S 0x00cc0000 #define ROP3_SRCCOPY 0x00cc0000 @@ -894,7 +893,6 @@ #define DP_SRC_SOURCE_MASK (7 << 24) #define GMC_BRUSH_NONE (15 << 4) #define DP_SRC_SOURCE_MEMORY (2 << 24) -#define DP_SRC_SOURCE_HOST_DATA (3 << 24) #define GMC_BRUSH_SOLIDCOLOR 0x000000d0 /* DP_MIX bit constants */ @@ -980,12 +978,6 @@ #define DISP_PWR_MAN_TV_ENABLE_RST (1 << 25) #define DISP_PWR_MAN_AUTO_PWRUP_EN (1 << 26) -/* RBBM_GUICNTL constants */ -#define RBBM_GUICNTL_HOST_DATA_SWAP_NONE (0 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_16BIT (1 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_32BIT (2 << 0) -#define RBBM_GUICNTL_HOST_DATA_SWAP_HDW (3 << 0) - /* masks */ #define CONFIG_MEMSIZE_MASK 0x1f000000 -- cgit v1.2.3 From d2ff911882b6bc693d86ca9566daac70aacbb2b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 15 Dec 2008 19:04:35 +1030 Subject: Define smp_call_function_many for UP Otherwise those using it in transition patches (eg. kvm) can't compile with CONFIG_SMP=n: arch/x86/kvm/../../../virt/kvm/kvm_main.c: In function 'make_all_cpus_request': arch/x86/kvm/../../../virt/kvm/kvm_main.c:380: error: implicit declaration of function 'smp_call_function_many' Signed-off-by: Rusty Russell Signed-off-by: Linus Torvalds --- include/linux/smp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/smp.h b/include/linux/smp.h index 3f9a60043a9..6e7ba16ff45 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -146,6 +146,8 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) +#define smp_call_function_many(mask, func, info, wait) \ + (up_smp_call_function(func, info)) static inline void init_call_single_data(void) { } -- cgit v1.2.3 From 092cab7e2cd868cb0b30209a0337689c3ffd6133 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 16 Dec 2008 01:19:41 -0800 Subject: netfilter: ctnetlink: fix missing CTA_NAT_SEQ_UNSPEC This patch fixes an inconsistency in nfnetlink_conntrack.h that I introduced myself. The problem is that CTA_NAT_SEQ_UNSPEC is missing from enum ctattr_natseq. This inconsistency may lead to problems in the message parsing in userspace (if the message contains the CTA_NAT_SEQ_* attributes, of course). This patch breaks backward compatibility, however, the only known client of this code is libnetfilter_conntrack which indeed crashes because it assumes the existence of CTA_NAT_SEQ_UNSPEC to do the parsing. The CTA_NAT_SEQ_* attributes were introduced in 2.6.25. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index c19595c8930..29fe9ea1d34 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -141,6 +141,7 @@ enum ctattr_protonat { #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) enum ctattr_natseq { + CTA_NAT_SEQ_UNSPEC, CTA_NAT_SEQ_CORRECTION_POS, CTA_NAT_SEQ_OFFSET_BEFORE, CTA_NAT_SEQ_OFFSET_AFTER, -- cgit v1.2.3 From 9a9fafb89433c5fd1331bac0c84c4b321e358b42 Mon Sep 17 00:00:00 2001 From: Phil Endecott Date: Mon, 1 Dec 2008 10:22:33 -0500 Subject: USB: fix comment about endianness of descriptors This patch fixes a comment and clarifies the documentation about the endianness of descriptors. The current policy is that descriptors will be little-endian at the API even on big-endian systems; however the /proc/bus/usb API predates this policy and presents descriptors with some multibyte fields byte-swapped. Signed-off-by: Phil Endecott Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/ch9.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 73a2f4eb1f7..9b42baed390 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -158,8 +158,12 @@ struct usb_ctrlrequest { * (rarely) accepted by SET_DESCRIPTOR. * * Note that all multi-byte values here are encoded in little endian - * byte order "on the wire". But when exposed through Linux-USB APIs, - * they've been converted to cpu byte order. + * byte order "on the wire". Within the kernel and when exposed + * through the Linux-USB APIs, they are not converted to cpu byte + * order; it is the responsibility of the client code to do this. + * The single exception is when device and configuration descriptors (but + * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); + * in this case the fields are converted to host endianness by the kernel. */ /* -- cgit v1.2.3 From 69c30e1e7492192f882a3fc11888b320fde5206a Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 17 Dec 2008 15:44:58 -0800 Subject: irda: Add irda_skb_cb qdisc related padding We need to pad irda_skb_cb in order to keep it safe accross dev_queue_xmit() calls. This is some ugly and temporary hack triggered by recent qisc code changes. Even though it fixes bugzilla.kernel.org bug #11795, it will be replaced by a proper fix before 2.6.29 is released. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irda_device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/irda/irda_device.h b/include/net/irda/irda_device.h index 3025ae17ddb..94c852d47d0 100644 --- a/include/net/irda/irda_device.h +++ b/include/net/irda/irda_device.h @@ -135,9 +135,11 @@ struct dongle_reg { /* * Per-packet information we need to hide inside sk_buff - * (must not exceed 48 bytes, check with struct sk_buff) + * (must not exceed 48 bytes, check with struct sk_buff) + * The default_qdisc_pad field is a temporary hack. */ struct irda_skb_cb { + unsigned int default_qdisc_pad; magic_t magic; /* Be sure that we can trust the information */ __u32 next_speed; /* The Speed to be set *after* this frame */ __u16 mtt; /* Minimum turn around time */ -- cgit v1.2.3 From e76f42761197dd6e9405e2eeb35932acfede115a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 13 Nov 2008 17:30:13 -0600 Subject: ACPI: fix 2.6.28 acpi.debug_level regression acpi_early_init() was changed to over-write the cmdline param, making it really inconvenient to set debug flags at boot-time. Also, This sets the default level to "info", which is what all the ACPI drivers use. So to enable messages from drivers, you only have to supply the "layer" (a.k.a. "component"). For non-"info" ACPI core and ACPI interpreter messages, you have to supply both level and layer masks, as before. Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- include/acpi/acoutput.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index 09d33c7740f..db8852d8bcf 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -172,7 +172,7 @@ /* Defaults for debug_level, debug and normal */ -#define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) +#define ACPI_DEBUG_DEFAULT (ACPI_LV_INFO) #define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT) #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) -- cgit v1.2.3 From abe1dfab60e1839d115930286cb421f5a5b193f3 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 26 Nov 2008 14:35:22 +0800 Subject: ACPI: don't cond_resched() when irqs_disabled() The ACPI interpreter usually runs with irqs enabled. However, during suspend/resume it runs with irqs disabled to evaluate _GTS/_BFS, as well as by irqrouter_resume() which evaluates _CRS, _PRS, _SRS. http://bugzilla.kernel.org/show_bug.cgi?id=12252 Signed-off-by: Wu Fengguang Signed-off-by: Len Brown --- include/acpi/platform/aclinux.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 029c8c06c15..0515e754449 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -141,6 +141,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache) /* * We need to show where it is safe to preempt execution of ACPICA */ -#define ACPI_PREEMPTION_POINT() cond_resched() +#define ACPI_PREEMPTION_POINT() \ + do { \ + if (!irqs_disabled()) \ + cond_resched(); \ + } while (0) #endif /* __ACLINUX_H__ */ -- cgit v1.2.3