From 3044646148cdfa83a311bf1c146a70e550280159 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sun, 9 Nov 2008 10:07:58 -0800 Subject: x86: move iomap.h to the new include location a new file was accidentally added to include/asm-x86; move it to the new arch/x86/include/asm location Signed-off-by: Arjan van de Ven --- include/asm-x86/iomap.h | 30 ------------------------------ 1 file changed, 30 deletions(-) delete mode 100644 include/asm-x86/iomap.h (limited to 'include') diff --git a/include/asm-x86/iomap.h b/include/asm-x86/iomap.h deleted file mode 100644 index c1f06289b14..00000000000 --- a/include/asm-x86/iomap.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright © 2008 Ingo Molnar - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. - */ - -#include -#include -#include -#include -#include -#include - -void * -iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); - -void -iounmap_atomic(void *kvaddr, enum km_type type); -- cgit v1.2.3 From f6d87f4bd259cf33e092cd1a8fde05f291c47af1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 7 Nov 2008 13:18:30 +0100 Subject: genirq: keep affinities set from userspace across free/request_irq() Impact: preserve user-modified affinities on interrupts Kumar Galak noticed that commit 18404756765c713a0be4eb1082920c04822ce588 (genirq: Expose default irq affinity mask (take 3)) overrides an already set affinity setting across a free / request_irq(). Happens e.g. with ifdown/ifup of a network device. Change the logic to mark the affinities as set and keep them intact. This also fixes the unlocked access to irq_desc in irq_select_affinity() when called from irq_affinity_proc_write() Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/irq.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index d058c57be02..36b186eb318 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -63,7 +63,8 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ -#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) @@ -210,7 +211,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_GENERIC_PENDING_IRQ -void set_pending_irq(unsigned int irq, cpumask_t mask); void move_native_irq(int irq); void move_masked_irq(int irq); @@ -228,10 +228,6 @@ static inline void move_masked_irq(int irq) { } -static inline void set_pending_irq(unsigned int irq, cpumask_t mask) -{ -} - #endif /* CONFIG_GENERIC_PENDING_IRQ */ #else /* CONFIG_SMP */ -- cgit v1.2.3 From a358324466b171e145df20bdb74fe81759906de6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 11 Nov 2008 15:01:42 -0500 Subject: ring-buffer: buffer record on/off switch Impact: enable/disable ring buffer recording API added Several kernel developers have requested that there be a way to stop recording into the ring buffers with a simple switch that can also be enabled from userspace. This patch addes a new kernel API to the ring buffers called: tracing_on() tracing_off() When tracing_off() is called, all ring buffers will not be able to record into their buffers. tracing_on() will enable the ring buffers again. These two act like an on/off switch. That is, there is no counting of the number of times tracing_off or tracing_on has been called. A new file is added to the debugfs/tracing directory called tracing_on This allows for userspace applications to also flip the switch. echo 0 > debugfs/tracing/tracing_on disables the tracing. echo 1 > /debugfs/tracing/tracing_on enables it. Note, this does not disable or enable any tracers. It only sets or clears a flag that needs to be set in order for the ring buffers to write to their buffers. It is a global flag, and affects all ring buffers. The buffers start out with tracing_on enabled. There are now three flags that control recording into the buffers: tracing_on: which affects all ring buffer tracers. buffer->record_disabled: which affects an allocated buffer, which may be set if an anomaly is detected, and tracing is disabled. cpu_buffer->record_disabled: which is set by tracing_stop() or if an anomaly is detected. tracing_start can not reenable this if an anomaly occurred. The userspace debugfs/tracing/tracing_enabled is implemented with tracing_stop() but the user space code can not enable it if the kernel called tracing_stop(). Userspace can enable the tracing_on even if the kernel disabled it. It is just a switch used to stop tracing if a condition was hit. tracing_on is not for protecting critical areas in the kernel nor is it for stopping tracing if an anomaly occurred. This is because userspace can reenable it at any time. Side effect: With this patch, I discovered a dead variable in ftrace.c called tracing_on. This patch removes it. Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 536b0ca46a0..e097c2e6b6d 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -120,6 +120,9 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer); u64 ring_buffer_time_stamp(int cpu); void ring_buffer_normalize_time_stamp(int cpu, u64 *ts); +void tracing_on(void); +void tracing_off(void); + enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; -- cgit v1.2.3 From ba32929a91fe2c0628f5be62d1597b379c8d3062 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 10 Nov 2008 15:29:58 +0900 Subject: block: make add_partition() return pointer to hd_struct Make add_partition() return pointer to the new hd_struct on success and ERR_PTR() value on failure. This change will be used to fix md autodetection bug. Signed-off-by: Tejun Heo Cc: Neil Brown Signed-off-by: Jens Axboe --- include/linux/genhd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e439e6aed83..3df7742ce24 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -522,7 +522,9 @@ extern char *disk_name (struct gendisk *hd, int partno, char *buf); extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); -extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); +extern struct hd_struct * __must_check add_partition(struct gendisk *disk, + int partno, sector_t start, + sector_t len, int flags); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); -- cgit v1.2.3 From 8e3bad65a59915f2ddc40f62a180ad81695d8440 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 17 Nov 2008 10:59:59 +0100 Subject: mac80211: remove ieee80211_notify_mac Before ieee80211_notify_mac() was added, it was presented with the use case of using it to tell mac80211 that the association may have been lost because the firmware crashed/reset. Since then, it has also been used by iwlwifi to (slightly) speed up re-association after resume, a workaround around the fact that mac80211 has no suspend/resume handling yet. It is also not used by any other drivers, so clearly it cannot be necessary for "good enough" suspend/resume. Unfortunately, the callback suffers from a severe problem: It only works for station mode. If suspend/resume happens while in IBSS or any other mode (but station), then the callback is pointless. Recently, it has created a number of locking issues, first because it required rtnl locking rather than RCU due to calling sleeping functions within the critical section, and now because it's called by iwlwifi from the mac80211 workqueue that may not use the rtnl because it is flushed under rtnl. (cf. http://bugzilla.kernel.org/show_bug.cgi?id=12046) I think, therefore, that we should take a step back, remove it entirely for now and add the small feature it provided properly. For suspend and resume we will need to introduce new hooks, and for the case where the firmware was reset the driver will probably simply just pretend it has done a suspend/resume cycle to get mac80211 to reprogram the hardware completely, not just try to connect to the current AP again in station mode. When doing so, we will need to take into account locking issues and possibly defer to schedule_work from within mac80211 for the resume operation, while the suspend operation must be done directly. Proper suspend/resume should also not necessarily try to reconnect to the current AP, the time spent in suspend may have been short enough to not be disconnected from the AP, mac80211 will detect that the AP went out of range quickly if it did, and if the association is lost then the AP will disassoc as soon as a data frame is sent. We might also take into account WWOL then, and have mac80211 program the hardware into such a mode where it is available and requested. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8856e2d60e9..73d81bc6aa7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -73,14 +73,6 @@ * not do so then mac80211 may add this under certain circumstances. */ -/** - * enum ieee80211_notification_type - Low level driver notification - * @IEEE80211_NOTIFY_RE_ASSOC: start the re-association sequence - */ -enum ieee80211_notification_types { - IEEE80211_NOTIFY_RE_ASSOC, -}; - /** * struct ieee80211_ht_bss_info - describing BSS's HT characteristics * @@ -1797,18 +1789,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid); void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, u16 tid); -/** - * ieee80211_notify_mac - low level driver notification - * @hw: pointer as obtained from ieee80211_alloc_hw(). - * @notif_type: enum ieee80211_notification_types - * - * This function must be called by low level driver to inform mac80211 of - * low level driver status change or force mac80211 to re-assoc for low - * level driver internal error that require re-assoc. - */ -void ieee80211_notify_mac(struct ieee80211_hw *hw, - enum ieee80211_notification_types notif_type); - /** * ieee80211_find_sta - find a station * -- cgit v1.2.3 From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 19 Nov 2008 15:36:14 -0800 Subject: reintroduce accept4 Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: Ulrich Drepper Tested-by: Michael Kerrisk Acked-by: Michael Kerrisk Cc: Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/net.h | 6 ++---- include/linux/syscalls.h | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 6dc14a24004..4515efae4c3 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -40,7 +40,7 @@ #define SYS_GETSOCKOPT 15 /* sys_getsockopt(2) */ #define SYS_SENDMSG 16 /* sys_sendmsg(2) */ #define SYS_RECVMSG 17 /* sys_recvmsg(2) */ -#define SYS_PACCEPT 18 /* sys_paccept(2) */ +#define SYS_ACCEPT4 18 /* sys_accept4(2) */ typedef enum { SS_FREE = 0, /* not allocated */ @@ -100,7 +100,7 @@ enum sock_type { * remaining bits are used as flags. */ #define SOCK_TYPE_MASK 0xf -/* Flags for socket, socketpair, paccept */ +/* Flags for socket, socketpair, accept4 */ #define SOCK_CLOEXEC O_CLOEXEC #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK @@ -223,8 +223,6 @@ extern int sock_map_fd(struct socket *sock, int flags); extern struct socket *sockfd_lookup(int fd, int *err); #define sockfd_put(sock) fput(sock->file) extern int net_ratelimit(void); -extern long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags); #define net_random() random32() #define net_srandom(seed) srandom32((__force u32)seed) diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index d6ff145919c..04fb47bfb92 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -410,8 +410,7 @@ asmlinkage long sys_getsockopt(int fd, int level, int optname, asmlinkage long sys_bind(int, struct sockaddr __user *, int); asmlinkage long sys_connect(int, struct sockaddr __user *, int); asmlinkage long sys_accept(int, struct sockaddr __user *, int __user *); -asmlinkage long sys_paccept(int, struct sockaddr __user *, int __user *, - const __user sigset_t *, size_t, int); +asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int); asmlinkage long sys_getsockname(int, struct sockaddr __user *, int __user *); asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *); asmlinkage long sys_send(int, void __user *, size_t, unsigned); -- cgit v1.2.3 From f481891fdc49d3d1b8a9674a1825d183069a805f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 19 Nov 2008 15:36:30 -0800 Subject: cpuset: update top cpuset's mems after adding a node After adding a node into the machine, top cpuset's mems isn't updated. By reviewing the code, we found that the update function cpuset_track_online_nodes() was invoked after node_states[N_ONLINE] changes. It is wrong because N_ONLINE just means node has pgdat, and if node has/added memory, we use N_HIGH_MEMORY. So, We should invoke the update function after node_states[N_HIGH_MEMORY] changes, just like its commit says. This patch fixes it. And we use notifier of memory hotplug instead of direct calling of cpuset_track_online_nodes(). Signed-off-by: Miao Xie Acked-by: Yasunori Goto Cc: David Rientjes Cc: Paul Menage Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 2691926fb50..8e540d32c9f 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -74,8 +74,6 @@ static inline int cpuset_do_slab_mem_spread(void) return current->flags & PF_SPREAD_SLAB; } -extern void cpuset_track_online_nodes(void); - extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); @@ -151,8 +149,6 @@ static inline int cpuset_do_slab_mem_spread(void) return 0; } -static inline void cpuset_track_online_nodes(void) {} - static inline int current_cpuset_is_being_rebound(void) { return 0; -- cgit v1.2.3 From 21098c68df7115554fe041170899bdff709efd08 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 20 Nov 2008 10:58:01 -0500 Subject: [SCSI] fc_transport: fix old bug on bitflag definitions When the fastfail flag was added, it did not account for the flags being bit fields. Correct the definition so there is no longer a conflict. Signed-off-by: James Smart Signed-off-by: James Bottomley --- include/scsi/scsi_transport_fc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 49d8913c4f8..6e04e6fe79c 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -357,7 +357,7 @@ struct fc_rport { /* aka fc_starget_attrs */ /* bit field values for struct fc_rport "flags" field: */ #define FC_RPORT_DEVLOSS_PENDING 0x01 #define FC_RPORT_SCAN_PENDING 0x02 -#define FC_RPORT_FAST_FAIL_TIMEDOUT 0x03 +#define FC_RPORT_FAST_FAIL_TIMEDOUT 0x04 #define dev_to_rport(d) \ container_of(d, struct fc_rport, dev) -- cgit v1.2.3 From 7e56b5d698707a9934833c47b24d78fb0bcaf764 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 21 Nov 2008 16:45:22 -0800 Subject: net: Fix memory leak in the proto_register function If the slub allocator is used, kmem_cache_create() may merge two or more kmem_cache's into one but the cache name pointer is not updated and kmem_cache_name() is no longer guaranteed to return the pointer passed to the former function. This patch stores the kmalloc'ed pointers in the corresponding request_sock_ops and timewait_sock_ops structures. Signed-off-by: Catalin Marinas Acked-by: Arnaldo Carvalho de Melo Reviewed-by: Christoph Lameter Signed-off-by: David S. Miller --- include/net/request_sock.h | 1 + include/net/timewait_sock.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index cac811e51f6..c7190846e12 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -31,6 +31,7 @@ struct request_sock_ops { int family; int obj_size; struct kmem_cache *slab; + char *slab_name; int (*rtx_syn_ack)(struct sock *sk, struct request_sock *req); void (*send_ack)(struct sock *sk, struct sk_buff *skb, diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 1e1ee3253fd..97c3b14da55 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -16,6 +16,7 @@ struct timewait_sock_ops { struct kmem_cache *twsk_slab; + char *twsk_slab_name; unsigned int twsk_obj_size; int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); -- cgit v1.2.3 From 2ed1cdcf9a83205d1343f29b630abff232eaa72c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 21 Nov 2008 16:59:57 -0800 Subject: irq.h: fix missing/extra kernel-doc Impact: fix kernel-doc build Fix missing & excess irq.h kernel-doc: Warning(include/linux/irq.h:182): No description found for parameter 'irq' Warning(include/linux/irq.h:182): Excess struct/union/enum/typedef member 'affinity_entry' description in 'irq_desc' Signed-off-by: Randy Dunlap Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/irq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 36b186eb318..3dddfa703eb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -131,7 +131,7 @@ struct irq_chip { /** * struct irq_desc - interrupt descriptor - * + * @irq: interrupt number for this descriptor * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] * @chip: low level interrupt hardware access * @msi_desc: MSI descriptor @@ -150,7 +150,6 @@ struct irq_chip { * @cpu: cpu index useful for balancing * @pending_mask: pending rebalanced interrupts * @dir: /proc/irq/ procfs entry - * @affinity_entry: /proc/irq/smp_affinity procfs entry on SMP * @name: flow handler name for /proc/interrupts output */ struct irq_desc { -- cgit v1.2.3 From 52440211dcdc52c0b757f8b34d122e11b12cdd50 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 18 Nov 2008 09:30:25 -0800 Subject: drm: move drm vblank initialization/cleanup to driver load/unload drm vblank initialization keeps track of the changes in driver-supplied frame counts across vt switch and mode setting, but only if you let it by not tearing down the drm vblank structure. Signed-off-by: Keith Packard Signed-off-by: Dave Airlie --- include/drm/drmP.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 28c7f1679d4..d5e8e5c8954 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1151,6 +1151,7 @@ extern u32 drm_vblank_count(struct drm_device *dev, int crtc); extern void drm_handle_vblank(struct drm_device *dev, int crtc); extern int drm_vblank_get(struct drm_device *dev, int crtc); extern void drm_vblank_put(struct drm_device *dev, int crtc); +extern void drm_vblank_cleanup(struct drm_device *dev); /* Modesetting support */ extern int drm_modeset_ctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -- cgit v1.2.3 From f79fca55f9a6fe54635ad32ddc8a38f92a94ec30 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 24 Nov 2008 16:06:17 -0800 Subject: netfilter: xtables: add missing const qualifier to xt_tgchk_param When entryinfo was a standalone parameter to functions, it used to be "const void *". Put the const back in. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be41b609c88..e52ce475d19 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,7 +251,7 @@ struct xt_target_param { */ struct xt_tgchk_param { const char *table; - void *entryinfo; + const void *entryinfo; const struct xt_target *target; void *targinfo; unsigned int hook_mask; -- cgit v1.2.3 From 5f23b734963ec7eaa3ebcd9050da0c9b7d143dd3 Mon Sep 17 00:00:00 2001 From: dann frazier Date: Wed, 26 Nov 2008 15:32:27 -0800 Subject: net: Fix soft lockups/OOM issues w/ unix garbage collector This is an implementation of David Miller's suggested fix in: https://bugzilla.redhat.com/show_bug.cgi?id=470201 It has been updated to use wait_event() instead of wait_event_interruptible(). Paraphrasing the description from the above report, it makes sendmsg() block while UNIX garbage collection is in progress. This avoids a situation where child processes continue to queue new FDs over a AF_UNIX socket to a parent which is in the exit path and running garbage collection on these FDs. This contention can result in soft lockups and oom-killing of unrelated processes. Signed-off-by: dann frazier Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index c29ff1da8a1..1614d78c60e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -9,6 +9,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); +extern void wait_for_unix_gc(void); #define UNIX_HASH_SIZE 256 -- cgit v1.2.3 From 95a28ed08619cc70f31611886ac7b26ab0e462dc Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Thu, 13 Nov 2008 11:01:34 +0800 Subject: ACPICA: Allow _WAK method to return an Integer This can happen if the _WAK method returns nothing (as per ACPI 1.0) but does return an integer if the implicit return mechanism is enabled. This is the only method that has this problem, since it is also defined to return a package of two integers (ACPI 1.0b+). In all other cases, if a method returns an object when one was not expected, no warning is issued. Signed-off-by: Bob Moore Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpredef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpredef.h b/include/acpi/acpredef.h index 619fb75f886..e6452dbf39e 100644 --- a/include/acpi/acpredef.h +++ b/include/acpi/acpredef.h @@ -346,7 +346,7 @@ static const union acpi_predefined_info predefined_names[] = { /* Acpi 1.0 defined _WAK with no return value. Later, it was changed to return a package */ - {.info = {"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_PACKAGE}}, + {.info = {"_WAK", 1, ACPI_RTYPE_NONE | ACPI_RTYPE_INTEGER | ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 2, 0, 0, 0}}, /* fixed (2 Int), but is optional */ {.ret_info = {0, 0, 0, 0, 0, 0}} /* Table terminator */ }; -- cgit v1.2.3 From e899b6485c332aa2d7510739507ab5e5d7b28e59 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 27 Nov 2008 14:42:30 +0800 Subject: ACPICA: disable _BIF warning A generic work-around from ACPICA is in the queue, but since Linux has a work-around in its battery driver, we can disable this warning now. Allow _BIF method to return an Package with Buffer elements http://bugzilla.kernel.org/show_bug.cgi?id=11822 Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpredef.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/acpi/acpredef.h b/include/acpi/acpredef.h index e6452dbf39e..16a9ca9a66e 100644 --- a/include/acpi/acpredef.h +++ b/include/acpi/acpredef.h @@ -167,7 +167,7 @@ static const union acpi_predefined_info predefined_names[] = { {.info = {"_BFS", 1, 0}}, {.info = {"_BIF", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 9, - ACPI_RTYPE_STRING, 4, 0}}, /* fixed (9 Int),(4 Str) */ + ACPI_RTYPE_STRING | ACPI_RTYPE_BUFFER, 4, 0}}, /* fixed (9 Int),(4 Str) */ {.info = {"_BLT", 3, 0}}, {.info = {"_BMC", 1, 0}}, {.info = {"_BMD", 0, ACPI_RTYPE_PACKAGE}}, {.ret_info = {ACPI_PTYPE1_FIXED, ACPI_RTYPE_INTEGER, 5, 0, 0, 0}}, /* fixed (5 Int) */ -- cgit v1.2.3 From 487ff32082a9bd7489d8185cf7d7a2fdf18a22fa Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 27 Nov 2008 11:13:58 +0000 Subject: Allow architectures to override copy_user_highpage() With aliasing VIPT cache support, the ARM implementation of clear_user_page() and copy_user_page() sets up a temporary kernel space mapping such that we have the same cache colour as the userspace page. This avoids having to consider any userspace aliases from this operation. However, when highmem is enabled, kmap_atomic() have to setup mappings. The copy_user_highpage() and clear_user_highpage() call these functions before delegating the copies to copy_user_page() and clear_user_page(). The effect of this is that each of the *_user_highpage() functions setup their own kmap mapping, followed by the *_user_page() functions setting up another mapping. This is rather wasteful. Thankfully, copy_user_highpage() can be overriden by architectures by defining __HAVE_ARCH_COPY_USER_HIGHPAGE. However, replacement of clear_user_highpage() is more difficult because its inline definition is not conditional. It seems that you're expected to define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and provide a replacement __alloc_zeroed_user_highpage() implementation instead. The allocation itself is fine, so we don't want to override that. What we really want to do is to override clear_user_highpage() with our own version which doesn't kmap_atomic() unnecessarily. Other VIPT architectures (PARISC and SH) would also like to override this function as well. Acked-by: Hugh Dickins Acked-by: James Bottomley Acked-by: Paul Mundt Signed-off-by: Russell King --- include/linux/highmem.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7dcbc82f3b7..13875ce9112 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -63,12 +63,14 @@ static inline void *kmap_atomic(struct page *page, enum km_type idx) #endif /* CONFIG_HIGHMEM */ /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ +#ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); } +#endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** -- cgit v1.2.3 From 9a5aa622dd4cd22b5e0fe83e4a9c0c768d4e2dea Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 28 Nov 2008 21:29:46 -0800 Subject: mlx4_core: Save/restore default port IB capability mask Commit 7ff93f8b ("mlx4_core: Multiple port type support") introduced support for different port types. As part of that support, SET_PORT is invoked to set the port type during driver startup. However, as a side-effect, for IB ports the invocation of this command also sets the port's capability mask to zero (losing the default value set by FW). To fix this, get the default ib port capabilities (via a MAD_IFC Port Info query) during driver startup, and save them for use in the mlx4_SET_PORT command when setting the port-type to Infiniband. This patch fixes problems with subnet manager (SM) failover such as , which occurred because the IsTrapSupported bit in the capability mask was zeroed. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index bd9977b8949..371086fd946 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -179,6 +179,7 @@ struct mlx4_caps { int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; + __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; -- cgit v1.2.3 From 31168481c32c8a485e1003af9433124dede57f8d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:33:24 +0000 Subject: meminit section warnings Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/page_cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index f546ad6fc02..1e6d34bfa09 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -17,7 +17,7 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ }; -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat); +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); void __init page_cgroup_init(void); struct page_cgroup *lookup_page_cgroup(struct page *page); @@ -91,7 +91,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; -static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) +static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { } -- cgit v1.2.3 From 02d0e6753d8ab0173b63338157929e52eac86d12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 22 Nov 2008 17:38:34 +0000 Subject: hotplug_memory_notifier section annotation Same as for hotplug_cpu - we want static notifier_block in there in meminitdata, to avoid false positives whenever it's used. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory.h b/include/linux/memory.h index 2f5f8a5ef2a..36c82c9e6ea 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -91,7 +91,7 @@ extern int memory_notify(unsigned long val, void *v); #ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) { \ - static struct notifier_block fn##_mem_nb = \ + static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri }; \ register_memory_notifier(&fn##_mem_nb); \ } -- cgit v1.2.3 From 96b8936a9ed08746e47081458a5eb9e43a751e24 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Nov 2008 08:10:03 +0100 Subject: remove __ARCH_WANT_COMPAT_SYS_PTRACE All architectures now use the generic compat_sys_ptrace, as should every new architecture that needs 32bit compat (if we'll ever get another). Remove the now superflous __ARCH_WANT_COMPAT_SYS_PTRACE define, and also kill a comment about __ARCH_SYS_PTRACE that was added after __ARCH_SYS_PTRACE was already gone. Signed-off-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/compat.h b/include/linux/compat.h index f061a1ea1b7..e88f3ecf38b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -252,12 +252,10 @@ extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data); -#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ /* * epoll (fs/eventpoll.c) compat bits follow ... -- cgit v1.2.3 From ac70a964b0e22a95af3628c344815857a01461b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 27 Nov 2008 13:36:48 +0900 Subject: libata: blacklist Seagate drives which time out FLUSH_CACHE when used with NCQ Some recent Seagate harddrives have firmware bug which causes FLUSH CACHE to timeout under certain circumstances if NCQ is being used. This can be worked around by disabling NCQ and fixed by updating the firmware. Implement ATA_HORKAGE_FIRMWARE_UPDATE and blacklist these devices. The wiki page has been updated to contain information on this issue. http://ata.wiki.kernel.org/index.php/Known_issues Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 59b0f1c807b..ed3f26eb5df 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -375,6 +375,7 @@ enum { ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ + ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firwmare update warning */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 7ef9964e6d1b911b78709f144000aacadd0ebc21 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Mon, 1 Dec 2008 13:13:55 -0800 Subject: epoll: introduce resource usage limits It has been thought that the per-user file descriptors limit would also limit the resources that a normal user can request via the epoll interface. Vegard Nossum reported a very simple program (a modified version attached) that can make a normal user to request a pretty large amount of kernel memory, well within the its maximum number of fds. To solve such problem, default limits are now imposed, and /proc based configuration has been introduced. A new directory has been created, named /proc/sys/fs/epoll/ and inside there, there are two configuration points: max_user_instances = Maximum number of devices - per user max_user_watches = Maximum number of "watched" fds - per user The current default for "max_user_watches" limits the memory used by epoll to store "watches", to 1/32 of the amount of the low RAM. As example, a 256MB 32bit machine, will have "max_user_watches" set to roughly 90000. That should be enough to not break existing heavy epoll users. The default value for "max_user_instances" is set to 128, that should be enough too. This also changes the userspace, because a new error code can now come out from EPOLL_CTL_ADD (-ENOSPC). The EMFILE from epoll_create() was already listed, so that should be ok. [akpm@linux-foundation.org: use get_current_user()] Signed-off-by: Davide Libenzi Cc: Michael Kerrisk Cc: Cc: Cyrill Gorcunov Reported-by: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 644ffbda17c..55e30d11447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -630,6 +630,10 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_EPOLL + atomic_t epoll_devs; /* The number of epoll descriptors currently open */ + atomic_t epoll_watches; /* The number of file descriptors currently watched */ +#endif #ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ -- cgit v1.2.3 From 6ff2d39b91aec3dcae951afa982059e3dd9b49dc Mon Sep 17 00:00:00 2001 From: Manfred Spraul Date: Mon, 1 Dec 2008 13:14:02 -0800 Subject: lib/idr.c: fix rcu related race with idr_find 2nd part of the fixes needed for http://bugzilla.kernel.org/show_bug.cgi?id=11796. When the idr tree is either grown or shrunk, then the update to the number of layers and the top pointer were not atomic. This race caused crashes. The attached patch fixes that by replicating the layers counter in each layer, thus idr_find doesn't need idp->layers anymore. Signed-off-by: Manfred Spraul Cc: Clement Calmels Cc: Nadia Derbey Cc: Pierre Peiffer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/idr.h b/include/linux/idr.h index fa035f96f2a..dd846df8cd3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -52,13 +52,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer *ary[1< Date: Tue, 2 Dec 2008 20:40:03 +0100 Subject: amd74xx: workaround unreliable AltStatus register for nVidia controllers It seems that on some nVidia controllers using AltStatus register can be unreliable so default to Status register if the PCI device is in Compatibility Mode. In order to achieve this: * Add ide_pci_is_in_compatibility_mode() inline helper to . * Add IDE_HFLAG_BROKEN_ALTSTATUS host flag and set it in amd74xx host driver for nVidia controllers in Compatibility Mode. * Teach actual_try_to_identify() and drive_is_ready() about the new flag. This fixes the regression caused by removal of CONFIG_IDEPCI_SHARE_IRQ config option in 2.6.25 and using AltStatus register unconditionally when available (kernel.org bugs #11659 and #10216). [ Moreover for CONFIG_IDEPCI_SHARE_IRQ=y (which is what most people and distributions use) it never worked correctly. ] Thanks to Remy LABENE and Lars Winterfeld for help with debugging the problem. More info at: http://bugzilla.kernel.org/show_bug.cgi?id=11659 http://bugzilla.kernel.org/show_bug.cgi?id=10216 Reported-by: Remy LABENE Tested-by: Remy LABENE Tested-by: Lars Winterfeld Acked-by: Borislav Petkov Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/ide.h b/include/linux/ide.h index 54525be4b5f..010fb26a157 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1296,6 +1296,13 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o #define ide_pci_register_driver(d) pci_register_driver(d) #endif +static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) + return 1; + return 0; +} + void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, hw_regs_t *, hw_regs_t **); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); @@ -1375,6 +1382,7 @@ enum { IDE_HFLAG_IO_32BIT = (1 << 24), /* unmask IRQs */ IDE_HFLAG_UNMASK_IRQS = (1 << 25), + IDE_HFLAG_BROKEN_ALTSTATUS = (1 << 26), /* serialize ports if DMA is possible (for sl82c105) */ IDE_HFLAG_SERIALIZE_DMA = (1 << 27), /* force host out of "simplex" mode */ -- cgit v1.2.3 From 1b79cd04fab80be61dcd2732e2423aafde9a4c1c Mon Sep 17 00:00:00 2001 From: "Junjiro R. Okajima" Date: Tue, 2 Dec 2008 10:31:46 -0800 Subject: nfsd: fix vm overcommit crash fix #2 The previous patch from Alan Cox ("nfsd: fix vm overcommit crash", commit 731572d39fcd3498702eda4600db4c43d51e0b26) fixed the problem where knfsd crashes on exported shmemfs objects and strict overcommit is set. But the patch forgot supporting the case when CONFIG_SECURITY is disabled. This patch copies a part of his fix which is mainly for detecting a bug earlier. Acked-by: James Morris Signed-off-by: Alan Cox Signed-off-by: Junjiro R. Okajima Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index c13f1cec9ab..e3d4ecda267 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1818,17 +1818,21 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { + WARN_ON(current->mm == NULL); return cap_vm_enough_memory(current->mm, pages); } -static inline int security_vm_enough_memory_kern(long pages) +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(current->mm, pages); + WARN_ON(mm == NULL); + return cap_vm_enough_memory(mm, pages); } -static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +static inline int security_vm_enough_memory_kern(long pages) { - return cap_vm_enough_memory(mm, pages); + /* If current->mm is a kernel thread then we will pass NULL, + for this specific case that is fine */ + return cap_vm_enough_memory(current->mm, pages); } static inline int security_bprm_alloc(struct linux_binprm *bprm) -- cgit v1.2.3 From 53a08807c01989c6847bb135d8d43f61c5dfdda5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 3 Dec 2008 12:41:26 +0100 Subject: block: internal dequeue shouldn't start timer blkdev_dequeue_request() and elv_dequeue_request() are equivalent and both start the timeout timer. Barrier code dequeues the original barrier request but doesn't passes the request itself to lower level driver, only broken down proxy requests; however, as the original barrier code goes through the same dequeue path and timeout timer is started on it. If barrier sequence takes long enough, this timer expires but the low level driver has no idea about this request and oops follows. Timeout timer shouldn't have been started on the original barrier request as it never goes through actual IO. This patch unexports elv_dequeue_request(), which has no external user anyway, and makes it operate on elevator proper w/o adding the timer and make blkdev_dequeue_request() call elv_dequeue_request() and add timer. Internal users which don't pass the request to driver - barrier code and end_that_request_last() - are converted to use elv_dequeue_request(). Signed-off-by: Tejun Heo Cc: Mike Anderson Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a135256b272..9cc7cc5fdce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -786,6 +786,8 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } +extern void blkdev_dequeue_request(struct request *req); + /* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with @@ -820,11 +822,6 @@ extern void blk_update_request(struct request *rq, int error, extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); -static inline void blkdev_dequeue_request(struct request *req) -{ - elv_dequeue_request(req->q, req); -} - /* * Access functions for manipulating queue properties */ -- cgit v1.2.3 From 0e435ac26e3f951d83338ed3d4ab7dc0fe0055bc Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 3 Dec 2008 12:55:08 +0100 Subject: block: fix setting of max_segment_size and seg_boundary mask Fix setting of max_segment_size and seg_boundary mask for stacked md/dm devices. When stacking devices (LVM over MD over SCSI) some of the request queue parameters are not set up correctly in some cases by default, namely max_segment_size and and seg_boundary mask. If you create MD device over SCSI, these attributes are zeroed. Problem become when there is over this mapping next device-mapper mapping - queue attributes are set in DM this way: request_queue max_segment_size seg_boundary_mask SCSI 65536 0xffffffff MD RAID1 0 0 LVM 65536 -1 (64bit) Unfortunately bio_add_page (resp. bio_phys_segments) calculates number of physical segments according to these parameters. During the generic_make_request() is segment cout recalculated and can increase bio->bi_phys_segments count over the allowed limit. (After bio_clone() in stack operation.) Thi is specially problem in CCISS driver, where it produce OOPS here BUG_ON(creq->nr_phys_segments > MAXSGENTRIES); (MAXSEGENTRIES is 31 by default.) Sometimes even this command is enough to cause oops: dd iflag=direct if=/dev// of=/dev/null bs=128000 count=10 This command generates bios with 250 sectors, allocated in 32 4k-pages (last page uses only 1024 bytes). For LVM layer, it allocates bio with 31 segments (still OK for CCISS), unfortunatelly on lower layer it is recalculated to 32 segments and this violates CCISS restriction and triggers BUG_ON(). The patch tries to fix it by: * initializing attributes above in queue request constructor blk_queue_make_request() * make sure that blk_queue_stack_limits() inherits setting (DM uses its own function to set the limits because it blk_queue_stack_limits() was introduced later. It should probably switch to use generic stack limit function too.) * sets the default seg_boundary value in one place (blkdev.h) * use this mask as default in DM (instead of -1, which differs in 64bit) Bugs related to this: https://bugzilla.redhat.com/show_bug.cgi?id=471639 http://bugzilla.kernel.org/show_bug.cgi?id=8672 Signed-off-by: Milan Broz Reviewed-by: Alasdair G Kergon Cc: Neil Brown Cc: FUJITA Tomonori Cc: Tejun Heo Cc: Mike Miller Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9cc7cc5fdce..6dcd30d806c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -918,6 +918,8 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_SEGMENT_SIZE 65536 +#define BLK_SEG_BOUNDARY_MASK 0xFFFFFFFFUL + #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) static inline int queue_hardsect_size(struct request_queue *q) -- cgit v1.2.3