aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt16
-rw-r--r--arch/i386/kernel/tsc.c1
-rw-r--r--arch/i386/kernel/vmitime.c10
-rw-r--r--arch/um/kernel/signal.c6
-rw-r--r--arch/um/os-Linux/skas/process.c5
-rw-r--r--arch/um/os-Linux/trap.c1
-rw-r--r--drivers/block/Kconfig16
-rw-r--r--drivers/block/cciss.c49
-rw-r--r--drivers/crypto/geode-aes.c3
-rw-r--r--drivers/video/Kconfig16
-rw-r--r--drivers/video/aty/atyfb.h3
-rw-r--r--drivers/video/nvidia/nv_backlight.c5
-rw-r--r--fs/buffer.c9
-rw-r--r--fs/sysfs/dir.c2
-rw-r--r--include/asm-i386/tsc.h68
-rw-r--r--include/asm-i386/vmi_time.h8
-rw-r--r--include/asm-x86_64/tsc.h68
-rw-r--r--include/linux/audit.h1
-rw-r--r--include/linux/hrtimer.h4
-rw-r--r--include/linux/sunrpc/svc.h2
-rw-r--r--include/linux/sunrpc/svcsock.h2
-rw-r--r--init/Kconfig16
-rw-r--r--ipc/mqueue.c3
-rw-r--r--kernel/hrtimer.c15
-rw-r--r--kernel/power/Kconfig37
-rw-r--r--kernel/rcutorture.c14
-rw-r--r--kernel/time/tick-broadcast.c36
-rw-r--r--kernel/time/tick-common.c32
-rw-r--r--kernel/time/tick-internal.h4
-rw-r--r--kernel/timer.c8
-rw-r--r--net/sunrpc/svc.c154
-rw-r--r--net/sunrpc/svcsock.c101
32 files changed, 481 insertions, 234 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 03eb5ed503f..6e92ba61f7c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1685,6 +1685,22 @@ and is between 256 and 4096 characters. It is defined in the file
stifb= [HW]
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+ sunrpc.pool_mode=
+ [NFS]
+ Control how the NFS server code allocates CPUs to
+ service thread pools. Depending on how many NICs
+ you have and where their interrupts are bound, this
+ option will affect which CPUs will do NFS serving.
+ Note: this parameter cannot be changed while the
+ NFS server is running.
+
+ auto the server chooses an appropriate mode
+ automatically using heuristics
+ global a single global pool contains all CPUs
+ percpu one pool for each CPU
+ pernode one pool for each NUMA node (equivalent
+ to global on non-NUMA machines)
+
swiotlb= [IA-64] Number of I/O TLB slabs
switches= [HW,M68k]
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 875d8a6ecc0..602660df455 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -24,7 +24,6 @@
* an extra value to store the TSC freq
*/
unsigned int tsc_khz;
-unsigned long long (*custom_sched_clock)(void);
int tsc_disable;
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c
index 8dc72d57566..9dfb17739b6 100644
--- a/arch/i386/kernel/vmitime.c
+++ b/arch/i386/kernel/vmitime.c
@@ -123,12 +123,10 @@ static struct clocksource clocksource_vmi = {
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
static struct irqaction vmi_timer_irq = {
- vmi_timer_interrupt,
- SA_INTERRUPT,
- CPU_MASK_NONE,
- "VMI-alarm",
- NULL,
- NULL
+ .handler = vmi_timer_interrupt,
+ .flags = IRQF_DISABLED,
+ .mask = CPU_MASK_NONE,
+ .name = "VMI-alarm",
};
/* Alarm rate */
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 2a32e5e8e9c..3c798cdde55 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -158,12 +158,12 @@ static int kern_do_signal(struct pt_regs *regs)
clear_thread_flag(TIF_RESTORE_SIGMASK);
sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
}
- return(handled_sig);
+ return handled_sig;
}
int do_signal(void)
{
- return(kern_do_signal(&current->thread.regs));
+ return kern_do_signal(&current->thread.regs);
}
/*
@@ -186,5 +186,5 @@ long sys_sigsuspend(int history0, int history1, old_sigset_t mask)
long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
{
- return(do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs)));
+ return do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs));
}
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 9b34fe65949..dda06789bcb 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -419,9 +419,12 @@ void map_stub_pages(int fd, unsigned long code,
.offset = code_offset
} } });
n = os_write_file(fd, &mmop, sizeof(mmop));
- if(n != sizeof(mmop))
+ if(n != sizeof(mmop)){
+ printk("mmap args - addr = 0x%lx, fd = %d, offset = %llx\n",
+ code, code_fd, (unsigned long long) code_offset);
panic("map_stub_pages : /proc/mm map for code failed, "
"err = %d\n", -n);
+ }
if ( stack ) {
__u64 map_offset;
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
index 1df231a2624..d221214d2ed 100644
--- a/arch/um/os-Linux/trap.c
+++ b/arch/um/os-Linux/trap.c
@@ -16,6 +16,7 @@ void usr2_handler(int sig, union uml_pt_regs *regs)
CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
}
+/* Initialized from linux_main() */
void (*sig_info[NSIG])(int, union uml_pt_regs *);
void os_fill_handlinfo(struct kern_handlers h)
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index cacb1c816e3..17ee97f3a99 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -406,22 +406,6 @@ config BLK_DEV_RAM_BLOCKSIZE
setups function - apparently needed by the rd_load_image routine
that supposes the filesystem in the image uses a 1024 blocksize.
-config BLK_DEV_INITRD
- bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
- depends on BROKEN || !FRV
- help
- The initial RAM filesystem is a ramfs which is loaded by the
- boot loader (loadlin or lilo) and that is mounted as root
- before the normal boot procedure. It is typically used to
- load modules needed to mount the "real" root file system,
- etc. See <file:Documentation/initrd.txt> for details.
-
- If RAM disk support (BLK_DEV_RAM) is also included, this
- also enables initial RAM disk (initrd) support and adds
- 15 Kbytes (more on some other architectures) to the kernel size.
-
- If unsure say Y.
-
config CDROM_PKTCDVD
tristate "Packet writing on CD/DVD media"
depends on !UML
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 05dfe357527..0c716ee905d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1291,13 +1291,19 @@ static void cciss_update_drive_info(int ctlr, int drv_index)
if (inq_buff == NULL)
goto mem_msg;
+ /* testing to see if 16-byte CDBs are already being used */
+ if (h->cciss_read == CCISS_READ_16) {
+ cciss_read_capacity_16(h->ctlr, drv_index, 1,
+ &total_size, &block_size);
+ goto geo_inq;
+ }
+
cciss_read_capacity(ctlr, drv_index, 1,
&total_size, &block_size);
- /* total size = last LBA + 1 */
- /* FFFFFFFF + 1 = 0, cannot have a logical volume of size 0 */
- /* so we assume this volume this must be >2TB in size */
- if (total_size == (__u32) 0) {
+ /* if read_capacity returns all F's this volume is >2TB in size */
+ /* so we switch to 16-byte CDB's for all read/write ops */
+ if (total_size == 0xFFFFFFFFULL) {
cciss_read_capacity_16(ctlr, drv_index, 1,
&total_size, &block_size);
h->cciss_read = CCISS_READ_16;
@@ -1306,6 +1312,7 @@ static void cciss_update_drive_info(int ctlr, int drv_index)
h->cciss_read = CCISS_READ_10;
h->cciss_write = CCISS_WRITE_10;
}
+geo_inq:
cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
inq_buff, &h->drv[drv_index]);
@@ -1917,13 +1924,14 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
drv->raid_level = inq_buff->data_byte[8];
}
drv->block_size = block_size;
- drv->nr_blocks = total_size;
+ drv->nr_blocks = total_size + 1;
t = drv->heads * drv->sectors;
if (t > 1) {
- unsigned rem = sector_div(total_size, t);
+ sector_t real_size = total_size + 1;
+ unsigned long rem = sector_div(real_size, t);
if (rem)
- total_size++;
- drv->cylinders = total_size;
+ real_size++;
+ drv->cylinders = real_size;
}
} else { /* Get geometry failed */
printk(KERN_WARNING "cciss: reading geometry failed\n");
@@ -1953,16 +1961,16 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
ctlr, buf, sizeof(ReadCapdata_struct),
1, logvol, 0, NULL, TYPE_CMD);
if (return_code == IO_OK) {
- *total_size = be32_to_cpu(*(__u32 *) buf->total_size)+1;
+ *total_size = be32_to_cpu(*(__u32 *) buf->total_size);
*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
} else { /* read capacity command failed */
printk(KERN_WARNING "cciss: read capacity failed\n");
*total_size = 0;
*block_size = BLOCK_SIZE;
}
- if (*total_size != (__u32) 0)
+ if (*total_size != 0)
printk(KERN_INFO " blocks= %llu block_size= %d\n",
- (unsigned long long)*total_size, *block_size);
+ (unsigned long long)*total_size+1, *block_size);
kfree(buf);
return;
}
@@ -1989,7 +1997,7 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
1, logvol, 0, NULL, TYPE_CMD);
}
if (return_code == IO_OK) {
- *total_size = be64_to_cpu(*(__u64 *) buf->total_size)+1;
+ *total_size = be64_to_cpu(*(__u64 *) buf->total_size);
*block_size = be32_to_cpu(*(__u32 *) buf->block_size);
} else { /* read capacity command failed */
printk(KERN_WARNING "cciss: read capacity failed\n");
@@ -1997,7 +2005,7 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
*block_size = BLOCK_SIZE;
}
printk(KERN_INFO " blocks= %llu block_size= %d\n",
- (unsigned long long)*total_size, *block_size);
+ (unsigned long long)*total_size+1, *block_size);
kfree(buf);
return;
}
@@ -3119,8 +3127,9 @@ static void cciss_getgeometry(int cntl_num)
}
cciss_read_capacity(cntl_num, i, 0, &total_size, &block_size);
- /* total_size = last LBA + 1 */
- if(total_size == (__u32) 0) {
+ /* If read_capacity returns all F's the logical is >2TB */
+ /* so we switch to 16-byte CDBs for all read/write ops */
+ if(total_size == 0xFFFFFFFFULL) {
cciss_read_capacity_16(cntl_num, i, 0,
&total_size, &block_size);
hba[cntl_num]->cciss_read = CCISS_READ_16;
@@ -3395,7 +3404,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
return -1;
}
-static void __devexit cciss_remove_one(struct pci_dev *pdev)
+static void cciss_remove_one(struct pci_dev *pdev)
{
ctlr_info_t *tmp_ptr;
int i, j;
@@ -3419,9 +3428,10 @@ static void __devexit cciss_remove_one(struct pci_dev *pdev)
memset(flush_buf, 0, 4);
return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
TYPE_CMD);
- if (return_code != IO_OK) {
- printk(KERN_WARNING "Error Flushing cache on controller %d\n",
- i);
+ if (return_code == IO_OK) {
+ printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
+ } else {
+ printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
}
free_irq(hba[i]->intr[2], hba[i]);
@@ -3472,6 +3482,7 @@ static struct pci_driver cciss_pci_driver = {
.probe = cciss_init_one,
.remove = __devexit_p(cciss_remove_one),
.id_table = cciss_pci_device_id, /* id_table */
+ .shutdown = cciss_remove_one,
};
/*
diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c
index 0eb62841e9b..6d3840e629d 100644
--- a/drivers/crypto/geode-aes.c
+++ b/drivers/crypto/geode-aes.c
@@ -99,9 +99,8 @@ do_crypt(void *src, void *dst, int len, u32 flags)
static unsigned int
geode_aes_crypt(struct geode_aes_op *op)
{
-
u32 flags = 0;
- int iflags;
+ unsigned long iflags;
if (op->len == 0 || op->src == op->dst)
return 0;
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index b8f0a11e8f3..7f5a5983681 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -677,8 +677,6 @@ config FB_S1D13XXX
config FB_NVIDIA
tristate "nVidia Framebuffer Support"
depends on FB && PCI
- select I2C_ALGOBIT if FB_NVIDIA_I2C
- select I2C if FB_NVIDIA_I2C
select FB_BACKLIGHT if FB_NVIDIA_BACKLIGHT
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
@@ -697,6 +695,7 @@ config FB_NVIDIA
config FB_NVIDIA_I2C
bool "Enable DDC Support"
depends on FB_NVIDIA
+ select FB_DDC
help
This enables I2C support for nVidia Chipsets. This is used
only for getting EDID information from the attached display
@@ -716,7 +715,6 @@ config FB_NVIDIA_BACKLIGHT
config FB_RIVA
tristate "nVidia Riva support"
depends on FB && PCI
- select FB_DDC if FB_RIVA_I2C
select FB_BACKLIGHT if FB_RIVA_BACKLIGHT
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
@@ -734,6 +732,7 @@ config FB_RIVA
config FB_RIVA_I2C
bool "Enable DDC Support"
depends on FB_RIVA
+ select FB_DDC
help
This enables I2C support for nVidia Chipsets. This is used
only for getting EDID information from the attached display
@@ -812,8 +811,6 @@ config FB_INTEL
depends on FB && EXPERIMENTAL && PCI && X86
select AGP
select AGP_INTEL
- select I2C_ALGOBIT if FB_INTEL_I2C
- select I2C if FB_INTEL_I2C
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
@@ -846,6 +843,7 @@ config FB_INTEL_DEBUG
config FB_INTEL_I2C
bool "DDC/I2C for Intel framebuffer support"
depends on FB_INTEL
+ select FB_DDC
default y
help
Say Y here if you want DDC/I2C support for your on-board Intel graphics.
@@ -924,8 +922,8 @@ config FB_MATROX_G
config FB_MATROX_I2C
tristate "Matrox I2C support"
- depends on FB_MATROX && I2C
- select I2C_ALGOBIT
+ depends on FB_MATROX
+ select FB_DDC
---help---
This drivers creates I2C buses which are needed for accessing the
DDC (I2C) bus present on all Matroxes, an I2C bus which
@@ -993,7 +991,6 @@ config FB_MATROX_MULTIHEAD
config FB_RADEON
tristate "ATI Radeon display support"
depends on FB && PCI
- select FB_DDC if FB_RADEON_I2C
select FB_BACKLIGHT if FB_RADEON_BACKLIGHT
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
@@ -1018,6 +1015,7 @@ config FB_RADEON
config FB_RADEON_I2C
bool "DDC/I2C for ATI Radeon support"
depends on FB_RADEON
+ select FB_DDC
default y
help
Say Y here if you want DDC/I2C support for your Radeon board.
@@ -1125,7 +1123,6 @@ config FB_S3
config FB_SAVAGE
tristate "S3 Savage support"
depends on FB && PCI && EXPERIMENTAL
- select FB_DDC if FB_SAVAGE_I2C
select FB_MODE_HELPERS
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
@@ -1142,6 +1139,7 @@ config FB_SAVAGE
config FB_SAVAGE_I2C
bool "Enable DDC2 Support"
depends on FB_SAVAGE
+ select FB_DDC
help
This enables I2C support for S3 Savage Chipsets. This is used
only for getting EDID information from the attached display
diff --git a/drivers/video/aty/atyfb.h b/drivers/video/aty/atyfb.h
index f72faff33c0..dc62f8e282b 100644
--- a/drivers/video/aty/atyfb.h
+++ b/drivers/video/aty/atyfb.h
@@ -284,7 +284,8 @@ static inline void aty_st_8(int regindex, u8 val, const struct atyfb_par *par)
#endif
}
-#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD)
+#if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || \
+defined (CONFIG_FB_ATY_GENERIC_LCD) || defined (CONFIG_FB_ATY_BACKLIGHT)
extern void aty_st_lcd(int index, u32 val, const struct atyfb_par *par);
extern u32 aty_ld_lcd(int index, const struct atyfb_par *par);
#endif
diff --git a/drivers/video/nvidia/nv_backlight.c b/drivers/video/nvidia/nv_backlight.c
index a50b303093a..43f62d8ee41 100644
--- a/drivers/video/nvidia/nv_backlight.c
+++ b/drivers/video/nvidia/nv_backlight.c
@@ -12,6 +12,11 @@
#include <linux/backlight.h>
#include <linux/fb.h>
#include <linux/pci.h>
+
+#ifdef CONFIG_PMAC_BACKLIGHT
+#include <asm/backlight.h>
+#endif
+
#include "nv_local.h"
#include "nv_type.h"
#include "nv_proto.h"
diff --git a/fs/buffer.c b/fs/buffer.c
index e8504b65176..1d0852fa728 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2365,6 +2365,10 @@ failed:
}
EXPORT_SYMBOL(nobh_prepare_write);
+/*
+ * Make sure any changes to nobh_commit_write() are reflected in
+ * nobh_truncate_page(), since it doesn't call commit_write().
+ */
int nobh_commit_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
@@ -2466,6 +2470,11 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
+ /*
+ * It would be more correct to call aops->commit_write()
+ * here, but this is more efficient.
+ */
+ SetPageUptodate(page);
set_page_dirty(page);
}
unlock_page(page);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8813990304f..85a668680f8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -431,6 +431,8 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
new_parent_dentry = new_parent ?
new_parent->dentry : sysfs_mount->mnt_sb->s_root;
+ if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
+ return 0; /* nothing to move */
again:
mutex_lock(&old_parent_dentry->d_inode->i_mutex);
if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
diff --git a/include/asm-i386/tsc.h b/include/asm-i386/tsc.h
index e997891cc7c..84016ff481b 100644
--- a/include/asm-i386/tsc.h
+++ b/include/asm-i386/tsc.h
@@ -1 +1,67 @@
-#include <asm-x86_64/tsc.h>
+/*
+ * linux/include/asm-i386/tsc.h
+ *
+ * i386 TSC related functions
+ */
+#ifndef _ASM_i386_TSC_H
+#define _ASM_i386_TSC_H
+
+#include <asm/processor.h>
+
+/*
+ * Standard way to access the cycle counter.
+ */
+typedef unsigned long long cycles_t;
+
+extern unsigned int cpu_khz;
+extern unsigned int tsc_khz;
+
+static inline cycles_t get_cycles(void)
+{
+ unsigned long long ret = 0;
+
+#ifndef CONFIG_X86_TSC
+ if (!cpu_has_tsc)
+ return 0;
+#endif
+
+#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
+ rdtscll(ret);
+#endif
+ return ret;
+}
+
+/* Like get_cycles, but make sure the CPU is synchronized. */
+static __always_inline cycles_t get_cycles_sync(void)
+{
+ unsigned long long ret;
+#ifdef X86_FEATURE_SYNC_RDTSC
+ unsigned eax;
+
+ /*
+ * Don't do an additional sync on CPUs where we know
+ * RDTSC is already synchronous:
+ */
+ alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
+ "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
+#else
+ sync_core();
+#endif
+ rdtscll(ret);
+
+ return ret;
+}
+
+extern void tsc_init(void);
+extern void mark_tsc_unstable(void);
+extern int unsynchronized_tsc(void);
+extern void init_tsc_clocksource(void);
+
+/*
+ * Boot-time check whether the TSCs are synchronized across
+ * all CPUs/cores:
+ */
+extern void check_tsc_sync_source(int cpu);
+extern void check_tsc_sync_target(void);
+
+#endif
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h
index 1f971eb7f71..94d0a12a411 100644
--- a/include/asm-i386/vmi_time.h
+++ b/include/asm-i386/vmi_time.h
@@ -61,6 +61,14 @@ extern void apic_vmi_timer_interrupt(void);
#ifdef CONFIG_NO_IDLE_HZ
extern int vmi_stop_hz_timer(void);
extern void vmi_account_time_restart_hz_timer(void);
+#else
+static inline int vmi_stop_hz_timer(void)
+{
+ return 0;
+}
+static inline void vmi_account_time_restart_hz_timer(void)
+{
+}
#endif
/*
diff --git a/include/asm-x86_64/tsc.h b/include/asm-x86_64/tsc.h
index 26c3e982828..d66ba6ef25f 100644
--- a/include/asm-x86_64/tsc.h
+++ b/include/asm-x86_64/tsc.h
@@ -1,67 +1 @@
-/*
- * linux/include/asm-x86_64/tsc.h
- *
- * x86_64 TSC related functions
- */
-#ifndef _ASM_x86_64_TSC_H
-#define _ASM_x86_64_TSC_H
-
-#include <asm/processor.h>
-
-/*
- * Standard way to access the cycle counter.
- */
-typedef unsigned long long cycles_t;
-
-extern unsigned int cpu_khz;
-extern unsigned int tsc_khz;
-
-static inline cycles_t get_cycles(void)
-{
- unsigned long long ret = 0;
-
-#ifndef CONFIG_X86_TSC
- if (!cpu_has_tsc)
- return 0;
-#endif
-
-#if defined(CONFIG_X86_GENERIC) || defined(CONFIG_X86_TSC)
- rdtscll(ret);
-#endif
- return ret;
-}
-
-/* Like get_cycles, but make sure the CPU is synchronized. */
-static __always_inline cycles_t get_cycles_sync(void)
-{
- unsigned long long ret;
-#ifdef X86_FEATURE_SYNC_RDTSC
- unsigned eax;
-
- /*
- * Don't do an additional sync on CPUs where we know
- * RDTSC is already synchronous:
- */
- alternative_io("cpuid", ASM_NOP2, X86_FEATURE_SYNC_RDTSC,
- "=a" (eax), "0" (1) : "ebx","ecx","edx","memory");
-#else
- sync_core();
-#endif
- rdtscll(ret);
-
- return ret;
-}
-
-extern void tsc_init(void);
-extern void mark_tsc_unstable(void);
-extern int unsynchronized_tsc(void);
-extern void init_tsc_clocksource(void);
-
-/*
- * Boot-time check whether the TSCs are synchronized across
- * all CPUs/cores:
- */
-extern void check_tsc_sync_source(int cpu);
-extern void check_tsc_sync_target(void);
-
-#endif
+#include <asm-i386/tsc.h>
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 229fa012c89..773e30df11e 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -24,6 +24,7 @@
#ifndef _LINUX_AUDIT_H_
#define _LINUX_AUDIT_H_
+#include <linux/types.h>
#include <linux/elf-em.h>
/* The netlink messages for the audit system is divided into blocks:
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 3bef961b58b..5bdbc744e77 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -47,7 +47,7 @@ enum hrtimer_restart {
* HRTIMER_CB_IRQSAFE: Callback may run in hardirq context
* HRTIMER_CB_IRQSAFE_NO_RESTART: Callback may run in hardirq context and
* does not restart the timer
- * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in softirq context
+ * HRTIMER_CB_IRQSAFE_NO_SOFTIRQ: Callback must run in hardirq context
* Special mode for tick emultation
*/
enum hrtimer_cb_mode {
@@ -139,7 +139,7 @@ struct hrtimer_sleeper {
};
/**
- * struct hrtimer_base - the timer base for a specific clock
+ * struct hrtimer_clock_base - the timer base for a specific clock
* @cpu_base: per cpu clock base
* @index: clock type index for per_cpu support when moving a
* timer to a base on another cpu.
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 83b3c7b433a..35fa4d5aadd 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -194,9 +194,7 @@ static inline void svc_putu32(struct kvec *iov, __be32 val)
union svc_addr_u {
struct in_addr addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr addr6;
-#endif
};
/*
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index cccea0a0feb..7909687557b 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -66,7 +66,7 @@ struct svc_sock {
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short, int flags);
-void svc_close_socket(struct svc_sock *);
+void svc_force_close_socket(struct svc_sock *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
diff --git a/init/Kconfig b/init/Kconfig
index f977086e118..b170aa1d43b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -304,6 +304,22 @@ config RELAY
If unsure, say N.
+config BLK_DEV_INITRD
+ bool "Initial RAM filesystem and RAM disk (initramfs/initrd) support"
+ depends on BROKEN || !FRV
+ help
+ The initial RAM filesystem is a ramfs which is loaded by the
+ boot loader (loadlin or lilo) and that is mounted as root
+ before the normal boot procedure. It is typically used to
+ load modules needed to mount the "real" root file system,
+ etc. See <file:Documentation/initrd.txt> for details.
+
+ If RAM disk support (BLK_DEV_RAM) is also included, this
+ also enables initial RAM disk (initrd) support and adds
+ 15 Kbytes (more on some other architectures) to the kernel size.
+
+ If unsure say Y.
+
if BLK_DEV_INITRD
source "usr/Kconfig"
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 0b5ecbe5f04..554ac368be7 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -731,7 +731,8 @@ asmlinkage long sys_mq_unlink(const char __user *u_name)
if (IS_ERR(name))
return PTR_ERR(name);
- mutex_lock(&mqueue_mnt->mnt_root->d_inode->i_mutex);
+ mutex_lock_nested(&mqueue_mnt->mnt_root->d_inode->i_mutex,
+ I_MUTEX_PARENT);
dentry = lookup_one_len(name, mqueue_mnt->mnt_root, strlen(name));
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index de93a8176ca..ec4cb9f3e3b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -540,19 +540,19 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
/*
* Switch to high resolution mode
*/
-static void hrtimer_switch_to_hres(void)
+static int hrtimer_switch_to_hres(void)
{
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
unsigned long flags;
if (base->hres_active)
- return;
+ return 1;
local_irq_save(flags);
if (tick_init_highres()) {
local_irq_restore(flags);
- return;
+ return 0;
}
base->hres_active = 1;
base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
@@ -565,13 +565,14 @@ static void hrtimer_switch_to_hres(void)
local_irq_restore(flags);
printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
smp_processor_id());
+ return 1;
}
#else
static inline int hrtimer_hres_active(void) { return 0; }
static inline int hrtimer_is_hres_enabled(void) { return 0; }
-static inline void hrtimer_switch_to_hres(void) { }
+static inline int hrtimer_switch_to_hres(void) { return 0; }
static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
@@ -1130,6 +1131,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
if (base->softirq_time.tv64 <= timer->expires.tv64)
break;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
+#endif
timer_stats_account_hrtimer(timer);
fn = timer->function;
@@ -1173,7 +1177,8 @@ void hrtimer_run_queues(void)
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- hrtimer_switch_to_hres();
+ if (hrtimer_switch_to_hres())
+ return;
hrtimer_get_softirq_time(cpu_base);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 95f6657fff7..51a4dd0f1b7 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -81,29 +81,34 @@ config SOFTWARE_SUSPEND
bool "Software Suspend"
depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
---help---
- Enable the possibility of suspending the machine.
- It doesn't need ACPI or APM.
- You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
- (patch for sysvinit needed).
+ Enable the suspend to disk (STD) functionality.
- It creates an image which is saved in your active swap. Upon next
+ You can suspend your machine with 'echo disk > /sys/power/state'.
+ Alternatively, you can use the additional userland tools available
+ from <http://suspend.sf.net>.
+
+ In principle it does not require ACPI or APM, although for example
+ ACPI will be used if available.
+
+ It creates an image which is saved in your active swap. Upon the next
boot, pass the 'resume=/dev/swappartition' argument to the kernel to
have it detect the saved image, restore memory state from it, and
continue to run as before. If you do not want the previous state to
- be reloaded, then use the 'noresume' kernel argument. However, note
- that your partitions will be fsck'd and you must re-mkswap your swap
- partitions. It does not work with swap files.
+ be reloaded, then use the 'noresume' kernel command line argument.
+ Note, however, that fsck will be run on your filesystems and you will
+ need to run mkswap against the swap partition used for the suspend.
- Right now you may boot without resuming and then later resume but
- in meantime you cannot use those swap partitions/files which were
- involved in suspending. Also in this case there is a risk that buffers
- on disk won't match with saved ones.
+ It also works with swap files to a limited extent (for details see
+ <file:Documentation/power/swsusp-and-swap-files.txt>).
- For more information take a look at <file:Documentation/power/swsusp.txt>.
+ Right now you may boot without resuming and resume later but in the
+ meantime you cannot use the swap partition(s)/file(s) involved in
+ suspending. Also in this case you must not use the filesystems
+ that were mounted before the suspend. In particular, you MUST NOT
+ MOUNT any journaled filesystems mounted before the suspend or they
+ will get corrupted in a nasty way.
- (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386.
- we need identity mapping for resume to work, and that is trivial
- to get with 4MB pages, but less than trivial on PAE).
+ For more information take a look at <file:Documentation/power/swsusp.txt>.
config PM_STD_PARTITION
string "Default resume partition"
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 482b11ff65c..bcd14e83ef3 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -60,19 +60,19 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
-module_param(nreaders, int, 0);
+module_param(nreaders, int, 0444);
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
-module_param(nfakewriters, int, 0);
+module_param(nfakewriters, int, 0444);
MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
-module_param(stat_interval, int, 0);
+module_param(stat_interval, int, 0444);
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-module_param(verbose, bool, 0);
+module_param(verbose, bool, 0444);
MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-module_param(test_no_idle_hz, bool, 0);
+module_param(test_no_idle_hz, bool, 0444);
MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
-module_param(shuffle_interval, int, 0);
+module_param(shuffle_interval, int, 0444);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
-module_param(torture_type, charp, 0);
+module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
#define TORTURE_FLAG "-torture:"
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 12b3efeb9f6..5567745470f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -284,6 +284,42 @@ void tick_shutdown_broadcast(unsigned int *cpup)
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
+void tick_suspend_broadcast(void)
+{
+ struct clock_event_device *bc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+ bc = tick_broadcast_device.evtdev;
+ if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
+
+ spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
+
+int tick_resume_broadcast(void)
+{
+ struct clock_event_device *bc;
+ unsigned long flags;
+ int broadcast = 0;
+
+ spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+ bc = tick_broadcast_device.evtdev;
+ if (bc) {
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC &&
+ !cpus_empty(tick_broadcast_mask))
+ tick_broadcast_start_periodic(bc);
+
+ broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask);
+ }
+ spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+
+ return broadcast;
+}
+
+
#ifdef CONFIG_TICK_ONESHOT
static cpumask_t tick_broadcast_oneshot_mask;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0986a2bfab4..43ba1bdec14 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -298,6 +298,28 @@ static void tick_shutdown(unsigned int *cpup)
spin_unlock_irqrestore(&tick_device_lock, flags);
}
+static void tick_suspend_periodic(void)
+{
+ struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_device_lock, flags);
+ if (td->mode == TICKDEV_MODE_PERIODIC)
+ clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+ spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
+static void tick_resume_periodic(void)
+{
+ struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_device_lock, flags);
+ if (td->mode == TICKDEV_MODE_PERIODIC)
+ tick_setup_periodic(td->evtdev, 0);
+ spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
/*
* Notification about clock event devices
*/
@@ -325,6 +347,16 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
tick_shutdown(dev);
break;
+ case CLOCK_EVT_NOTIFY_SUSPEND:
+ tick_suspend_periodic();
+ tick_suspend_broadcast();
+ break;
+
+ case CLOCK_EVT_NOTIFY_RESUME:
+ if (!tick_resume_broadcast())
+ tick_resume_periodic();
+ break;
+
default:
break;
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 54861a0f29f..75890efd24f 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -67,6 +67,8 @@ extern int tick_check_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
+extern void tick_suspend_broadcast(void);
+extern int tick_resume_broadcast(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
@@ -90,6 +92,8 @@ static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline int tick_resume_broadcast(void) { return 0; }
/*
* Set the periodic handler in non broadcast mode
diff --git a/kernel/timer.c b/kernel/timer.c
index 8ad384253ef..797cccb8643 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -862,6 +862,8 @@ int do_settimeofday(struct timespec *tv)
clock->error = 0;
ntp_clear();
+ update_vsyscall(&xtime, clock);
+
write_sequnlock_irqrestore(&xtime_lock, flags);
/* signal hrtimers about time change */
@@ -997,6 +999,9 @@ static int timekeeping_resume(struct sys_device *dev)
write_sequnlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
+
/* Resume hrtimers */
clock_was_set();
@@ -1011,6 +1016,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
timekeeping_suspended = 1;
timekeeping_suspend_time = read_persistent_clock();
write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+
return 0;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 8353829bc5c..b4db53ff143 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -27,22 +27,26 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
+#define svc_serv_is_pooled(serv) ((serv)->sv_function)
+
/*
* Mode for mapping cpus to pools.
*/
enum {
- SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
+ SVC_POOL_AUTO = -1, /* choose one of the others */
SVC_POOL_GLOBAL, /* no mapping, just a single global pool
* (legacy & UP mode) */
SVC_POOL_PERCPU, /* one pool per cpu */
SVC_POOL_PERNODE /* one pool per numa node */
};
+#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
static struct svc_pool_map {
+ int count; /* How many svc_servs use us */
int mode; /* Note: int not enum to avoid
* warnings about "enumeration value
* not handled in switch" */
@@ -50,9 +54,63 @@ static struct svc_pool_map {
unsigned int *pool_to; /* maps pool id to cpu or node */
unsigned int *to_pool; /* maps cpu or node to pool id */
} svc_pool_map = {
- .mode = SVC_POOL_NONE
+ .count = 0,
+ .mode = SVC_POOL_DEFAULT
};
+static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
+
+static int
+param_set_pool_mode(const char *val, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
+ int err;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ err = -EBUSY;
+ if (m->count)
+ goto out;
+
+ err = 0;
+ if (!strncmp(val, "auto", 4))
+ *ip = SVC_POOL_AUTO;
+ else if (!strncmp(val, "global", 6))
+ *ip = SVC_POOL_GLOBAL;
+ else if (!strncmp(val, "percpu", 6))
+ *ip = SVC_POOL_PERCPU;
+ else if (!strncmp(val, "pernode", 7))
+ *ip = SVC_POOL_PERNODE;
+ else
+ err = -EINVAL;
+
+out:
+ mutex_unlock(&svc_pool_map_mutex);
+ return err;
+}
+static int
+param_get_pool_mode(char *buf, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+
+ switch (*ip)
+ {
+ case SVC_POOL_AUTO:
+ return strlcpy(buf, "auto", 20);
+ case SVC_POOL_GLOBAL:
+ return strlcpy(buf, "global", 20);
+ case SVC_POOL_PERCPU:
+ return strlcpy(buf, "percpu", 20);
+ case SVC_POOL_PERNODE:
+ return strlcpy(buf, "pernode", 20);
+ default:
+ return sprintf(buf, "%d", *ip);
+ }
+}
+
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ &svc_pool_map.mode, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -166,18 +224,25 @@ svc_pool_map_init_pernode(struct svc_pool_map *m)
/*
- * Build the global map of cpus to pools and vice versa.
+ * Add a reference to the global map of cpus to pools (and
+ * vice versa). Initialise the map if we're the first user.
+ * Returns the number of pools.
*/
static unsigned int
-svc_pool_map_init(void)
+svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
int npools = -1;
- if (m->mode != SVC_POOL_NONE)
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (m->count++) {
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
+ }
- m->mode = svc_pool_map_choose_mode();
+ if (m->mode == SVC_POOL_AUTO)
+ m->mode = svc_pool_map_choose_mode();
switch (m->mode) {
case SVC_POOL_PERCPU:
@@ -195,9 +260,36 @@ svc_pool_map_init(void)
}
m->npools = npools;
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
}
+
+/*
+ * Drop a reference to the global map of cpus to pools.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+static void
+svc_pool_map_put(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (!--m->count) {
+ m->mode = SVC_POOL_DEFAULT;
+ kfree(m->to_pool);
+ kfree(m->pool_to);
+ m->npools = 0;
+ }
+
+ mutex_unlock(&svc_pool_map_mutex);
+}
+
+
/*
* Set the current thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -212,10 +304,9 @@ svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
/*
* The caller checks for sv_nrpools > 1, which
- * implies that we've been initialized and the
- * map mode is not NONE.
+ * implies that we've been initialized.
*/
- BUG_ON(m->mode == SVC_POOL_NONE);
+ BUG_ON(m->count == 0);
switch (m->mode)
{
@@ -246,18 +337,19 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
unsigned int pidx = 0;
/*
- * SVC_POOL_NONE happens in a pure client when
+ * An uninitialised map happens in a pure client when
* lockd is brought up, so silently treat it the
* same as SVC_POOL_GLOBAL.
*/
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
+ if (svc_serv_is_pooled(serv)) {
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
+ }
}
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -347,7 +439,7 @@ svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
svc_thread_fn func, int sig, struct module *mod)
{
struct svc_serv *serv;
- unsigned int npools = svc_pool_map_init();
+ unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, shutdown);
@@ -367,6 +459,7 @@ void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
+ struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
@@ -382,24 +475,23 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
- while (!list_empty(&serv->sv_tempsocks)) {
- svsk = list_entry(serv->sv_tempsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
+ svc_force_close_socket(svsk);
+
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
- while (!list_empty(&serv->sv_permsocks)) {
- svsk = list_entry(serv->sv_permsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
+ svc_force_close_socket(svsk);
+
+ BUG_ON(!list_empty(&serv->sv_permsocks));
+ BUG_ON(!list_empty(&serv->sv_tempsocks));
cache_clean_deferred(serv);
+ if (svc_serv_is_pooled(serv))
+ svc_pool_map_put();
+
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
kfree(serv->sv_pools);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 63ae94771b8..f6e1eb1ea72 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -82,6 +82,7 @@ static void svc_delete_socket(struct svc_sock *svsk);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
+static void svc_close_socket(struct svc_sock *svsk);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -131,13 +132,13 @@ static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
htons(((struct sockaddr_in *) addr)->sin_port));
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
htons(((struct sockaddr_in6 *) addr)->sin6_port));
break;
-#endif
+
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
@@ -449,9 +450,7 @@ svc_wake_up(struct svc_serv *serv)
union svc_pktinfo_u {
struct in_pktinfo pkti;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_pktinfo pkti6;
-#endif
};
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
@@ -467,7 +466,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
@@ -479,7 +478,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#endif
}
return;
}
@@ -721,45 +719,21 @@ svc_write_space(struct sock *sk)
}
}
-static void svc_udp_get_sender_address(struct svc_rqst *rqstp,
- struct sk_buff *skb)
+static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+ struct cmsghdr *cmh)
{
switch (rqstp->rq_sock->sk_sk->sk_family) {
case AF_INET: {
- /* this seems to come from net/ipv4/udp.c:udp_recvmsg */
- struct sockaddr_in *sin = svc_addr_in(rqstp);
-
- sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
- }
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+ rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- case AF_INET6: {
- /* this is derived from net/ipv6/udp.c:udpv6_recvmesg */
- struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp);
-
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = skb->h.uh->source;
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (ipv6_addr_type(&sin6->sin6_addr) &
- IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
- ipv6_addr_copy(&sin6->sin6_addr,
- &skb->nh.ipv6h->saddr);
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- ipv6_addr_copy(&rqstp->rq_daddr.addr6,
- &skb->nh.ipv6h->saddr);
}
+ case AF_INET6: {
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
-#endif
+ }
}
- return;
}
/*
@@ -771,7 +745,15 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
+ char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
+ struct cmsghdr *cmh = (struct cmsghdr *)buffer;
int err, len;
+ struct msghdr msg = {
+ .msg_name = svc_addr(rqstp),
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_DONTWAIT,
+ };
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* udp sockets need large rcvbuf as all pending
@@ -797,7 +779,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
}
clear_bit(SK_DATA, &svsk->sk_flags);
- while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
+ while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+ 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+ (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
svc_sock_received(svsk);
return err;
@@ -805,6 +789,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
/* possibly an icmp error */
dprintk("svc: recvfrom returned error %d\n", -err);
}
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
if (skb->tstamp.off_sec == 0) {
struct timeval tv;
@@ -827,7 +812,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_prot = IPPROTO_UDP;
- svc_udp_get_sender_address(rqstp, skb);
+ if (cmh->cmsg_level != IPPROTO_IP ||
+ cmh->cmsg_type != IP_PKTINFO) {
+ if (net_ratelimit())
+ printk("rpcsvc: received unknown control message:"
+ "%d/%d\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ skb_free_datagram(svsk->sk_sk, skb);
+ return 0;
+ }
+ svc_udp_get_dest_address(rqstp, cmh);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
@@ -884,6 +878,9 @@ svc_udp_sendto(struct svc_rqst *rqstp)
static void
svc_udp_init(struct svc_sock *svsk)
{
+ int one = 1;
+ mm_segment_t oldfs;
+
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
svsk->sk_recvfrom = svc_udp_recvfrom;
@@ -899,6 +896,13 @@ svc_udp_init(struct svc_sock *svsk)
set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
set_bit(SK_CHNGBUF, &svsk->sk_flags);
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ /* make sure we get destination address info */
+ svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
+ (char __user *)&one, sizeof(one));
+ set_fs(oldfs);
}
/*
@@ -977,11 +981,9 @@ static inline int svc_port_is_privileged(struct sockaddr *sin)
case AF_INET:
return ntohs(((struct sockaddr_in *)sin)->sin_port)
< PROT_SOCK;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
< PROT_SOCK;
-#endif
default:
return 0;
}
@@ -1786,7 +1788,7 @@ svc_delete_socket(struct svc_sock *svsk)
spin_unlock_bh(&serv->sv_lock);
}
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
{
set_bit(SK_CLOSE, &svsk->sk_flags);
if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
@@ -1799,6 +1801,19 @@ void svc_close_socket(struct svc_sock *svsk)
svc_sock_put(svsk);
}
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Waiting to be processed, but no threads left,
+ * So just remove it from the waiting list
+ */
+ list_del_init(&svsk->sk_ready);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
+ }
+ svc_close_socket(svsk);
+}
+
/**
* svc_makesock - Make a socket for nfsd and lockd
* @serv: RPC server structure