aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig13
-rw-r--r--arch/x86_64/boot/setup.S2
-rw-r--r--arch/x86_64/boot/tools/build.c4
-rw-r--r--arch/x86_64/ia32/ia32entry.S2
-rw-r--r--arch/x86_64/ia32/sys_ia32.c26
-rw-r--r--arch/x86_64/kernel/Makefile5
-rw-r--r--arch/x86_64/kernel/e820.c2
-rw-r--r--arch/x86_64/kernel/entry.S12
-rw-r--r--arch/x86_64/kernel/genapic.c2
-rw-r--r--arch/x86_64/kernel/genapic_cluster.c6
-rw-r--r--arch/x86_64/kernel/io_apic.c112
-rw-r--r--arch/x86_64/kernel/kprobes.c41
-rw-r--r--arch/x86_64/kernel/nmi.c8
-rw-r--r--arch/x86_64/kernel/process.c1
-rw-r--r--arch/x86_64/kernel/semaphore.c180
-rw-r--r--arch/x86_64/kernel/setup.c2
-rw-r--r--arch/x86_64/kernel/setup64.c2
-rw-r--r--arch/x86_64/kernel/smpboot.c10
-rw-r--r--arch/x86_64/kernel/time.c8
-rw-r--r--arch/x86_64/kernel/traps.c14
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86_64/mm/fault.c4
-rw-r--r--arch/x86_64/mm/numa.c6
23 files changed, 160 insertions, 303 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 40242c61e90..e63323e03ea 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -24,6 +24,10 @@ config X86
bool
default y
+config SEMAPHORE_SLEEPERS
+ bool
+ default y
+
config MMU
bool
default y
@@ -61,6 +65,10 @@ config GENERIC_IOMAP
bool
default y
+config ARCH_MAY_HAVE_PC_FDC
+ bool
+ default y
+
source "init/Kconfig"
@@ -436,6 +444,11 @@ config ISA_DMA_API
bool
default y
+config GENERIC_PENDING_IRQ
+ bool
+ depends on GENERIC_HARDIRQS && SMP
+ default y
+
menu "Power management options"
source kernel/power/Kconfig
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index ff58b2832b7..12ea0b6c52e 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -81,7 +81,7 @@ start:
# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
.ascii "HdrS" # header signature
- .word 0x0203 # header version number (>= 0x0105)
+ .word 0x0204 # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
start_sys_seg: .word SYSSEG
diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c
index 18b5bac1c42..c44f5e2ec10 100644
--- a/arch/x86_64/boot/tools/build.c
+++ b/arch/x86_64/boot/tools/build.c
@@ -178,7 +178,9 @@ int main(int argc, char ** argv)
die("Output: seek failed");
buf[0] = (sys_size & 0xff);
buf[1] = ((sys_size >> 8) & 0xff);
- if (write(1, buf, 2) != 2)
+ buf[2] = ((sys_size >> 16) & 0xff);
+ buf[3] = ((sys_size >> 24) & 0xff);
+ if (write(1, buf, 4) != 4)
die("Write of image length failed");
return 0; /* Everything is OK */
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index c45d6a05b98..f174083d556 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -307,7 +307,7 @@ ia32_sys_call_table:
.quad stub32_fork
.quad sys_read
.quad sys_write
- .quad sys32_open /* 5 */
+ .quad compat_sys_open /* 5 */
.quad sys_close
.quad sys32_waitpid
.quad sys_creat
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c
index be996d1b691..04d80406ce4 100644
--- a/arch/x86_64/ia32/sys_ia32.c
+++ b/arch/x86_64/ia32/sys_ia32.c
@@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig)
return sys_kill(pid, sig);
}
-asmlinkage long sys32_open(const char __user * filename, int flags, int mode)
-{
- char * tmp;
- int fd, error;
-
- /* don't force O_LARGEFILE */
- tmp = getname(filename);
- fd = PTR_ERR(tmp);
- if (!IS_ERR(tmp)) {
- fd = get_unused_fd();
- if (fd >= 0) {
- struct file *f = filp_open(tmp, flags, mode);
- error = PTR_ERR(f);
- if (IS_ERR(f)) {
- put_unused_fd(fd);
- fd = error;
- } else {
- fsnotify_open(f->f_dentry);
- fd_install(fd, f);
- }
- }
- putname(tmp);
- }
- return fd;
-}
-
extern asmlinkage long
sys_timer_create(clockid_t which_clock,
struct sigevent __user *timer_event_spec,
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile
index 0296ca6cbfa..1579bdd0adc 100644
--- a/arch/x86_64/kernel/Makefile
+++ b/arch/x86_64/kernel/Makefile
@@ -4,10 +4,10 @@
extra-y := head.o head64.o init_task.o vmlinux.lds
EXTRA_AFLAGS := -traditional
-obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \
+obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \
x8664_ksyms.o i387.o syscall.o vsyscall.o \
- setup64.o bootflag.o e820.o reboot.o quirks.o
+ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
@@ -45,3 +45,4 @@ swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
+i8237-y += ../../i386/kernel/i8237.o
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index b548dea4e5b..116ac5f53dc 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -85,7 +85,7 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type)
struct e820entry *ei = &e820.map[i];
if (type && ei->type != type)
continue;
- if (ei->addr >= end || ei->addr + ei->size < start)
+ if (ei->addr >= end || ei->addr + ei->size <= start)
continue;
return 1;
}
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 096d470e280..be51dbe1f75 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -784,8 +784,9 @@ ENTRY(execve)
ret
CFI_ENDPROC
-ENTRY(page_fault)
+KPROBE_ENTRY(page_fault)
errorentry do_page_fault
+ .previous .text
ENTRY(coprocessor_error)
zeroentry do_coprocessor_error
@@ -797,13 +798,14 @@ ENTRY(device_not_available)
zeroentry math_state_restore
/* runs on exception stack */
-ENTRY(debug)
+KPROBE_ENTRY(debug)
CFI_STARTPROC
pushq $0
CFI_ADJUST_CFA_OFFSET 8
paranoidentry do_debug
jmp paranoid_exit
CFI_ENDPROC
+ .previous .text
/* runs on exception stack */
ENTRY(nmi)
@@ -854,8 +856,9 @@ paranoid_schedule:
jmp paranoid_userspace
CFI_ENDPROC
-ENTRY(int3)
+KPROBE_ENTRY(int3)
zeroentry do_int3
+ .previous .text
ENTRY(overflow)
zeroentry do_overflow
@@ -892,8 +895,9 @@ ENTRY(stack_segment)
jmp paranoid_exit
CFI_ENDPROC
-ENTRY(general_protection)
+KPROBE_ENTRY(general_protection)
errorentry do_general_protection
+ .previous .text
ENTRY(alignment_check)
errorentry do_alignment_check
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index f031358906f..b1c144f7314 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -25,7 +25,7 @@
#endif
/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
EXPORT_SYMBOL(x86_cpu_to_apicid);
u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c
index 9703da7202e..f6523dd1bc0 100644
--- a/arch/x86_64/kernel/genapic_cluster.c
+++ b/arch/x86_64/kernel/genapic_cluster.c
@@ -72,10 +72,14 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector)
static void cluster_send_IPI_allbutself(int vector)
{
cpumask_t mask = cpu_online_map;
- cpu_clear(smp_processor_id(), mask);
+ int me = get_cpu(); /* Ensure we are not preempted when we clear */
+
+ cpu_clear(me, mask);
if (!cpus_empty(mask))
cluster_send_IPI_mask(mask, vector);
+
+ put_cpu();
}
static void cluster_send_IPI_all(int vector)
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 1013a8fe44d..40e0aca088f 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -70,7 +70,7 @@ static struct irq_pin_list {
short apic, pin, next;
} irq_2_pin[PIN_MAP_SIZE];
-int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
+int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
#ifdef CONFIG_PCI_MSI
#define vector_to_irq(vector) \
(platform_legacy_irq(vector) ? vector : vector_irq[vector])
@@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1};
#define vector_to_irq(vector) (vector)
#endif
+#define __DO_ACTION(R, ACTION, FINAL) \
+ \
+{ \
+ int pin; \
+ struct irq_pin_list *entry = irq_2_pin + irq; \
+ \
+ for (;;) { \
+ unsigned int reg; \
+ pin = entry->pin; \
+ if (pin == -1) \
+ break; \
+ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
+ reg ACTION; \
+ io_apic_modify(entry->apic, reg); \
+ if (!entry->next) \
+ break; \
+ entry = irq_2_pin + entry->next; \
+ } \
+ FINAL; \
+}
+
+#ifdef CONFIG_SMP
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
+{
+ unsigned long flags;
+ unsigned int dest;
+ cpumask_t tmp;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if (cpus_empty(tmp))
+ tmp = TARGET_CPUS;
+
+ cpus_and(mask, tmp, CPU_MASK_ALL);
+
+ dest = cpu_mask_to_apicid(mask);
+
+ /*
+ * Only the high 8 bits are valid.
+ */
+ dest = SET_APIC_LOGICAL_ID(dest);
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __DO_ACTION(1, = dest, )
+ set_irq_info(irq, mask);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+#endif
+
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
@@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin)
entry->pin = pin;
}
-#define __DO_ACTION(R, ACTION, FINAL) \
- \
-{ \
- int pin; \
- struct irq_pin_list *entry = irq_2_pin + irq; \
- \
- for (;;) { \
- unsigned int reg; \
- pin = entry->pin; \
- if (pin == -1) \
- break; \
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
- reg ACTION; \
- io_apic_modify(entry->apic, reg); \
- if (!entry->next) \
- break; \
- entry = irq_2_pin + entry->next; \
- } \
- FINAL; \
-}
#define DO_ACTION(name,R,ACTION, FINAL) \
\
@@ -655,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq)
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 };
+u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 };
int assign_irq_vector(int irq)
{
@@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void)
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
}
}
@@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq)
*/
static void ack_edge_ioapic_irq(unsigned int irq)
{
+ move_irq(irq);
if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
== (IRQ_PENDING | IRQ_DISABLED))
mask_IO_APIC_irq(irq);
@@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq)
static void end_level_ioapic_irq (unsigned int irq)
{
+ move_irq(irq);
ack_APIC_irq();
}
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
-{
- unsigned long flags;
- unsigned int dest;
-
- dest = cpu_mask_to_apicid(mask);
-
- /*
- * Only the high 8 bits are valid.
- */
- dest = SET_APIC_LOGICAL_ID(dest);
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __DO_ACTION(1, = dest, )
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
#ifdef CONFIG_PCI_MSI
static unsigned int startup_edge_ioapic_vector(unsigned int vector)
{
@@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector)
{
int irq = vector_to_irq(vector);
+ move_native_irq(vector);
ack_edge_ioapic_irq(irq);
}
@@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector)
{
int irq = vector_to_irq(vector);
+ move_native_irq(vector);
end_level_ioapic_irq(irq);
}
@@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector)
unmask_IO_APIC_irq(irq);
}
+#ifdef CONFIG_SMP
static void set_ioapic_affinity_vector (unsigned int vector,
cpumask_t cpu_mask)
{
int irq = vector_to_irq(vector);
+ set_native_irq_info(vector, cpu_mask);
set_ioapic_affinity_irq(irq, cpu_mask);
}
-#endif
+#endif // CONFIG_SMP
+#endif // CONFIG_PCI_MSI
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -1424,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector,
* races.
*/
-static struct hw_interrupt_type ioapic_edge_type = {
+static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
.typename = "IO-APIC-edge",
.startup = startup_edge_ioapic,
.shutdown = shutdown_edge_ioapic,
@@ -1432,10 +1451,12 @@ static struct hw_interrupt_type ioapic_edge_type = {
.disable = disable_edge_ioapic,
.ack = ack_edge_ioapic,
.end = end_edge_ioapic,
+#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity,
+#endif
};
-static struct hw_interrupt_type ioapic_level_type = {
+static struct hw_interrupt_type ioapic_level_type __read_mostly = {
.typename = "IO-APIC-level",
.startup = startup_level_ioapic,
.shutdown = shutdown_level_ioapic,
@@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = {
.disable = disable_level_ioapic,
.ack = mask_and_ack_level_ioapic,
.end = end_level_ioapic,
+#ifdef CONFIG_SMP
.set_affinity = set_ioapic_affinity,
+#endif
};
static inline void init_IO_APIC_traps(void)
@@ -1506,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq)
static void end_lapic_irq (unsigned int i) { /* nothing */ }
-static struct hw_interrupt_type lapic_irq_type = {
+static struct hw_interrupt_type lapic_irq_type __read_mostly = {
.typename = "local-APIC-edge",
.startup = NULL, /* startup_irq() not used for IRQ0 */
.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
@@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
+ set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
spin_unlock_irqrestore(&ioapic_lock, flags);
return 0;
@@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
* we need to reprogram the ioredtbls to cater for the cpus which have come online
* so mask in all cases should simply be TARGET_CPUS
*/
+#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
int pin, ioapic, irq, irq_entry;
@@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void)
}
}
+#endif
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index 5c6dc705148..df08c43276a 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -74,7 +74,7 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn)
return 0;
}
-int arch_prepare_kprobe(struct kprobe *p)
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
{
/* insn: must be on special executable page on x86_64. */
up(&kprobe_mutex);
@@ -189,7 +189,7 @@ static inline s32 *is_riprel(u8 *insn)
return NULL;
}
-void arch_copy_kprobe(struct kprobe *p)
+void __kprobes arch_copy_kprobe(struct kprobe *p)
{
s32 *ripdisp;
memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
@@ -215,21 +215,21 @@ void arch_copy_kprobe(struct kprobe *p)
p->opcode = *p->addr;
}
-void arch_arm_kprobe(struct kprobe *p)
+void __kprobes arch_arm_kprobe(struct kprobe *p)
{
*p->addr = BREAKPOINT_INSTRUCTION;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
-void arch_disarm_kprobe(struct kprobe *p)
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
*p->addr = p->opcode;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
}
-void arch_remove_kprobe(struct kprobe *p)
+void __kprobes arch_remove_kprobe(struct kprobe *p)
{
up(&kprobe_mutex);
free_insn_slot(p->ainsn.insn);
@@ -261,7 +261,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
kprobe_saved_rflags &= ~IF_MASK;
}
-static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
regs->eflags |= TF_MASK;
regs->eflags &= ~IF_MASK;
@@ -272,7 +272,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->rip = (unsigned long)p->ainsn.insn;
}
-void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
+void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
+ struct pt_regs *regs)
{
unsigned long *sara = (unsigned long *)regs->rsp;
struct kretprobe_instance *ri;
@@ -295,7 +296,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs)
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled thorough out this function.
*/
-int kprobe_handler(struct pt_regs *regs)
+int __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
@@ -310,7 +311,8 @@ int kprobe_handler(struct pt_regs *regs)
Disarm the probe we just hit, and ignore it. */
p = get_kprobe(addr);
if (p) {
- if (kprobe_status == KPROBE_HIT_SS) {
+ if (kprobe_status == KPROBE_HIT_SS &&
+ *p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
regs->eflags &= ~TF_MASK;
regs->eflags |= kprobe_saved_rflags;
unlock_kprobes();
@@ -360,7 +362,10 @@ int kprobe_handler(struct pt_regs *regs)
* either a probepoint or a debugger breakpoint
* at this address. In either case, no further
* handling of this interrupt is appropriate.
+ * Back up over the (now missing) int3 and run
+ * the original instruction.
*/
+ regs->rip = (unsigned long)addr;
ret = 1;
}
/* Not one of ours: let kernel handle it */
@@ -399,7 +404,7 @@ no_kprobe:
/*
* Called when we hit the probe point at kretprobe_trampoline
*/
-int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
@@ -478,7 +483,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*/
-static void resume_execution(struct kprobe *p, struct pt_regs *regs)
+static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
unsigned long *tos = (unsigned long *)regs->rsp;
unsigned long next_rip = 0;
@@ -536,7 +541,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs)
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
* remain disabled thoroughout this function. And we hold kprobe lock.
*/
-int post_kprobe_handler(struct pt_regs *regs)
+int __kprobes post_kprobe_handler(struct pt_regs *regs)
{
if (!kprobe_running())
return 0;
@@ -571,7 +576,7 @@ out:
}
/* Interrupts disabled, kprobe_lock held. */
-int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
if (current_kprobe->fault_handler
&& current_kprobe->fault_handler(current_kprobe, regs, trapnr))
@@ -590,8 +595,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
/*
* Wrapper routine for handling exceptions.
*/
-int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
- void *data)
+int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
switch (val) {
@@ -619,7 +624,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
return NOTIFY_DONE;
}
-int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr;
@@ -640,7 +645,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
return 1;
}
-void jprobe_return(void)
+void __kprobes jprobe_return(void)
{
preempt_enable_no_resched();
asm volatile (" xchg %%rbx,%%rsp \n"
@@ -651,7 +656,7 @@ void jprobe_return(void)
(jprobe_saved_rsp):"memory");
}
-int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
+int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
u8 *addr = (u8 *) (regs->rip - 1);
unsigned long stack_addr = (unsigned long)jprobe_saved_rsp;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4e44d6e6b7e..caf164959e1 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -290,7 +290,7 @@ void enable_timer_nmi_watchdog(void)
static int nmi_pm_active; /* nmi_active before suspend */
-static int lapic_nmi_suspend(struct sys_device *dev, u32 state)
+static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
{
nmi_pm_active = nmi_active;
disable_lapic_nmi_watchdog();
@@ -463,6 +463,8 @@ void touch_nmi_watchdog (void)
*/
for (i = 0; i < NR_CPUS; i++)
per_cpu(nmi_touch, i) = 1;
+
+ touch_softlockup_watchdog();
}
void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
@@ -522,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code)
nmi_enter();
add_pda(__nmi_count,1);
- if (!nmi_callback(regs, cpu))
+ if (!rcu_dereference(nmi_callback)(regs, cpu))
default_do_nmi(regs);
nmi_exit();
}
void set_nmi_callback(nmi_callback_t callback)
{
- nmi_callback = callback;
+ rcu_assign_pointer(nmi_callback, callback);
}
void unset_nmi_callback(void)
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7577f9d7a75..8661f82ac70 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -310,6 +310,7 @@ void __show_regs(struct pt_regs * regs)
void show_regs(struct pt_regs *regs)
{
+ printk("CPU %d:", smp_processor_id());
__show_regs(regs);
show_trace(&regs->rsp);
}
diff --git a/arch/x86_64/kernel/semaphore.c b/arch/x86_64/kernel/semaphore.c
deleted file mode 100644
index 48f7c18172b..00000000000
--- a/arch/x86_64/kernel/semaphore.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * x86_64 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/config.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/errno.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- * - only on a boundary condition do we need to care. When we go
- * from a negative count to a non-negative, we wake people up.
- * - when we go from a non-negative count to a negative do we
- * (a) synchronize with the "sleeper" count and (b) make sure
- * that we're on the wakeup list before we synchronize so that
- * we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
- wake_up(&sem->wait);
-}
-
-void __sched __down(struct semaphore * sem)
-{
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_UNINTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * the wait_queue_head.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_UNINTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
- int retval = 0;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- unsigned long flags;
-
- tsk->state = TASK_INTERRUPTIBLE;
- spin_lock_irqsave(&sem->wait.lock, flags);
- add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
- sem->sleepers++;
- for (;;) {
- int sleepers = sem->sleepers;
-
- /*
- * With signals pending, this turns into
- * the trylock failure case - we won't be
- * sleeping, and we* can't get the lock as
- * it has contention. Just correct the count
- * and exit.
- */
- if (signal_pending(current)) {
- retval = -EINTR;
- sem->sleepers = 0;
- atomic_add(sleepers, &sem->count);
- break;
- }
-
- /*
- * Add "everybody else" into it. They aren't
- * playing, because we own the spinlock in
- * wait_queue_head. The "-1" is because we're
- * still hoping to get the semaphore.
- */
- if (!atomic_add_negative(sleepers - 1, &sem->count)) {
- sem->sleepers = 0;
- break;
- }
- sem->sleepers = 1; /* us - see -1 above */
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- schedule();
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- tsk->state = TASK_INTERRUPTIBLE;
- }
- remove_wait_queue_locked(&sem->wait, &wait);
- wake_up_locked(&sem->wait);
- spin_unlock_irqrestore(&sem->wait.lock, flags);
-
- tsk->state = TASK_RUNNING;
- return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-int __down_trylock(struct semaphore * sem)
-{
- int sleepers;
- unsigned long flags;
-
- spin_lock_irqsave(&sem->wait.lock, flags);
- sleepers = sem->sleepers + 1;
- sem->sleepers = 0;
-
- /*
- * Add "everybody else" and us into it. They aren't
- * playing, because we own the spinlock in the
- * wait_queue_head.
- */
- if (!atomic_add_negative(sleepers, &sem->count)) {
- wake_up_locked(&sem->wait);
- }
-
- spin_unlock_irqrestore(&sem->wait.lock, flags);
- return 1;
-}
-
-
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index cfcebc8ab7b..0511d808791 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -65,7 +65,7 @@
* Machine setup..
*/
-struct cpuinfo_x86 boot_cpu_data;
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
unsigned long mmu_cr4_features;
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 34082c1cc41..e3ffcacc8c9 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
-unsigned long __supported_pte_mask = ~0UL;
+unsigned long __supported_pte_mask __read_mostly = ~0UL;
static int do_not_nx __initdata = 0;
/* noexec=on|off
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index fa25e39fe54..90aeccd1519 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -62,13 +62,13 @@
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
/* Package ID of each logical CPU */
-u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
-u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
+u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
EXPORT_SYMBOL(phys_proc_id);
EXPORT_SYMBOL(cpu_core_id);
/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map;
+cpumask_t cpu_online_map __read_mostly;
EXPORT_SYMBOL(cpu_online_map);
@@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
/* Set when the idlers are all forked */
int smp_threads_ready;
-cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
-cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
+cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
/*
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 66bf6ddeb0c..7b6abe05825 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv)
set_normalized_timespec(&xtime, sec, nsec);
set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec);
- time_adjust = 0; /* stop active adjtime() */
- time_status |= STA_UNSYNC;
- time_maxerror = NTP_PHASE_LIMIT;
- time_esterror = NTP_PHASE_LIMIT;
+ ntp_clear();
write_sequnlock_irq(&xtime_lock);
clock_was_set();
@@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
* off) isn't likely to go away much sooner anyway.
*/
- if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update &&
+ if (ntp_synced() && xtime.tv_sec > rtc_update &&
abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
set_rtc_mmss(xtime.tv_sec);
rtc_update = xtime.tv_sec + 660;
@@ -1041,6 +1038,7 @@ static int timer_resume(struct sys_device *dev)
write_sequnlock_irqrestore(&xtime_lock,flags);
jiffies += sleep_length;
wall_jiffies += sleep_length;
+ touch_softlockup_watchdog();
return 0;
}
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 6ead433a388..f238d6078a5 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/nmi.h>
+#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -419,8 +420,9 @@ void die_nmi(char *str, struct pt_regs *regs)
do_exit(SIGSEGV);
}
-static void do_trap(int trapnr, int signr, char *str,
- struct pt_regs * regs, long error_code, siginfo_t *info)
+static void __kprobes do_trap(int trapnr, int signr, char *str,
+ struct pt_regs * regs, long error_code,
+ siginfo_t *info)
{
conditional_sti(regs);
@@ -504,7 +506,8 @@ DO_ERROR(18, SIGSEGV, "reserved", reserved)
DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
-asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
+asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
+ long error_code)
{
conditional_sti(regs);
@@ -622,7 +625,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs)
io_check_error(reason, regs);
}
-asmlinkage void do_int3(struct pt_regs * regs, long error_code)
+asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
return;
@@ -653,7 +656,8 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs)
}
/* runs on IST stack. */
-asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code)
+asmlinkage void __kprobes do_debug(struct pt_regs * regs,
+ unsigned long error_code)
{
unsigned long condition;
struct task_struct *tsk = current;
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 2a94f9b60b2..d4abb07af52 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -21,6 +21,7 @@ SECTIONS
*(.text)
SCHED_TEXT
LOCK_TEXT
+ KPROBES_TEXT
*(.fixup)
*(.gnu.warning)
} = 0x9090
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index ca914c3bd49..816732d8858 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -23,6 +23,7 @@
#include <linux/vt_kern.h> /* For unblank_screen() */
#include <linux/compiler.h>
#include <linux/module.h>
+#include <linux/kprobes.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -294,7 +295,8 @@ int exception_trace = 1;
* bit 2 == 0 means kernel, 1 means user-mode
* bit 3 == 1 means fault was an instruction fetch
*/
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
+ unsigned long error_code)
{
struct task_struct *tsk;
struct mm_struct *mm;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 6a156f5692a..04f7a33e144 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -22,14 +22,14 @@
#define Dprintk(x...)
#endif
-struct pglist_data *node_data[MAX_NUMNODES];
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
bootmem_data_t plat_node_bdata[MAX_NUMNODES];
int memnode_shift;
u8 memnodemap[NODEMAPSIZE];
-unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
-cpumask_t node_to_cpumask[MAX_NUMNODES];
+unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
+cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly;
int numa_off __initdata;