aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S12
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S32
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c66
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c10
-rw-r--r--arch/x86/kernel/mce_64.c39
-rw-r--r--arch/x86/kernel/msr.c35
-rw-r--r--arch/x86/kernel/suspend_64.c101
-rw-r--r--arch/x86/kernel/suspend_asm_64.S49
-rw-r--r--arch/x86/kernel/vsyscall_64.c23
9 files changed, 246 insertions, 121 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index f22ba8534d2..a97313b1270 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -11,7 +11,7 @@
#
# If physical address of wakeup_code is 0x12345, BIOS should call us with
# cs = 0x1234, eip = 0x05
-#
+#
#define BEEP \
inb $97, %al; \
@@ -52,7 +52,6 @@ wakeup_code:
BEEP
1:
mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board
- movw $0x0e00 + 'S', %fs:(0x12)
pushl $0 # Kill any dangerous flags
popfl
@@ -90,9 +89,6 @@ wakeup_code:
# make sure %cr4 is set correctly (features, etc)
movl real_save_cr4 - wakeup_code, %eax
movl %eax, %cr4
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'i', %fs:(0x12)
# need a gdt -- use lgdtl to force 32-bit operands, in case
# the GDT is located past 16 megabytes.
@@ -102,8 +98,6 @@ wakeup_code:
movl %eax, %cr0
jmp 1f
1:
- movw $0x0e00 + 'n', %fs:(0x14)
-
movl real_magic - wakeup_code, %eax
cmpl $0x12345678, %eax
jne bogus_real_magic
@@ -122,13 +116,11 @@ real_save_cr4: .long 0
real_magic: .long 0
video_mode: .long 0
realmode_flags: .long 0
-beep_flags: .long 0
real_efer_save_restore: .long 0
real_save_efer_edx: .long 0
real_save_efer_eax: .long 0
bogus_real_magic:
- movw $0x0e00 + 'B', %fs:(0x12)
jmp bogus_real_magic
/* This code uses an extended set of video mode numbers. These include:
@@ -194,7 +186,6 @@ wakeup_pmode_return:
movw %ax, %es
movw %ax, %fs
movw %ax, %gs
- movw $0x0e00 + 'u', 0xb8016
# reload the gdt, as we need the full 32 bit address
lgdt saved_gdt
@@ -218,7 +209,6 @@ wakeup_pmode_return:
jmp *%eax
bogus_magic:
- movw $0x0e00 + 'B', 0xb8018
jmp bogus_magic
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 8b4357e1efe..55608ec2ed7 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -41,7 +41,6 @@ wakeup_code:
# Running in *copy* of this code, somewhere in low 1MB.
- movb $0xa1, %al ; outb %al, $0x80
cli
cld
# setup data segment
@@ -65,11 +64,6 @@ wakeup_code:
cmpl $0x12345678, %eax
jne bogus_real_magic
- call verify_cpu # Verify the cpu supports long
- # mode
- testl %eax, %eax
- jnz no_longmode
-
testl $1, realmode_flags - wakeup_code
jz 1f
lcall $0xc000,$3
@@ -84,12 +78,6 @@ wakeup_code:
call mode_set
1:
- movw $0xb800, %ax
- movw %ax,%fs
- movw $0x0e00 + 'L', %fs:(0x10)
-
- movb $0xa2, %al ; outb %al, $0x80
-
mov %ds, %ax # Find 32bit wakeup_code addr
movzx %ax, %esi # (Convert %ds:gdt to a liner ptr)
shll $4, %esi
@@ -117,14 +105,10 @@ wakeup_32_vector:
.code32
wakeup_32:
# Running in this code, but at low address; paging is not yet turned on.
- movb $0xa5, %al ; outb %al, $0x80
movl $__KERNEL_DS, %eax
movl %eax, %ds
- movw $0x0e00 + 'i', %ds:(0xb8012)
- movb $0xa8, %al ; outb %al, $0x80;
-
/*
* Prepare for entering 64bits mode
*/
@@ -200,16 +184,11 @@ wakeup_long64:
*/
lgdt cpu_gdt_descr
- movw $0x0e00 + 'n', %ds:(0xb8014)
- movb $0xa9, %al ; outb %al, $0x80
-
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
jne bogus_64_magic
- movw $0x0e00 + 'u', %ds:(0xb8016)
-
nop
nop
movw $__KERNEL_DS, %ax
@@ -220,13 +199,11 @@ wakeup_long64:
movw %ax, %gs
movq saved_rsp, %rsp
- movw $0x0e00 + 'x', %ds:(0xb8018)
movq saved_rbx, %rbx
movq saved_rdi, %rdi
movq saved_rsi, %rsi
movq saved_rbp, %rbp
- movw $0x0e00 + '!', %ds:(0xb801a)
movq saved_rip, %rax
jmp *%rax
@@ -256,21 +233,12 @@ realmode_flags: .quad 0
.code16
bogus_real_magic:
- movb $0xba,%al ; outb %al,$0x80
jmp bogus_real_magic
.code64
bogus_64_magic:
- movb $0xb3,%al ; outb %al,$0x80
jmp bogus_64_magic
-.code16
-no_longmode:
- movb $0xbc,%al ; outb %al,$0x80
- jmp no_longmode
-
-#include "../verify_cpu_64.S"
-
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
* NORMAL_VGA (-1)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 1826395ebee..297a2411694 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -499,6 +499,11 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {
static void free_cache_attributes(unsigned int cpu)
{
+ int i;
+
+ for (i = 0; i < num_cache_leaves; i++)
+ cache_remove_shared_cpu_map(cpu, i);
+
kfree(cpuid4_info[cpu]);
cpuid4_info[cpu] = NULL;
}
@@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu)
static int __cpuinit detect_cache_attributes(unsigned int cpu)
{
struct _cpuid4_info *this_leaf;
- unsigned long j;
- int retval;
+ unsigned long j;
+ int retval;
cpumask_t oldmask;
if (num_cache_leaves == 0)
@@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu)
goto out;
/* Do cpuid and store the results */
- retval = 0;
for (j = 0; j < num_cache_leaves; j++) {
this_leaf = CPUID4_INFO_IDX(cpu, j);
retval = cpuid4_cache_lookup(j, this_leaf);
- if (unlikely(retval < 0))
+ if (unlikely(retval < 0)) {
+ int i;
+
+ for (i = 0; i < j; i++)
+ cache_remove_shared_cpu_map(cpu, i);
break;
+ }
cache_shared_cpu_map_setup(cpu, j);
}
set_cpus_allowed(current, oldmask);
out:
- if (retval)
- free_cache_attributes(cpu);
+ if (retval) {
+ kfree(cpuid4_info[cpu]);
+ cpuid4_info[cpu] = NULL;
+ }
+
return retval;
}
@@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = {
.sysfs_ops = &sysfs_ops,
};
-static void cpuid4_cache_sysfs_exit(unsigned int cpu)
+static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
{
kfree(cache_kobject[cpu]);
kfree(index_kobject[cpu]);
@@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu)
static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
{
+ int err;
if (num_cache_leaves == 0)
return -ENOENT;
- detect_cache_attributes(cpu);
- if (cpuid4_info[cpu] == NULL)
- return -ENOENT;
+ err = detect_cache_attributes(cpu);
+ if (err)
+ return err;
/* Allocate all required memory */
cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL);
@@ -705,13 +718,15 @@ err_out:
return -ENOMEM;
}
+static cpumask_t cache_dev_map = CPU_MASK_NONE;
+
/* Add/Remove cache interface for CPU device */
static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i, j;
struct _index_kobject *this_object;
- int retval = 0;
+ int retval;
retval = cpuid4_cache_sysfs_init(cpu);
if (unlikely(retval < 0))
@@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
kobject_set_name(cache_kobject[cpu], "%s", "cache");
cache_kobject[cpu]->ktype = &ktype_percpu_entry;
retval = kobject_register(cache_kobject[cpu]);
+ if (retval < 0) {
+ cpuid4_cache_sysfs_exit(cpu);
+ return retval;
+ }
for (i = 0; i < num_cache_leaves; i++) {
this_object = INDEX_KOBJECT_PTR(cpu,i);
@@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
break;
}
}
+ if (!retval)
+ cpu_set(cpu, cache_dev_map);
+
return retval;
}
@@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
if (cpuid4_info[cpu] == NULL)
return;
- for (i = 0; i < num_cache_leaves; i++) {
- cache_remove_shared_cpu_map(cpu, i);
+ if (!cpu_isset(cpu, cache_dev_map))
+ return;
+ cpu_clear(cpu, cache_dev_map);
+
+ for (i = 0; i < num_cache_leaves; i++)
kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
- }
kobject_unregister(cache_kobject[cpu]);
cpuid4_cache_sysfs_exit(cpu);
- return;
}
static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
@@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
{
- .notifier_call = cacheinfo_cpu_callback,
+ .notifier_call = cacheinfo_cpu_callback,
};
static int __cpuinit cache_sysfs_init(void)
@@ -791,14 +814,15 @@ static int __cpuinit cache_sysfs_init(void)
if (num_cache_leaves == 0)
return 0;
- register_hotcpu_notifier(&cacheinfo_cpu_notifier);
-
for_each_online_cpu(i) {
- struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i);
+ int err;
+ struct sys_device *sys_dev = get_cpu_sysdev(i);
- cache_add_dev(sys_dev);
+ err = cache_add_dev(sys_dev);
+ if (err)
+ return err;
}
-
+ register_hotcpu_notifier(&cacheinfo_cpu_notifier);
return 0;
}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 494d320d909..24885be5c48 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
- int err;
+ int err = 0;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
mutex_lock(&therm_cpu_lock);
@@ -149,7 +151,7 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
mutex_unlock(&therm_cpu_lock);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c
index 8ca8f864896..66e6b797b2c 100644
--- a/arch/x86/kernel/mce_64.c
+++ b/arch/x86/kernel/mce_64.c
@@ -802,16 +802,29 @@ static __cpuinit int mce_create_device(unsigned int cpu)
if (!mce_available(&cpu_data[cpu]))
return -EIO;
+ memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
per_cpu(device_mce,cpu).id = cpu;
per_cpu(device_mce,cpu).cls = &mce_sysclass;
err = sysdev_register(&per_cpu(device_mce,cpu));
+ if (err)
+ return err;
+
+ for (i = 0; mce_attributes[i]; i++) {
+ err = sysdev_create_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
+ if (err)
+ goto error;
+ }
- if (!err) {
- for (i = 0; mce_attributes[i]; i++)
- sysdev_create_file(&per_cpu(device_mce,cpu),
- mce_attributes[i]);
+ return 0;
+error:
+ while (i--) {
+ sysdev_remove_file(&per_cpu(device_mce,cpu),
+ mce_attributes[i]);
}
+ sysdev_unregister(&per_cpu(device_mce,cpu));
+
return err;
}
@@ -823,7 +836,6 @@ static void mce_remove_device(unsigned int cpu)
sysdev_remove_file(&per_cpu(device_mce,cpu),
mce_attributes[i]);
sysdev_unregister(&per_cpu(device_mce,cpu));
- memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject));
}
/* Get notified when a cpu comes on/off. Be hotplug friendly. */
@@ -831,18 +843,21 @@ static int
mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- mce_create_device(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = mce_create_device(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
mce_remove_device(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
static struct notifier_block mce_cpu_notifier = {
@@ -857,9 +872,13 @@ static __init int mce_init_device(void)
if (!mce_available(&boot_cpu_data))
return -EIO;
err = sysdev_class_register(&mce_sysclass);
+ if (err)
+ return err;
for_each_online_cpu(i) {
- mce_create_device(i);
+ err = mce_create_device(i);
+ if (err)
+ return err;
}
register_hotcpu_notifier(&mce_cpu_notifier);
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index df85c9c1360..e18e516cf54 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -133,37 +133,42 @@ static const struct file_operations msr_fops = {
.open = msr_open,
};
-static int __cpuinit msr_device_create(int i)
+static int __cpuinit msr_device_create(int cpu)
{
- int err = 0;
struct device *dev;
- dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i);
- if (IS_ERR(dev))
- err = PTR_ERR(dev);
- return err;
+ dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu),
+ "msr%d", cpu);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void msr_device_destroy(int cpu)
+{
+ device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
}
static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
+ int err = 0;
switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- msr_device_create(cpu);
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ err = msr_device_create(cpu);
break;
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
break;
}
- return NOTIFY_OK;
+ return err ? NOTIFY_BAD : NOTIFY_OK;
}
-static struct notifier_block __cpuinitdata msr_class_cpu_notifier =
-{
+static struct notifier_block __cpuinitdata msr_class_cpu_notifier = {
.notifier_call = msr_class_cpu_callback,
};
@@ -196,7 +201,7 @@ static int __init msr_init(void)
out_class:
i = 0;
for_each_online_cpu(i)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, i));
+ msr_device_destroy(i);
class_destroy(msr_class);
out_chrdev:
unregister_chrdev(MSR_MAJOR, "cpu/msr");
@@ -208,7 +213,7 @@ static void __exit msr_exit(void)
{
int cpu = 0;
for_each_online_cpu(cpu)
- device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
+ msr_device_destroy(cpu);
class_destroy(msr_class);
unregister_chrdev(MSR_MAJOR, "cpu/msr");
unregister_hotcpu_notifier(&msr_class_cpu_notifier);
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c
index 573c0a6e0ac..f8fafe527ff 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/kernel/suspend_64.c
@@ -150,8 +150,22 @@ void fix_processor_context(void)
/* Defined in arch/x86_64/kernel/suspend_asm.S */
extern int restore_image(void);
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
pgd_t *temp_level4_pgt;
+void *relocated_restore_code;
+
static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
long i, j;
@@ -175,7 +189,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
if (paddr >= end)
break;
- pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr;
+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
pe &= __supported_pte_mask;
set_pmd(pmd, __pmd(pe));
}
@@ -183,25 +197,42 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en
return 0;
}
+static int res_kernel_text_pud_init(pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd;
+ unsigned long paddr;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+
+ return 0;
+}
+
static int set_up_temporary_mappings(void)
{
unsigned long start, end, next;
+ pud_t *pud;
int error;
temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
if (!temp_level4_pgt)
return -ENOMEM;
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
/* Set up the direct mapping from scratch */
start = (unsigned long)pfn_to_kaddr(0);
end = (unsigned long)pfn_to_kaddr(end_pfn);
for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
next = start + PGDIR_SIZE;
@@ -212,7 +243,17 @@ static int set_up_temporary_mappings(void)
set_pgd(temp_level4_pgt + pgd_index(start),
mk_kernel_pgd(__pa(pud)));
}
- return 0;
+
+ /* Set up the kernel text mapping from scratch */
+ pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+ error = res_kernel_text_pud_init(pud, __START_KERNEL_map);
+ if (!error)
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ __pgd(__pa(pud) | _PAGE_TABLE));
+
+ return error;
}
int swsusp_arch_resume(void)
@@ -222,6 +263,13 @@ int swsusp_arch_resume(void)
/* We have got enough memory and from now on we cannot recover */
if ((error = set_up_temporary_mappings()))
return error;
+
+ relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+ memcpy(relocated_restore_code, &core_restore_code,
+ &restore_registers - &core_restore_code);
+
restore_image();
return 0;
}
@@ -236,4 +284,43 @@ int pfn_is_nosave(unsigned long pfn)
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long cr3;
+ unsigned long magic;
+};
+
+#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->jump_address = restore_jump_address;
+ rdr->cr3 = restore_cr3;
+ rdr->magic = RESTORE_MAGIC;
+ return 0;
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ restore_jump_address = rdr->jump_address;
+ restore_cr3 = rdr->cr3;
+ return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S
index 16d183f67bc..48344b666d2 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/kernel/suspend_asm_64.S
@@ -2,8 +2,8 @@
*
* Distribute under GPLv2.
*
- * swsusp_arch_resume may not use any stack, nor any variable that is
- * not "NoSave" during copying pages:
+ * swsusp_arch_resume must not use any stack or any nonlocal variables while
+ * copying pages:
*
* Its rewriting one kernel image with another. What is stack in "old"
* image could very well be data page in "new" image, and overwriting
@@ -36,6 +36,13 @@ ENTRY(swsusp_arch_suspend)
movq %r15, saved_context_r15(%rip)
pushfq ; popq saved_context_eflags(%rip)
+ /* save the address of restore_registers */
+ movq $restore_registers, %rax
+ movq %rax, restore_jump_address(%rip)
+ /* save cr3 */
+ movq %cr3, %rax
+ movq %rax, restore_cr3(%rip)
+
call swsusp_save
ret
@@ -54,7 +61,17 @@ ENTRY(restore_image)
movq %rcx, %cr3;
movq %rax, %cr4; # turn PGE back on
+ /* prepare to jump to the image kernel */
+ movq restore_jump_address(%rip), %rax
+ movq restore_cr3(%rip), %rbx
+
+ /* prepare to copy image data to their original locations */
movq restore_pblist(%rip), %rdx
+ movq relocated_restore_code(%rip), %rcx
+ jmpq *%rcx
+
+ /* code below has been relocated to a safe page */
+ENTRY(core_restore_code)
loop:
testq %rdx, %rdx
jz done
@@ -62,7 +79,7 @@ loop:
/* get addresses from the pbe and copy the page */
movq pbe_address(%rdx), %rsi
movq pbe_orig_address(%rdx), %rdi
- movq $512, %rcx
+ movq $(PAGE_SIZE >> 3), %rcx
rep
movsq
@@ -70,10 +87,22 @@ loop:
movq pbe_next(%rdx), %rdx
jmp loop
done:
+ /* jump to the restore_registers address from the image header */
+ jmpq *%rax
+ /*
+ * NOTE: This assumes that the boot kernel's text mapping covers the
+ * image kernel's page containing restore_registers and the address of
+ * this page is the same as in the image kernel's text mapping (it
+ * should always be true, because the text mapping is linear, starting
+ * from 0, and is supposed to cover the entire kernel text for every
+ * kernel).
+ *
+ * code below belongs to the image kernel
+ */
+
+ENTRY(restore_registers)
/* go back to the original page tables */
- movq $(init_level4_pgt - __START_KERNEL_map), %rax
- addq phys_base(%rip), %rax
- movq %rax, %cr3
+ movq %rbx, %cr3
/* Flush TLB, including "global" things (vmalloc) */
movq mmu_cr4_features(%rip), %rax
@@ -84,12 +113,9 @@ done:
movq %rcx, %cr3
movq %rax, %cr4; # turn PGE back on
- movl $24, %eax
- movl %eax, %ds
-
movq saved_context_esp(%rip), %rsp
movq saved_context_ebp(%rip), %rbp
- /* Don't restore %rax, it must be 0 anyway */
+ /* restore GPRs (we don't restore %rax, it must be 0 anyway) */
movq saved_context_ebx(%rip), %rbx
movq saved_context_ecx(%rip), %rcx
movq saved_context_edx(%rip), %rdx
@@ -107,4 +133,7 @@ done:
xorq %rax, %rax
+ /* tell the hibernation core that we've just restored the memory */
+ movq %rax, in_suspend(%rip)
+
ret
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8a67e282cb5..585541ca1a7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -64,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
.sysctl_enabled = 1,
};
+void update_vsyscall_tz(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ /* sys_tz has changed */
+ vsyscall_gtod_data.sys_tz = sys_tz;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+
void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
{
unsigned long flags;
@@ -77,7 +87,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.sys_tz = sys_tz;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -163,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t)
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
- vgettimeofday(&tv, 0);
+ vgettimeofday(&tv, NULL);
result = tv.tv_sec;
if (t)
*t = result;
@@ -257,18 +266,10 @@ out:
return ret;
}
-static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
- void __user *oldval, size_t __user *oldlenp,
- void __user *newval, size_t newlen)
-{
- return -ENOSYS;
-}
-
static ctl_table kernel_table2[] = {
- { .ctl_name = 99, .procname = "vsyscall64",
+ { .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .strategy = vsyscall_sysctl_nostrat,
.proc_handler = vsyscall_sysctl_change },
{}
};