diff options
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_32.S | 12 | ||||
-rw-r--r-- | arch/x86/kernel/acpi/wakeup_64.S | 32 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/intel_cacheinfo.c | 66 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/mce_64.c | 39 | ||||
-rw-r--r-- | arch/x86/kernel/msr.c | 35 | ||||
-rw-r--r-- | arch/x86/kernel/suspend_64.c | 101 | ||||
-rw-r--r-- | arch/x86/kernel/suspend_asm_64.S | 49 | ||||
-rw-r--r-- | arch/x86/kernel/vsyscall_64.c | 23 |
9 files changed, 246 insertions, 121 deletions
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index f22ba8534d2..a97313b1270 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -11,7 +11,7 @@ # # If physical address of wakeup_code is 0x12345, BIOS should call us with # cs = 0x1234, eip = 0x05 -# +# #define BEEP \ inb $97, %al; \ @@ -52,7 +52,6 @@ wakeup_code: BEEP 1: mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board - movw $0x0e00 + 'S', %fs:(0x12) pushl $0 # Kill any dangerous flags popfl @@ -90,9 +89,6 @@ wakeup_code: # make sure %cr4 is set correctly (features, etc) movl real_save_cr4 - wakeup_code, %eax movl %eax, %cr4 - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'i', %fs:(0x12) # need a gdt -- use lgdtl to force 32-bit operands, in case # the GDT is located past 16 megabytes. @@ -102,8 +98,6 @@ wakeup_code: movl %eax, %cr0 jmp 1f 1: - movw $0x0e00 + 'n', %fs:(0x14) - movl real_magic - wakeup_code, %eax cmpl $0x12345678, %eax jne bogus_real_magic @@ -122,13 +116,11 @@ real_save_cr4: .long 0 real_magic: .long 0 video_mode: .long 0 realmode_flags: .long 0 -beep_flags: .long 0 real_efer_save_restore: .long 0 real_save_efer_edx: .long 0 real_save_efer_eax: .long 0 bogus_real_magic: - movw $0x0e00 + 'B', %fs:(0x12) jmp bogus_real_magic /* This code uses an extended set of video mode numbers. These include: @@ -194,7 +186,6 @@ wakeup_pmode_return: movw %ax, %es movw %ax, %fs movw %ax, %gs - movw $0x0e00 + 'u', 0xb8016 # reload the gdt, as we need the full 32 bit address lgdt saved_gdt @@ -218,7 +209,6 @@ wakeup_pmode_return: jmp *%eax bogus_magic: - movw $0x0e00 + 'B', 0xb8018 jmp bogus_magic diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8b4357e1efe..55608ec2ed7 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -41,7 +41,6 @@ wakeup_code: # Running in *copy* of this code, somewhere in low 1MB. - movb $0xa1, %al ; outb %al, $0x80 cli cld # setup data segment @@ -65,11 +64,6 @@ wakeup_code: cmpl $0x12345678, %eax jne bogus_real_magic - call verify_cpu # Verify the cpu supports long - # mode - testl %eax, %eax - jnz no_longmode - testl $1, realmode_flags - wakeup_code jz 1f lcall $0xc000,$3 @@ -84,12 +78,6 @@ wakeup_code: call mode_set 1: - movw $0xb800, %ax - movw %ax,%fs - movw $0x0e00 + 'L', %fs:(0x10) - - movb $0xa2, %al ; outb %al, $0x80 - mov %ds, %ax # Find 32bit wakeup_code addr movzx %ax, %esi # (Convert %ds:gdt to a liner ptr) shll $4, %esi @@ -117,14 +105,10 @@ wakeup_32_vector: .code32 wakeup_32: # Running in this code, but at low address; paging is not yet turned on. - movb $0xa5, %al ; outb %al, $0x80 movl $__KERNEL_DS, %eax movl %eax, %ds - movw $0x0e00 + 'i', %ds:(0xb8012) - movb $0xa8, %al ; outb %al, $0x80; - /* * Prepare for entering 64bits mode */ @@ -200,16 +184,11 @@ wakeup_long64: */ lgdt cpu_gdt_descr - movw $0x0e00 + 'n', %ds:(0xb8014) - movb $0xa9, %al ; outb %al, $0x80 - movq saved_magic, %rax movq $0x123456789abcdef0, %rdx cmpq %rdx, %rax jne bogus_64_magic - movw $0x0e00 + 'u', %ds:(0xb8016) - nop nop movw $__KERNEL_DS, %ax @@ -220,13 +199,11 @@ wakeup_long64: movw %ax, %gs movq saved_rsp, %rsp - movw $0x0e00 + 'x', %ds:(0xb8018) movq saved_rbx, %rbx movq saved_rdi, %rdi movq saved_rsi, %rsi movq saved_rbp, %rbp - movw $0x0e00 + '!', %ds:(0xb801a) movq saved_rip, %rax jmp *%rax @@ -256,21 +233,12 @@ realmode_flags: .quad 0 .code16 bogus_real_magic: - movb $0xba,%al ; outb %al,$0x80 jmp bogus_real_magic .code64 bogus_64_magic: - movb $0xb3,%al ; outb %al,$0x80 jmp bogus_64_magic -.code16 -no_longmode: - movb $0xbc,%al ; outb %al,$0x80 - jmp no_longmode - -#include "../verify_cpu_64.S" - /* This code uses an extended set of video mode numbers. These include: * Aliases for standard modes * NORMAL_VGA (-1) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 1826395ebee..297a2411694 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -499,6 +499,11 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) { static void free_cache_attributes(unsigned int cpu) { + int i; + + for (i = 0; i < num_cache_leaves; i++) + cache_remove_shared_cpu_map(cpu, i); + kfree(cpuid4_info[cpu]); cpuid4_info[cpu] = NULL; } @@ -506,8 +511,8 @@ static void free_cache_attributes(unsigned int cpu) static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; - unsigned long j; - int retval; + unsigned long j; + int retval; cpumask_t oldmask; if (num_cache_leaves == 0) @@ -524,19 +529,26 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) goto out; /* Do cpuid and store the results */ - retval = 0; for (j = 0; j < num_cache_leaves; j++) { this_leaf = CPUID4_INFO_IDX(cpu, j); retval = cpuid4_cache_lookup(j, this_leaf); - if (unlikely(retval < 0)) + if (unlikely(retval < 0)) { + int i; + + for (i = 0; i < j; i++) + cache_remove_shared_cpu_map(cpu, i); break; + } cache_shared_cpu_map_setup(cpu, j); } set_cpus_allowed(current, oldmask); out: - if (retval) - free_cache_attributes(cpu); + if (retval) { + kfree(cpuid4_info[cpu]); + cpuid4_info[cpu] = NULL; + } + return retval; } @@ -669,7 +681,7 @@ static struct kobj_type ktype_percpu_entry = { .sysfs_ops = &sysfs_ops, }; -static void cpuid4_cache_sysfs_exit(unsigned int cpu) +static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { kfree(cache_kobject[cpu]); kfree(index_kobject[cpu]); @@ -680,13 +692,14 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { + int err; if (num_cache_leaves == 0) return -ENOENT; - detect_cache_attributes(cpu); - if (cpuid4_info[cpu] == NULL) - return -ENOENT; + err = detect_cache_attributes(cpu); + if (err) + return err; /* Allocate all required memory */ cache_kobject[cpu] = kzalloc(sizeof(struct kobject), GFP_KERNEL); @@ -705,13 +718,15 @@ err_out: return -ENOMEM; } +static cpumask_t cache_dev_map = CPU_MASK_NONE; + /* Add/Remove cache interface for CPU device */ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; struct _index_kobject *this_object; - int retval = 0; + int retval; retval = cpuid4_cache_sysfs_init(cpu); if (unlikely(retval < 0)) @@ -721,6 +736,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) kobject_set_name(cache_kobject[cpu], "%s", "cache"); cache_kobject[cpu]->ktype = &ktype_percpu_entry; retval = kobject_register(cache_kobject[cpu]); + if (retval < 0) { + cpuid4_cache_sysfs_exit(cpu); + return retval; + } for (i = 0; i < num_cache_leaves; i++) { this_object = INDEX_KOBJECT_PTR(cpu,i); @@ -740,6 +759,9 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) break; } } + if (!retval) + cpu_set(cpu, cache_dev_map); + return retval; } @@ -750,13 +772,14 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) if (cpuid4_info[cpu] == NULL) return; - for (i = 0; i < num_cache_leaves; i++) { - cache_remove_shared_cpu_map(cpu, i); + if (!cpu_isset(cpu, cache_dev_map)) + return; + cpu_clear(cpu, cache_dev_map); + + for (i = 0; i < num_cache_leaves; i++) kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); - } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return; } static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, @@ -781,7 +804,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { - .notifier_call = cacheinfo_cpu_callback, + .notifier_call = cacheinfo_cpu_callback, }; static int __cpuinit cache_sysfs_init(void) @@ -791,14 +814,15 @@ static int __cpuinit cache_sysfs_init(void) if (num_cache_leaves == 0) return 0; - register_hotcpu_notifier(&cacheinfo_cpu_notifier); - for_each_online_cpu(i) { - struct sys_device *sys_dev = get_cpu_sysdev((unsigned int)i); + int err; + struct sys_device *sys_dev = get_cpu_sysdev(i); - cache_add_dev(sys_dev); + err = cache_add_dev(sys_dev); + if (err) + return err; } - + register_hotcpu_notifier(&cacheinfo_cpu_notifier); return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 494d320d909..24885be5c48 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -131,17 +131,19 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, { unsigned int cpu = (unsigned long)hcpu; struct sys_device *sys_dev; - int err; + int err = 0; sys_dev = get_cpu_sysdev(cpu); switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); err = thermal_throttle_add_dev(sys_dev); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: mutex_lock(&therm_cpu_lock); @@ -149,7 +151,7 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_unlock(&therm_cpu_lock); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = diff --git a/arch/x86/kernel/mce_64.c b/arch/x86/kernel/mce_64.c index 8ca8f864896..66e6b797b2c 100644 --- a/arch/x86/kernel/mce_64.c +++ b/arch/x86/kernel/mce_64.c @@ -802,16 +802,29 @@ static __cpuinit int mce_create_device(unsigned int cpu) if (!mce_available(&cpu_data[cpu])) return -EIO; + memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); per_cpu(device_mce,cpu).id = cpu; per_cpu(device_mce,cpu).cls = &mce_sysclass; err = sysdev_register(&per_cpu(device_mce,cpu)); + if (err) + return err; + + for (i = 0; mce_attributes[i]; i++) { + err = sysdev_create_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); + if (err) + goto error; + } - if (!err) { - for (i = 0; mce_attributes[i]; i++) - sysdev_create_file(&per_cpu(device_mce,cpu), - mce_attributes[i]); + return 0; +error: + while (i--) { + sysdev_remove_file(&per_cpu(device_mce,cpu), + mce_attributes[i]); } + sysdev_unregister(&per_cpu(device_mce,cpu)); + return err; } @@ -823,7 +836,6 @@ static void mce_remove_device(unsigned int cpu) sysdev_remove_file(&per_cpu(device_mce,cpu), mce_attributes[i]); sysdev_unregister(&per_cpu(device_mce,cpu)); - memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); } /* Get notified when a cpu comes on/off. Be hotplug friendly. */ @@ -831,18 +843,21 @@ static int mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - mce_create_device(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = mce_create_device(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: mce_remove_device(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } static struct notifier_block mce_cpu_notifier = { @@ -857,9 +872,13 @@ static __init int mce_init_device(void) if (!mce_available(&boot_cpu_data)) return -EIO; err = sysdev_class_register(&mce_sysclass); + if (err) + return err; for_each_online_cpu(i) { - mce_create_device(i); + err = mce_create_device(i); + if (err) + return err; } register_hotcpu_notifier(&mce_cpu_notifier); diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index df85c9c1360..e18e516cf54 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -133,37 +133,42 @@ static const struct file_operations msr_fops = { .open = msr_open, }; -static int __cpuinit msr_device_create(int i) +static int __cpuinit msr_device_create(int cpu) { - int err = 0; struct device *dev; - dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), "msr%d",i); - if (IS_ERR(dev)) - err = PTR_ERR(dev); - return err; + dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), + "msr%d", cpu); + return IS_ERR(dev) ? PTR_ERR(dev) : 0; +} + +static void msr_device_destroy(int cpu) +{ + device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); } static int __cpuinit msr_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; + int err = 0; switch (action) { - case CPU_ONLINE: - case CPU_ONLINE_FROZEN: - msr_device_create(cpu); + case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: + err = msr_device_create(cpu); break; + case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: case CPU_DEAD_FROZEN: - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + msr_device_destroy(cpu); break; } - return NOTIFY_OK; + return err ? NOTIFY_BAD : NOTIFY_OK; } -static struct notifier_block __cpuinitdata msr_class_cpu_notifier = -{ +static struct notifier_block __cpuinitdata msr_class_cpu_notifier = { .notifier_call = msr_class_cpu_callback, }; @@ -196,7 +201,7 @@ static int __init msr_init(void) out_class: i = 0; for_each_online_cpu(i) - device_destroy(msr_class, MKDEV(MSR_MAJOR, i)); + msr_device_destroy(i); class_destroy(msr_class); out_chrdev: unregister_chrdev(MSR_MAJOR, "cpu/msr"); @@ -208,7 +213,7 @@ static void __exit msr_exit(void) { int cpu = 0; for_each_online_cpu(cpu) - device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); + msr_device_destroy(cpu); class_destroy(msr_class); unregister_chrdev(MSR_MAJOR, "cpu/msr"); unregister_hotcpu_notifier(&msr_class_cpu_notifier); diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/kernel/suspend_64.c index 573c0a6e0ac..f8fafe527ff 100644 --- a/arch/x86/kernel/suspend_64.c +++ b/arch/x86/kernel/suspend_64.c @@ -150,8 +150,22 @@ void fix_processor_context(void) /* Defined in arch/x86_64/kernel/suspend_asm.S */ extern int restore_image(void); +/* + * Address to jump to in the last phase of restore in order to get to the image + * kernel's text (this value is passed in the image header). + */ +unsigned long restore_jump_address; + +/* + * Value of the cr3 register from before the hibernation (this value is passed + * in the image header). + */ +unsigned long restore_cr3; + pgd_t *temp_level4_pgt; +void *relocated_restore_code; + static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) { long i, j; @@ -175,7 +189,7 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en if (paddr >= end) break; - pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; + pe = __PAGE_KERNEL_LARGE_EXEC | paddr; pe &= __supported_pte_mask; set_pmd(pmd, __pmd(pe)); } @@ -183,25 +197,42 @@ static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long en return 0; } +static int res_kernel_text_pud_init(pud_t *pud, unsigned long start) +{ + pmd_t *pmd; + unsigned long paddr; + + pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); + if (!pmd) + return -ENOMEM; + set_pud(pud + pud_index(start), __pud(__pa(pmd) | _KERNPG_TABLE)); + for (paddr = 0; paddr < KERNEL_TEXT_SIZE; pmd++, paddr += PMD_SIZE) { + unsigned long pe; + + pe = __PAGE_KERNEL_LARGE_EXEC | _PAGE_GLOBAL | paddr; + pe &= __supported_pte_mask; + set_pmd(pmd, __pmd(pe)); + } + + return 0; +} + static int set_up_temporary_mappings(void) { unsigned long start, end, next; + pud_t *pud; int error; temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!temp_level4_pgt) return -ENOMEM; - /* It is safe to reuse the original kernel mapping */ - set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), - init_level4_pgt[pgd_index(__START_KERNEL_map)]); - /* Set up the direct mapping from scratch */ start = (unsigned long)pfn_to_kaddr(0); end = (unsigned long)pfn_to_kaddr(end_pfn); for (; start < end; start = next) { - pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); + pud = (pud_t *)get_safe_page(GFP_ATOMIC); if (!pud) return -ENOMEM; next = start + PGDIR_SIZE; @@ -212,7 +243,17 @@ static int set_up_temporary_mappings(void) set_pgd(temp_level4_pgt + pgd_index(start), mk_kernel_pgd(__pa(pud))); } - return 0; + + /* Set up the kernel text mapping from scratch */ + pud = (pud_t *)get_safe_page(GFP_ATOMIC); + if (!pud) + return -ENOMEM; + error = res_kernel_text_pud_init(pud, __START_KERNEL_map); + if (!error) + set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), + __pgd(__pa(pud) | _PAGE_TABLE)); + + return error; } int swsusp_arch_resume(void) @@ -222,6 +263,13 @@ int swsusp_arch_resume(void) /* We have got enough memory and from now on we cannot recover */ if ((error = set_up_temporary_mappings())) return error; + + relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC); + if (!relocated_restore_code) + return -ENOMEM; + memcpy(relocated_restore_code, &core_restore_code, + &restore_registers - &core_restore_code); + restore_image(); return 0; } @@ -236,4 +284,43 @@ int pfn_is_nosave(unsigned long pfn) unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); } + +struct restore_data_record { + unsigned long jump_address; + unsigned long cr3; + unsigned long magic; +}; + +#define RESTORE_MAGIC 0x0123456789ABCDEFUL + +/** + * arch_hibernation_header_save - populate the architecture specific part + * of a hibernation image header + * @addr: address to save the data at + */ +int arch_hibernation_header_save(void *addr, unsigned int max_size) +{ + struct restore_data_record *rdr = addr; + + if (max_size < sizeof(struct restore_data_record)) + return -EOVERFLOW; + rdr->jump_address = restore_jump_address; + rdr->cr3 = restore_cr3; + rdr->magic = RESTORE_MAGIC; + return 0; +} + +/** + * arch_hibernation_header_restore - read the architecture specific data + * from the hibernation image header + * @addr: address to read the data from + */ +int arch_hibernation_header_restore(void *addr) +{ + struct restore_data_record *rdr = addr; + + restore_jump_address = rdr->jump_address; + restore_cr3 = rdr->cr3; + return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL; +} #endif /* CONFIG_HIBERNATION */ diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/kernel/suspend_asm_64.S index 16d183f67bc..48344b666d2 100644 --- a/arch/x86/kernel/suspend_asm_64.S +++ b/arch/x86/kernel/suspend_asm_64.S @@ -2,8 +2,8 @@ * * Distribute under GPLv2. * - * swsusp_arch_resume may not use any stack, nor any variable that is - * not "NoSave" during copying pages: + * swsusp_arch_resume must not use any stack or any nonlocal variables while + * copying pages: * * Its rewriting one kernel image with another. What is stack in "old" * image could very well be data page in "new" image, and overwriting @@ -36,6 +36,13 @@ ENTRY(swsusp_arch_suspend) movq %r15, saved_context_r15(%rip) pushfq ; popq saved_context_eflags(%rip) + /* save the address of restore_registers */ + movq $restore_registers, %rax + movq %rax, restore_jump_address(%rip) + /* save cr3 */ + movq %cr3, %rax + movq %rax, restore_cr3(%rip) + call swsusp_save ret @@ -54,7 +61,17 @@ ENTRY(restore_image) movq %rcx, %cr3; movq %rax, %cr4; # turn PGE back on + /* prepare to jump to the image kernel */ + movq restore_jump_address(%rip), %rax + movq restore_cr3(%rip), %rbx + + /* prepare to copy image data to their original locations */ movq restore_pblist(%rip), %rdx + movq relocated_restore_code(%rip), %rcx + jmpq *%rcx + + /* code below has been relocated to a safe page */ +ENTRY(core_restore_code) loop: testq %rdx, %rdx jz done @@ -62,7 +79,7 @@ loop: /* get addresses from the pbe and copy the page */ movq pbe_address(%rdx), %rsi movq pbe_orig_address(%rdx), %rdi - movq $512, %rcx + movq $(PAGE_SIZE >> 3), %rcx rep movsq @@ -70,10 +87,22 @@ loop: movq pbe_next(%rdx), %rdx jmp loop done: + /* jump to the restore_registers address from the image header */ + jmpq *%rax + /* + * NOTE: This assumes that the boot kernel's text mapping covers the + * image kernel's page containing restore_registers and the address of + * this page is the same as in the image kernel's text mapping (it + * should always be true, because the text mapping is linear, starting + * from 0, and is supposed to cover the entire kernel text for every + * kernel). + * + * code below belongs to the image kernel + */ + +ENTRY(restore_registers) /* go back to the original page tables */ - movq $(init_level4_pgt - __START_KERNEL_map), %rax - addq phys_base(%rip), %rax - movq %rax, %cr3 + movq %rbx, %cr3 /* Flush TLB, including "global" things (vmalloc) */ movq mmu_cr4_features(%rip), %rax @@ -84,12 +113,9 @@ done: movq %rcx, %cr3 movq %rax, %cr4; # turn PGE back on - movl $24, %eax - movl %eax, %ds - movq saved_context_esp(%rip), %rsp movq saved_context_ebp(%rip), %rbp - /* Don't restore %rax, it must be 0 anyway */ + /* restore GPRs (we don't restore %rax, it must be 0 anyway) */ movq saved_context_ebx(%rip), %rbx movq saved_context_ecx(%rip), %rcx movq saved_context_edx(%rip), %rdx @@ -107,4 +133,7 @@ done: xorq %rax, %rax + /* tell the hibernation core that we've just restored the memory */ + movq %rax, in_suspend(%rip) + ret diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8a67e282cb5..585541ca1a7 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -64,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data = .sysctl_enabled = 1, }; +void update_vsyscall_tz(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* sys_tz has changed */ + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; @@ -77,7 +87,6 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -163,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t) if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, 0); + vgettimeofday(&tv, NULL); result = tv.tv_sec; if (t) *t = result; @@ -257,18 +266,10 @@ out: return ret; } -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - static ctl_table kernel_table2[] = { - { .ctl_name = 99, .procname = "vsyscall64", + { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, {} }; |