diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
commit | c226951b93f7cd7c3a10b17384535b617bd43fd0 (patch) | |
tree | 07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e /arch/i386 | |
parent | b0df3bd1e553e901ec7297267611a5db88240b38 (diff) | |
parent | e8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 4 | ||||
-rw-r--r-- | arch/i386/kernel/apm.c | 26 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/mtrr/generic.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/efi_stub.S | 1 | ||||
-rw-r--r-- | arch/i386/kernel/reboot.c | 12 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 23 | ||||
-rw-r--r-- | arch/i386/kernel/smp.c | 66 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 6 | ||||
-rw-r--r-- | arch/i386/kernel/srat.c | 5 | ||||
-rw-r--r-- | arch/i386/kernel/time.c | 50 | ||||
-rw-r--r-- | arch/i386/kernel/time_hpet.c | 37 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 11 | ||||
-rw-r--r-- | arch/i386/kernel/vmlinux.lds.S | 12 | ||||
-rw-r--r-- | arch/i386/mach-voyager/voyager_thread.c | 1 | ||||
-rw-r--r-- | arch/i386/mm/boot_ioremap.c | 7 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 33 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 44 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 30 | ||||
-rw-r--r-- | arch/i386/power/swsusp.S | 2 |
19 files changed, 298 insertions, 76 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index b2751eadbc5..6189b0c28d6 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -494,7 +494,7 @@ config HIGHMEM64G endchoice choice - depends on EXPERIMENTAL && !X86_PAE + depends on EXPERIMENTAL prompt "Memory split" if EMBEDDED default VMSPLIT_3G help @@ -516,6 +516,7 @@ choice config VMSPLIT_3G bool "3G/1G user/kernel split" config VMSPLIT_3G_OPT + depends on !HIGHMEM bool "3G/1G user/kernel split (for full 1G low memory)" config VMSPLIT_2G bool "2G/2G user/kernel split" @@ -794,6 +795,7 @@ config HOTPLUG_CPU config COMPAT_VDSO bool "Compat VDSO support" default y + depends on !PARAVIRT help Map the VDSO to the predictable old-style address too. ---help--- diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 8591f2fa920..ff9ce4b5eaa 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -1154,9 +1154,11 @@ out: static void set_time(void) { + struct timespec ts; if (got_clock_diff) { /* Must know time zone in order to set clock */ - xtime.tv_sec = get_cmos_time() + clock_cmos_diff; - xtime.tv_nsec = 0; + ts.tv_sec = get_cmos_time() + clock_cmos_diff; + ts.tv_nsec = 0; + do_settimeofday(&ts); } } @@ -1232,13 +1234,8 @@ static int suspend(int vetoable) restore_processor_state(); local_irq_disable(); - write_seqlock(&xtime_lock); - spin_lock(&i8253_lock); - reinit_timer(); set_time(); - - spin_unlock(&i8253_lock); - write_sequnlock(&xtime_lock); + reinit_timer(); if (err == APM_NO_ERROR) err = APM_SUCCESS; @@ -1365,9 +1362,7 @@ static void check_events(void) ignore_bounce = 1; if ((event != APM_NORMAL_RESUME) || (ignore_normal_resume == 0)) { - write_seqlock_irq(&xtime_lock); set_time(); - write_sequnlock_irq(&xtime_lock); device_resume(); pm_send_all(PM_RESUME, (void *)0); queue_event(event, NULL); @@ -1383,9 +1378,7 @@ static void check_events(void) break; case APM_UPDATE_TIME: - write_seqlock_irq(&xtime_lock); set_time(); - write_sequnlock_irq(&xtime_lock); break; case APM_CRITICAL_SUSPEND: @@ -2339,6 +2332,7 @@ static int __init apm_init(void) ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD); if (ret < 0) { printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n"); + remove_proc_entry("apm", NULL); return -ENOMEM; } @@ -2348,7 +2342,13 @@ static int __init apm_init(void) return 0; } - misc_register(&apm_device); + /* + * Note we don't actually care if the misc_device cannot be registered. + * this driver can do its job without it, even if userspace can't + * control it. just log the error + */ + if (misc_register(&apm_device)) + printk(KERN_WARNING "apm: Could not register misc device.\n"); if (HZ != 100) idle_period = (idle_period * HZ) / 100; diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index 169ac8e0db6..0b61eed8bbd 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c @@ -243,7 +243,7 @@ static DEFINE_SPINLOCK(set_atomicity_lock); * has been called. */ -static void prepare_set(void) +static void prepare_set(void) __acquires(set_atomicity_lock) { unsigned long cr0; @@ -274,7 +274,7 @@ static void prepare_set(void) mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); } -static void post_set(void) +static void post_set(void) __releases(set_atomicity_lock) { /* Flush TLBs (no need to flush caches - they are disabled) */ __flush_tlb(); diff --git a/arch/i386/kernel/efi_stub.S b/arch/i386/kernel/efi_stub.S index d3ee73a3eee..ef00bb77d7e 100644 --- a/arch/i386/kernel/efi_stub.S +++ b/arch/i386/kernel/efi_stub.S @@ -7,7 +7,6 @@ #include <linux/linkage.h> #include <asm/page.h> -#include <asm/pgtable.h> /* * efi_call_phys(void *, ...) is a function with variable parameters. diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 54cfeabbc5e..84278e0093a 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -145,14 +145,10 @@ real_mode_gdt_entries [3] = 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */ }; -static struct -{ - unsigned short size __attribute__ ((packed)); - unsigned long long * base __attribute__ ((packed)); -} -real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries }, -real_mode_idt = { 0x3ff, NULL }, -no_idt = { 0, NULL }; +static struct Xgt_desc_struct +real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, (long)real_mode_gdt_entries }, +real_mode_idt = { 0x3ff, 0 }, +no_idt = { 0, 0 }; /* This is 16-bit protected mode code to disable paging and the cache, diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index f1682206d30..16d99444cf6 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -53,6 +53,7 @@ #include <asm/apic.h> #include <asm/e820.h> #include <asm/mpspec.h> +#include <asm/mmzone.h> #include <asm/setup.h> #include <asm/arch_hooks.h> #include <asm/sections.h> @@ -934,6 +935,24 @@ static void __init parse_cmdline_early (char ** cmdline_p) } /* + * reservetop=size reserves a hole at the top of the kernel address space which + * a hypervisor can load into later. Needed for dynamically loaded hypervisors, + * so relocating the fixmap can be done before paging initialization. + */ +static int __init parse_reservetop(char *arg) +{ + unsigned long address; + + if (!arg) + return -EINVAL; + + address = memparse(arg, &arg); + reserve_top_address(address); + return 0; +} +early_param("reservetop", parse_reservetop); + +/* * Callback for efi_memory_walk. */ static int __init @@ -1181,7 +1200,7 @@ static unsigned long __init setup_memory(void) void __init zone_sizes_init(void) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; unsigned int max_dma, low; max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; @@ -1258,7 +1277,7 @@ void __init setup_bootmem_allocator(void) */ find_smp_config(); #endif - + numa_kva_reserve(); #ifdef CONFIG_BLK_DEV_INITRD if (LOADER_TYPE && INITRD_START) { if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index c10789d7a9d..465188e2d70 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -634,3 +634,69 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) } } +/* + * this function sends a 'generic call function' IPI to one other CPU + * in the system. + * + * cpu is a standard Linux logical CPU number. + */ +static void +__smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = 1; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (!wait) + return; + + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + +/* + * smp_call_function_single - Run a function on another CPU + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Currently unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Retrurns 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute <func> + * or is or has executed. + */ + +int smp_call_function_single(int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int me = get_cpu(); + if (cpu == me) { + WARN_ON(1); + put_cpu(); + return -EBUSY; + } + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; +} +EXPORT_SYMBOL(smp_call_function_single); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index f948419c888..efe07990e7f 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -642,9 +642,13 @@ static void map_cpu_to_logical_apicid(void) { int cpu = smp_processor_id(); int apicid = logical_smp_processor_id(); + int node = apicid_to_node(apicid); + + if (!node_online(node)) + node = first_online_node; cpu_2_logical_apicid[cpu] = apicid; - map_cpu_to_node(cpu, apicid_to_node(apicid)); + map_cpu_to_node(cpu, node); } static void unmap_cpu_to_logical_apicid(int cpu) diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index b1809c9a089..83db411b3aa 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -42,7 +42,7 @@ #define PXM_BITMAP_LEN (MAX_PXM_DOMAINS / 8) static u8 pxm_bitmap[PXM_BITMAP_LEN]; /* bitmap of proximity domains */ -#define MAX_CHUNKS_PER_NODE 4 +#define MAX_CHUNKS_PER_NODE 3 #define MAXCHUNKS (MAX_CHUNKS_PER_NODE * MAX_NUMNODES) struct node_memory_chunk_s { unsigned long start_pfn; @@ -135,9 +135,6 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -#if MAX_NR_ZONES != 4 -#error "MAX_NR_ZONES != 4, chunk_to_zone requires review" -#endif /* Take a chunk of pages from page frame cstart to cend and count the number * of pages in each zone, returned via zones[]. */ diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index edd00f6cee3..1302e4ab3c4 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -270,16 +270,19 @@ void notify_arch_cmos_timer(void) mod_timer(&sync_cmos_timer, jiffies + 1); } -static long clock_cmos_diff, sleep_start; +static long clock_cmos_diff; +static unsigned long sleep_start; static int timer_suspend(struct sys_device *dev, pm_message_t state) { /* * Estimate time zone so that set_time can update the clock */ - clock_cmos_diff = -get_cmos_time(); + unsigned long ctime = get_cmos_time(); + + clock_cmos_diff = -ctime; clock_cmos_diff += get_seconds(); - sleep_start = get_cmos_time(); + sleep_start = ctime; return 0; } @@ -287,18 +290,29 @@ static int timer_resume(struct sys_device *dev) { unsigned long flags; unsigned long sec; - unsigned long sleep_length; - + unsigned long ctime = get_cmos_time(); + long sleep_length = (ctime - sleep_start) * HZ; + struct timespec ts; + + if (sleep_length < 0) { + printk(KERN_WARNING "CMOS clock skew detected in timer resume!\n"); + /* The time after the resume must not be earlier than the time + * before the suspend or some nasty things will happen + */ + sleep_length = 0; + ctime = sleep_start; + } #ifdef CONFIG_HPET_TIMER if (is_hpet_enabled()) hpet_reenable(); #endif setup_pit_timer(); - sec = get_cmos_time() + clock_cmos_diff; - sleep_length = (get_cmos_time() - sleep_start) * HZ; + + sec = ctime + clock_cmos_diff; + ts.tv_sec = sec; + ts.tv_nsec = 0; + do_settimeofday(&ts); write_seqlock_irqsave(&xtime_lock, flags); - xtime.tv_sec = sec; - xtime.tv_nsec = 0; jiffies_64 += sleep_length; wall_jiffies += sleep_length; write_sequnlock_irqrestore(&xtime_lock, flags); @@ -334,10 +348,11 @@ extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + struct timespec ts; + ts.tv_sec = get_cmos_time(); + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + + do_settimeofday(&ts); if ((hpet_enable() >= 0) && hpet_use_timer) { printk("Using HPET for base-timer\n"); @@ -349,6 +364,7 @@ static void __init hpet_time_init(void) void __init time_init(void) { + struct timespec ts; #ifdef CONFIG_HPET_TIMER if (is_hpet_capable()) { /* @@ -359,10 +375,10 @@ void __init time_init(void) return; } #endif - xtime.tv_sec = get_cmos_time(); - xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); - set_normalized_timespec(&wall_to_monotonic, - -xtime.tv_sec, -xtime.tv_nsec); + ts.tv_sec = get_cmos_time(); + ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ); + + do_settimeofday(&ts); time_init_hook(); } diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 14a1376fedd..6bf14a4e995 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -301,23 +301,25 @@ int hpet_rtc_timer_init(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; local_irq_save(flags); + cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); hpet_t1_cmp = cnt; - local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); + local_irq_restore(flags); + return 1; } static void hpet_rtc_timer_reinit(void) { - unsigned int cfg, cnt; + unsigned int cfg, cnt, ticks_per_int, lost_ints; if (unlikely(!(PIE_on | AIE_on | UIE_on))) { cfg = hpet_readl(HPET_T1_CFG); @@ -332,10 +334,33 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_t1_cmp; - cnt += hpet_tick*HZ/hpet_rtc_int_freq; - hpet_writel(cnt, HPET_T1_CMP); - hpet_t1_cmp = cnt; + ticks_per_int = hpet_tick * HZ / hpet_rtc_int_freq; + hpet_t1_cmp += ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + /* + * If the interrupt handler was delayed too long, the write above tries + * to schedule the next interrupt in the past and the hardware would + * not interrupt until the counter had wrapped around. + * So we have to check that the comparator wasn't set to a past time. + */ + cnt = hpet_readl(HPET_COUNTER); + if (unlikely((int)(cnt - hpet_t1_cmp) > 0)) { + lost_ints = (cnt - hpet_t1_cmp) / ticks_per_int + 1; + /* Make sure that, even with the time needed to execute + * this code, the next scheduled interrupt has been moved + * back to the future: */ + lost_ints++; + + hpet_t1_cmp += lost_ints * ticks_per_int; + hpet_writel(hpet_t1_cmp, HPET_T1_CMP); + + if (PIE_on) + PIE_count += lost_ints; + + printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", + hpet_rtc_int_freq); + } } /* diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 7e9edafffd8..4fcc6690be9 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -313,6 +313,8 @@ void show_registers(struct pt_regs *regs) */ if (in_kernel) { u8 __user *eip; + int code_bytes = 64; + unsigned char c; printk("\n" KERN_EMERG "Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); @@ -320,9 +322,12 @@ void show_registers(struct pt_regs *regs) printk(KERN_EMERG "Code: "); eip = (u8 __user *)regs->eip - 43; - for (i = 0; i < 64; i++, eip++) { - unsigned char c; - + if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { + /* try starting at EIP */ + eip = (u8 __user *)regs->eip; + code_bytes = 32; + } + for (i = 0; i < code_bytes; i++, eip++) { if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) { printk(" Bad EIP value."); break; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 2d4f1386e2b..1e7ac1c44dd 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -13,6 +13,12 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) ENTRY(phys_startup_32) jiffies = jiffies_64; + +PHDRS { + text PT_LOAD FLAGS(5); /* R_E */ + data PT_LOAD FLAGS(7); /* RWE */ + note PT_NOTE FLAGS(4); /* R__ */ +} SECTIONS { . = __KERNEL_START; @@ -26,7 +32,7 @@ SECTIONS KPROBES_TEXT *(.fixup) *(.gnu.warning) - } = 0x9090 + } :text = 0x9090 _etext = .; /* End of text section */ @@ -48,7 +54,7 @@ SECTIONS .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ *(.data) CONSTRUCTORS - } + } :data . = ALIGN(4096); __nosave_begin = .; @@ -184,4 +190,6 @@ SECTIONS STABS_DEBUG DWARF_DEBUG + + NOTES } diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index 50f6de6ff64..f39887359e8 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -130,7 +130,6 @@ thread(void *unused) init_timer(&wakeup_timer); sigfillset(¤t->blocked); - current->signal->tty = NULL; printk(KERN_NOTICE "Voyager starting monitor thread\n"); diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 5d44f4f5ff5..4de11f508c3 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -29,8 +29,11 @@ */ #define BOOT_PTE_PTRS (PTRS_PER_PTE*2) -#define boot_pte_index(address) \ - (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1)) + +static unsigned long boot_pte_index(unsigned long vaddr) +{ + return __pa(vaddr) >> PAGE_SHIFT; +} static inline boot_pte_t* boot_vaddr_to_pte(void *address) { diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b..fb5d8b747de 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void *node_remap_end_vaddr[MAX_NUMNODES]; void *node_remap_alloc_vaddr[MAX_NUMNODES]; - +static unsigned long kva_start_pfn; +static unsigned long kva_pages; /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -286,7 +287,6 @@ unsigned long __init setup_memory(void) { int nid; unsigned long system_start_pfn, system_max_low_pfn; - unsigned long reserve_pages; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -298,14 +298,23 @@ unsigned long __init setup_memory(void) find_max_pfn(); get_memcfg_numa(); - reserve_pages = calculate_numa_remap_pages(); + kva_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; - printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", - reserve_pages, max_low_pfn + reserve_pages); + kva_start_pfn = find_max_low_pfn() - kva_pages; + +#ifdef CONFIG_BLK_DEV_INITRD + /* Numa kva area is below the initrd */ + if (LOADER_TYPE && INITRD_START) + kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; +#endif + kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); + + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -323,7 +332,7 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn + node_remap_offset[nid]); + kva_start_pfn + node_remap_offset[nid]); /* Init the node remap allocator */ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + (node_remap_size[nid] * PAGE_SIZE); @@ -338,7 +347,6 @@ unsigned long __init setup_memory(void) } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); - vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for_each_online_node(nid) find_max_pfn_node(nid); @@ -348,13 +356,18 @@ unsigned long __init setup_memory(void) return max_low_pfn; } +void __init numa_kva_reserve(void) +{ + reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); +} + void __init zone_sizes_init(void) { int nid; for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long zones_size[MAX_NR_ZONES] = {0, }; unsigned long *zholes_size; unsigned int max_dma; @@ -409,7 +422,7 @@ void __init set_highmem_pages_init(int bad_ppro) zone_end_pfn = zone_start_pfn + zone->spanned_pages; printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone->zone_pgdat->node_id, + zone->name, zone_to_nid(zone), zone_start_pfn, zone_end_pfn); for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 89e8486aac3..efd0bcdac65 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -629,6 +629,48 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); +#if 1 /* double-sanity-check paranoia */ + printk("virtual kernel memory layout:\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + FIXADDR_START, FIXADDR_TOP, + (FIXADDR_TOP - FIXADDR_START) >> 10, + +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, +#endif + + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, + + (unsigned long)__va(0), (unsigned long)high_memory, + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, + + (unsigned long)&__init_begin, (unsigned long)&__init_end, + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, + + (unsigned long)&_etext, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + +#ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif + BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); +#endif /* double-sanity-check paranoia */ + #ifdef CONFIG_X86_PAE if (!cpu_has_pae) panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); @@ -657,7 +699,7 @@ void __init mem_init(void) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata = &contig_page_data; - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index bd98768d876..10126e3f817 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -60,7 +61,9 @@ void show_mem(void) printk(KERN_INFO "%lu pages writeback\n", global_page_state(NR_WRITEBACK)); printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); + printk(KERN_INFO "%lu pages slab\n", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); printk(KERN_INFO "%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } @@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) __flush_tlb_one(vaddr); } +static int fixmaps; +#ifndef CONFIG_COMPAT_VDSO +unsigned long __FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(__FIXADDR_TOP); +#endif + void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { unsigned long address = __fix_to_virt(idx); @@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) return; } set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + fixmaps++; +} + +/** + * reserve_top_address - reserves a hole in the top of kernel address space + * @reserve - size of hole to reserve + * + * Can be used to relocate the fixmap area and poke a hole in the top + * of kernel address space to make room for a hypervisor. + */ +void reserve_top_address(unsigned long reserve) +{ + BUG_ON(fixmaps > 0); +#ifdef CONFIG_COMPAT_VDSO + BUG_ON(reserve != 0); +#else + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; +#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) diff --git a/arch/i386/power/swsusp.S b/arch/i386/power/swsusp.S index c893b897217..8a2b50a0aaa 100644 --- a/arch/i386/power/swsusp.S +++ b/arch/i386/power/swsusp.S @@ -32,7 +32,7 @@ ENTRY(swsusp_arch_resume) movl $swsusp_pg_dir-__PAGE_OFFSET, %ecx movl %ecx, %cr3 - movl pagedir_nosave, %edx + movl restore_pblist, %edx .p2align 4,,7 copy_loop: |