aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig4
-rw-r--r--arch/x86_64/defconfig8
-rw-r--r--arch/x86_64/kernel/mpparse.c37
-rw-r--r--arch/x86_64/kernel/nmi.c4
-rw-r--r--arch/x86_64/kernel/pci-dma.c93
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c3
-rw-r--r--arch/x86_64/kernel/setup.c11
-rw-r--r--arch/x86_64/kernel/time.c20
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S17
-rw-r--r--arch/x86_64/kernel/vsyscall.c11
-rw-r--r--arch/x86_64/mm/fault.c6
-rw-r--r--arch/x86_64/mm/init.c38
-rw-r--r--arch/x86_64/mm/ioremap.c111
-rw-r--r--arch/x86_64/mm/srat.c66
14 files changed, 187 insertions, 242 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 32ae1378f35..b87a19f0d58 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -367,6 +367,10 @@ config ARCH_FLATMEM_ENABLE
source "mm/Kconfig"
+config MEMORY_HOTPLUG_RESERVE
+ def_bool y
+ depends on (MEMORY_HOTPLUG && DISCONTIGMEM)
+
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool y
depends on NUMA
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 647610ecb58..4844b543bed 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,11 +1,12 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.18-git5
-# Tue Sep 26 09:30:47 2006
+# Linux kernel version: 2.6.18-git7
+# Wed Sep 27 21:53:10 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_X86=y
+CONFIG_ZONE_DMA32=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
@@ -179,6 +180,7 @@ CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_PM=y
# CONFIG_PM_LEGACY is not set
# CONFIG_PM_DEBUG is not set
+# CONFIG_PM_SYSFS_DEPRECATED is not set
CONFIG_SOFTWARE_SUSPEND=y
CONFIG_PM_STD_PARTITION=""
CONFIG_SUSPEND_SMP=y
@@ -251,6 +253,7 @@ CONFIG_PCI_DIRECT=y
CONFIG_PCI_MMCONFIG=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_MSI=y
+# CONFIG_PCI_MULTITHREAD_PROBE is not set
# CONFIG_PCI_DEBUG is not set
#
@@ -1458,6 +1461,7 @@ CONFIG_KPROBES=y
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 20e88f4b564..b8d53dfa993 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -152,6 +152,21 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
}
}
+static int bad_ioapic(unsigned long address)
+{
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+ }
+ if (!address) {
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+ " found in table, skipping!\n");
+ return 1;
+ }
+ return 0;
+}
+
static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
{
if (!(m->mpc_flags & MPC_APIC_USABLE))
@@ -159,16 +174,10 @@ static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
printk("I/O APIC #%d at 0x%X.\n",
m->mpc_apicid, m->mpc_apicaddr);
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
- MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
- }
- if (!m->mpc_apicaddr) {
- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
- " found in MP table, skipping!\n");
+
+ if (bad_ioapic(m->mpc_apicaddr))
return;
- }
+
mp_ioapics[nr_ioapics] = *m;
nr_ioapics++;
}
@@ -647,16 +656,8 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
{
int idx = 0;
- if (nr_ioapics >= MAX_IO_APICS) {
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
- }
- if (!address) {
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
- " found in MADT table, skipping!\n");
+ if (bad_ioapic(address))
return;
- }
idx = nr_ioapics++;
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 4d6fb047952..7af9cb3e2d9 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -28,6 +28,10 @@
#include <asm/mce.h>
#include <asm/intel_arch_perfmon.h>
+int unknown_nmi_panic;
+int nmi_watchdog_enabled;
+int panic_on_unrecovered_nmi;
+
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
* evtsel_nmi_owner tracks the ownership of the event selection
* - different performance counters/ event selection may be reserved for
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 4dcb671bd19..f8d857453f8 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -170,8 +170,20 @@ void dma_free_coherent(struct device *dev, size_t size,
}
EXPORT_SYMBOL(dma_free_coherent);
+static int forbid_dac __read_mostly;
+
int dma_supported(struct device *dev, u64 mask)
{
+#ifdef CONFIG_PCI
+ if (mask > 0xffffffff && forbid_dac > 0) {
+
+
+
+ printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id);
+ return 0;
+ }
+#endif
+
if (dma_ops->dma_supported)
return dma_ops->dma_supported(dev, mask);
@@ -231,57 +243,64 @@ EXPORT_SYMBOL(dma_set_mask);
allowed overwrite iommu off workarounds for specific chipsets.
soft Use software bounce buffering (default for Intel machines)
noaperture Don't touch the aperture for AGP.
+ allowdac Allow DMA >4GB
+ nodac Forbid DMA >4GB
+ panic Force panic when IOMMU overflows
*/
__init int iommu_setup(char *p)
{
- iommu_merge = 1;
+ iommu_merge = 1;
if (!p)
return -EINVAL;
- while (*p) {
- if (!strncmp(p,"off",3))
- no_iommu = 1;
- /* gart_parse_options has more force support */
- if (!strncmp(p,"force",5))
- force_iommu = 1;
- if (!strncmp(p,"noforce",7)) {
- iommu_merge = 0;
- force_iommu = 0;
- }
-
- if (!strncmp(p, "biomerge",8)) {
- iommu_bio_merge = 4096;
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "panic",5))
- panic_on_overflow = 1;
- if (!strncmp(p, "nopanic",7))
- panic_on_overflow = 0;
- if (!strncmp(p, "merge",5)) {
- iommu_merge = 1;
- force_iommu = 1;
- }
- if (!strncmp(p, "nomerge",7))
- iommu_merge = 0;
- if (!strncmp(p, "forcesac",8))
- iommu_sac_force = 1;
+ while (*p) {
+ if (!strncmp(p,"off",3))
+ no_iommu = 1;
+ /* gart_parse_options has more force support */
+ if (!strncmp(p,"force",5))
+ force_iommu = 1;
+ if (!strncmp(p,"noforce",7)) {
+ iommu_merge = 0;
+ force_iommu = 0;
+ }
+
+ if (!strncmp(p, "biomerge",8)) {
+ iommu_bio_merge = 4096;
+ iommu_merge = 1;
+ force_iommu = 1;
+ }
+ if (!strncmp(p, "panic",5))
+ panic_on_overflow = 1;
+ if (!strncmp(p, "nopanic",7))
+ panic_on_overflow = 0;
+ if (!strncmp(p, "merge",5)) {
+ iommu_merge = 1;
+ force_iommu = 1;
+ }
+ if (!strncmp(p, "nomerge",7))
+ iommu_merge = 0;
+ if (!strncmp(p, "forcesac",8))
+ iommu_sac_force = 1;
+ if (!strncmp(p, "allowdac", 8))
+ forbid_dac = 0;
+ if (!strncmp(p, "nodac", 5))
+ forbid_dac = -1;
#ifdef CONFIG_SWIOTLB
- if (!strncmp(p, "soft",4))
- swiotlb = 1;
+ if (!strncmp(p, "soft",4))
+ swiotlb = 1;
#endif
#ifdef CONFIG_IOMMU
- gart_parse_options(p);
+ gart_parse_options(p);
#endif
- p += strcspn(p, ",");
- if (*p == ',')
- ++p;
- }
- return 0;
+ p += strcspn(p, ",");
+ if (*p == ',')
+ ++p;
+ }
+ return 0;
}
early_param("iommu", iommu_setup);
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 6a55f87ba97..697f0aa794b 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -3,7 +3,8 @@
#include <linux/pci.h>
#include <linux/cache.h>
#include <linux/module.h>
-#include <asm/dma-mapping.h>
+#include <linux/dma-mapping.h>
+
#include <asm/proto.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 0b00bb2ea57..fc944b5e8f4 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -123,9 +123,6 @@ struct resource standard_io_resources[] = {
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
};
-#define STANDARD_IO_RESOURCES \
- (sizeof standard_io_resources / sizeof standard_io_resources[0])
-
#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
struct resource data_resource = {
@@ -172,9 +169,6 @@ static struct resource adapter_rom_resources[] = {
.flags = IORESOURCE_ROM }
};
-#define ADAPTER_ROM_RESOURCES \
- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
-
static struct resource video_rom_resource = {
.name = "Video ROM",
.start = 0xc0000,
@@ -245,7 +239,8 @@ static void __init probe_roms(void)
}
/* check for adapter roms on 2k boundaries */
- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
+ start += 2048) {
rom = isa_bus_to_virt(start);
if (!romsignature(rom))
continue;
@@ -537,7 +532,7 @@ void __init setup_arch(char **cmdline_p)
{
unsigned i;
/* request I/O space for devices used on all i[345]86 PCs */
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
request_resource(&ioport_resource, &standard_io_resources[i]);
}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 1c255ee76e7..557e92af7be 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -77,7 +77,6 @@ unsigned long long monotonic_base;
struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
struct timespec __xtime __section_xtime;
struct timezone __sys_tz __section_sys_tz;
@@ -119,7 +118,7 @@ unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc;
void do_gettimeofday(struct timeval *tv)
{
- unsigned long seq, t;
+ unsigned long seq;
unsigned int sec, usec;
do {
@@ -136,10 +135,7 @@ void do_gettimeofday(struct timeval *tv)
be found. Note when you fix it here you need to do the same
in arch/x86_64/kernel/vsyscall.c and export all needed
variables in vmlinux.lds. -AK */
-
- t = (jiffies - wall_jiffies) * USEC_PER_TICK +
- do_gettimeoffset();
- usec += t;
+ usec += do_gettimeoffset();
} while (read_seqretry(&xtime_lock, seq));
@@ -165,8 +161,7 @@ int do_settimeofday(struct timespec *tv)
write_seqlock_irq(&xtime_lock);
- nsec -= do_gettimeoffset() * NSEC_PER_USEC +
- (jiffies - wall_jiffies) * NSEC_PER_TICK;
+ nsec -= do_gettimeoffset() * NSEC_PER_USEC;
wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - sec);
wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - nsec);
@@ -415,16 +410,16 @@ void main_timer_handler(struct pt_regs *regs)
(((long) offset << US_SCALE) / vxtime.tsc_quot) - 1;
}
- if (lost > 0) {
+ if (lost > 0)
handle_lost_ticks(lost, regs);
- jiffies += lost;
- }
+ else
+ lost = 0;
/*
* Do the timer stuff.
*/
- do_timer(regs);
+ do_timer(lost + 1);
#ifndef CONFIG_SMP
update_process_times(user_mode(regs));
#endif
@@ -1071,7 +1066,6 @@ static int timer_resume(struct sys_device *dev)
vxtime.last_tsc = get_cycles_sync();
write_sequnlock_irqrestore(&xtime_lock,flags);
jiffies += sleep_length;
- wall_jiffies += sleep_length;
monotonic_base += sleep_length * (NSEC_PER_SEC/HZ);
touch_softlockup_watchdog();
return 0;
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index d0564f1bcb0..b9df2ab6529 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -67,13 +67,6 @@ SECTIONS
_edata = .; /* End of data section */
- __bss_start = .; /* BSS */
- .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
- *(.bss.page_aligned)
- *(.bss)
- }
- __bss_stop = .;
-
. = ALIGN(PAGE_SIZE);
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
@@ -108,9 +101,6 @@ SECTIONS
.vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
vgetcpu_mode = VVIRT(.vgetcpu_mode);
- .wall_jiffies : AT(VLOAD(.wall_jiffies)) { *(.wall_jiffies) }
- wall_jiffies = VVIRT(.wall_jiffies);
-
.sys_tz : AT(VLOAD(.sys_tz)) { *(.sys_tz) }
sys_tz = VVIRT(.sys_tz);
@@ -229,6 +219,13 @@ SECTIONS
. = ALIGN(4096);
__nosave_end = .;
+ __bss_start = .; /* BSS */
+ .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+ *(.bss.page_aligned)
+ *(.bss)
+ }
+ __bss_stop = .;
+
_end = . ;
/* Sections to be discarded */
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index ac48c3857dd..a98b460af6a 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -66,8 +66,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
sequence = read_seqbegin(&__xtime_lock);
sec = __xtime.tv_sec;
- usec = (__xtime.tv_nsec / 1000) +
- (__jiffies - __wall_jiffies) * (1000000 / HZ);
+ usec = __xtime.tv_nsec / 1000;
if (__vxtime.mode != VXTIME_HPET) {
t = get_cycles_sync();
@@ -155,8 +154,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
- if (tcache && tcache->t0 == (j = __jiffies)) {
- p = tcache->t1;
+ if (tcache && tcache->blob[0] == (j = __jiffies)) {
+ p = tcache->blob[1];
} else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
rdtscp(dummy, dummy, p);
@@ -165,8 +164,8 @@ vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
}
if (tcache) {
- tcache->t0 = j;
- tcache->t1 = p;
+ tcache->blob[0] = j;
+ tcache->blob[1] = p;
}
if (cpu)
*cpu = p & 0xfff;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 1a17b0733ab..3751b4788e2 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -244,7 +244,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
int unhandled_signal(struct task_struct *tsk, int sig)
{
- if (tsk->pid == 1)
+ if (is_init(tsk))
return 1;
if (tsk->ptrace & PT_PTRACED)
return 0;
@@ -464,7 +464,7 @@ good_area:
case PF_PROT: /* read, present */
goto bad_area;
case 0: /* read, not present */
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
goto bad_area;
}
@@ -580,7 +580,7 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
goto again;
}
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 3e16fe08150..19c72520a86 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -463,19 +463,6 @@ void online_page(struct page *page)
#ifdef CONFIG_MEMORY_HOTPLUG
/*
- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
- * via probe interface of sysfs. If acpi notifies hot-add event, then it
- * can tell node id by searching dsdt. But, probe interface doesn't have
- * node id. So, return 0 as node id at this time.
- */
-#ifdef CONFIG_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
- return 0;
-}
-#endif
-
-/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
@@ -487,12 +474,12 @@ int arch_add_memory(int nid, u64 start, u64 size)
unsigned long nr_pages = size >> PAGE_SHIFT;
int ret;
+ init_memory_mapping(start, (start + size -1));
+
ret = __add_pages(zone, start_pfn, nr_pages);
if (ret)
goto error;
- init_memory_mapping(start, (start + size -1));
-
return ret;
error:
printk("%s: Problem encountered in __add_pages!\n", __func__);
@@ -506,7 +493,24 @@ int remove_memory(u64 start, u64 size)
}
EXPORT_SYMBOL_GPL(remove_memory);
-#else /* CONFIG_MEMORY_HOTPLUG */
+#ifndef CONFIG_ACPI_NUMA
+int memory_add_physaddr_to_nid(u64 start)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+#endif
+
+#ifndef CONFIG_ACPI_NUMA
+int memory_add_physaddr_to_nid(u64 start)
+{
+ return 0;
+}
+#endif
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
/*
* Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
* just online the pages.
@@ -532,7 +536,7 @@ int __add_pages(struct zone *z, unsigned long start_pfn, unsigned long nr_pages)
}
return err;
}
-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 45d7d823c3b..c6e5e8d401a 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -12,117 +12,16 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
-#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
#include <asm/proto.h>
#define ISA_START_ADDRESS 0xa0000
#define ISA_END_ADDRESS 0x100000
-static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
- unsigned long pfn;
-
- address &= ~PMD_MASK;
- end = address + size;
- if (end > PMD_SIZE)
- end = PMD_SIZE;
- if (address >= end)
- BUG();
- pfn = phys_addr >> PAGE_SHIFT;
- do {
- if (!pte_none(*pte)) {
- printk("remap_area_pte: page already exists\n");
- BUG();
- }
- set_pte(pte, pfn_pte(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW |
- _PAGE_GLOBAL | _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
- address += PAGE_SIZE;
- pfn++;
- pte++;
- } while (address && (address < end));
-}
-
-static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
-
- address &= ~PUD_MASK;
- end = address + size;
- if (end > PUD_SIZE)
- end = PUD_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
- do {
- pte_t * pte = pte_alloc_kernel(pmd, address);
- if (!pte)
- return -ENOMEM;
- remap_area_pte(pte, address, end - address, address + phys_addr, flags);
- address = (address + PMD_SIZE) & PMD_MASK;
- pmd++;
- } while (address && (address < end));
- return 0;
-}
-
-static inline int remap_area_pud(pud_t * pud, unsigned long address, unsigned long size,
- unsigned long phys_addr, unsigned long flags)
-{
- unsigned long end;
-
- address &= ~PGDIR_MASK;
- end = address + size;
- if (end > PGDIR_SIZE)
- end = PGDIR_SIZE;
- phys_addr -= address;
- if (address >= end)
- BUG();
- do {
- pmd_t * pmd = pmd_alloc(&init_mm, pud, address);
- if (!pmd)
- return -ENOMEM;
- remap_area_pmd(pmd, address, end - address, address + phys_addr, flags);
- address = (address + PUD_SIZE) & PUD_MASK;
- pud++;
- } while (address && (address < end));
- return 0;
-}
-
-static int remap_area_pages(unsigned long address, unsigned long phys_addr,
- unsigned long size, unsigned long flags)
-{
- int error;
- pgd_t *pgd;
- unsigned long end = address + size;
-
- phys_addr -= address;
- pgd = pgd_offset_k(address);
- flush_cache_all();
- if (address >= end)
- BUG();
- do {
- pud_t *pud;
- pud = pud_alloc(&init_mm, pgd, address);
- error = -ENOMEM;
- if (!pud)
- break;
- if (remap_area_pud(pud, address, end - address,
- phys_addr + address, flags))
- break;
- error = 0;
- address = (address + PGDIR_SIZE) & PGDIR_MASK;
- pgd++;
- } while (address && (address < end));
- flush_tlb_all();
- return error;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -165,6 +64,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
void * addr;
struct vm_struct * area;
unsigned long offset, last_addr;
+ pgprot_t pgprot;
/* Don't allow wraparound or zero size */
last_addr = phys_addr + size - 1;
@@ -194,6 +94,8 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
}
#endif
+ pgprot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_GLOBAL
+ | _PAGE_DIRTY | _PAGE_ACCESSED | flags);
/*
* Mappings have to be page-aligned
*/
@@ -209,7 +111,8 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l
return NULL;
area->phys_addr = phys_addr;
addr = area->addr;
- if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
+ if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
+ phys_addr, pgprot)) {
remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
return NULL;
}
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index f8c04d6935c..3cc0544e25f 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -23,22 +23,13 @@
int acpi_numa __initdata;
-#if (defined(CONFIG_ACPI_HOTPLUG_MEMORY) || \
- defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)) \
- && !defined(CONFIG_MEMORY_HOTPLUG)
-#define RESERVE_HOTADD 1
-#endif
-
static struct acpi_table_slit *acpi_slit;
static nodemask_t nodes_parsed __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;
-#ifndef RESERVE_HOTADD
-#define hotadd_percent 0 /* Ignore all settings */
-#endif
/* Too small nodes confuse the VM badly. Usually they result
from BIOS bugs. */
@@ -160,7 +151,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
pxm, pa->apic_id, node);
}
-#ifdef RESERVE_HOTADD
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
/*
* Protect against too large hotadd areas that would fill up memory.
*/
@@ -203,15 +194,37 @@ static int hotadd_enough_memory(struct bootnode *nd)
return 1;
}
+static int update_end_of_memory(unsigned long end)
+{
+ found_add_area = 1;
+ if ((end >> PAGE_SHIFT) > end_pfn)
+ end_pfn = end >> PAGE_SHIFT;
+ return 1;
+}
+
+static inline int save_add_info(void)
+{
+ return hotadd_percent > 0;
+}
+#else
+int update_end_of_memory(unsigned long end) {return 0;}
+static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static inline int save_add_info(void) {return 1;}
+#else
+static inline int save_add_info(void) {return 0;}
+#endif
+#endif
/*
- * It is fine to add this area to the nodes data it will be used later
+ * Update nodes_add and decide if to include add are in the zone.
+ * Both SPARSE and RESERVE need nodes_add infomation.
* This code supports one contigious hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
{
unsigned long s_pfn = start >> PAGE_SHIFT;
unsigned long e_pfn = end >> PAGE_SHIFT;
- int changed = 0;
+ int ret = 0, changed = 0;
struct bootnode *nd = &nodes_add[node];
/* I had some trouble with strange memory hotadd regions breaking
@@ -240,7 +253,6 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end)
/* Looks good */
- found_add_area = 1;
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
@@ -258,14 +270,12 @@ static int reserve_hotadd(int node, unsigned long start, unsigned long end)
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}
- if ((nd->end >> PAGE_SHIFT) > end_pfn)
- end_pfn = nd->end >> PAGE_SHIFT;
+ ret = update_end_of_memory(nd->end);
if (changed)
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
- return 0;
+ return ret;
}
-#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
@@ -284,7 +294,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
}
if (ma->flags.enabled == 0)
return;
- if (ma->flags.hot_pluggable && hotadd_percent == 0)
+ if (ma->flags.hot_pluggable && !save_add_info())
return;
start = ma->base_addr_lo | ((u64)ma->base_addr_hi << 32);
end = start + (ma->length_lo | ((u64)ma->length_hi << 32));
@@ -327,15 +337,13 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
push_node_boundaries(node, nd->start >> PAGE_SHIFT,
nd->end >> PAGE_SHIFT);
-#ifdef RESERVE_HOTADD
- if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
+ if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) {
/* Ignore hotadd region. Undo damage */
printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
*nd = oldnode;
if ((nd->start | nd->end) == 0)
node_clear(node, nodes_parsed);
}
-#endif
}
/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -351,7 +359,6 @@ static int nodes_cover_memory(void)
unsigned long e = nodes[i].end >> PAGE_SHIFT;
pxmram += e - s;
pxmram -= absent_pages_in_range(s, e);
- pxmram -= nodes_add[i].end - nodes_add[i].start;
if ((long)pxmram < 0)
pxmram = 0;
}
@@ -459,3 +466,16 @@ int __node_distance(int a, int b)
}
EXPORT_SYMBOL(__node_distance);
+
+int memory_add_physaddr_to_nid(u64 start)
+{
+ int i, ret = 0;
+
+ for_each_node(i)
+ if (nodes_add[i].start <= start && nodes_add[i].end > start)
+ ret = i;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+