aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ia64/aliasing.txt208
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/Makefile2
-rw-r--r--arch/ia64/kernel/asm-offsets.c16
-rw-r--r--arch/ia64/kernel/efi.c156
-rw-r--r--arch/ia64/kernel/efi_stub.S2
-rw-r--r--arch/ia64/kernel/entry.h1
-rw-r--r--arch/ia64/kernel/mca_asm.S28
-rw-r--r--arch/ia64/kernel/sal.c6
-rw-r--r--arch/ia64/kernel/setup.c1
-rw-r--r--arch/ia64/mm/ioremap.c27
-rw-r--r--arch/ia64/pci/pci.c17
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c50
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c4
-rw-r--r--include/asm-ia64/io.h1
-rw-r--r--include/asm-ia64/mca.h9
-rw-r--r--include/asm-ia64/pgtable.h22
-rw-r--r--include/asm-ia64/sn/sn_sal.h2
-rw-r--r--include/linux/efi.h1
19 files changed, 432 insertions, 123 deletions
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
new file mode 100644
index 00000000000..38f9a52d182
--- /dev/null
+++ b/Documentation/ia64/aliasing.txt
@@ -0,0 +1,208 @@
+ MEMORY ATTRIBUTE ALIASING ON IA-64
+
+ Bjorn Helgaas
+ <bjorn.helgaas@hp.com>
+ May 4, 2006
+
+
+MEMORY ATTRIBUTES
+
+ Itanium supports several attributes for virtual memory references.
+ The attribute is part of the virtual translation, i.e., it is
+ contained in the TLB entry. The ones of most interest to the Linux
+ kernel are:
+
+ WB Write-back (cacheable)
+ UC Uncacheable
+ WC Write-coalescing
+
+ System memory typically uses the WB attribute. The UC attribute is
+ used for memory-mapped I/O devices. The WC attribute is uncacheable
+ like UC is, but writes may be delayed and combined to increase
+ performance for things like frame buffers.
+
+ The Itanium architecture requires that we avoid accessing the same
+ page with both a cacheable mapping and an uncacheable mapping[1].
+
+ The design of the chipset determines which attributes are supported
+ on which regions of the address space. For example, some chipsets
+ support either WB or UC access to main memory, while others support
+ only WB access.
+
+MEMORY MAP
+
+ Platform firmware describes the physical memory map and the
+ supported attributes for each region. At boot-time, the kernel uses
+ the EFI GetMemoryMap() interface. ACPI can also describe memory
+ devices and the attributes they support, but Linux/ia64 currently
+ doesn't use this information.
+
+ The kernel uses the efi_memmap table returned from GetMemoryMap() to
+ learn the attributes supported by each region of physical address
+ space. Unfortunately, this table does not completely describe the
+ address space because some machines omit some or all of the MMIO
+ regions from the map.
+
+ The kernel maintains another table, kern_memmap, which describes the
+ memory Linux is actually using and the attribute for each region.
+ This contains only system memory; it does not contain MMIO space.
+
+ The kern_memmap table typically contains only a subset of the system
+ memory described by the efi_memmap. Linux/ia64 can't use all memory
+ in the system because of constraints imposed by the identity mapping
+ scheme.
+
+ The efi_memmap table is preserved unmodified because the original
+ boot-time information is required for kexec.
+
+KERNEL IDENTITY MAPPINGS
+
+ Linux/ia64 identity mappings are done with large pages, currently
+ either 16MB or 64MB, referred to as "granules." Cacheable mappings
+ are speculative[2], so the processor can read any location in the
+ page at any time, independent of the programmer's intentions. This
+ means that to avoid attribute aliasing, Linux can create a cacheable
+ identity mapping only when the entire granule supports cacheable
+ access.
+
+ Therefore, kern_memmap contains only full granule-sized regions that
+ can referenced safely by an identity mapping.
+
+ Uncacheable mappings are not speculative, so the processor will
+ generate UC accesses only to locations explicitly referenced by
+ software. This allows UC identity mappings to cover granules that
+ are only partially populated, or populated with a combination of UC
+ and WB regions.
+
+USER MAPPINGS
+
+ User mappings are typically done with 16K or 64K pages. The smaller
+ page size allows more flexibility because only 16K or 64K has to be
+ homogeneous with respect to memory attributes.
+
+POTENTIAL ATTRIBUTE ALIASING CASES
+
+ There are several ways the kernel creates new mappings:
+
+ mmap of /dev/mem
+
+ This uses remap_pfn_range(), which creates user mappings. These
+ mappings may be either WB or UC. If the region being mapped
+ happens to be in kern_memmap, meaning that it may also be mapped
+ by a kernel identity mapping, the user mapping must use the same
+ attribute as the kernel mapping.
+
+ If the region is not in kern_memmap, the user mapping should use
+ an attribute reported as being supported in the EFI memory map.
+
+ Since the EFI memory map does not describe MMIO on some
+ machines, this should use an uncacheable mapping as a fallback.
+
+ mmap of /sys/class/pci_bus/.../legacy_mem
+
+ This is very similar to mmap of /dev/mem, except that legacy_mem
+ only allows mmap of the one megabyte "legacy MMIO" area for a
+ specific PCI bus. Typically this is the first megabyte of
+ physical address space, but it may be different on machines with
+ several VGA devices.
+
+ "X" uses this to access VGA frame buffers. Using legacy_mem
+ rather than /dev/mem allows multiple instances of X to talk to
+ different VGA cards.
+
+ The /dev/mem mmap constraints apply.
+
+ However, since this is for mapping legacy MMIO space, WB access
+ does not make sense. This matters on machines without legacy
+ VGA support: these machines may have WB memory for the entire
+ first megabyte (or even the entire first granule).
+
+ On these machines, we could mmap legacy_mem as WB, which would
+ be safe in terms of attribute aliasing, but X has no way of
+ knowing that it is accessing regular memory, not a frame buffer,
+ so the kernel should fail the mmap rather than doing it with WB.
+
+ read/write of /dev/mem
+
+ This uses copy_from_user(), which implicitly uses a kernel
+ identity mapping. This is obviously safe for things in
+ kern_memmap.
+
+ There may be corner cases of things that are not in kern_memmap,
+ but could be accessed this way. For example, registers in MMIO
+ space are not in kern_memmap, but could be accessed with a UC
+ mapping. This would not cause attribute aliasing. But
+ registers typically can be accessed only with four-byte or
+ eight-byte accesses, and the copy_from_user() path doesn't allow
+ any control over the access size, so this would be dangerous.
+
+ ioremap()
+
+ This returns a kernel identity mapping for use inside the
+ kernel.
+
+ If the region is in kern_memmap, we should use the attribute
+ specified there. Otherwise, if the EFI memory map reports that
+ the entire granule supports WB, we should use that (granules
+ that are partially reserved or occupied by firmware do not appear
+ in kern_memmap). Otherwise, we should use a UC mapping.
+
+PAST PROBLEM CASES
+
+ mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+
+ The EFI memory map may not report these MMIO regions.
+
+ These must be allowed so that X will work. This means that
+ when the EFI memory map is incomplete, every /dev/mem mmap must
+ succeed. It may create either WB or UC user mappings, depending
+ on whether the region is in kern_memmap or the EFI memory map.
+
+ mmap of 0x0-0xA0000 /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+
+ See https://bugzilla.novell.com/show_bug.cgi?id=140858.
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0x9FFFF WB only
+ 0xA0000-0xBFFFF UC only (VGA frame buffer)
+ 0xC0000-0xFFFFF WB only
+
+ This mmap is done with user pages, not kernel identity mappings,
+ so it is safe to use WB mappings.
+
+ The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
+ which will use a granule-sized UC mapping covering 0-0xFFFFF. This
+ granule covers some WB-only memory, but since UC is non-speculative,
+ the processor will never generate an uncacheable reference to the
+ WB-only areas unless the driver explicitly touches them.
+
+ mmap of 0x0-0xFFFFF legacy_mem by "X"
+
+ If the EFI memory map reports this entire range as WB, there
+ is no VGA MMIO hole, and the mmap should fail or be done with
+ a WB mapping.
+
+ There's no easy way for X to determine whether the 0xA0000-0xBFFFF
+ region is a frame buffer or just memory, so I think it's best to
+ just fail this mmap request rather than using a WB mapping. As
+ far as I know, there's no need to map legacy_mem with WB
+ mappings.
+
+ Otherwise, a UC mapping of the entire region is probably safe.
+ The VGA hole means the region will not be in kern_memmap. The
+ HP sx1000 chipset doesn't support UC access to the memory surrounding
+ the VGA hole, but X doesn't need that area anyway and should not
+ reference it.
+
+ mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0xFFFFF WB only (no VGA MMIO hole)
+
+ This is a special case of the previous case, and the mmap should
+ fail for the same reason as above.
+
+NOTES
+
+ [1] SDM rev 2.2, vol 2, sec 4.4.1.
+ [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index fbb25b00629..18318749884 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -449,6 +449,8 @@ config PCI_DOMAINS
bool
default PCI
+source "drivers/pci/pcie/Kconfig"
+
source "drivers/pci/Kconfig"
source "drivers/pci/hotplug/Kconfig"
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 80ea7506fa1..21033ed8330 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -71,6 +71,8 @@ all: compressed unwcheck
compressed: vmlinux.gz
+vmlinuz: vmlinux.gz
+
vmlinux.gz: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 77225659e96..16e7b6600ae 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -217,16 +217,24 @@ void foo(void)
DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
offsetof (struct ia64_mca_cpu, init_stack));
BLANK();
- DEFINE(IA64_SAL_OS_STATE_COMMON_OFFSET,
- offsetof (struct ia64_sal_os_state, sal_ra));
DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET,
offsetof (struct ia64_sal_os_state, os_gp));
- DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
- offsetof (struct ia64_sal_os_state, pal_min_state));
DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET,
offsetof (struct ia64_sal_os_state, proc_state_param));
+ DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET,
+ offsetof (struct ia64_sal_os_state, sal_ra));
+ DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET,
+ offsetof (struct ia64_sal_os_state, sal_gp));
+ DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET,
+ offsetof (struct ia64_sal_os_state, pal_min_state));
+ DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET,
+ offsetof (struct ia64_sal_os_state, os_status));
+ DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET,
+ offsetof (struct ia64_sal_os_state, context));
DEFINE(IA64_SAL_OS_STATE_SIZE,
sizeof (struct ia64_sal_os_state));
+ BLANK();
+
DEFINE(IA64_PMSA_GR_OFFSET,
offsetof (struct pal_min_state_area_s, pmsa_gr));
DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 12cfedce73b..c33d0ba7e30 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -8,6 +8,8 @@
* Copyright (C) 1999-2003 Hewlett-Packard Co.
* David Mosberger-Tang <davidm@hpl.hp.com>
* Stephane Eranian <eranian@hpl.hp.com>
+ * (c) Copyright 2006 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
*
* All EFI Runtime Services are not implemented yet as EFI only
* supports physical mode addressing on SoftSDV. This is to be fixed
@@ -622,28 +624,20 @@ efi_get_iobase (void)
return 0;
}
-static efi_memory_desc_t *
-efi_memory_descriptor (unsigned long phys_addr)
+static struct kern_memdesc *
+kern_memory_descriptor (unsigned long phys_addr)
{
- void *efi_map_start, *efi_map_end, *p;
- efi_memory_desc_t *md;
- u64 efi_desc_size;
-
- efi_map_start = __va(ia64_boot_param->efi_memmap);
- efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
- efi_desc_size = ia64_boot_param->efi_memdesc_size;
+ struct kern_memdesc *md;
- for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
- md = p;
-
- if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+ for (md = kern_memmap; md->start != ~0UL; md++) {
+ if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT))
return md;
}
return 0;
}
-static int
-efi_memmap_has_mmio (void)
+static efi_memory_desc_t *
+efi_memory_descriptor (unsigned long phys_addr)
{
void *efi_map_start, *efi_map_end, *p;
efi_memory_desc_t *md;
@@ -656,8 +650,8 @@ efi_memmap_has_mmio (void)
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
md = p;
- if (md->type == EFI_MEMORY_MAPPED_IO)
- return 1;
+ if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
+ return md;
}
return 0;
}
@@ -683,71 +677,125 @@ efi_mem_attributes (unsigned long phys_addr)
}
EXPORT_SYMBOL(efi_mem_attributes);
-/*
- * Determines whether the memory at phys_addr supports the desired
- * attribute (WB, UC, etc). If this returns 1, the caller can safely
- * access size bytes at phys_addr with the specified attribute.
- */
-int
-efi_mem_attribute_range (unsigned long phys_addr, unsigned long size, u64 attr)
+u64
+efi_mem_attribute (unsigned long phys_addr, unsigned long size)
{
unsigned long end = phys_addr + size;
efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+ u64 attr;
+
+ if (!md)
+ return 0;
+
+ /*
+ * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells
+ * the kernel that firmware needs this region mapped.
+ */
+ attr = md->attribute & ~EFI_MEMORY_RUNTIME;
+ do {
+ unsigned long md_end = efi_md_end(md);
+
+ if (end <= md_end)
+ return attr;
+
+ md = efi_memory_descriptor(md_end);
+ if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr)
+ return 0;
+ } while (md);
+ return 0;
+}
+
+u64
+kern_mem_attribute (unsigned long phys_addr, unsigned long size)
+{
+ unsigned long end = phys_addr + size;
+ struct kern_memdesc *md;
+ u64 attr;
/*
- * Some firmware doesn't report MMIO regions in the EFI memory
- * map. The Intel BigSur (a.k.a. HP i2000) has this problem.
- * On those platforms, we have to assume UC is valid everywhere.
+ * This is a hack for ioremap calls before we set up kern_memmap.
+ * Maybe we should do efi_memmap_init() earlier instead.
*/
- if (!md || (md->attribute & attr) != attr) {
- if (attr == EFI_MEMORY_UC && !efi_memmap_has_mmio())
- return 1;
+ if (!kern_memmap) {
+ attr = efi_mem_attribute(phys_addr, size);
+ if (attr & EFI_MEMORY_WB)
+ return EFI_MEMORY_WB;
return 0;
}
+ md = kern_memory_descriptor(phys_addr);
+ if (!md)
+ return 0;
+
+ attr = md->attribute;
do {
- unsigned long md_end = efi_md_end(md);
+ unsigned long md_end = kmd_end(md);
if (end <= md_end)
- return 1;
+ return attr;
- md = efi_memory_descriptor(md_end);
- if (!md || (md->attribute & attr) != attr)
+ md = kern_memory_descriptor(md_end);
+ if (!md || md->attribute != attr)
return 0;
} while (md);
return 0;
}
+EXPORT_SYMBOL(kern_mem_attribute);
-/*
- * For /dev/mem, we only allow read & write system calls to access
- * write-back memory, because read & write don't allow the user to
- * control access size.
- */
int
valid_phys_addr_range (unsigned long phys_addr, unsigned long size)
{
- return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
+ u64 attr;
+
+ /*
+ * /dev/mem reads and writes use copy_to_user(), which implicitly
+ * uses a granule-sized kernel identity mapping. It's really
+ * only safe to do this for regions in kern_memmap. For more
+ * details, see Documentation/ia64/aliasing.txt.
+ */
+ attr = kern_mem_attribute(phys_addr, size);
+ if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC)
+ return 1;
+ return 0;
}
-/*
- * We allow mmap of anything in the EFI memory map that supports
- * either write-back or uncacheable access. For uncacheable regions,
- * the supported access sizes are system-dependent, and the user is
- * responsible for using the correct size.
- *
- * Note that this doesn't currently allow access to hot-added memory,
- * because that doesn't appear in the boot-time EFI memory map.
- */
int
valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long size)
{
- if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
- return 1;
+ /*
+ * MMIO regions are often missing from the EFI memory map.
+ * We must allow mmap of them for programs like X, so we
+ * currently can't do any useful validation.
+ */
+ return 1;
+}
- if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
- return 1;
+pgprot_t
+phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size,
+ pgprot_t vma_prot)
+{
+ unsigned long phys_addr = pfn << PAGE_SHIFT;
+ u64 attr;
- return 0;
+ /*
+ * For /dev/mem mmap, we use user mappings, but if the region is
+ * in kern_memmap (and hence may be covered by a kernel mapping),
+ * we must use the same attribute as the kernel mapping.
+ */
+ attr = kern_mem_attribute(phys_addr, size);
+ if (attr & EFI_MEMORY_WB)
+ return pgprot_cacheable(vma_prot);
+ else if (attr & EFI_MEMORY_UC)
+ return pgprot_noncached(vma_prot);
+
+ /*
+ * Some chipsets don't support UC access to memory. If
+ * WB is supported, we prefer that.
+ */
+ if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB)
+ return pgprot_cacheable(vma_prot);
+
+ return pgprot_noncached(vma_prot);
}
int __init
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
index 5a7fe70212a..a56e161d751 100644
--- a/arch/ia64/kernel/efi_stub.S
+++ b/arch/ia64/kernel/efi_stub.S
@@ -61,7 +61,7 @@ GLOBAL_ENTRY(efi_call_phys)
or loc3=loc3,r17
mov b6=r2
;;
- andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
+ andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
br.call.sptk.many rp=ia64_switch_mode_phys
.ret0: mov out4=in5
mov out0=in1
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
index 78eeb079341..ebc3dfb8882 100644
--- a/arch/ia64/kernel/entry.h
+++ b/arch/ia64/kernel/entry.h
@@ -23,6 +23,7 @@
#define PT(f) (IA64_PT_REGS_##f##_OFFSET)
#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
+#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET)
#define PT_REGS_SAVES(off) \
.unwabi 3, 'i'; \
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 6dff024cd62..c1bd1feffab 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -159,7 +159,7 @@ ia64_os_mca_spin:
GET_IA64_MCA_DATA(r2)
// Using MCA stack, struct ia64_sal_os_state, variable proc_state_param
;;
- add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, r2
+ add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2
;;
ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK.
;;
@@ -479,9 +479,11 @@ ia64_state_save:
st8 [temp2]=r11,16 // rv_rc
mov r11=cr.iipa
;;
- st8 [temp1]=r18,16 // proc_state_param
- st8 [temp2]=r19,16 // monarch
+ st8 [temp1]=r18 // proc_state_param
+ st8 [temp2]=r19 // monarch
mov r6=IA64_KR(CURRENT)
+ add temp1=SOS(SAL_RA), regs
+ add temp2=SOS(SAL_GP), regs
;;
st8 [temp1]=r12,16 // sal_ra
st8 [temp2]=r10,16 // sal_gp
@@ -503,12 +505,14 @@ ia64_state_save:
st8 [temp2]=r11,16 // cr.iipa
mov r12=cr.iim
;;
- st8 [temp1]=r12,16 // cr.iim
+ st8 [temp1]=r12 // cr.iim
(p1) mov r12=IA64_MCA_COLD_BOOT
(p2) mov r12=IA64_INIT_WARM_BOOT
mov r6=cr.iha
+ add temp1=SOS(OS_STATUS), regs
;;
- st8 [temp2]=r6,16 // cr.iha
+ st8 [temp2]=r6 // cr.iha
+ add temp2=SOS(CONTEXT), regs
st8 [temp1]=r12 // os_status, default is cold boot
mov r6=IA64_MCA_SAME_CONTEXT
;;
@@ -820,8 +824,8 @@ ia64_state_restore:
// Restore the SAL to OS state. The previous code left regs at pt_regs.
add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs
;;
- add temp1=IA64_SAL_OS_STATE_COMMON_OFFSET, regs
- add temp2=IA64_SAL_OS_STATE_COMMON_OFFSET+8, regs
+ add temp1=SOS(SAL_RA), regs
+ add temp2=SOS(SAL_GP), regs
;;
ld8 r12=[temp1],16 // sal_ra
ld8 r9=[temp2],16 // sal_gp
@@ -842,8 +846,10 @@ ia64_state_restore:
;;
mov cr.itir=temp3
mov cr.iipa=temp4
- ld8 temp3=[temp1],16 // cr.iim
- ld8 temp4=[temp2],16 // cr.iha
+ ld8 temp3=[temp1] // cr.iim
+ ld8 temp4=[temp2] // cr.iha
+ add temp1=SOS(OS_STATUS), regs
+ add temp2=SOS(CONTEXT), regs
;;
mov cr.iim=temp3
mov cr.iha=temp4
@@ -916,7 +922,7 @@ ia64_state_restore:
ia64_new_stack:
add regs=MCA_PT_REGS_OFFSET, r3
- add temp2=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, r3
+ add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3
mov b0=r2 // save return address
GET_IA64_MCA_DATA(temp1)
invala
@@ -1020,7 +1026,7 @@ ia64_old_stack:
ia64_set_kernel_registers:
add temp3=MCA_SP_OFFSET, r3
- add temp4=MCA_SOS_OFFSET+IA64_SAL_OS_STATE_OS_GP_OFFSET, r3
+ add temp4=MCA_SOS_OFFSET+SOS(OS_GP), r3
mov b0=r2 // save return address
GET_IA64_MCA_DATA(temp1)
;;
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 056f7a6eedc..77fa65903d9 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -227,7 +227,7 @@ static int sal_cache_flush_drops_interrupts;
static void __init
check_sal_cache_flush (void)
{
- unsigned long flags, itv;
+ unsigned long flags;
int cpu;
u64 vector;
@@ -238,9 +238,6 @@ check_sal_cache_flush (void)
* Schedule a timer interrupt, wait until it's reported, and see if
* SAL_CACHE_FLUSH drops it.
*/
- itv = ia64_get_itv();
- BUG_ON((itv & (1 << 16)) == 0);
-
ia64_set_itv(IA64_TIMER_VECTOR);
ia64_set_itm(ia64_get_itc() + 1000);
@@ -260,7 +257,6 @@ check_sal_cache_flush (void)
ia64_eoi();
}
- ia64_set_itv(itv);
local_irq_restore(flags);
put_cpu();
}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index e4dfda1eb7d..6dba2d63f24 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -260,6 +260,7 @@ reserve_memory (void)
n++;
num_rsvd_regions = n;
+ BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n);
sort_regions(rsvd_region, num_rsvd_regions);
}
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 643ccc6960c..07bd02b6c37 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/efi.h>
#include <asm/io.h>
+#include <asm/meminit.h>
static inline void __iomem *
__ioremap (unsigned long offset, unsigned long size)
@@ -21,16 +22,29 @@ __ioremap (unsigned long offset, unsigned long size)
void __iomem *
ioremap (unsigned long offset, unsigned long size)
{
- if (efi_mem_attribute_range(offset, size, EFI_MEMORY_WB))
- return phys_to_virt(offset);
+ u64 attr;
+ unsigned long gran_base, gran_size;
- if (efi_mem_attribute_range(offset, size, EFI_MEMORY_UC))
+ /*
+ * For things in kern_memmap, we must use the same attribute
+ * as the rest of the kernel. For more details, see
+ * Documentation/ia64/aliasing.txt.
+ */
+ attr = kern_mem_attribute(offset, size);
+ if (attr & EFI_MEMORY_WB)
+ return phys_to_virt(offset);
+ else if (attr & EFI_MEMORY_UC)
return __ioremap(offset, size);
/*
- * Someday this should check ACPI resources so we
- * can do the right thing for hot-plugged regions.
+ * Some chipsets don't support UC access to memory. If
+ * WB is supported for the whole granule, we prefer that.
*/
+ gran_base = GRANULEROUNDDOWN(offset);
+ gran_size = GRANULEROUNDUP(offset + size) - gran_base;
+ if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB)
+ return phys_to_virt(offset);
+
return __ioremap(offset, size);
}
EXPORT_SYMBOL(ioremap);
@@ -38,6 +52,9 @@ EXPORT_SYMBOL(ioremap);
void __iomem *
ioremap_nocache (unsigned long offset, unsigned long size)
{
+ if (kern_mem_attribute(offset, size) & EFI_MEMORY_WB)
+ return 0;
+
return __ioremap(offset, size);
}
EXPORT_SYMBOL(ioremap_nocache);
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index cf7751b99d1..61dd8608da4 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -645,18 +645,31 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
int
pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
{
+ unsigned long size = vma->vm_end - vma->vm_start;
+ pgprot_t prot;
char *addr;
+ /*
+ * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt
+ * for more details.
+ */
+ if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, size))
+ return -EINVAL;
+ prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size,
+ vma->vm_page_prot);
+ if (pgprot_val(prot) != pgprot_val(pgprot_noncached(vma->vm_page_prot)))
+ return -EINVAL;
+
addr = pci_get_legacy_mem(bus);
if (IS_ERR(addr))
return PTR_ERR(addr);
vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_page_prot = prot;
vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot))
+ size, vma->vm_page_prot))
return -EAGAIN;
return 0;
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 739c948dc50..9a8a29339d2 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -51,6 +51,8 @@ static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
static int sn_hwperf_init(void);
static DECLARE_MUTEX(sn_hwperf_init_mutex);
+#define cnode_possible(n) ((n) < num_cnodes)
+
static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
{
int e;
@@ -127,14 +129,14 @@ static int sn_hwperf_geoid_to_cnode(char *location)
}
}
- return node_possible(cnode) ? cnode : -1;
+ return cnode_possible(cnode) ? cnode : -1;
}
static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
{
if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
BUG();
- if (!obj->sn_hwp_this_part)
+ if (SN_HWPERF_FOREIGN(obj))
return -1;
return sn_hwperf_geoid_to_cnode(obj->location);
}
@@ -199,12 +201,12 @@ static void print_pci_topology(struct seq_file *s)
static inline int sn_hwperf_has_cpus(cnodeid_t node)
{
- return node_online(node) && nr_cpus_node(node);
+ return node < MAX_NUMNODES && node_online(node) && nr_cpus_node(node);
}
static inline int sn_hwperf_has_mem(cnodeid_t node)
{
- return node_online(node) && NODE_DATA(node)->node_present_pages;
+ return node < MAX_NUMNODES && node_online(node) && NODE_DATA(node)->node_present_pages;
}
static struct sn_hwperf_object_info *
@@ -237,7 +239,7 @@ static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objb
int found_mem = 0;
int found_cpu = 0;
- if (!node_possible(node))
+ if (!cnode_possible(node))
return -EINVAL;
if (sn_hwperf_has_cpus(node)) {
@@ -442,7 +444,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
obj->sn_hwp_this_part ? "local" : "shared", obj->name);
- if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
+ if (ordinal < 0 || (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)))
seq_putc(s, '\n');
else {
cnodeid_t near_mem = -1;
@@ -468,22 +470,24 @@ static int sn_topology_show(struct seq_file *s, void *d)
/*
* CPUs on this node, if any
*/
- cpumask = node_to_cpumask(ordinal);
- for_each_online_cpu(i) {
- if (cpu_isset(i, cpumask)) {
- slice = 'a' + cpuid_to_slice(i);
- c = cpu_data(i);
- seq_printf(s, "cpu %d %s%c local"
- " freq %luMHz, arch ia64",
- i, obj->location, slice,
- c->proc_freq / 1000000);
- for_each_online_cpu(j) {
- seq_printf(s, j ? ":%d" : ", dist %d",
- node_distance(
- cpu_to_node(i),
- cpu_to_node(j)));
+ if (!SN_HWPERF_IS_IONODE(obj)) {
+ cpumask = node_to_cpumask(ordinal);
+ for_each_online_cpu(i) {
+ if (cpu_isset(i, cpumask)) {
+ slice = 'a' + cpuid_to_slice(i);
+ c = cpu_data(i);
+ seq_printf(s, "cpu %d %s%c local"
+ " freq %luMHz, arch ia64",
+ i, obj->location, slice,
+ c->proc_freq / 1000000);
+ for_each_online_cpu(j) {
+ seq_printf(s, j ? ":%d" : ", dist %d",
+ node_distance(
+ cpu_to_node(i),
+ cpu_to_node(j)));
+ }
+ seq_putc(s, '\n');
}
- seq_putc(s, '\n');
}
}
}
@@ -523,7 +527,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
/* both ends local to this partition */
seq_puts(s, " local");
- else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part)
+ else if (SN_HWPERF_FOREIGN(p))
/* both ends of the link in foreign partiton */
seq_puts(s, " foreign");
else
@@ -776,7 +780,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
case SN_HWPERF_GET_NODE_NASID:
if (a.sz != sizeof(u64) ||
- (node = a.arg) < 0 || !node_possible(node)) {
+ (node = a.arg) < 0 || !cnode_possible(node)) {
r = -EINVAL;
goto error;
}
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index 4cac7bdc7c7..2d7948567eb 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2003-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
#include <linux/types.h>
@@ -1023,7 +1023,7 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
~0ULL);
- tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, 0ULL);
if (request_irq(SGI_PCIASIC_ERROR,
tioce_error_intr_handler,
diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h
index c2e3742108b..781ee2c7e8c 100644
--- a/include/asm-ia64/io.h
+++ b/include/asm-ia64/io.h
@@ -88,6 +88,7 @@ phys_to_virt (unsigned long address)
}
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
+extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size);
extern int valid_phys_addr_range (unsigned long addr, size_t count); /* efi.c */
extern int valid_mmap_phys_addr_range (unsigned long addr, size_t count);
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index 9c5389b7e62..ee97f7c2d46 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -69,14 +69,16 @@ typedef struct ia64_mc_info_s {
*/
struct ia64_sal_os_state {
- /* SAL to OS, must be at offset 0 */
+
+ /* SAL to OS */
u64 os_gp; /* GP of the os registered with the SAL, physical */
u64 pal_proc; /* PAL_PROC entry point, physical */
u64 sal_proc; /* SAL_PROC entry point, physical */
u64 rv_rc; /* MCA - Rendezvous state, INIT - reason code */
u64 proc_state_param; /* from R18 */
u64 monarch; /* 1 for a monarch event, 0 for a slave */
- /* common, must follow SAL to OS */
+
+ /* common */
u64 sal_ra; /* Return address in SAL, physical */
u64 sal_gp; /* GP of the SAL - physical */
pal_min_state_area_t *pal_min_state; /* from R17. physical in asm, virtual in C */
@@ -98,7 +100,8 @@ struct ia64_sal_os_state {
u64 iipa;
u64 iim;
u64 iha;
- /* OS to SAL, must follow common */
+
+ /* OS to SAL */
u64 os_status; /* OS status to SAL, enum below */
u64 context; /* 0 if return to same context
1 if return to new context */
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index eaac08d5e0b..228981cadf8 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -316,22 +316,20 @@ ia64_phys_addr_valid (unsigned long addr)
#define pte_mkhuge(pte) (__pte(pte_val(pte)))
/*
- * Macro to a page protection value as "uncacheable". Note that "protection" is really a
- * misnomer here as the protection value contains the memory attribute bits, dirty bits,
- * and various other bits as well.
+ * Make page protection values cacheable, uncacheable, or write-
+ * combining. Note that "protection" is really a misnomer here as the
+ * protection value contains the memory attribute bits, dirty bits, and
+ * various other bits as well.
*/
+#define pgprot_cacheable(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB)
#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC)
-
-/*
- * Macro to make mark a page protection value as "write-combining".
- * Note that "protection" is really a misnomer here as the protection
- * value contains the memory attribute bits, dirty bits, and various
- * other bits as well. Accesses through a write-combining translation
- * works bypasses the caches, but does allow for consecutive writes to
- * be combined into single (but larger) write transactions.
- */
#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC)
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
static inline unsigned long
pgd_index (unsigned long address)
{
diff --git a/include/asm-ia64/sn/sn_sal.h b/include/asm-ia64/sn/sn_sal.h
index 8c865e43f60..cd490b20d59 100644
--- a/include/asm-ia64/sn/sn_sal.h
+++ b/include/asm-ia64/sn/sn_sal.h
@@ -345,7 +345,7 @@ ia64_sn_plat_set_error_handling_features(void)
ret_stuff.v1 = 0;
ret_stuff.v2 = 0;
SAL_CALL_REENTRANT(ret_stuff, SN_SAL_SET_ERROR_HANDLING_FEATURES,
- (SAL_ERR_FEAT_MCA_SLV_TO_OS_INIT_SLV | SAL_ERR_FEAT_LOG_SBES),
+ SAL_ERR_FEAT_LOG_SBES,
0, 0, 0, 0, 0, 0);
return ret_stuff.status;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index e203613d3ae..66d621dbcb6 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -294,6 +294,7 @@ extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if pos
extern u64 efi_get_iobase (void);
extern u32 efi_mem_type (unsigned long phys_addr);
extern u64 efi_mem_attributes (unsigned long phys_addr);
+extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size,
u64 attr);
extern int __init efi_uart_console_only (void);