diff options
author | Paul Mackerras <paulus@samba.org> | 2007-10-11 20:37:10 +1000 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2007-10-12 14:05:17 +1000 |
commit | 1189be6508d45183013ddb82b18f4934193de274 (patch) | |
tree | 58924481b4de56699e4a884dce8dc601e71cf7d1 /include/asm-powerpc | |
parent | 287e5d6fcccfa38b953cebe307e1ddfd32363355 (diff) |
[POWERPC] Use 1TB segments
This makes the kernel use 1TB segments for all kernel mappings and for
user addresses of 1TB and above, on machines which support them
(currently POWER5+, POWER6 and PA6T).
We detect that the machine supports 1TB segments by looking at the
ibm,processor-segment-sizes property in the device tree.
We don't currently use 1TB segments for user addresses < 1T, since
that would effectively prevent 32-bit processes from using huge pages
unless we also had a way to revert to using 256MB segments. That
would be possible but would involve extra complications (such as
keeping track of which segment size was used when HPTEs were inserted)
and is not addressed here.
Parts of this patch were originally written by Ben Herrenschmidt.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include/asm-powerpc')
-rw-r--r-- | include/asm-powerpc/cputable.h | 3 | ||||
-rw-r--r-- | include/asm-powerpc/machdep.h | 8 | ||||
-rw-r--r-- | include/asm-powerpc/mmu-hash64.h | 140 | ||||
-rw-r--r-- | include/asm-powerpc/page_64.h | 8 | ||||
-rw-r--r-- | include/asm-powerpc/tlbflush.h | 3 |
5 files changed, 113 insertions, 49 deletions
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h index d913f460e71..ae093ef6836 100644 --- a/include/asm-powerpc/cputable.h +++ b/include/asm-powerpc/cputable.h @@ -164,6 +164,7 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start, #define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000800000000000) #define CPU_FTR_SPURR LONG_ASM_CONST(0x0001000000000000) #define CPU_FTR_DSCR LONG_ASM_CONST(0x0002000000000000) +#define CPU_FTR_1T_SEGMENT LONG_ASM_CONST(0x0004000000000000) #ifndef __ASSEMBLY__ @@ -374,7 +375,7 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start, #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ - CPU_FTRS_CELL | CPU_FTRS_PA6T) + CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_1T_SEGMENT) #else enum { CPU_FTRS_POSSIBLE = diff --git a/include/asm-powerpc/machdep.h b/include/asm-powerpc/machdep.h index cc7c17f16a9..6968f4300dc 100644 --- a/include/asm-powerpc/machdep.h +++ b/include/asm-powerpc/machdep.h @@ -51,22 +51,22 @@ struct machdep_calls { #ifdef CONFIG_PPC64 void (*hpte_invalidate)(unsigned long slot, unsigned long va, - int psize, + int psize, int ssize, int local); long (*hpte_updatepp)(unsigned long slot, unsigned long newpp, unsigned long va, - int pize, + int psize, int ssize, int local); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, - int psize); + int psize, int ssize); long (*hpte_insert)(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long rflags, unsigned long vflags, - int psize); + int psize, int ssize); long (*hpte_remove)(unsigned long hpte_group); void (*flush_hash_range)(unsigned long number, int local); diff --git a/include/asm-powerpc/mmu-hash64.h b/include/asm-powerpc/mmu-hash64.h index b22b0d20e15..82328dec2b5 100644 --- a/include/asm-powerpc/mmu-hash64.h +++ b/include/asm-powerpc/mmu-hash64.h @@ -47,6 +47,8 @@ extern char initial_stab[]; /* Bits in the SLB VSID word */ #define SLB_VSID_SHIFT 12 +#define SLB_VSID_SHIFT_1T 24 +#define SLB_VSID_SSIZE_SHIFT 62 #define SLB_VSID_B ASM_CONST(0xc000000000000000) #define SLB_VSID_B_256M ASM_CONST(0x0000000000000000) #define SLB_VSID_B_1T ASM_CONST(0x4000000000000000) @@ -66,6 +68,7 @@ extern char initial_stab[]; #define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) #define SLBIE_C (0x08000000) +#define SLBIE_SSIZE_SHIFT 25 /* * Hash table @@ -77,7 +80,7 @@ extern char initial_stab[]; #define HPTE_V_AVPN_SHIFT 7 #define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80) #define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) -#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN)) +#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80)) #define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) #define HPTE_V_LOCK ASM_CONST(0x0000000000000008) #define HPTE_V_LARGE ASM_CONST(0x0000000000000004) @@ -164,16 +167,19 @@ struct mmu_psize_def #define MMU_SEGSIZE_256M 0 #define MMU_SEGSIZE_1T 1 + #ifndef __ASSEMBLY__ /* - * The current system page sizes + * The current system page and segment sizes */ extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; extern int mmu_linear_psize; extern int mmu_virtual_psize; extern int mmu_vmalloc_psize; extern int mmu_io_psize; +extern int mmu_kernel_ssize; +extern int mmu_highuser_ssize; /* * If the processor supports 64k normal pages but not 64k cache @@ -195,13 +201,15 @@ extern int mmu_huge_psize; * This function sets the AVPN and L fields of the HPTE appropriately * for the page size */ -static inline unsigned long hpte_encode_v(unsigned long va, int psize) +static inline unsigned long hpte_encode_v(unsigned long va, int psize, + int ssize) { - unsigned long v = + unsigned long v; v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; if (psize != MMU_PAGE_4K) v |= HPTE_V_LARGE; + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } @@ -226,20 +234,40 @@ static inline unsigned long hpte_encode_r(unsigned long pa, int psize) } /* - * This hashes a virtual address for a 256Mb segment only for now + * Build a VA given VSID, EA and segment size */ +static inline unsigned long hpt_va(unsigned long ea, unsigned long vsid, + int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return (vsid << 28) | (ea & 0xfffffffUL); + return (vsid << 40) | (ea & 0xffffffffffUL); +} -static inline unsigned long hpt_hash(unsigned long va, unsigned int shift) +/* + * This hashes a virtual address + */ + +static inline unsigned long hpt_hash(unsigned long va, unsigned int shift, + int ssize) { - return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift); + unsigned long hash, vsid; + + if (ssize == MMU_SEGSIZE_256M) { + hash = (va >> 28) ^ ((va & 0x0fffffffUL) >> shift); + } else { + vsid = va >> 40; + hash = vsid ^ (vsid << 25) ^ ((va & 0xffffffffffUL) >> shift); + } + return hash & 0x7fffffffffUL; } extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local); + unsigned int local, int ssize); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local); + unsigned int local, int ssize); struct mm_struct; extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); extern int hash_huge_page(struct mm_struct *mm, unsigned long access, @@ -248,7 +276,7 @@ extern int hash_huge_page(struct mm_struct *mm, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long mode, - int psize); + int psize, int ssize); extern void htab_initialize(void); extern void htab_initialize_secondary(void); @@ -317,12 +345,17 @@ extern void slb_vmalloc_update(void); * which are used by the iSeries firmware. */ -#define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */ -#define VSID_BITS 36 -#define VSID_MODULUS ((1UL<<VSID_BITS)-1) +#define VSID_MULTIPLIER_256M ASM_CONST(200730139) /* 28-bit prime */ +#define VSID_BITS_256M 36 +#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1) -#define CONTEXT_BITS 19 -#define USER_ESID_BITS 16 +#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */ +#define VSID_BITS_1T 24 +#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1) + +#define CONTEXT_BITS 19 +#define USER_ESID_BITS 16 +#define USER_ESID_BITS_1T 4 #define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT)) @@ -336,17 +369,17 @@ extern void slb_vmalloc_update(void); * rx = scratch register (clobbered) * * - rt and rx must be different registers - * - The answer will end up in the low 36 bits of rt. The higher + * - The answer will end up in the low VSID_BITS bits of rt. The higher * bits may contain other garbage, so you may need to mask the * result. */ -#define ASM_VSID_SCRAMBLE(rt, rx) \ - lis rx,VSID_MULTIPLIER@h; \ - ori rx,rx,VSID_MULTIPLIER@l; \ +#define ASM_VSID_SCRAMBLE(rt, rx, size) \ + lis rx,VSID_MULTIPLIER_##size@h; \ + ori rx,rx,VSID_MULTIPLIER_##size@l; \ mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ \ - srdi rx,rt,VSID_BITS; \ - clrldi rt,rt,(64-VSID_BITS); \ + srdi rx,rt,VSID_BITS_##size; \ + clrldi rt,rt,(64-VSID_BITS_##size); \ add rt,rt,rx; /* add high and low bits */ \ /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \ * 2^36-1+2^28-1. That in particular means that if r3 >= \ @@ -355,7 +388,7 @@ extern void slb_vmalloc_update(void); * doesn't, the answer is the low 36 bits of r3+1. So in all \ * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\ addi rx,rt,1; \ - srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ add rt,rt,rx @@ -377,37 +410,60 @@ typedef struct { } mm_context_t; -static inline unsigned long vsid_scramble(unsigned long protovsid) -{ #if 0 - /* The code below is equivalent to this function for arguments - * < 2^VSID_BITS, which is all this should ever be called - * with. However gcc is not clever enough to compute the - * modulus (2^n-1) without a second multiply. */ - return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS); -#else /* 1 */ - unsigned long x; +/* + * The code below is equivalent to this function for arguments + * < 2^VSID_BITS, which is all this should ever be called + * with. However gcc is not clever enough to compute the + * modulus (2^n-1) without a second multiply. + */ +#define vsid_scrample(protovsid, size) \ + ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size)) - x = protovsid * VSID_MULTIPLIER; - x = (x >> VSID_BITS) + (x & VSID_MODULUS); - return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS; +#else /* 1 */ +#define vsid_scramble(protovsid, size) \ + ({ \ + unsigned long x; \ + x = (protovsid) * VSID_MULTIPLIER_##size; \ + x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \ + (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \ + }) #endif /* 1 */ -} /* This is only valid for addresses >= KERNELBASE */ -static inline unsigned long get_kernel_vsid(unsigned long ea) +static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize) { - return vsid_scramble(ea >> SID_SHIFT); + if (ssize == MMU_SEGSIZE_256M) + return vsid_scramble(ea >> SID_SHIFT, 256M); + return vsid_scramble(ea >> SID_SHIFT_1T, 1T); } -/* This is only valid for user addresses (which are below 2^41) */ -static inline unsigned long get_vsid(unsigned long context, unsigned long ea) +/* Returns the segment size indicator for a user address */ +static inline int user_segment_size(unsigned long addr) { - return vsid_scramble((context << USER_ESID_BITS) - | (ea >> SID_SHIFT)); + /* Use 1T segments if possible for addresses >= 1T */ + if (addr >= (1UL << SID_SHIFT_1T)) + return mmu_highuser_ssize; + return MMU_SEGSIZE_256M; } -#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS) +/* This is only valid for user addresses (which are below 2^44) */ +static inline unsigned long get_vsid(unsigned long context, unsigned long ea, + int ssize) +{ + if (ssize == MMU_SEGSIZE_256M) + return vsid_scramble((context << USER_ESID_BITS) + | (ea >> SID_SHIFT), 256M); + return vsid_scramble((context << USER_ESID_BITS_1T) + | (ea >> SID_SHIFT_1T), 1T); +} + +/* + * This is only used on legacy iSeries in lparmap.c, + * hence the 256MB segment assumption. + */ +#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER_256M) % \ + VSID_MODULUS_256M) #define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea)) /* Physical address used by some IO functions */ diff --git a/include/asm-powerpc/page_64.h b/include/asm-powerpc/page_64.h index 56a2df0f683..4ee82c61e4d 100644 --- a/include/asm-powerpc/page_64.h +++ b/include/asm-powerpc/page_64.h @@ -26,12 +26,18 @@ */ #define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT) -/* Segment size */ +/* Segment size; normal 256M segments */ #define SID_SHIFT 28 #define SID_MASK ASM_CONST(0xfffffffff) #define ESID_MASK 0xfffffffff0000000UL #define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) +/* 1T segments */ +#define SID_SHIFT_1T 40 +#define SID_MASK_1T 0xffffffUL +#define ESID_MASK_1T 0xffffff0000000000UL +#define GET_ESID_1T(x) (((x) >> SID_SHIFT_1T) & SID_MASK_1T) + #ifndef __ASSEMBLY__ #include <asm/cache.h> diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h index 99a0439baa5..a022f806bb2 100644 --- a/include/asm-powerpc/tlbflush.h +++ b/include/asm-powerpc/tlbflush.h @@ -97,6 +97,7 @@ struct ppc64_tlb_batch { real_pte_t pte[PPC64_TLB_BATCH_NR]; unsigned long vaddr[PPC64_TLB_BATCH_NR]; unsigned int psize; + int ssize; }; DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); @@ -127,7 +128,7 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long va, real_pte_t pte, int psize, - int local); + int ssize, int local); extern void flush_hash_range(unsigned long number, int local); |