From 3dc494e86d1c93afd4c66385f270899dbfae483d Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Add pagetable accessors to pack and unpack pagetable entries Add a set of accessors to pack, unpack and modify page table entries (at all levels). This allows a paravirt implementation to control the contents of pgd/pmd/pte entries. For example, Xen uses this to convert the (pseudo-)physical address into a machine address when populating a pagetable entry, and converting back to pphys address when an entry is read. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Andi Kleen Acked-by: Ingo Molnar --- include/asm-i386/pgtable-3level.h | 63 +++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'include/asm-i386/pgtable-3level.h') diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 7a2318f3830..be6017f37a9 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -42,20 +42,23 @@ static inline int pte_exec_kernel(pte_t pte) return pte_x(pte); } -#ifndef CONFIG_PARAVIRT /* Rules for using set_pte: the pte being assigned *must* be * either not present or in a state where the hardware will * not attempt to update the pte. In places where this is * not possible, use pte_get_and_clear to obtain the old pte * value and then use set_pte to update it. -ben */ -static inline void set_pte(pte_t *ptep, pte_t pte) +static inline void native_set_pte(pte_t *ptep, pte_t pte) { ptep->pte_high = pte.pte_high; smp_wmb(); ptep->pte_low = pte.pte_low; } -#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) +static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , pte_t pte) +{ + native_set_pte(ptep, pte); +} /* * Since this is only called on user PTEs, and the page fault handler @@ -63,7 +66,8 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * we are justified in merely clearing the PTE present bit, followed * by a set. The ordering here is important. */ -static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) +static inline void native_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) { ptep->pte_low = 0; smp_wmb(); @@ -72,32 +76,48 @@ static inline void set_pte_present(struct mm_struct *mm, unsigned long addr, pte ptep->pte_low = pte.pte_low; } -#define set_pte_atomic(pteptr,pteval) \ - set_64bit((unsigned long long *)(pteptr),pte_val(pteval)) -#define set_pmd(pmdptr,pmdval) \ - set_64bit((unsigned long long *)(pmdptr),pmd_val(pmdval)) -#define set_pud(pudptr,pudval) \ - (*(pudptr) = (pudval)) +static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + set_64bit((unsigned long long *)(ptep),native_pte_val(pte)); +} +static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + set_64bit((unsigned long long *)(pmdp),native_pmd_val(pmd)); +} +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ + *pudp = pud; +} /* * For PTEs and PDEs, we must clear the P-bit first when clearing a page table * entry, so clear the bottom half first and enforce ordering with a compiler * barrier. */ -static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { ptep->pte_low = 0; smp_wmb(); ptep->pte_high = 0; } -static inline void pmd_clear(pmd_t *pmd) +static inline void native_pmd_clear(pmd_t *pmd) { u32 *tmp = (u32 *)pmd; *tmp = 0; smp_wmb(); *(tmp + 1) = 0; } + +#ifndef CONFIG_PARAVIRT +#define set_pte(ptep, pte) native_set_pte(ptep, pte) +#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) +#define set_pte_present(mm, addr, ptep, pte) native_set_pte_present(mm, addr, ptep, pte) +#define set_pte_atomic(ptep, pte) native_set_pte_atomic(ptep, pte) +#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd) +#define set_pud(pudp, pud) native_set_pud(pudp, pud) +#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep) +#define pmd_clear(pmd) native_pmd_clear(pmd) #endif /* @@ -119,7 +139,7 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -static inline pte_t raw_ptep_get_and_clear(pte_t *ptep) +static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -146,28 +166,21 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return (pte.pte_low >> PAGE_SHIFT) | - (pte.pte_high << (32 - PAGE_SHIFT)); + return pte_val(pte) >> PAGE_SHIFT; } extern unsigned long long __supported_pte_mask; static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { - pte_t pte; - - pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \ - (pgprot_val(pgprot) >> 32); - pte.pte_high &= (__supported_pte_mask >> 32); - pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \ - __supported_pte_mask; - return pte; + return __pte((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) { - return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \ - pgprot_val(pgprot)) & __supported_pte_mask); + return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | + pgprot_val(pgprot)) & __supported_pte_mask); } /* -- cgit v1.2.3 From 5311ab62cdc7788784971ed816ce85e926f3e994 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 2 May 2007 19:27:13 +0200 Subject: [PATCH] i386: PARAVIRT: Allow paravirt backend to choose kernel PMD sharing Normally when running in PAE mode, the 4th PMD maps the kernel address space, which can be shared among all processes (since they all need the same kernel mappings). Xen, however, does not allow guests to have the kernel pmd shared between page tables, so parameterize pgtable.c to allow both modes of operation. There are several side-effects of this. One is that vmalloc will update the kernel address space mappings, and those updates need to be propagated into all processes if the kernel mappings are not intrinsically shared. In the non-PAE case, this is done by maintaining a pgd_list of all processes; this list is used when all process pagetables must be updated. pgd_list is threaded via otherwise unused entries in the page structure for the pgd, which means that the pgd must be page-sized for this to work. Normally the PAE pgd is only 4x64 byte entries large, but Xen requires the PAE pgd to page aligned anyway, so this patch forces the pgd to be page aligned+sized when the kernel pmd is unshared, to accomodate both these requirements. Also, since there may be several distinct kernel pmds (if the user/kernel split is below 3G), there's no point in allocating them from a slab cache; they're just allocated with get_free_page and initialized appropriately. (Of course the could be cached if there is just a single kernel pmd - which is the default with a 3G user/kernel split - but it doesn't seem worthwhile to add yet another case into this code). [ Many thanks to wli for review comments. ] Signed-off-by: Jeremy Fitzhardinge Signed-off-by: William Lee Irwin III Signed-off-by: Andi Kleen Cc: Zachary Amsden Cc: Christoph Lameter Acked-by: Ingo Molnar Signed-off-by: Andrew Morton --- include/asm-i386/pgtable-3level.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/asm-i386/pgtable-3level.h') diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index be6017f37a9..664bfee5a2f 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) #define __pmd_free_tlb(tlb, x) do { } while (0) -#define vmalloc_sync_all() ((void)0) - #endif /* _I386_PGTABLE_3LEVEL_H */ -- cgit v1.2.3 From 142dd975911fdd82b1b6f6617cd20ac90a8ccf00 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte xchg optimization In situations where page table updates need only be made locally, and there is no cross-processor A/D bit races involved, we need not use the heavyweight xchg instruction to atomically fetch and clear page table entries. Instead, we can just read and clear them directly. This introduces a neat optimization for non-SMP kernels; drop the atomic xchg operations from page table updates. Thanks to Michel Lespinasse for noting this potential optimization. Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-3level.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/asm-i386/pgtable-3level.h') diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 664bfee5a2f..45b02418150 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,6 +139,17 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) +/* local pte updates need not use xchg for locking */ +static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) +{ + pte_t res; + + res = *ptep; + native_pte_clear(NULL, 0, ptep); + return res; +} + +#ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { pte_t res; @@ -150,6 +161,9 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep) return res; } +#else +#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) +#endif #define __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t a, pte_t b) -- cgit v1.2.3 From 9e5e3162b2d5e4466187ecd63c9eec2de33cb7bc Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Wed, 2 May 2007 19:27:19 +0200 Subject: [PATCH] i386: pte simplify ops Add comment and condense code to make use of native_local_ptep_get_and_clear function. Also, it turns out the 2-level and 3-level paging definitions were identical, so move the common definition into pgtable.h Signed-off-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen --- include/asm-i386/pgtable-3level.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/asm-i386/pgtable-3level.h') diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h index 45b02418150..eb0f1d7e96a 100644 --- a/include/asm-i386/pgtable-3level.h +++ b/include/asm-i386/pgtable-3level.h @@ -139,16 +139,6 @@ static inline void pud_clear (pud_t * pud) { } #define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \ pmd_index(address)) -/* local pte updates need not use xchg for locking */ -static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) -{ - pte_t res; - - res = *ptep; - native_pte_clear(NULL, 0, ptep); - return res; -} - #ifdef CONFIG_SMP static inline pte_t native_ptep_get_and_clear(pte_t *ptep) { -- cgit v1.2.3