aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/i386/kernel/paravirt.c1
-rw-r--r--arch/i386/mm/fault.c5
-rw-r--r--arch/i386/mm/init.c18
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/mm/pgtable.c88
-rw-r--r--include/asm-i386/paravirt.h1
-rw-r--r--include/asm-i386/pgtable-2level-defs.h2
-rw-r--r--include/asm-i386/pgtable-2level.h2
-rw-r--r--include/asm-i386/pgtable-3level-defs.h6
-rw-r--r--include/asm-i386/pgtable-3level.h2
-rw-r--r--include/asm-i386/pgtable.h2
11 files changed, 101 insertions, 28 deletions
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index 47d075bdfb9..2040a831d5b 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -132,6 +132,7 @@ struct paravirt_ops paravirt_ops = {
.name = "bare hardware",
.paravirt_enabled = 0,
.kernel_rpl = 0,
+ .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
.patch = native_patch,
.banner = default_banner,
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index c6a0a06258e..f534c29e80b 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -603,7 +603,6 @@ do_sigbus:
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
}
-#ifndef CONFIG_X86_PAE
void vmalloc_sync_all(void)
{
/*
@@ -616,6 +615,9 @@ void vmalloc_sync_all(void)
static unsigned long start = TASK_SIZE;
unsigned long address;
+ if (SHARED_KERNEL_PMD)
+ return;
+
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
@@ -638,4 +640,3 @@ void vmalloc_sync_all(void)
start = address + PGDIR_SIZE;
}
}
-#endif
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index e8545dcf06c..dbe16f63a56 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -745,6 +745,8 @@ struct kmem_cache *pmd_cache;
void __init pgtable_cache_init(void)
{
+ size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
+
if (PTRS_PER_PMD > 1) {
pmd_cache = kmem_cache_create("pmd",
PTRS_PER_PMD*sizeof(pmd_t),
@@ -754,13 +756,23 @@ void __init pgtable_cache_init(void)
NULL);
if (!pmd_cache)
panic("pgtable_cache_init(): cannot create pmd cache");
+
+ if (!SHARED_KERNEL_PMD) {
+ /* If we're in PAE mode and have a non-shared
+ kernel pmd, then the pgd size must be a
+ page size. This is because the pgd_list
+ links through the page structure, so there
+ can only be one pgd per page for this to
+ work. */
+ pgd_size = PAGE_SIZE;
+ }
}
pgd_cache = kmem_cache_create("pgd",
- PTRS_PER_PGD*sizeof(pgd_t),
- PTRS_PER_PGD*sizeof(pgd_t),
+ pgd_size,
+ pgd_size,
0,
pgd_ctor,
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
+ (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
if (!pgd_cache)
panic("pgtable_cache_init(): Cannot create pgd cache");
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index ea6b6d4a0a2..47bd477c8ec 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
unsigned long flags;
set_pte_atomic(kpte, pte); /* change init_mm */
- if (PTRS_PER_PMD > 1)
+ if (SHARED_KERNEL_PMD)
return;
spin_lock_irqsave(&pgd_lock, flags);
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 99c09edc3db..9a96c164742 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -232,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd)
set_page_private(next, (unsigned long)pprev);
}
+#if (PTRS_PER_PMD == 1)
+/* Non-PAE pgd constructor */
void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags;
- if (PTRS_PER_PMD == 1) {
- memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
- spin_lock_irqsave(&pgd_lock, flags);
- }
+ /* !PAE, no pagetable sharing */
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ /* must happen under lock */
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
swapper_pg_dir + USER_PTRS_PER_PGD,
KERNEL_PGD_PTRS);
-
- if (PTRS_PER_PMD > 1)
- return;
-
- /* must happen under lock */
paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
-
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
+ USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
pgd_list_add(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
+#else /* PTRS_PER_PMD > 1 */
+/* PAE pgd constructor */
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+{
+ /* PAE, kernel PMD may be shared */
+
+ if (SHARED_KERNEL_PMD) {
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
+ swapper_pg_dir + USER_PTRS_PER_PGD,
+ KERNEL_PGD_PTRS);
+ } else {
+ unsigned long flags;
+
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+ spin_lock_irqsave(&pgd_lock, flags);
+ pgd_list_add(pgd);
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+#endif /* PTRS_PER_PMD */
-/* never called when PTRS_PER_PMD > 1 */
void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
{
unsigned long flags; /* can be called from interrupt context */
+ BUG_ON(SHARED_KERNEL_PMD);
+
paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
spin_lock_irqsave(&pgd_lock, flags);
pgd_list_del(pgd);
spin_unlock_irqrestore(&pgd_lock, flags);
}
+#define UNSHARED_PTRS_PER_PGD \
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+
+/* If we allocate a pmd for part of the kernel address space, then
+ make sure its initialized with the appropriate kernel mappings.
+ Otherwise use a cached zeroed pmd. */
+static pmd_t *pmd_cache_alloc(int idx)
+{
+ pmd_t *pmd;
+
+ if (idx >= USER_PTRS_PER_PGD) {
+ pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+ if (pmd)
+ memcpy(pmd,
+ (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
+ sizeof(pmd_t) * PTRS_PER_PMD);
+ } else
+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+
+ return pmd;
+}
+
+static void pmd_cache_free(pmd_t *pmd, int idx)
+{
+ if (idx >= USER_PTRS_PER_PGD)
+ free_page((unsigned long)pmd);
+ else
+ kmem_cache_free(pmd_cache, pmd);
+}
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
int i;
@@ -276,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (PTRS_PER_PMD == 1 || !pgd)
return pgd;
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
+ pmd_t *pmd = pmd_cache_alloc(i);
+
if (!pmd)
goto out_oom;
+
paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
}
@@ -290,7 +342,7 @@ out_oom:
pgd_t pgdent = pgd[i];
void* pmd = (void *)__va(pgd_val(pgdent)-1);
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- kmem_cache_free(pmd_cache, pmd);
+ pmd_cache_free(pmd, i);
}
kmem_cache_free(pgd_cache, pgd);
return NULL;
@@ -302,11 +354,11 @@ void pgd_free(pgd_t *pgd)
/* in the PAE case user pgd entries are overwritten before usage */
if (PTRS_PER_PMD > 1)
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
pgd_t pgdent = pgd[i];
void* pmd = (void *)__va(pgd_val(pgdent)-1);
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
- kmem_cache_free(pmd_cache, pmd);
+ pmd_cache_free(pmd, i);
}
/* in the non-PAE case, free_pgtables() clears user pgd entries */
kmem_cache_free(pgd_cache, pgd);
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index c49b44cdd8e..f93599dc775 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -35,6 +35,7 @@ struct desc_struct;
struct paravirt_ops
{
unsigned int kernel_rpl;
+ int shared_kernel_pmd;
int paravirt_enabled;
const char *name;
diff --git a/include/asm-i386/pgtable-2level-defs.h b/include/asm-i386/pgtable-2level-defs.h
index 02518079f81..0f71c9f13da 100644
--- a/include/asm-i386/pgtable-2level-defs.h
+++ b/include/asm-i386/pgtable-2level-defs.h
@@ -1,6 +1,8 @@
#ifndef _I386_PGTABLE_2LEVEL_DEFS_H
#define _I386_PGTABLE_2LEVEL_DEFS_H
+#define SHARED_KERNEL_PMD 0
+
/*
* traditional i386 two-level paging structure:
*/
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 043a2bcfa86..781fe4bcc96 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -82,6 +82,4 @@ static inline int pte_exec_kernel(pte_t pte)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-void vmalloc_sync_all(void);
-
#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-i386/pgtable-3level-defs.h b/include/asm-i386/pgtable-3level-defs.h
index eb3a1ea8867..c0df89f66e8 100644
--- a/include/asm-i386/pgtable-3level-defs.h
+++ b/include/asm-i386/pgtable-3level-defs.h
@@ -1,6 +1,12 @@
#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
#define _I386_PGTABLE_3LEVEL_DEFS_H
+#ifdef CONFIG_PARAVIRT
+#define SHARED_KERNEL_PMD (paravirt_ops.shared_kernel_pmd)
+#else
+#define SHARED_KERNEL_PMD 1
+#endif
+
/*
* PGDIR_SHIFT determines what a top-level page table entry can map
*/
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index be6017f37a9..664bfee5a2f 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -200,6 +200,4 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
#define __pmd_free_tlb(tlb, x) do { } while (0)
-#define vmalloc_sync_all() ((void)0)
-
#endif /* _I386_PGTABLE_3LEVEL_H */
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 0790ad6ed44..5b88a6a1278 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -243,6 +243,8 @@ static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; re
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
+extern void vmalloc_sync_all(void);
+
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
#else