diff options
author | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
---|---|---|
committer | Jeff Garzik <jeff@garzik.org> | 2006-09-26 13:13:19 -0400 |
commit | c226951b93f7cd7c3a10b17384535b617bd43fd0 (patch) | |
tree | 07b8796a5c99fbbf587b8d0dbcbc173cfe5e381e /include/linux | |
parent | b0df3bd1e553e901ec7297267611a5db88240b38 (diff) | |
parent | e8216dee838c09776680a6f1a2e54d81f3cdfa14 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/bootmem.h | 100 | ||||
-rw-r--r-- | include/linux/console.h | 5 | ||||
-rw-r--r-- | include/linux/cpu.h | 8 | ||||
-rw-r--r-- | include/linux/dccp.h | 14 | ||||
-rw-r--r-- | include/linux/elf-em.h | 1 | ||||
-rw-r--r-- | include/linux/elfnote.h | 90 | ||||
-rw-r--r-- | include/linux/gfp.h | 36 | ||||
-rw-r--r-- | include/linux/highmem.h | 5 | ||||
-rw-r--r-- | include/linux/irq.h | 6 | ||||
-rw-r--r-- | include/linux/kernel.h | 1 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 4 | ||||
-rw-r--r-- | include/linux/mm.h | 128 | ||||
-rw-r--r-- | include/linux/mmzone.h | 120 | ||||
-rw-r--r-- | include/linux/netfilter/Kbuild | 2 | ||||
-rw-r--r-- | include/linux/page-flags.h | 35 | ||||
-rw-r--r-- | include/linux/pagemap.h | 15 | ||||
-rw-r--r-- | include/linux/percpu.h | 89 | ||||
-rw-r--r-- | include/linux/resume-trace.h | 24 | ||||
-rw-r--r-- | include/linux/rmap.h | 14 | ||||
-rw-r--r-- | include/linux/selinux.h | 29 | ||||
-rw-r--r-- | include/linux/slab.h | 29 | ||||
-rw-r--r-- | include/linux/smp.h | 3 | ||||
-rw-r--r-- | include/linux/suspend.h | 32 | ||||
-rw-r--r-- | include/linux/swap.h | 12 | ||||
-rw-r--r-- | include/linux/sysctl.h | 1 | ||||
-rw-r--r-- | include/linux/vmalloc.h | 2 | ||||
-rw-r--r-- | include/linux/vmstat.h | 18 | ||||
-rw-r--r-- | include/linux/writeback.h | 1 |
28 files changed, 571 insertions, 253 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index e319c649e4f..31e9abb6d97 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -4,11 +4,8 @@ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <linux/cache.h> -#include <linux/init.h> #include <linux/mmzone.h> +#include <asm/dma.h> /* * simple boot-time physical memory area allocator. @@ -41,45 +38,64 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; -extern unsigned long __init bootmem_bootmap_pages (unsigned long); -extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); -extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit); +extern unsigned long bootmem_bootmap_pages(unsigned long); +extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); +extern void free_bootmem(unsigned long addr, unsigned long size); +extern void *__alloc_bootmem(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void reserve_bootmem(unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ - __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low((x), PAGE_SIZE, 0) + __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -extern unsigned long __init free_all_bootmem (void); -extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); -extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); -extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); + +extern unsigned long free_all_bootmem(void); +extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); +extern void *__alloc_bootmem_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern unsigned long init_bootmem_node(pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn); +extern void reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size); +extern void free_bootmem_node(pg_data_t *pgdat, + unsigned long addr, + unsigned long size); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) + __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP @@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size) { return NULL; } -#endif +#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ extern unsigned long __meminitdata nr_kernel_pages; extern unsigned long nr_all_pages; -extern void *__init alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long limit); +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long limit); #define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ #define HASH_EARLY 0x00000002 /* Allocating during early boot? */ diff --git a/include/linux/console.h b/include/linux/console.h index 3bdf2155e56..76a1807726e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -120,9 +120,14 @@ extern void console_stop(struct console *); extern void console_start(struct console *); extern int is_console_locked(void); +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); +#else +static inline void suspend_console(void) {} +static inline void resume_console(void) {} +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /* Some debug stub to catch some of the obvious races in the VT code */ #if 1 diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8fb344a9abd..3fef7d67aed 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu); static inline int cpu_is_offline(int cpu) { return 0; } #endif +#ifdef CONFIG_SUSPEND_SMP +extern int disable_nonboot_cpus(void); +extern void enable_nonboot_cpus(void); +#else +static inline int disable_nonboot_cpus(void) { return 0; } +static inline void enable_nonboot_cpus(void) {} +#endif + #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 2d7671c92c0..d6f4ec467a4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -169,6 +169,12 @@ enum { DCCPO_MAX_CCID_SPECIFIC = 255, }; +/* DCCP CCIDS */ +enum { + DCCPC_CCID2 = 2, + DCCPC_CCID3 = 3, +}; + /* DCCP features */ enum { DCCPF_RESERVED = 0, @@ -320,7 +326,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 #define DCCPF_INITIAL_ACK_RATIO 2 -#define DCCPF_INITIAL_CCID 2 +#define DCCPF_INITIAL_CCID DCCPC_CCID2 #define DCCPF_INITIAL_SEND_ACK_VECTOR 1 /* FIXME: for now we're default to 1 but it should really be 0 */ #define DCCPF_INITIAL_SEND_NDP_COUNT 1 @@ -404,6 +410,7 @@ struct dccp_service_list { }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) +#define DCCP_SERVICE_CODE_IS_ABSENT 0 static inline int dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) @@ -484,11 +491,6 @@ static inline struct dccp_minisock *dccp_msk(const struct sock *sk) return (struct dccp_minisock *)&dccp_sk(sk)->dccps_minisock; } -static inline int dccp_service_not_initialized(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 6a5796c81c9..666e0a5f00f 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h @@ -31,6 +31,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ +#define EM_AVR32 0x18ad /* Atmel AVR32 */ /* * This is an interim value that we will use until the committee comes diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h new file mode 100644 index 00000000000..67396db141e --- /dev/null +++ b/include/linux/elfnote.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_ELFNOTE_H +#define _LINUX_ELFNOTE_H +/* + * Helper macros to generate ELF Note structures, which are put into a + * PT_NOTE segment of the final vmlinux image. These are useful for + * including name-value pairs of metadata into the kernel binary (or + * modules?) for use by external programs. + * + * Each note has three parts: a name, a type and a desc. The name is + * intended to distinguish the note's originator, so it would be a + * company, project, subsystem, etc; it must be in a suitable form for + * use in a section name. The type is an integer which is used to tag + * the data, and is considered to be within the "name" namespace (so + * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The + * "desc" field is the actual data. There are no constraints on the + * desc field's contents, though typically they're fairly small. + * + * All notes from a given NAME are put into a section named + * .note.NAME. When the kernel image is finally linked, all the notes + * are packed into a single .notes section, which is mapped into the + * PT_NOTE segment. Because notes for a given name are grouped into + * the same section, they'll all be adjacent the output file. + * + * This file defines macros for both C and assembler use. Their + * syntax is slightly different, but they're semantically similar. + * + * See the ELF specification for more detail about ELF notes. + */ + +#ifdef __ASSEMBLER__ +/* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and + * desc data with appropriate padding. The 'desctype' argument is the + * assembler pseudo op defining the type of the data e.g. .asciz while + * 'descdata' is the data itself e.g. "hello, world". + * + * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") + * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection ; +#else /* !__ASSEMBLER__ */ +#include <linux/elf.h> +/* + * Use an anonymous structure which matches the shape of + * Elf{32,64}_Nhdr, but includes the name and desc data. The size and + * type of name and desc depend on the macro arguments. "name" must + * be a literal string, and "desc" must be passed by value. You may + * only define one note per line, since __LINE__ is used to generate + * unique symbols. + */ +#define _ELFNOTE_PASTE(a,b) a##b +#define _ELFNOTE(size, name, unique, type, desc) \ + static const struct { \ + struct elf##size##_note _nhdr; \ + unsigned char _name[sizeof(name)] \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + typeof(desc) _desc \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + } _ELFNOTE_PASTE(_note_, unique) \ + __attribute_used__ \ + __attribute__((section(".note." name), \ + aligned(sizeof(Elf##size##_Word)), \ + unused)) = { \ + { \ + sizeof(name), \ + sizeof(desc), \ + type, \ + }, \ + name, \ + desc \ + } +#define ELFNOTE(size, name, type, desc) \ + _ELFNOTE(size, name, __LINE__, type, desc) + +#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) +#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) +#endif /* __ASSEMBLER__ */ + +#endif /* _LINUX_ELFNOTE_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cc9e6084448..8b34aabfe4c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,17 +9,16 @@ struct vm_area_struct; /* * GFP bitmasks.. + * + * Zone modifiers (see linux/mmzone.h - low three bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use the consistently. The definitions here may + * be used in bit comparisons. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#ifdef CONFIG_DMA_IS_DMA32 -#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ -#elif BITS_PER_LONG < 64 -#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ -#else -#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ -#endif +#define __GFP_DMA32 ((__force gfp_t)0x04u) /* * Action modifiers - doesn't change the zoning @@ -46,6 +45,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -54,7 +54,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -67,6 +67,8 @@ struct vm_area_struct; #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) + /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ @@ -76,11 +78,19 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 -static inline int gfp_zone(gfp_t gfp) +static inline enum zone_type gfp_zone(gfp_t flags) { - int zone = GFP_ZONEMASK & (__force int) gfp; - BUG_ON(zone >= GFP_ZONETYPES); - return zone; + if (flags & __GFP_DMA) + return ZONE_DMA; +#ifdef CONFIG_ZONE_DMA32 + if (flags & __GFP_DMA32) + return ZONE_DMA32; +#endif +#ifdef CONFIG_HIGHMEM + if (flags & __GFP_HIGHMEM) + return ZONE_HIGHMEM; +#endif + return ZONE_NORMAL; } /* diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 85ce7ef9a51..fd7d12daa94 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); +extern unsigned long totalhigh_pages; #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } +#define totalhigh_pages 0 + +#ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { might_sleep(); @@ -41,6 +45,7 @@ static inline void *kmap(struct page *page) #define kunmap_atomic(addr, idx) do { } while (0) #define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) +#endif #endif /* CONFIG_HIGHMEM */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fbf6d901e9c..48d3cb3b6a4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *, * Monolithic do_IRQ implementation. * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); +#endif /* * Architectures call this to let the generic IRQ layer @@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs) { struct irq_desc *desc = irq_desc + irq; +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc, regs); +#else if (likely(desc->handle_irq)) desc->handle_irq(irq, desc, regs); else __do_IRQ(irq, regs); +#endif } /* Handling of unhandled and spurious interrupts: */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2b2ae4fdce8..e44a37e2c71 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ extern const char linux_banner[]; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) #define KERN_EMERG "<0>" /* system is unusable */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 72440f0a443..09f0f575ddf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); extern unsigned slab_node(struct mempolicy *policy); -extern int policy_zone; +extern enum zone_type policy_zone; -static inline void check_highest_zone(int k) +static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone) policy_zone = k; diff --git a/include/linux/mm.h b/include/linux/mm.h index 224178a000d..856f0ee7e84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/mutex.h> #include <linux/debug_locks.h> +#include <linux/backing-dev.h> struct mempolicy; struct anon_vma; @@ -218,7 +219,8 @@ struct inode; * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using - * a page. + * a page, though if it is a pagecache page, rmap structures can tell us + * who is mapping it. */ struct page { unsigned long flags; /* Atomic flags, some possibly @@ -278,6 +280,12 @@ struct page { */ #include <linux/page-flags.h> +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(condition) do { } while(0) +#endif + /* * Methods to modify the page usage count. * @@ -292,12 +300,11 @@ struct page { */ /* - * Drop a ref, return true if the logical refcount fell to zero (the page has - * no users) + * Drop a ref, return true if the refcount fell to zero (the page has no users) */ static inline int put_page_testzero(struct page *page) { - BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON(atomic_read(&page->_count) == 0); return atomic_dec_and_test(&page->_count); } @@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { + VM_BUG_ON(PageCompound(page)); return atomic_inc_not_zero(&page->_count); } -extern void FASTCALL(__page_cache_release(struct page *)); - static inline int page_count(struct page *page) { if (unlikely(PageCompound(page))) @@ -323,6 +329,7 @@ static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + VM_BUG_ON(atomic_read(&page->_count) == 0); atomic_inc(&page->_count); } @@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order); * For the non-reserved pages, page_count(page) denotes a reference count. * page_count() == 0 means the page is free. page->lru is then used for * freelist management in the buddy allocator. - * page_count() == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). + * page_count() > 0 means the page has been allocated. + * + * Pages are allocated by the slab allocator in order to provide memory + * to kmalloc and kmem_cache_alloc. In this case, the management of the + * page, and the fields in 'struct page' are the responsibility of mm/slab.c + * unless a particular usage is carefully commented. (the responsibility of + * freeing the kmalloc memory is the caller's, of course). * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page_count() is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. + * A page may be used by anyone else who does a __get_free_page(). + * In this case, page_count still tracks the references, and should only + * be used through the normal accessor functions. The top bits of page->flags + * and page->virtual store page management information, but all other fields + * are unused and could be used privately, carefully. The management of this + * page is the responsibility of the one who allocated it, and those who have + * subsequently been given references to it. * - * The other pages (we may call them "process pages") are completely + * The other pages (we may call them "pagecache pages") are completely * managed by the Linux memory manager: I/O, buffers, swapping etc. * The following discussion applies only to them. * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. + * A pagecache page contains an opaque `private' member, which belongs to the + * page's address_space. Usually, this is the address of a circular list of + * the page's disk buffers. PG_private must be set to tell the VM to call + * into the filesystem to release these pages. * - * A page contains an opaque `private' member, which belongs to the - * page's address_space. Usually, this is the address of a circular - * list of the page's disk buffers. + * A page may belong to an inode's memory mapping. In this case, page->mapping + * is the pointer to the inode, and page->index is the file offset of the page, + * in units of PAGE_CACHE_SIZE. * - * For pages belonging to inodes, the page_count() is the number of - * attaches, plus 1 if `private' contains something, plus one for - * the page cache itself. + * If pagecache pages are not associated with an inode, they are said to be + * anonymous pages. These may become associated with the swapcache, and in that + * case PG_swapcache is set, and page->private is an offset into the swapcache. * - * Instead of keeping dirty/clean pages in per address-space lists, we instead - * now tag pages as dirty/under writeback in the radix tree. + * In either case (swapcache or inode backed), the pagecache itself holds one + * reference to the page. Setting PG_private should also increment the + * refcount. The each user mapping also has a reference to the page. * - * There is also a per-mapping radix tree mapping index to the page - * in memory if present. The tree is rooted at mapping->root. + * The pagecache pages are stored in a per-mapping radix tree, which is + * rooted at mapping->page_tree, and indexed by offset. + * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space + * lists, we instead now tag pages as dirty/writeback in the radix tree. * - * All process pages can do I/O: + * All pagecache pages may be subject to I/O: * - inode pages may need to be read from disk, * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. + * to be written back to the inode on disk, + * - anonymous pages (including MAP_PRIVATE file mappings) which have been + * modified may need to be swapped out to swap space and (later) to be read + * back into memory. */ /* @@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order); #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) -static inline unsigned long page_zonenum(struct page *page) +static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page) return zone_table[page_zone_id(page)]; } +static inline unsigned long zone_to_nid(struct zone *zone) +{ + return zone->zone_pgdat->node_id; +} + static inline unsigned long page_to_nid(struct page *page) { if (FLAGS_HAS_NODE) return (page->flags >> NODES_PGSHIFT) & NODES_MASK; else - return page_zone(page)->zone_pgdat->node_id; + return zone_to_nid(page_zone(page)); } static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -static inline void set_page_zone(struct page *page, unsigned long zone) +static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } + static inline void set_page_node(struct page *page, unsigned long node) { page->flags &= ~(NODES_MASK << NODES_PGSHIFT); @@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline void set_page_links(struct page *page, unsigned long zone, +static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); @@ -802,6 +827,39 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); +/* + * Some shared mappigns will want the pages marked read-only + * to track write events. If so, we'll downgrade vm_page_prot + * to the private version (using protection_map[] without the + * VM_SHARED bit). + */ +static inline int vma_wants_writenotify(struct vm_area_struct *vma) +{ + unsigned int vm_flags = vma->vm_flags; + + /* If it was private or non-writable, the write bit is already clear */ + if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) + return 0; + + /* The backer wishes to know when pages are first written to? */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) + return 1; + + /* The open routine did something to the protections already? */ + if (pgprot_val(vma->vm_page_prot) != + pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) + return 0; + + /* Specialty mapping? */ + if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) + return 0; + + /* Can the mapping track the dirty pages? */ + return vma->vm_file && vma->vm_file->f_mapping && + mapping_cap_account_dirty(vma->vm_file->f_mapping); +} + extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f45163c528e..3693f1a5278 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -51,7 +51,8 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_FILE_DIRTY, NR_WRITEBACK, @@ -88,53 +89,68 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif -#define ZONE_DMA 0 -#define ZONE_DMA32 1 -#define ZONE_NORMAL 2 -#define ZONE_HIGHMEM 3 - -#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ - +enum zone_type { + /* + * ZONE_DMA is used when there are devices that are not able + * to do DMA to all of addressable memory (ZONE_NORMAL). Then we + * carve out the portion of memory that is needed for these devices. + * The range is arch specific. + * + * Some examples + * + * Architecture Limit + * --------------------------- + * parisc, ia64, sparc <4G + * s390 <2G + * arm26 <48M + * arm Various + * alpha Unlimited or 0-16MB. + * + * i386, x86_64 and multiple other arches + * <16M. + */ + ZONE_DMA, +#ifdef CONFIG_ZONE_DMA32 + /* + * x86_64 needs two ZONE_DMAs because it supports devices that are + * only able to do DMA to the lower 16M but also 32 bit devices that + * can only do DMA areas below 4G. + */ + ZONE_DMA32, +#endif + /* + * Normal addressable memory is in ZONE_NORMAL. DMA operations can be + * performed on pages in ZONE_NORMAL if the DMA devices support + * transfers to all addressable memory. + */ + ZONE_NORMAL, +#ifdef CONFIG_HIGHMEM + /* + * A memory area that is only addressable by the kernel through + * mapping portions into its own address space. This is for example + * used by i386 to allow the kernel to address the memory beyond + * 900MB. The kernel will set up special mappings (page + * table entries on i386) for each page that the kernel needs to + * access. + */ + ZONE_HIGHMEM, +#endif + MAX_NR_ZONES +}; /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - * - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form for GFP_ZONETYPES when the left most bit is not - * a "loner", otherwise use the second. - * - * NOTE! Make sure this matches the zones in <linux/gfp.h> + * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -#define GFP_ZONEMASK 0x07 -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a 32bit PC we have 4 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_DMA32 0 MB Empty - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ +#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) +#define ZONES_SHIFT 1 +#else +#define ZONES_SHIFT 2 +#endif struct zone { /* Fields commonly accessed by the page allocator */ @@ -154,7 +170,8 @@ struct zone { /* * zone reclaim becomes active if more unmapped pages exist. */ - unsigned long min_unmapped_ratio; + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -266,7 +283,6 @@ struct zone { char *name; } ____cacheline_internodealigned_in_smp; - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -304,7 +320,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; + struct zonelist node_zonelists[MAX_NR_ZONES]; int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; @@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } -static inline int is_highmem_idx(int idx) +static inline int is_highmem_idx(enum zone_type idx) { +#ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM); +#else + return 0; +#endif } -static inline int is_normal_idx(int idx) +static inline int is_normal_idx(enum zone_type idx) { return (idx == ZONE_NORMAL); } @@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx) */ static inline int is_highmem(struct zone *zone) { +#ifdef CONFIG_HIGHMEM return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; +#else + return 0; +#endif } static inline int is_normal(struct zone *zone) @@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone) static inline int is_dma32(struct zone *zone) { +#ifdef CONFIG_ZONE_DMA32 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +#else + return 0; +#endif } static inline int is_dma(struct zone *zone) @@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 9a285cecf24..312bd2ffee3 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,8 @@ header-y += xt_connmark.h header-y += xt_CONNMARK.h header-y += xt_conntrack.h header-y += xt_dccp.h +header-y += xt_dscp.h +header-y += xt_DSCP.h header-y += xt_esp.h header-y += xt_helper.h header-y += xt_length.h diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5748642e9f3..9d7921dd50f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -13,24 +13,25 @@ * PG_reserved is set for special pages, which can never be swapped out. Some * of them might not even exist (eg empty_bad_page)... * - * The PG_private bitflag is set if page->private contains a valid value. + * The PG_private bitflag is set on pagecache pages if they contain filesystem + * specific data (which is normally at page->private). It can be used by + * private allocations for its own usage. * - * During disk I/O, PG_locked is used. This bit is set before I/O and - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks - * waiting for the I/O on this page to complete. + * During initiation of disk I/O, PG_locked is set. This bit is set before I/O + * and cleared when writeback _starts_ or when read _completes_. PG_writeback + * is set before writeback starts and cleared when it finishes. + * + * PG_locked also pins a page in pagecache, and blocks truncation of the file + * while it is held. + * + * page_waitqueue(page) is a wait queue of all tasks waiting for the page + * to become unlocked. * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, - * which is set any time the system accesses that page through the (mapping, - * index) hash table. This referenced bit, together with the referenced bit - * in the page tables, is used to manipulate page->age and move the page across - * the active, inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the active, - * inactive_dirty and inactive_clean lists are protected by the - * zone->lru_lock, and *NOT* by the usual PG_locked bit! + * PG_referenced, PG_reclaim are used for page reclaim for anonymous and + * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * @@ -42,6 +43,10 @@ * space, they need to be kmapped separately for doing IO on the pages. The * struct page (these bits with information) are always mapped into kernel * address space... + * + * PG_buddy is set to indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + * */ /* @@ -74,7 +79,7 @@ #define PG_checked 8 /* kill me in 2.5.<early>. */ #define PG_arch_1 9 #define PG_reserved 10 -#define PG_private 11 /* Has something at ->private */ +#define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ #define PG_nosave 13 /* Used for system suspend/resume */ @@ -83,7 +88,7 @@ #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ #define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_nosave_free 18 /* Used for system suspend/resume */ #define PG_buddy 19 /* Page is free, on buddy lists */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2f5d27f60..64f95092515 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } extern void FASTCALL(__lock_page(struct page *page)); +extern void FASTCALL(__lock_page_nosync(struct page *page)); extern void FASTCALL(unlock_page(struct page *page)); +/* + * lock_page may only be called if we have the page's inode pinned. + */ static inline void lock_page(struct page *page) { might_sleep(); if (TestSetPageLocked(page)) __lock_page(page); } + +/* + * lock_page_nosync should only be used if we can't pin the page's inode. + * Doesn't play quite so well with block device plugging. + */ +static inline void lock_page_nosync(struct page *page) +{ + might_sleep(); + if (TestSetPageLocked(page)) + __lock_page_nosync(page); +} /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cb9039a21f2..3835a9642f1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,9 +1,12 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H + #include <linux/spinlock.h> /* For preempt_disable() */ #include <linux/slab.h> /* For kmalloc() */ #include <linux/smp.h> #include <linux/string.h> /* For memset() */ +#include <linux/cpumask.h> + #include <asm/percpu.h> /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ @@ -11,8 +14,14 @@ #define PERCPU_ENOUGH_ROOM 32768 #endif -/* Must be an lvalue. */ -#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) +/* + * Must be an lvalue. Since @var must be a simple identifier, + * we force a syntax error here if it isn't. + */ +#define get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + preempt_disable(); \ + &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() #ifdef CONFIG_SMP @@ -21,39 +30,77 @@ struct percpu_data { void *ptrs[NR_CPUS]; }; +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) /* - * Use this to get to a cpu's version of the per-cpu object allocated using - * alloc_percpu. Non-atomic access to the current CPU's version should + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should * probably be combined with get_cpu()/put_cpu(). */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ +#define percpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size); -extern void free_percpu(const void *); +extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu); +extern void percpu_depopulate(void *__pdata, int cpu); +extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask); +extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask); +extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); +extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) + +static inline void percpu_depopulate(void *__pdata, int cpu) +{ +} + +static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ +} -static inline void *__alloc_percpu(size_t size) +static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, + int cpu) { - void *ret = kmalloc(size, GFP_KERNEL); - if (ret) - memset(ret, 0, size); - return ret; + return percpu_ptr(__pdata, cpu); } -static inline void free_percpu(const void *ptr) -{ - kfree(ptr); + +static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + return 0; +} + +static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + return kzalloc(size, gfp); +} + +static inline void percpu_free(void *__pdata) +{ + kfree(__pdata); } #endif /* CONFIG_SMP */ -/* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) +#define percpu_populate_mask(__pdata, size, gfp, mask) \ + __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) +#define percpu_depopulate_mask(__pdata, mask) \ + __percpu_depopulate_mask((__pdata), &(mask)) +#define percpu_alloc_mask(size, gfp, mask) \ + __percpu_alloc_mask((size), (gfp), &(mask)) + +#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) + +/* (legacy) interface for use without CPU hotplug handling */ + +#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ + cpu_possible_map) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define free_percpu(ptr) percpu_free((ptr)) +#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index a376bd4ade3..81e9299ca14 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,21 +3,25 @@ #ifdef CONFIG_PM_TRACE +extern int pm_trace_enabled; + struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) set_trace_device(dev) -#define TRACE_RESUME(user) do { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ +#define TRACE_RESUME(user) do { \ + if (pm_trace_enabled) { \ + void *tracedata; \ + asm volatile("movl $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n" \ + "\t.long %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ } while (0) #else diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b090001..db2c1df4fef 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, */ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +/* + * Cleans the PTEs of shared mappings. + * (and since clean PTEs should also be readonly, write protects them too) + * + * returns the number of cleaned PTEs. + */ +int page_mkclean(struct page *); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); #define page_referenced(page,l) TestClearPageReferenced(page) #define try_to_unmap(page, refs) SWAP_FAIL +static inline int page_mkclean(struct page *page) +{ + return 0; +} + + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/selinux.h b/include/linux/selinux.h index aad4e390d6a..d1b7ca6c1c5 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); /** * selinux_audit_rule_match - determine if a context ID matches a rule. - * @ctxid: the context ID to check + * @sid: the context ID to check * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against @@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx); @@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, void selinux_audit_set_callback(int (*callback)(void)); /** - * selinux_task_ctxid - determine a context ID for a process. - * @tsk: the task object - * @ctxid: ID value returned via this - * - * On return, ctxid will contain an ID for the context. This value - * should only be used opaquely. - */ -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); - -/** - * selinux_ctxid_to_string - map a security context ID to a string - * @ctxid: security context ID to be converted. + * selinux_sid_to_string - map a security context ID to a string + * @sid: security context ID to be converted. * @ctx: address of context string to be returned * @ctxlen: length of returned context string. * @@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); * string will be allocated internally, and the caller must call * kfree() on it after use. */ -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen); /** * selinux_get_inode_sid - get the inode's security context ID @@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) return; } -static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { @@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void)) return; } -static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) -{ - *ctxid = 0; -} - -static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { *ctx = NULL; *ctxlen = 0; diff --git a/include/linux/slab.h b/include/linux/slab.h index 45ad55b70d1..66d6eb78d1c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -203,7 +202,30 @@ extern int slab_is_available(void); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); -extern void *kmalloc_node(size_t size, gfp_t flags, int node); +extern void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc_node((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags, node); + } + return __kmalloc_node(size, flags, node); +} #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { @@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); /* SLOB allocator routines */ void kmem_cache_init(void); -struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), @@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; -extern atomic_t slab_reclaim_pages; - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 837e8bce134..51649987f69 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus); */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int retry, int wait); + /* * Call a function on all processors */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 96e31aa64cc..b1237f16ecd 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -10,29 +10,11 @@ #include <linux/pm.h> /* page backup entry */ -typedef struct pbe { +struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ struct pbe *next; -} suspend_pagedir_t; - -#define for_each_pbe(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = pbe->next) - -#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe)) -#define PB_PAGE_SKIP (PBES_PER_PAGE-1) - -#define for_each_pb_page(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) - - -#define SWAP_FILENAME_MAXLENGTH 32 - - -extern dev_t swsusp_resume_device; - -/* mm/vmscan.c */ -extern int shrink_mem(void); +}; /* mm/page_alloc.c */ extern void drain_local_pages(void); @@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {} static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); - return -EPERM; + return -ENOSYS; } #endif /* CONFIG_PM */ -#ifdef CONFIG_SUSPEND_SMP -extern void disable_nonboot_cpus(void); -extern void enable_nonboot_cpus(void); -#else -static inline void disable_nonboot_cpus(void) {} -static inline void enable_nonboot_cpus(void) {} -#endif - void save_processor_state(void); void restore_processor_state(void); struct saved_context; diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e59184c909..e7c36ba2a2d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,10 @@ #include <asm/atomic.h> #include <asm/page.h> +struct notifier_block; + +struct bio; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -156,13 +160,14 @@ struct swap_list_t { /* linux/mm/oom_kill.c */ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern int register_oom_notifier(struct notifier_block *nb); +extern int unregister_oom_notifier(struct notifier_block *nb); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -extern unsigned long totalhigh_pages; extern unsigned long totalreserve_pages; extern long nr_swap_pages; extern unsigned int nr_free_pages(void); @@ -190,6 +195,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; extern int sysctl_min_unmapped_ratio; +extern int sysctl_min_slab_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 @@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int, swp_entry_t, struct page *); +extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain); +extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ extern struct address_space swapper_space; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 736ed917a4f..eca555781d0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -191,6 +191,7 @@ enum VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ + VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ }; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 71b6363caaa..dee88c6b6fa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, - pgprot_t prot, int node); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2d9b1b60798..176c7f79733 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -18,7 +18,19 @@ * generated will simply be the increment of a global address. */ -#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH +#ifdef CONFIG_ZONE_DMA32 +#define DMA32_ZONE(xx) xx##_DMA32, +#else +#define DMA32_ZONE(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define HIGHMEM_ZONE(xx) , xx##_HIGH +#else +#define HIGHMEM_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), @@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node, struct zone *zones = NODE_DATA(node)->node_zones; return -#ifndef CONFIG_DMA_IS_NORMAL -#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64 +#ifdef CONFIG_ZONE_DMA32 zone_page_state(&zones[ZONE_DMA32], item) + #endif zone_page_state(&zones[ZONE_NORMAL], item) + -#endif #ifdef CONFIG_HIGHMEM zone_page_state(&zones[ZONE_HIGHMEM], item) + #endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0422036af4e..56a23a0e7f2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); +void set_page_dirty_balance(struct page *page); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl |