diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Makefile | 4 | ||||
-rw-r--r-- | mm/bounce.c | 302 | ||||
-rw-r--r-- | mm/filemap.c | 40 | ||||
-rw-r--r-- | mm/highmem.c | 281 | ||||
-rw-r--r-- | mm/memory.c | 9 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 5 | ||||
-rw-r--r-- | mm/migrate.c | 4 | ||||
-rw-r--r-- | mm/oom_kill.c | 53 | ||||
-rw-r--r-- | mm/page-writeback.c | 160 | ||||
-rw-r--r-- | mm/shmem.c | 99 | ||||
-rw-r--r-- | mm/shmem_acl.c | 197 | ||||
-rw-r--r-- | mm/slab.c | 24 | ||||
-rw-r--r-- | mm/swapfile.c | 7 | ||||
-rw-r--r-- | mm/truncate.c | 26 |
14 files changed, 883 insertions, 328 deletions
diff --git a/mm/Makefile b/mm/Makefile index 60c56c0b5e1..12b3a4eee88 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -12,11 +12,15 @@ obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o \ prio_tree.o util.o mmzone.o vmstat.o $(mmu-y) +ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy) +obj-y += bounce.o +endif obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o thrash.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o +obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o diff --git a/mm/bounce.c b/mm/bounce.c new file mode 100644 index 00000000000..e4b62d2a402 --- /dev/null +++ b/mm/bounce.c @@ -0,0 +1,302 @@ +/* bounce buffer handling for block devices + * + * - Split from highmem.c + */ + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/swap.h> +#include <linux/bio.h> +#include <linux/pagemap.h> +#include <linux/mempool.h> +#include <linux/blkdev.h> +#include <linux/init.h> +#include <linux/hash.h> +#include <linux/highmem.h> +#include <linux/blktrace_api.h> +#include <asm/tlbflush.h> + +#define POOL_SIZE 64 +#define ISA_POOL_SIZE 16 + +static mempool_t *page_pool, *isa_page_pool; + +#ifdef CONFIG_HIGHMEM +static __init int init_emergency_pool(void) +{ + struct sysinfo i; + si_meminfo(&i); + si_swapinfo(&i); + + if (!i.totalhigh) + return 0; + + page_pool = mempool_create_page_pool(POOL_SIZE, 0); + BUG_ON(!page_pool); + printk("highmem bounce pool size: %d pages\n", POOL_SIZE); + + return 0; +} + +__initcall(init_emergency_pool); + +/* + * highmem version, map in to vec + */ +static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) +{ + unsigned long flags; + unsigned char *vto; + + local_irq_save(flags); + vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); + memcpy(vto + to->bv_offset, vfrom, to->bv_len); + kunmap_atomic(vto, KM_BOUNCE_READ); + local_irq_restore(flags); +} + +#else /* CONFIG_HIGHMEM */ + +#define bounce_copy_vec(to, vfrom) \ + memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) + +#endif /* CONFIG_HIGHMEM */ + +/* + * allocate pages in the DMA region for the ISA pool + */ +static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) +{ + return mempool_alloc_pages(gfp_mask | GFP_DMA, data); +} + +/* + * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA + * as the max address, so check if the pool has already been created. + */ +int init_emergency_isa_pool(void) +{ + if (isa_page_pool) + return 0; + + isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, + mempool_free_pages, (void *) 0); + BUG_ON(!isa_page_pool); + + printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); + return 0; +} + +/* + * Simple bounce buffer support for highmem pages. Depending on the + * queue gfp mask set, *to may or may not be a highmem page. kmap it + * always, it will do the Right Thing + */ +static void copy_to_high_bio_irq(struct bio *to, struct bio *from) +{ + unsigned char *vfrom; + struct bio_vec *tovec, *fromvec; + int i; + + __bio_for_each_segment(tovec, to, i, 0) { + fromvec = from->bi_io_vec + i; + + /* + * not bounced + */ + if (tovec->bv_page == fromvec->bv_page) + continue; + + /* + * fromvec->bv_offset and fromvec->bv_len might have been + * modified by the block layer, so use the original copy, + * bounce_copy_vec already uses tovec->bv_len + */ + vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; + + flush_dcache_page(tovec->bv_page); + bounce_copy_vec(tovec, vfrom); + } +} + +static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) +{ + struct bio *bio_orig = bio->bi_private; + struct bio_vec *bvec, *org_vec; + int i; + + if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) + set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); + + /* + * free up bounce indirect pages used + */ + __bio_for_each_segment(bvec, bio, i, 0) { + org_vec = bio_orig->bi_io_vec + i; + if (bvec->bv_page == org_vec->bv_page) + continue; + + dec_zone_page_state(bvec->bv_page, NR_BOUNCE); + mempool_free(bvec->bv_page, pool); + } + + bio_endio(bio_orig, bio_orig->bi_size, err); + bio_put(bio); +} + +static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bounce_end_io(bio, page_pool, err); + return 0; +} + +static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + bounce_end_io(bio, isa_page_pool, err); + return 0; +} + +static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) +{ + struct bio *bio_orig = bio->bi_private; + + if (test_bit(BIO_UPTODATE, &bio->bi_flags)) + copy_to_high_bio_irq(bio_orig, bio); + + bounce_end_io(bio, pool, err); +} + +static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + __bounce_end_io_read(bio, page_pool, err); + return 0; +} + +static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) +{ + if (bio->bi_size) + return 1; + + __bounce_end_io_read(bio, isa_page_pool, err); + return 0; +} + +static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, + mempool_t *pool) +{ + struct page *page; + struct bio *bio = NULL; + int i, rw = bio_data_dir(*bio_orig); + struct bio_vec *to, *from; + + bio_for_each_segment(from, *bio_orig, i) { + page = from->bv_page; + + /* + * is destination page below bounce pfn? + */ + if (page_to_pfn(page) < q->bounce_pfn) + continue; + + /* + * irk, bounce it + */ + if (!bio) + bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); + + to = bio->bi_io_vec + i; + + to->bv_page = mempool_alloc(pool, q->bounce_gfp); + to->bv_len = from->bv_len; + to->bv_offset = from->bv_offset; + inc_zone_page_state(to->bv_page, NR_BOUNCE); + + if (rw == WRITE) { + char *vto, *vfrom; + + flush_dcache_page(from->bv_page); + vto = page_address(to->bv_page) + to->bv_offset; + vfrom = kmap(from->bv_page) + from->bv_offset; + memcpy(vto, vfrom, to->bv_len); + kunmap(from->bv_page); + } + } + + /* + * no pages bounced + */ + if (!bio) + return; + + /* + * at least one page was bounced, fill in possible non-highmem + * pages + */ + __bio_for_each_segment(from, *bio_orig, i, 0) { + to = bio_iovec_idx(bio, i); + if (!to->bv_page) { + to->bv_page = from->bv_page; + to->bv_len = from->bv_len; + to->bv_offset = from->bv_offset; + } + } + + bio->bi_bdev = (*bio_orig)->bi_bdev; + bio->bi_flags |= (1 << BIO_BOUNCED); + bio->bi_sector = (*bio_orig)->bi_sector; + bio->bi_rw = (*bio_orig)->bi_rw; + + bio->bi_vcnt = (*bio_orig)->bi_vcnt; + bio->bi_idx = (*bio_orig)->bi_idx; + bio->bi_size = (*bio_orig)->bi_size; + + if (pool == page_pool) { + bio->bi_end_io = bounce_end_io_write; + if (rw == READ) + bio->bi_end_io = bounce_end_io_read; + } else { + bio->bi_end_io = bounce_end_io_write_isa; + if (rw == READ) + bio->bi_end_io = bounce_end_io_read_isa; + } + + bio->bi_private = *bio_orig; + *bio_orig = bio; +} + +void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) +{ + mempool_t *pool; + + /* + * for non-isa bounce case, just check if the bounce pfn is equal + * to or bigger than the highest pfn in the system -- in that case, + * don't waste time iterating over bio segments + */ + if (!(q->bounce_gfp & GFP_DMA)) { + if (q->bounce_pfn >= blk_max_pfn) + return; + pool = page_pool; + } else { + BUG_ON(!isa_page_pool); + pool = isa_page_pool; + } + + blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + + /* + * slow path + */ + __blk_queue_bounce(q, bio_orig, pool); +} + +EXPORT_SYMBOL(blk_queue_bounce); diff --git a/mm/filemap.c b/mm/filemap.c index afcdc72b5e9..c4fe97f5ace 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1471,7 +1471,7 @@ outside_data_content: * accessible.. */ if (area->vm_mm == current->mm) - return NULL; + return NOPAGE_SIGBUS; /* Fall through to the non-read-ahead case */ no_cached_page: /* @@ -1496,7 +1496,7 @@ no_cached_page: */ if (error == -ENOMEM) return NOPAGE_OOM; - return NULL; + return NOPAGE_SIGBUS; page_not_uptodate: if (!did_readaround) { @@ -1565,7 +1565,7 @@ page_not_uptodate: */ shrink_readahead_size_eio(file, ra); page_cache_release(page); - return NULL; + return NOPAGE_SIGBUS; } EXPORT_SYMBOL(filemap_nopage); @@ -2020,6 +2020,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (unlikely(*pos + *count > inode->i_sb->s_maxbytes)) *count = inode->i_sb->s_maxbytes - *pos; } else { +#ifdef CONFIG_BLOCK loff_t isize; if (bdev_read_only(I_BDEV(inode))) return -EPERM; @@ -2031,6 +2032,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i if (*pos + *count > isize) *count = isize - *pos; +#else + return -EPERM; +#endif } return 0; } @@ -2491,3 +2495,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, } return retval; } + +/** + * try_to_release_page() - release old fs-specific metadata on a page + * + * @page: the page which the kernel is trying to free + * @gfp_mask: memory allocation flags (and I/O mode) + * + * The address_space is to try to release any data against the page + * (presumably at page->private). If the release was successful, return `1'. + * Otherwise return zero. + * + * The @gfp_mask argument specifies whether I/O may be performed to release + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). + * + * NOTE: @gfp_mask may go away, and this function may become non-blocking. + */ +int try_to_release_page(struct page *page, gfp_t gfp_mask) +{ + struct address_space * const mapping = page->mapping; + + BUG_ON(!PageLocked(page)); + if (PageWriteback(page)) + return 0; + + if (mapping && mapping->a_ops->releasepage) + return mapping->a_ops->releasepage(page, gfp_mask); + return try_to_free_buffers(page); +} + +EXPORT_SYMBOL(try_to_release_page); diff --git a/mm/highmem.c b/mm/highmem.c index ee5519b176e..0206e7e5018 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -29,13 +29,6 @@ #include <linux/blktrace_api.h> #include <asm/tlbflush.h> -static mempool_t *page_pool, *isa_page_pool; - -static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data) -{ - return mempool_alloc_pages(gfp_mask | GFP_DMA, data); -} - /* * Virtual_count is not a pure "count". * 0 means that it is not mapped, and has not been mapped @@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page) } EXPORT_SYMBOL(kunmap_high); - -#define POOL_SIZE 64 - -static __init int init_emergency_pool(void) -{ - struct sysinfo i; - si_meminfo(&i); - si_swapinfo(&i); - - if (!i.totalhigh) - return 0; - - page_pool = mempool_create_page_pool(POOL_SIZE, 0); - BUG_ON(!page_pool); - printk("highmem bounce pool size: %d pages\n", POOL_SIZE); - - return 0; -} - -__initcall(init_emergency_pool); - -/* - * highmem version, map in to vec - */ -static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom) -{ - unsigned long flags; - unsigned char *vto; - - local_irq_save(flags); - vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ); - memcpy(vto + to->bv_offset, vfrom, to->bv_len); - kunmap_atomic(vto, KM_BOUNCE_READ); - local_irq_restore(flags); -} - -#else /* CONFIG_HIGHMEM */ - -#define bounce_copy_vec(to, vfrom) \ - memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len) - #endif -#define ISA_POOL_SIZE 16 - -/* - * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA - * as the max address, so check if the pool has already been created. - */ -int init_emergency_isa_pool(void) -{ - if (isa_page_pool) - return 0; - - isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa, - mempool_free_pages, (void *) 0); - BUG_ON(!isa_page_pool); - - printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE); - return 0; -} - -/* - * Simple bounce buffer support for highmem pages. Depending on the - * queue gfp mask set, *to may or may not be a highmem page. kmap it - * always, it will do the Right Thing - */ -static void copy_to_high_bio_irq(struct bio *to, struct bio *from) -{ - unsigned char *vfrom; - struct bio_vec *tovec, *fromvec; - int i; - - __bio_for_each_segment(tovec, to, i, 0) { - fromvec = from->bi_io_vec + i; - - /* - * not bounced - */ - if (tovec->bv_page == fromvec->bv_page) - continue; - - /* - * fromvec->bv_offset and fromvec->bv_len might have been - * modified by the block layer, so use the original copy, - * bounce_copy_vec already uses tovec->bv_len - */ - vfrom = page_address(fromvec->bv_page) + tovec->bv_offset; - - flush_dcache_page(tovec->bv_page); - bounce_copy_vec(tovec, vfrom); - } -} - -static void bounce_end_io(struct bio *bio, mempool_t *pool, int err) -{ - struct bio *bio_orig = bio->bi_private; - struct bio_vec *bvec, *org_vec; - int i; - - if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) - set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags); - - /* - * free up bounce indirect pages used - */ - __bio_for_each_segment(bvec, bio, i, 0) { - org_vec = bio_orig->bi_io_vec + i; - if (bvec->bv_page == org_vec->bv_page) - continue; - - dec_zone_page_state(bvec->bv_page, NR_BOUNCE); - mempool_free(bvec->bv_page, pool); - } - - bio_endio(bio_orig, bio_orig->bi_size, err); - bio_put(bio); -} - -static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - bounce_end_io(bio, page_pool, err); - return 0; -} - -static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - bounce_end_io(bio, isa_page_pool, err); - return 0; -} - -static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err) -{ - struct bio *bio_orig = bio->bi_private; - - if (test_bit(BIO_UPTODATE, &bio->bi_flags)) - copy_to_high_bio_irq(bio_orig, bio); - - bounce_end_io(bio, pool, err); -} - -static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - __bounce_end_io_read(bio, page_pool, err); - return 0; -} - -static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err) -{ - if (bio->bi_size) - return 1; - - __bounce_end_io_read(bio, isa_page_pool, err); - return 0; -} - -static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, - mempool_t *pool) -{ - struct page *page; - struct bio *bio = NULL; - int i, rw = bio_data_dir(*bio_orig); - struct bio_vec *to, *from; - - bio_for_each_segment(from, *bio_orig, i) { - page = from->bv_page; - - /* - * is destination page below bounce pfn? - */ - if (page_to_pfn(page) < q->bounce_pfn) - continue; - - /* - * irk, bounce it - */ - if (!bio) - bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt); - - to = bio->bi_io_vec + i; - - to->bv_page = mempool_alloc(pool, q->bounce_gfp); - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; - inc_zone_page_state(to->bv_page, NR_BOUNCE); - - if (rw == WRITE) { - char *vto, *vfrom; - - flush_dcache_page(from->bv_page); - vto = page_address(to->bv_page) + to->bv_offset; - vfrom = kmap(from->bv_page) + from->bv_offset; - memcpy(vto, vfrom, to->bv_len); - kunmap(from->bv_page); - } - } - - /* - * no pages bounced - */ - if (!bio) - return; - - /* - * at least one page was bounced, fill in possible non-highmem - * pages - */ - __bio_for_each_segment(from, *bio_orig, i, 0) { - to = bio_iovec_idx(bio, i); - if (!to->bv_page) { - to->bv_page = from->bv_page; - to->bv_len = from->bv_len; - to->bv_offset = from->bv_offset; - } - } - - bio->bi_bdev = (*bio_orig)->bi_bdev; - bio->bi_flags |= (1 << BIO_BOUNCED); - bio->bi_sector = (*bio_orig)->bi_sector; - bio->bi_rw = (*bio_orig)->bi_rw; - - bio->bi_vcnt = (*bio_orig)->bi_vcnt; - bio->bi_idx = (*bio_orig)->bi_idx; - bio->bi_size = (*bio_orig)->bi_size; - - if (pool == page_pool) { - bio->bi_end_io = bounce_end_io_write; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read; - } else { - bio->bi_end_io = bounce_end_io_write_isa; - if (rw == READ) - bio->bi_end_io = bounce_end_io_read_isa; - } - - bio->bi_private = *bio_orig; - *bio_orig = bio; -} - -void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig) -{ - mempool_t *pool; - - /* - * for non-isa bounce case, just check if the bounce pfn is equal - * to or bigger than the highest pfn in the system -- in that case, - * don't waste time iterating over bio segments - */ - if (!(q->bounce_gfp & GFP_DMA)) { - if (q->bounce_pfn >= blk_max_pfn) - return; - pool = page_pool; - } else { - BUG_ON(!isa_page_pool); - pool = isa_page_pool; - } - - blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); - - /* - * slow path - */ - __blk_queue_bounce(q, bio_orig, pool); -} - -EXPORT_SYMBOL(blk_queue_bounce); - #if defined(HASHED_PAGE_VIRTUAL) #define PA_HASH_ORDER 7 diff --git a/mm/memory.c b/mm/memory.c index 601159a46ab..160f5b503ea 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1577,7 +1577,14 @@ gotten: entry = mk_pte(new_page, vma->vm_page_prot); entry = maybe_mkwrite(pte_mkdirty(entry), vma); lazy_mmu_prot_update(entry); - ptep_establish(vma, address, page_table, entry); + /* + * Clear the pte entry and flush it first, before updating the + * pte with the new entry. This will avoid a race condition + * seen in the presence of one thread doing SMC and another + * thread doing COW. + */ + ptep_clear_flush(vma, address, page_table); + set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); page_add_new_anon_rmap(new_page, vma, address); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c37319542b7..2053bb165a2 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -13,6 +13,7 @@ #include <linux/compiler.h> #include <linux/module.h> #include <linux/pagevec.h> +#include <linux/writeback.h> #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/cpu.h> @@ -21,6 +22,7 @@ #include <linux/highmem.h> #include <linux/vmalloc.h> #include <linux/ioport.h> +#include <linux/cpuset.h> #include <asm/tlbflush.h> @@ -191,6 +193,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages) if (need_zonelists_rebuild) build_all_zonelists(); vm_total_pages = nr_free_pagecache_pages(); + writeback_set_ratelimit(); return 0; } @@ -283,6 +286,8 @@ int add_memory(int nid, u64 start, u64 size) /* we online node here. we can't roll back from here. */ node_set_online(nid); + cpuset_track_online_nodes(); + if (new_pgdat) { ret = register_one_node(nid); /* diff --git a/mm/migrate.c b/mm/migrate.c index 20a8c2687b1..ba2453f9483 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping, } EXPORT_SYMBOL(migrate_page); +#ifdef CONFIG_BLOCK /* * Migration function for pages with buffers. This function can only be used * if the underlying filesystem guarantees that no other references to "page" @@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping, return 0; } EXPORT_SYMBOL(buffer_migrate_page); +#endif /* * Writeback a page to clean the dirty state @@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping, * Buffers may be managed in a filesystem specific way. * We must have no buffers or drop them. */ - if (page_has_buffers(page) && + if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) return -EAGAIN; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index bada3d03119..20f41b082e1 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -204,16 +204,30 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) do_posix_clock_monotonic_gettime(&uptime); do_each_thread(g, p) { unsigned long points; - int releasing; - /* skip kernel threads */ + /* + * skip kernel threads and tasks which have already released + * their mm. + */ if (!p->mm) continue; - /* skip the init task with pid == 1 */ - if (p->pid == 1) + /* skip the init task */ + if (is_init(p)) continue; /* + * This task already has access to memory reserves and is + * being killed. Don't allow any other task access to the + * memory reserve. + * + * Note: this may have a chance of deadlock if it gets + * blocked waiting for another task which itself is waiting + * for memory. Is there a better alternative? + */ + if (test_tsk_thread_flag(p, TIF_MEMDIE)) + return ERR_PTR(-1UL); + + /* * This is in the process of releasing memory so wait for it * to finish before killing some other task by mistake. * @@ -221,21 +235,16 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) * go ahead if it is exiting: this will simply set TIF_MEMDIE, * which will allow it to gain access to memory reserves in * the process of exiting and releasing its resources. - * Otherwise we could get an OOM deadlock. + * Otherwise we could get an easy OOM deadlock. */ - releasing = test_tsk_thread_flag(p, TIF_MEMDIE) || - p->flags & PF_EXITING; - if (releasing) { - /* PF_DEAD tasks have already released their mm */ - if (p->flags & PF_DEAD) - continue; - if (p->flags & PF_EXITING && p == current) { - chosen = p; - *ppoints = ULONG_MAX; - break; - } - return ERR_PTR(-1UL); + if (p->flags & PF_EXITING) { + if (p != current) + return ERR_PTR(-1UL); + + chosen = p; + *ppoints = ULONG_MAX; } + if (p->oomkilladj == OOM_DISABLE) continue; @@ -245,6 +254,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) *ppoints = points; } } while_each_thread(g, p); + return chosen; } @@ -255,20 +265,17 @@ static struct task_struct *select_bad_process(unsigned long *ppoints) */ static void __oom_kill_task(struct task_struct *p, const char *message) { - if (p->pid == 1) { + if (is_init(p)) { WARN_ON(1); printk(KERN_WARNING "tried to kill init!\n"); return; } - task_lock(p); - if (!p->mm || p->mm == &init_mm) { + if (!p->mm) { WARN_ON(1); printk(KERN_WARNING "tried to kill an mm-less task!\n"); - task_unlock(p); return; } - task_unlock(p); if (message) { printk(KERN_ERR "%s: Killed process %d (%s).\n", @@ -302,7 +309,7 @@ static int oom_kill_task(struct task_struct *p, const char *message) * However, this is of no concern to us. */ - if (mm == NULL || mm == &init_mm) + if (mm == NULL) return 1; __oom_kill_task(p, message); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 555752907dc..c0d4ce144de 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -30,6 +30,8 @@ #include <linux/sysctl.h> #include <linux/cpu.h> #include <linux/syscalls.h> +#include <linux/buffer_head.h> +#include <linux/pagevec.h> /* * The maximum number of pages to writeout in a single bdflush/kupdate @@ -46,7 +48,6 @@ */ static long ratelimit_pages = 32; -static long total_pages; /* The total number of pages in the machine. */ static int dirty_exceeded __cacheline_aligned_in_smp; /* Dirty mem may be over limit */ /* @@ -126,7 +127,7 @@ get_dirty_limits(long *pbackground, long *pdirty, int unmapped_ratio; long background; long dirty; - unsigned long available_memory = total_pages; + unsigned long available_memory = vm_total_pages; struct task_struct *tsk; #ifdef CONFIG_HIGHMEM @@ -141,7 +142,7 @@ get_dirty_limits(long *pbackground, long *pdirty, unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) + global_page_state(NR_ANON_PAGES)) * 100) / - total_pages; + vm_total_pages; dirty_ratio = vm_dirty_ratio; if (dirty_ratio > unmapped_ratio / 2) @@ -502,9 +503,9 @@ void laptop_sync_completion(void) * will write six megabyte chunks, max. */ -static void set_ratelimit(void) +void writeback_set_ratelimit(void) { - ratelimit_pages = total_pages / (num_online_cpus() * 32); + ratelimit_pages = vm_total_pages / (num_online_cpus() * 32); if (ratelimit_pages < 16) ratelimit_pages = 16; if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024) @@ -514,7 +515,7 @@ static void set_ratelimit(void) static int __cpuinit ratelimit_handler(struct notifier_block *self, unsigned long u, void *v) { - set_ratelimit(); + writeback_set_ratelimit(); return 0; } @@ -533,9 +534,7 @@ void __init page_writeback_init(void) long buffer_pages = nr_free_buffer_pages(); long correction; - total_pages = nr_free_pagecache_pages(); - - correction = (100 * 4 * buffer_pages) / total_pages; + correction = (100 * 4 * buffer_pages) / vm_total_pages; if (correction < 100) { dirty_background_ratio *= correction; @@ -549,10 +548,143 @@ void __init page_writeback_init(void) vm_dirty_ratio = 1; } mod_timer(&wb_timer, jiffies + dirty_writeback_interval); - set_ratelimit(); + writeback_set_ratelimit(); register_cpu_notifier(&ratelimit_nb); } +/** + * generic_writepages - walk the list of dirty pages of the given + * address space and writepage() all of them. + * + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * + * This is a library function, which implements the writepages() + * address_space_operation. + * + * If a page is already under I/O, generic_writepages() skips it, even + * if it's dirty. This is desirable behaviour for memory-cleaning writeback, + * but it is INCORRECT for data-integrity system calls such as fsync(). fsync() + * and msync() need to guarantee that all the data which was dirty at the time + * the call was made get new I/O started against them. If wbc->sync_mode is + * WB_SYNC_ALL then we were called for data integrity and we must wait for + * existing IO to complete. + * + * Derived from mpage_writepages() - if you fix this you should check that + * also! + */ +int generic_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct backing_dev_info *bdi = mapping->backing_dev_info; + int ret = 0; + int done = 0; + int (*writepage)(struct page *page, struct writeback_control *wbc); + struct pagevec pvec; + int nr_pages; + pgoff_t index; + pgoff_t end; /* Inclusive */ + int scanned = 0; + int range_whole = 0; + + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + return 0; + } + + writepage = mapping->a_ops->writepage; + + /* deal with chardevs and other special file */ + if (!writepage) + return 0; + + pagevec_init(&pvec, 0); + if (wbc->range_cyclic) { + index = mapping->writeback_index; /* Start from prev offset */ + end = -1; + } else { + index = wbc->range_start >> PAGE_CACHE_SHIFT; + end = wbc->range_end >> PAGE_CACHE_SHIFT; + if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) + range_whole = 1; + scanned = 1; + } +retry: + while (!done && (index <= end) && + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, + PAGECACHE_TAG_DIRTY, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + unsigned i; + + scanned = 1; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + + /* + * At this point we hold neither mapping->tree_lock nor + * lock on the page itself: the page may be truncated or + * invalidated (changing page->mapping to NULL), or even + * swizzled back from swapper_space to tmpfs file + * mapping + */ + lock_page(page); + + if (unlikely(page->mapping != mapping)) { + unlock_page(page); + continue; + } + + if (!wbc->range_cyclic && page->index > end) { + done = 1; + unlock_page(page); + continue; + } + + if (wbc->sync_mode != WB_SYNC_NONE) + wait_on_page_writeback(page); + + if (PageWriteback(page) || + !clear_page_dirty_for_io(page)) { + unlock_page(page); + continue; + } + + ret = (*writepage)(page, wbc); + if (ret) { + if (ret == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else + set_bit(AS_EIO, &mapping->flags); + } + + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) + unlock_page(page); + if (ret || (--(wbc->nr_to_write) <= 0)) + done = 1; + if (wbc->nonblocking && bdi_write_congested(bdi)) { + wbc->encountered_congestion = 1; + done = 1; + } + } + pagevec_release(&pvec); + cond_resched(); + } + if (!scanned && !done) { + /* + * We hit the last page and there is more work to be done: wrap + * back to the start of the file + */ + scanned = 1; + index = 0; + goto retry; + } + if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) + mapping->writeback_index = index; + return ret; +} + +EXPORT_SYMBOL(generic_writepages); + int do_writepages(struct address_space *mapping, struct writeback_control *wbc) { int ret; @@ -675,9 +807,11 @@ int fastcall set_page_dirty(struct page *page) if (likely(mapping)) { int (*spd)(struct page *) = mapping->a_ops->set_page_dirty; - if (spd) - return (*spd)(page); - return __set_page_dirty_buffers(page); +#ifdef CONFIG_BLOCK + if (!spd) + spd = __set_page_dirty_buffers; +#endif + return (*spd)(page); } if (!PageDirty(page)) { if (!TestSetPageDirty(page)) diff --git a/mm/shmem.c b/mm/shmem.c index eda907c3a86..b96de69f236 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -26,6 +26,8 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/xattr.h> +#include <linux/generic_acl.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/file.h> @@ -177,6 +179,7 @@ static const struct address_space_operations shmem_aops; static struct file_operations shmem_file_operations; static struct inode_operations shmem_inode_operations; static struct inode_operations shmem_dir_inode_operations; +static struct inode_operations shmem_special_inode_operations; static struct vm_operations_struct shmem_vm_ops; static struct backing_dev_info shmem_backing_dev_info __read_mostly = { @@ -637,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) struct page *page = NULL; int error; - if (attr->ia_valid & ATTR_SIZE) { + if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { if (attr->ia_size < inode->i_size) { /* * If truncating down to a partial page, then @@ -670,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr) error = inode_change_ok(inode, attr); if (!error) error = inode_setattr(inode, attr); +#ifdef CONFIG_TMPFS_POSIX_ACL + if (!error && (attr->ia_valid & ATTR_MODE)) + error = generic_acl_chmod(inode, &shmem_acl_ops); +#endif if (page) page_cache_release(page); return error; @@ -1362,6 +1369,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) switch (mode & S_IFMT) { default: + inode->i_op = &shmem_special_inode_operations; init_special_inode(inode, mode, dev); break; case S_IFREG: @@ -1682,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) iput(inode); return error; } - error = 0; + } + error = shmem_acl_init(inode, dir); + if (error) { + iput(inode); + return error; } if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; @@ -1897,6 +1909,53 @@ static struct inode_operations shmem_symlink_inode_operations = { .put_link = shmem_put_link, }; +#ifdef CONFIG_TMPFS_POSIX_ACL +/** + * Superblocks without xattr inode operations will get security.* xattr + * support from the VFS "for free". As soon as we have any other xattrs + * like ACLs, we also need to implement the security.* handlers at + * filesystem level, though. + */ + +static size_t shmem_xattr_security_list(struct inode *inode, char *list, + size_t list_len, const char *name, + size_t name_len) +{ + return security_inode_listsecurity(inode, list, list_len); +} + +static int shmem_xattr_security_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return security_inode_getsecurity(inode, name, buffer, size, + -EOPNOTSUPP); +} + +static int shmem_xattr_security_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + if (strcmp(name, "") == 0) + return -EINVAL; + return security_inode_setsecurity(inode, name, value, size, flags); +} + +struct xattr_handler shmem_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .list = shmem_xattr_security_list, + .get = shmem_xattr_security_get, + .set = shmem_xattr_security_set, +}; + +static struct xattr_handler *shmem_xattr_handlers[] = { + &shmem_xattr_acl_access_handler, + &shmem_xattr_acl_default_handler, + &shmem_xattr_security_handler, + NULL +}; +#endif + static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes, int *policy, nodemask_t *policy_nodes) @@ -2094,6 +2153,10 @@ static int shmem_fill_super(struct super_block *sb, sb->s_magic = TMPFS_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; +#ifdef CONFIG_TMPFS_POSIX_ACL + sb->s_xattr = shmem_xattr_handlers; + sb->s_flags |= MS_POSIXACL; +#endif inode = shmem_get_inode(sb, S_IFDIR | mode, 0); if (!inode) @@ -2130,6 +2193,7 @@ static void shmem_destroy_inode(struct inode *inode) /* only struct inode is valid if it's an inline symlink */ mpol_free_shared_policy(&SHMEM_I(inode)->policy); } + shmem_acl_destroy_inode(inode); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } @@ -2141,6 +2205,10 @@ static void init_once(void *foo, struct kmem_cache *cachep, if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&p->vfs_inode); +#ifdef CONFIG_TMPFS_POSIX_ACL + p->i_acl = NULL; + p->i_default_acl = NULL; +#endif } } @@ -2184,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = { .truncate = shmem_truncate, .setattr = shmem_notify_change, .truncate_range = shmem_truncate_range, +#ifdef CONFIG_TMPFS_POSIX_ACL + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif + }; static struct inode_operations shmem_dir_inode_operations = { @@ -2198,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = { .mknod = shmem_mknod, .rename = shmem_rename, #endif +#ifdef CONFIG_TMPFS_POSIX_ACL + .setattr = shmem_notify_change, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif +}; + +static struct inode_operations shmem_special_inode_operations = { +#ifdef CONFIG_TMPFS_POSIX_ACL + .setattr = shmem_notify_change, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = generic_listxattr, + .removexattr = generic_removexattr, + .permission = shmem_permission, +#endif }; static struct super_operations shmem_ops = { diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c new file mode 100644 index 00000000000..c946bf46871 --- /dev/null +++ b/mm/shmem_acl.c @@ -0,0 +1,197 @@ +/* + * mm/shmem_acl.c + * + * (C) 2005 Andreas Gruenbacher <agruen@suse.de> + * + * This file is released under the GPL. + */ + +#include <linux/fs.h> +#include <linux/shmem_fs.h> +#include <linux/xattr.h> +#include <linux/generic_acl.h> + +/** + * shmem_get_acl - generic_acl_operations->getacl() operation + */ +static struct posix_acl * +shmem_get_acl(struct inode *inode, int type) +{ + struct posix_acl *acl = NULL; + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = posix_acl_dup(SHMEM_I(inode)->i_acl); + break; + + case ACL_TYPE_DEFAULT: + acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl); + break; + } + spin_unlock(&inode->i_lock); + + return acl; +} + +/** + * shmem_get_acl - generic_acl_operations->setacl() operation + */ +static void +shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *free = NULL; + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + free = SHMEM_I(inode)->i_acl; + SHMEM_I(inode)->i_acl = posix_acl_dup(acl); + break; + + case ACL_TYPE_DEFAULT: + free = SHMEM_I(inode)->i_default_acl; + SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl); + break; + } + spin_unlock(&inode->i_lock); + posix_acl_release(free); +} + +struct generic_acl_operations shmem_acl_ops = { + .getacl = shmem_get_acl, + .setacl = shmem_set_acl, +}; + +/** + * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access, + * shmem_xattr_acl_access_handler - plumbing code to implement the + * system.posix_acl_access xattr using the generic acl functions. + */ + +static size_t +shmem_list_acl_access(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, + list, list_size); +} + +static int +shmem_get_acl_access(struct inode *inode, const char *name, void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer, + size); +} + +static int +shmem_set_acl_access(struct inode *inode, const char *name, const void *value, + size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value, + size); +} + +struct xattr_handler shmem_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .list = shmem_list_acl_access, + .get = shmem_get_acl_access, + .set = shmem_set_acl_access, +}; + +/** + * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default, + * shmem_xattr_acl_default_handler - plumbing code to implement the + * system.posix_acl_default xattr using the generic acl functions. + */ + +static size_t +shmem_list_acl_default(struct inode *inode, char *list, size_t list_size, + const char *name, size_t name_len) +{ + return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, + list, list_size); +} + +static int +shmem_get_acl_default(struct inode *inode, const char *name, void *buffer, + size_t size) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer, + size); +} + +static int +shmem_set_acl_default(struct inode *inode, const char *name, const void *value, + size_t size, int flags) +{ + if (strcmp(name, "") != 0) + return -EINVAL; + return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value, + size); +} + +struct xattr_handler shmem_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .list = shmem_list_acl_default, + .get = shmem_get_acl_default, + .set = shmem_set_acl_default, +}; + +/** + * shmem_acl_init - Inizialize the acl(s) of a new inode + */ +int +shmem_acl_init(struct inode *inode, struct inode *dir) +{ + return generic_acl_init(inode, dir, &shmem_acl_ops); +} + +/** + * shmem_acl_destroy_inode - destroy acls hanging off the in-memory inode + * + * This is done before destroying the actual inode. + */ + +void +shmem_acl_destroy_inode(struct inode *inode) +{ + if (SHMEM_I(inode)->i_acl) + posix_acl_release(SHMEM_I(inode)->i_acl); + SHMEM_I(inode)->i_acl = NULL; + if (SHMEM_I(inode)->i_default_acl) + posix_acl_release(SHMEM_I(inode)->i_default_acl); + SHMEM_I(inode)->i_default_acl = NULL; +} + +/** + * shmem_check_acl - check_acl() callback for generic_permission() + */ +static int +shmem_check_acl(struct inode *inode, int mask) +{ + struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS); + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + return -EAGAIN; +} + +/** + * shmem_permission - permission() inode operation + */ +int +shmem_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, shmem_check_acl); +} diff --git a/mm/slab.c b/mm/slab.c index 792bfe320a8..3dbd6f4e747 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1683,10 +1683,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) static void dump_line(char *data, int offset, int limit) { int i; + unsigned char error = 0; + int bad_count = 0; + printk(KERN_ERR "%03x:", offset); - for (i = 0; i < limit; i++) + for (i = 0; i < limit; i++) { + if (data[offset + i] != POISON_FREE) { + error = data[offset + i]; + bad_count++; + } printk(" %02x", (unsigned char)data[offset + i]); + } printk("\n"); + + if (bad_count == 1) { + error ^= POISON_FREE; + if (!(error & (error - 1))) { + printk(KERN_ERR "Single bit error detected. Probably " + "bad RAM.\n"); +#ifdef CONFIG_X86 + printk(KERN_ERR "Run memtest86+ or a similar memory " + "test tool.\n"); +#else + printk(KERN_ERR "Run a memory test tool.\n"); +#endif + } + } } #endif diff --git a/mm/swapfile.c b/mm/swapfile.c index f1f5ec78378..a15def63f28 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1723,13 +1723,14 @@ get_swap_info_struct(unsigned type) */ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) { - int ret = 0, i = 1 << page_cluster; + int our_page_cluster = page_cluster; + int ret = 0, i = 1 << our_page_cluster; unsigned long toff; struct swap_info_struct *swapdev = swp_type(entry) + swap_info; - if (!page_cluster) /* no readahead */ + if (!our_page_cluster) /* no readahead */ return 0; - toff = (swp_offset(entry) >> page_cluster) << page_cluster; + toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster; if (!toff) /* first page is swap header */ toff++, i--; *offset = toff; diff --git a/mm/truncate.c b/mm/truncate.c index a654928323d..8fde6580657 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -17,6 +17,32 @@ do_invalidatepage */ +/** + * do_invalidatepage - invalidate part of all of a page + * @page: the page which is affected + * @offset: the index of the truncation point + * + * do_invalidatepage() is called when all or part of the page has become + * invalidated by a truncate operation. + * + * do_invalidatepage() does not have to release all buffers, but it must + * ensure that no dirty buffer is left outside @offset and that no I/O + * is underway against any of the blocks which are outside the truncation + * point. Because the caller is about to free (and possibly reuse) those + * blocks on-disk. + */ +void do_invalidatepage(struct page *page, unsigned long offset) +{ + void (*invalidatepage)(struct page *, unsigned long); + invalidatepage = page->mapping->a_ops->invalidatepage; +#ifdef CONFIG_BLOCK + if (!invalidatepage) + invalidatepage = block_invalidatepage; +#endif + if (invalidatepage) + (*invalidatepage)(page, offset); +} + static inline void truncate_partial_page(struct page *page, unsigned partial) { memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); |