aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c5
-rw-r--r--mm/bounce.c14
-rw-r--r--mm/failslab.c59
-rw-r--r--mm/filemap.c15
-rw-r--r--mm/memory.c89
-rw-r--r--mm/memory_hotplug.c9
-rw-r--r--mm/mempolicy.c9
-rw-r--r--mm/migrate.c68
-rw-r--r--mm/mlock.c45
-rw-r--r--mm/mmap.c2
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/mremap.c2
-rw-r--r--mm/msync.c2
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page_cgroup.c59
-rw-r--r--mm/pdflush.c16
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.c91
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c126
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swapfile.c9
-rw-r--r--mm/vmalloc.c27
-rw-r--r--mm/vmscan.c6
-rw-r--r--mm/vmstat.c4
28 files changed, 456 insertions, 250 deletions
diff --git a/mm/Makefile b/mm/Makefile
index c06b45a1ff5..51c27709cc7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
+obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f2e574dbc30..a7c6c5613ec 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -176,6 +176,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
int ret = 0;
struct device *dev;
+ if (bdi->dev) /* The driver needs to use separate queues per device */
+ goto exit;
+
va_start(args, fmt);
dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
va_end(args);
@@ -220,7 +223,7 @@ int bdi_init(struct backing_dev_info *bdi)
bdi->max_prop_frac = PROP_FRAC_BASE;
for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
- err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
+ err = percpu_counter_init(&bdi->bdi_stat[i], 0);
if (err)
goto err;
}
diff --git a/mm/bounce.c b/mm/bounce.c
index 06722c40305..e590272fe7a 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -14,6 +14,7 @@
#include <linux/hash.h>
#include <linux/highmem.h>
#include <linux/blktrace_api.h>
+#include <trace/block.h>
#include <asm/tlbflush.h>
#define POOL_SIZE 64
@@ -21,6 +22,8 @@
static mempool_t *page_pool, *isa_page_pool;
+DEFINE_TRACE(block_bio_bounce);
+
#ifdef CONFIG_HIGHMEM
static __init int init_emergency_pool(void)
{
@@ -195,8 +198,13 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
/*
* irk, bounce it
*/
- if (!bio)
- bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+ if (!bio) {
+ unsigned int cnt = (*bio_orig)->bi_vcnt;
+
+ bio = bio_alloc(GFP_NOIO, cnt);
+ memset(bio->bi_io_vec, 0, cnt * sizeof(struct bio_vec));
+ }
+
to = bio->bi_io_vec + i;
@@ -222,7 +230,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
if (!bio)
return;
- blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+ trace_block_bio_bounce(q, *bio_orig);
/*
* at least one page was bounced, fill in possible non-highmem
diff --git a/mm/failslab.c b/mm/failslab.c
new file mode 100644
index 00000000000..7c6ea6493f8
--- /dev/null
+++ b/mm/failslab.c
@@ -0,0 +1,59 @@
+#include <linux/fault-inject.h>
+
+static struct {
+ struct fault_attr attr;
+ u32 ignore_gfp_wait;
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+ struct dentry *ignore_gfp_wait_file;
+#endif
+} failslab = {
+ .attr = FAULT_ATTR_INITIALIZER,
+ .ignore_gfp_wait = 1,
+};
+
+bool should_failslab(size_t size, gfp_t gfpflags)
+{
+ if (gfpflags & __GFP_NOFAIL)
+ return false;
+
+ if (failslab.ignore_gfp_wait && (gfpflags & __GFP_WAIT))
+ return false;
+
+ return should_fail(&failslab.attr, size);
+}
+
+static int __init setup_failslab(char *str)
+{
+ return setup_fault_attr(&failslab.attr, str);
+}
+__setup("failslab=", setup_failslab);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init failslab_debugfs_init(void)
+{
+ mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+ struct dentry *dir;
+ int err;
+
+ err = init_fault_attr_dentries(&failslab.attr, "failslab");
+ if (err)
+ return err;
+ dir = failslab.attr.dentries.dir;
+
+ failslab.ignore_gfp_wait_file =
+ debugfs_create_bool("ignore-gfp-wait", mode, dir,
+ &failslab.ignore_gfp_wait);
+
+ if (!failslab.ignore_gfp_wait_file) {
+ err = -ENOMEM;
+ debugfs_remove(failslab.ignore_gfp_wait_file);
+ cleanup_fault_attr_dentries(&failslab.attr);
+ }
+
+ return err;
+}
+
+late_initcall(failslab_debugfs_init);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
diff --git a/mm/filemap.c b/mm/filemap.c
index f3e5f8944d1..f5769b4dc07 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1766,7 +1766,7 @@ int should_remove_suid(struct dentry *dentry)
if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
kill |= ATTR_KILL_SGID;
- if (unlikely(kill && !capable(CAP_FSETID)))
+ if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode)))
return kill;
return 0;
@@ -2140,19 +2140,24 @@ EXPORT_SYMBOL(generic_file_direct_write);
* Find or create a page at the given pagecache position. Return the locked
* page. This function is specifically for buffered writes.
*/
-struct page *__grab_cache_page(struct address_space *mapping, pgoff_t index)
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+ pgoff_t index, unsigned flags)
{
int status;
struct page *page;
+ gfp_t gfp_notmask = 0;
+ if (flags & AOP_FLAG_NOFS)
+ gfp_notmask = __GFP_FS;
repeat:
page = find_lock_page(mapping, index);
if (likely(page))
return page;
- page = page_cache_alloc(mapping);
+ page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask);
if (!page)
return NULL;
- status = add_to_page_cache_lru(page, mapping, index, GFP_KERNEL);
+ status = add_to_page_cache_lru(page, mapping, index,
+ GFP_KERNEL & ~gfp_notmask);
if (unlikely(status)) {
page_cache_release(page);
if (status == -EEXIST)
@@ -2161,7 +2166,7 @@ repeat:
}
return page;
}
-EXPORT_SYMBOL(__grab_cache_page);
+EXPORT_SYMBOL(grab_cache_page_write_begin);
static ssize_t generic_perform_write(struct file *file,
struct iov_iter *i, loff_t pos)
diff --git a/mm/memory.c b/mm/memory.c
index 164951c4730..7b9db658aca 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -669,6 +669,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
if (is_vm_hugetlb_page(vma))
return copy_hugetlb_page_range(dst_mm, src_mm, vma);
+ if (unlikely(is_pfn_mapping(vma))) {
+ /*
+ * We do not free on error cases below as remove_vma
+ * gets called on error from higher level routine
+ */
+ ret = track_pfn_vma_copy(vma);
+ if (ret)
+ return ret;
+ }
+
/*
* We need to invalidate the secondary MMU mappings only when
* there could be a permission downgrade on the ptes of the
@@ -915,6 +925,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp,
if (vma->vm_flags & VM_ACCOUNT)
*nr_accounted += (end - start) >> PAGE_SHIFT;
+ if (unlikely(is_pfn_mapping(vma)))
+ untrack_pfn_vma(vma, 0, 0);
+
while (start != end) {
if (!tlb_start_valid) {
tlb_start = start;
@@ -1430,6 +1443,7 @@ out:
int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn)
{
+ int ret;
/*
* Technically, architectures with pte_special can avoid all these
* restrictions (same for remap_pfn_range). However we would like
@@ -1444,7 +1458,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT;
- return insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+ if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE))
+ return -EINVAL;
+
+ ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot);
+
+ if (ret)
+ untrack_pfn_vma(vma, pfn, PAGE_SIZE);
+
+ return ret;
}
EXPORT_SYMBOL(vm_insert_pfn);
@@ -1575,14 +1597,17 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
* behaviour that some programs depend on. We mark the "original"
* un-COW'ed pages by matching them up with "vma->vm_pgoff".
*/
- if (is_cow_mapping(vma->vm_flags)) {
- if (addr != vma->vm_start || end != vma->vm_end)
- return -EINVAL;
+ if (addr == vma->vm_start && end == vma->vm_end)
vma->vm_pgoff = pfn;
- }
+ else if (is_cow_mapping(vma->vm_flags))
+ return -EINVAL;
vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+ err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size));
+ if (err)
+ return -EINVAL;
+
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
pgd = pgd_offset(mm, addr);
@@ -1594,6 +1619,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
if (err)
break;
} while (pgd++, addr = next, addr != end);
+
+ if (err)
+ untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
@@ -2237,7 +2266,7 @@ int vmtruncate(struct inode * inode, loff_t offset)
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
- if (inode->i_op && inode->i_op->truncate)
+ if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
@@ -2257,7 +2286,7 @@ int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
* a way to truncate a range of blocks (punch a hole) -
* we should return failure right now.
*/
- if (!inode->i_op || !inode->i_op->truncate_range)
+ if (!inode->i_op->truncate_range)
return -ENOSYS;
mutex_lock(&inode->i_mutex);
@@ -2865,9 +2894,9 @@ int in_gate_area_no_task(unsigned long addr)
#endif /* __HAVE_ARCH_GATE_AREA */
#ifdef CONFIG_HAVE_IOREMAP_PROT
-static resource_size_t follow_phys(struct vm_area_struct *vma,
- unsigned long address, unsigned int flags,
- unsigned long *prot)
+int follow_phys(struct vm_area_struct *vma,
+ unsigned long address, unsigned int flags,
+ unsigned long *prot, resource_size_t *phys)
{
pgd_t *pgd;
pud_t *pud;
@@ -2876,24 +2905,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
spinlock_t *ptl;
resource_size_t phys_addr = 0;
struct mm_struct *mm = vma->vm_mm;
+ int ret = -EINVAL;
- VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP)));
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
+ goto out;
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto no_page_table;
+ goto out;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- goto no_page_table;
+ goto out;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- goto no_page_table;
+ goto out;
/* We cannot handle huge page PFN maps. Luckily they don't exist. */
if (pmd_huge(*pmd))
- goto no_page_table;
+ goto out;
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
@@ -2908,13 +2939,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma,
phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */
*prot = pgprot_val(pte_pgprot(pte));
+ *phys = phys_addr;
+ ret = 0;
unlock:
pte_unmap_unlock(ptep, ptl);
out:
- return phys_addr;
-no_page_table:
- return 0;
+ return ret;
}
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
@@ -2925,12 +2956,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
void *maddr;
int offset = addr & (PAGE_SIZE-1);
- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
- return -EINVAL;
-
- phys_addr = follow_phys(vma, addr, write, &prot);
-
- if (!phys_addr)
+ if (follow_phys(vma, addr, write, &prot, &phys_addr))
return -EINVAL;
maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot);
@@ -3049,3 +3075,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
}
up_read(&current->mm->mmap_sem);
}
+
+#ifdef CONFIG_PROVE_LOCKING
+void might_fault(void)
+{
+ might_sleep();
+ /*
+ * it would be nicer only to annotate paths which are not under
+ * pagefault_disable, however that requires a larger audit and
+ * providing helpers like get_user_atomic.
+ */
+ if (!in_atomic() && current->mm)
+ might_lock_read(&current->mm->mmap_sem);
+}
+EXPORT_SYMBOL(might_fault);
+#endif
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b5b2b15085a..b1737118546 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -189,7 +189,7 @@ static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn,
pgdat->node_start_pfn;
}
-static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nr_pages = PAGES_PER_SECTION;
@@ -216,7 +216,7 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
return 0;
}
-static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
+static int __meminit __add_section(struct zone *zone, unsigned long phys_start_pfn)
{
int nr_pages = PAGES_PER_SECTION;
int ret;
@@ -273,7 +273,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
* call this function after deciding the zone to which to
* add the new pages.
*/
-int __add_pages(struct zone *zone, unsigned long phys_start_pfn,
+int __ref __add_pages(struct zone *zone, unsigned long phys_start_pfn,
unsigned long nr_pages)
{
unsigned long i;
@@ -470,7 +470,8 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
}
-int add_memory(int nid, u64 start, u64 size)
+/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
+int __ref add_memory(int nid, u64 start, u64 size)
{
pg_data_t *pgdat = NULL;
int new_pgdat = 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e9493b1c111..e412ffa8e52 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1114,6 +1114,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
const unsigned long __user *old_nodes,
const unsigned long __user *new_nodes)
{
+ const struct cred *cred = current_cred(), *tcred;
struct mm_struct *mm;
struct task_struct *task;
nodemask_t old;
@@ -1148,12 +1149,16 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- if ((current->euid != task->suid) && (current->euid != task->uid) &&
- (current->uid != task->suid) && (current->uid != task->uid) &&
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
task_nodes = cpuset_mems_allowed(task);
/* Is the user allowed to access the target nodes? */
diff --git a/mm/migrate.c b/mm/migrate.c
index 1e0d6b237f4..21631ab8c08 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -987,25 +987,18 @@ out:
/*
* Determine the nodes of an array of pages and store it in an array of status.
*/
-static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
- const void __user * __user *pages,
- int __user *status)
+static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user **pages, int *status)
{
unsigned long i;
- int err;
down_read(&mm->mmap_sem);
for (i = 0; i < nr_pages; i++) {
- const void __user *p;
- unsigned long addr;
+ unsigned long addr = (unsigned long)(*pages);
struct vm_area_struct *vma;
struct page *page;
-
- err = -EFAULT;
- if (get_user(p, pages+i))
- goto out;
- addr = (unsigned long) p;
+ int err = -EFAULT;
vma = find_vma(mm, addr);
if (!vma)
@@ -1024,12 +1017,52 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
err = page_to_nid(page);
set_status:
- put_user(err, status+i);
+ *status = err;
+
+ pages++;
+ status++;
+ }
+
+ up_read(&mm->mmap_sem);
+}
+
+/*
+ * Determine the nodes of a user array of pages and store it in
+ * a user array of status.
+ */
+static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
+ const void __user * __user *pages,
+ int __user *status)
+{
+#define DO_PAGES_STAT_CHUNK_NR 16
+ const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR];
+ int chunk_status[DO_PAGES_STAT_CHUNK_NR];
+ unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR;
+ int err;
+
+ for (i = 0; i < nr_pages; i += chunk_nr) {
+ if (chunk_nr + i > nr_pages)
+ chunk_nr = nr_pages - i;
+
+ err = copy_from_user(chunk_pages, &pages[i],
+ chunk_nr * sizeof(*chunk_pages));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
+
+ do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status);
+
+ err = copy_to_user(&status[i], chunk_status,
+ chunk_nr * sizeof(*chunk_status));
+ if (err) {
+ err = -EFAULT;
+ goto out;
+ }
}
err = 0;
out:
- up_read(&mm->mmap_sem);
return err;
}
@@ -1042,6 +1075,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
const int __user *nodes,
int __user *status, int flags)
{
+ const struct cred *cred = current_cred(), *tcred;
struct task_struct *task;
struct mm_struct *mm;
int err;
@@ -1072,12 +1106,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- if ((current->euid != task->suid) && (current->euid != task->uid) &&
- (current->uid != task->suid) && (current->uid != task->uid) &&
+ rcu_read_lock();
+ tcred = __task_cred(task);
+ if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
+ cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
+ rcu_read_unlock();
err = -EPERM;
goto out;
}
+ rcu_read_unlock();
err = security_task_movememory(task);
if (err)
diff --git a/mm/mlock.c b/mm/mlock.c
index 1ada366570c..3035a56e761 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -667,3 +667,48 @@ void user_shm_unlock(size_t size, struct user_struct *user)
spin_unlock(&shmlock_user_lock);
free_uid(user);
}
+
+void *alloc_locked_buffer(size_t size)
+{
+ unsigned long rlim, vm, pgsz;
+ void *buffer = NULL;
+
+ pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->total_vm + pgsz;
+ if (rlim < vm)
+ goto out;
+
+ rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+ vm = current->mm->locked_vm + pgsz;
+ if (rlim < vm)
+ goto out;
+
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
+ goto out;
+
+ current->mm->total_vm += pgsz;
+ current->mm->locked_vm += pgsz;
+
+ out:
+ up_write(&current->mm->mmap_sem);
+ return buffer;
+}
+
+void free_locked_buffer(void *buffer, size_t size)
+{
+ unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ down_write(&current->mm->mmap_sem);
+
+ current->mm->total_vm -= pgsz;
+ current->mm->locked_vm -= pgsz;
+
+ up_write(&current->mm->mmap_sem);
+
+ kfree(buffer);
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index d4855a682ab..2c778fcfd9b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3,7 +3,7 @@
*
* Written by obz.
*
- * Address space accounting code <alan@redhat.com>
+ * Address space accounting code <alan@lxorguk.ukuu.org.uk>
*/
#include <linux/slab.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index fded06f923f..cfb4c485206 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -4,7 +4,7 @@
* (C) Copyright 1994 Linus Torvalds
* (C) Copyright 2002 Christoph Hellwig
*
- * Address space accounting code <alan@redhat.com>
+ * Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
diff --git a/mm/mremap.c b/mm/mremap.c
index 58a2908f42f..646de959aa5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -3,7 +3,7 @@
*
* (C) Copyright 1996 Linus Torvalds
*
- * Address space accounting code <alan@redhat.com>
+ * Address space accounting code <alan@lxorguk.ukuu.org.uk>
* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
*/
diff --git a/mm/msync.c b/mm/msync.c
index 144a7570535..07dae08cf31 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -82,7 +82,7 @@ asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
(vma->vm_flags & VM_SHARED)) {
get_file(file);
up_read(&mm->mmap_sem);
- error = do_fsync(file, 0);
+ error = vfs_fsync(file, file->f_path.dentry, 0);
fput(file);
if (error || start >= end)
goto out;
diff --git a/mm/nommu.c b/mm/nommu.c
index 7695dc85078..1c28ea3a4e9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -86,7 +86,7 @@ do_expand:
i_size_write(inode, offset);
out_truncate:
- if (inode->i_op && inode->i_op->truncate)
+ if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
out_sig:
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a0a01902f55..558f9afe6e4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -128,8 +128,8 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
* Superuser processes are usually more important, so we make it
* less likely that we kill those.
*/
- if (has_capability(p, CAP_SYS_ADMIN) ||
- has_capability(p, CAP_SYS_RESOURCE))
+ if (has_capability_noaudit(p, CAP_SYS_ADMIN) ||
+ has_capability_noaudit(p, CAP_SYS_RESOURCE))
points /= 4;
/*
@@ -138,7 +138,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
* tend to only have this flag set on applications they think
* of as important.
*/
- if (has_capability(p, CAP_SYS_RAWIO))
+ if (has_capability_noaudit(p, CAP_SYS_RAWIO))
points /= 4;
/*
@@ -299,9 +299,9 @@ static void dump_tasks(const struct mem_cgroup *mem)
task_lock(p);
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
- p->pid, p->uid, p->tgid, p->mm->total_vm,
- get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj,
- p->comm);
+ p->pid, __task_cred(p)->uid, p->tgid,
+ p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p),
+ p->oomkilladj, p->comm);
task_unlock(p);
} while_each_thread(g, p);
}
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 1223d927904..ab27ff75051 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -21,7 +21,7 @@ static unsigned long total_usage;
#if !defined(CONFIG_SPARSEMEM)
-void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
pgdat->node_page_cgroup = NULL;
}
@@ -49,6 +49,9 @@ static int __init alloc_node_page_cgroup(int nid)
start_pfn = NODE_DATA(nid)->node_start_pfn;
nr_pages = NODE_DATA(nid)->node_spanned_pages;
+ if (!nr_pages)
+ return 0;
+
table_size = sizeof(struct page_cgroup) * nr_pages;
base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
@@ -97,7 +100,8 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
return section->page_cgroup + pfn;
}
-int __meminit init_section_page_cgroup(unsigned long pfn)
+/* __alloc_bootmem...() is protected by !slab_available() */
+int __init_refok init_section_page_cgroup(unsigned long pfn)
{
struct mem_section *section;
struct page_cgroup *base, *pc;
@@ -106,19 +110,29 @@ int __meminit init_section_page_cgroup(unsigned long pfn)
section = __pfn_to_section(pfn);
- if (section->page_cgroup)
- return 0;
-
- nid = page_to_nid(pfn_to_page(pfn));
-
- table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
- if (slab_is_available()) {
- base = kmalloc_node(table_size, GFP_KERNEL, nid);
- if (!base)
- base = vmalloc_node(table_size, nid);
- } else {
- base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), table_size,
+ if (!section->page_cgroup) {
+ nid = page_to_nid(pfn_to_page(pfn));
+ table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+ if (slab_is_available()) {
+ base = kmalloc_node(table_size, GFP_KERNEL, nid);
+ if (!base)
+ base = vmalloc_node(table_size, nid);
+ } else {
+ base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
+ table_size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+ }
+ } else {
+ /*
+ * We don't have to allocate page_cgroup again, but
+ * address of memmap may be changed. So, we have to initialize
+ * again.
+ */
+ base = section->page_cgroup + pfn;
+ table_size = 0;
+ /* check address of memmap is changed or not. */
+ if (base->page == pfn_to_page(pfn))
+ return 0;
}
if (!base) {
@@ -158,7 +172,7 @@ void __free_page_cgroup(unsigned long pfn)
}
}
-int online_page_cgroup(unsigned long start_pfn,
+int __meminit online_page_cgroup(unsigned long start_pfn,
unsigned long nr_pages,
int nid)
{
@@ -183,7 +197,7 @@ int online_page_cgroup(unsigned long start_pfn,
return -ENOMEM;
}
-int offline_page_cgroup(unsigned long start_pfn,
+int __meminit offline_page_cgroup(unsigned long start_pfn,
unsigned long nr_pages, int nid)
{
unsigned long start, end, pfn;
@@ -197,7 +211,7 @@ int offline_page_cgroup(unsigned long start_pfn,
}
-static int page_cgroup_callback(struct notifier_block *self,
+static int __meminit page_cgroup_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_notify *mn = arg;
@@ -207,18 +221,23 @@ static int page_cgroup_callback(struct notifier_block *self,
ret = online_page_cgroup(mn->start_pfn,
mn->nr_pages, mn->status_change_nid);
break;
- case MEM_CANCEL_ONLINE:
case MEM_OFFLINE:
offline_page_cgroup(mn->start_pfn,
mn->nr_pages, mn->status_change_nid);
break;
+ case MEM_CANCEL_ONLINE:
case MEM_GOING_OFFLINE:
break;
case MEM_ONLINE:
case MEM_CANCEL_OFFLINE:
break;
}
- ret = notifier_from_errno(ret);
+
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
+
return ret;
}
@@ -248,7 +267,7 @@ void __init page_cgroup_init(void)
" want\n");
}
-void __init pgdat_page_cgroup_init(struct pglist_data *pgdat)
+void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
{
return;
}
diff --git a/mm/pdflush.c b/mm/pdflush.c
index a0a14c4d507..15de509b68f 100644
--- a/mm/pdflush.c
+++ b/mm/pdflush.c
@@ -172,7 +172,16 @@ static int __pdflush(struct pdflush_work *my_work)
static int pdflush(void *dummy)
{
struct pdflush_work my_work;
- cpumask_t cpus_allowed;
+ cpumask_var_t cpus_allowed;
+
+ /*
+ * Since the caller doesn't even check kthread_run() worked, let's not
+ * freak out too much if this fails.
+ */
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
+ return 0;
+ }
/*
* pdflush can spend a lot of time doing encryption via dm-crypt. We
@@ -187,8 +196,9 @@ static int pdflush(void *dummy)
* This is needed as pdflush's are dynamically created and destroyed.
* The boottime pdflush's are easily placed w/o these 2 lines.
*/
- cpuset_cpus_allowed(current, &cpus_allowed);
- set_cpus_allowed_ptr(current, &cpus_allowed);
+ cpuset_cpus_allowed(current, cpus_allowed);
+ set_cpus_allowed_ptr(current, cpus_allowed);
+ free_cpumask_var(cpus_allowed);
return __pdflush(&my_work);
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 0ed075215e5..f1b0d4871f3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1513,8 +1513,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
inode = new_inode(sb);
if (inode) {
inode->i_mode = mode;
- inode->i_uid = current->fsuid;
- inode->i_gid = current->fsgid;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -2278,8 +2278,8 @@ static int shmem_fill_super(struct super_block *sb,
sbinfo->max_blocks = 0;
sbinfo->max_inodes = 0;
sbinfo->mode = S_IRWXUGO | S_ISVTX;
- sbinfo->uid = current->fsuid;
- sbinfo->gid = current->fsgid;
+ sbinfo->uid = current_fsuid();
+ sbinfo->gid = current_fsgid();
sbinfo->mpol = NULL;
sb->s_fs_info = sbinfo;
diff --git a/mm/slab.c b/mm/slab.c
index 09187517f9d..ddc41f337d5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2123,6 +2123,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
*
* @name must be valid until the cache is destroyed. This implies that
* the module calling this has to destroy the cache before getting unloaded.
+ * Note that kmem_cache_name() is not guaranteed to return the same pointer,
+ * therefore applications must manage it themselves.
*
* The flags are
*
@@ -2155,7 +2157,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
/*
* We use cache_chain_mutex to ensure a consistent view of
- * cpu_online_map as well. Please see cpuup_callback
+ * cpu_online_mask as well. Please see cpuup_callback
*/
get_online_cpus();
mutex_lock(&cache_chain_mutex);
@@ -2609,7 +2611,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
slabp = kmem_cache_alloc_node(cachep->slabp_cache,
- local_flags & ~GFP_THISNODE, nodeid);
+ local_flags, nodeid);
if (!slabp)
return NULL;
} else {
@@ -2997,7 +2999,7 @@ retry:
* there must be at least one object available for
* allocation.
*/
- BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
+ BUG_ON(slabp->inuse >= cachep->num);
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
@@ -3106,79 +3108,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
#endif
-#ifdef CONFIG_FAILSLAB
-
-static struct failslab_attr {
-
- struct fault_attr attr;
-
- u32 ignore_gfp_wait;
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
- struct dentry *ignore_gfp_wait_file;
-#endif
-
-} failslab = {
- .attr = FAULT_ATTR_INITIALIZER,
- .ignore_gfp_wait = 1,
-};
-
-static int __init setup_failslab(char *str)
-{
- return setup_fault_attr(&failslab.attr, str);
-}
-__setup("failslab=", setup_failslab);
-
-static int should_failslab(struct kmem_cache *cachep, gfp_t flags)
+static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
if (cachep == &cache_cache)
- return 0;
- if (flags & __GFP_NOFAIL)
- return 0;
- if (failslab.ignore_gfp_wait && (flags & __GFP_WAIT))
- return 0;
+ return false;
- return should_fail(&failslab.attr, obj_size(cachep));
+ return should_failslab(obj_size(cachep), flags);
}
-#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
-
-static int __init failslab_debugfs(void)
-{
- mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
- struct dentry *dir;
- int err;
-
- err = init_fault_attr_dentries(&failslab.attr, "failslab");
- if (err)
- return err;
- dir = failslab.attr.dentries.dir;
-
- failslab.ignore_gfp_wait_file =
- debugfs_create_bool("ignore-gfp-wait", mode, dir,
- &failslab.ignore_gfp_wait);
-
- if (!failslab.ignore_gfp_wait_file) {
- err = -ENOMEM;
- debugfs_remove(failslab.ignore_gfp_wait_file);
- cleanup_fault_attr_dentries(&failslab.attr);
- }
-
- return err;
-}
-
-late_initcall(failslab_debugfs);
-
-#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
-
-#else /* CONFIG_FAILSLAB */
-
-static inline int should_failslab(struct kmem_cache *cachep, gfp_t flags)
-{
- return 0;
-}
-
-#endif /* CONFIG_FAILSLAB */
-
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
@@ -3381,7 +3318,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
unsigned long save_flags;
void *ptr;
- if (should_failslab(cachep, flags))
+ if (slab_should_failslab(cachep, flags))
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
@@ -3457,7 +3394,7 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
unsigned long save_flags;
void *objp;
- if (should_failslab(cachep, flags))
+ if (slab_should_failslab(cachep, flags))
return NULL;
cache_alloc_debugcheck_before(cachep, flags);
@@ -3686,9 +3623,9 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
- int node, void *caller)
+ int node, unsigned long caller)
{
- return __do_kmalloc_node(size, flags, node, caller);
+ return __do_kmalloc_node(size, flags, node, (void *)caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
#else
@@ -3730,9 +3667,9 @@ void *__kmalloc(size_t size, gfp_t flags)
}
EXPORT_SYMBOL(__kmalloc);
-void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
{
- return __do_kmalloc(size, flags, caller);
+ return __do_kmalloc(size, flags, (void *)caller);
}
EXPORT_SYMBOL(__kmalloc_track_caller);
diff --git a/mm/slob.c b/mm/slob.c
index cb675d12679..bf7e8fc3aed 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
struct kmem_cache *c;
c = slob_alloc(sizeof(struct kmem_cache),
- flags, ARCH_KMALLOC_MINALIGN, -1);
+ GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1);
if (c) {
c->name = name;
diff --git a/mm/slub.c b/mm/slub.c
index 7ad489af956..f0e2892fe40 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -24,6 +24,7 @@
#include <linux/kallsyms.h>
#include <linux/memory.h>
#include <linux/math64.h>
+#include <linux/fault-inject.h>
/*
* Lock order:
@@ -153,6 +154,10 @@
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
#endif
+#define OO_SHIFT 16
+#define OO_MASK ((1 << OO_SHIFT) - 1)
+#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */
+
/* Internal SLUB flags */
#define __OBJECT_POISON 0x80000000 /* Poison object */
#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
@@ -178,7 +183,7 @@ static LIST_HEAD(slab_caches);
* Tracking user of a slab.
*/
struct track {
- void *addr; /* Called from address */
+ unsigned long addr; /* Called from address */
int cpu; /* Was running on cpu */
int pid; /* Pid context */
unsigned long when; /* When did the operation occur */
@@ -290,7 +295,7 @@ static inline struct kmem_cache_order_objects oo_make(int order,
unsigned long size)
{
struct kmem_cache_order_objects x = {
- (order << 16) + (PAGE_SIZE << order) / size
+ (order << OO_SHIFT) + (PAGE_SIZE << order) / size
};
return x;
@@ -298,12 +303,12 @@ static inline struct kmem_cache_order_objects oo_make(int order,
static inline int oo_order(struct kmem_cache_order_objects x)
{
- return x.x >> 16;
+ return x.x >> OO_SHIFT;
}
static inline int oo_objects(struct kmem_cache_order_objects x)
{
- return x.x & ((1 << 16) - 1);
+ return x.x & OO_MASK;
}
#ifdef CONFIG_SLUB_DEBUG
@@ -367,7 +372,7 @@ static struct track *get_track(struct kmem_cache *s, void *object,
}
static void set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, void *addr)
+ enum track_item alloc, unsigned long addr)
{
struct track *p;
@@ -391,8 +396,8 @@ static void init_tracking(struct kmem_cache *s, void *object)
if (!(s->flags & SLAB_STORE_USER))
return;
- set_track(s, object, TRACK_FREE, NULL);
- set_track(s, object, TRACK_ALLOC, NULL);
+ set_track(s, object, TRACK_FREE, 0UL);
+ set_track(s, object, TRACK_ALLOC, 0UL);
}
static void print_track(const char *s, struct track *t)
@@ -401,7 +406,7 @@ static void print_track(const char *s, struct track *t)
return;
printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
- s, t->addr, jiffies - t->when, t->cpu, t->pid);
+ s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
}
static void print_tracking(struct kmem_cache *s, void *object)
@@ -692,7 +697,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
if (!check_valid_pointer(s, page, get_freepointer(s, p))) {
object_err(s, page, p, "Freepointer corrupt");
/*
- * No choice but to zap it and thus loose the remainder
+ * No choice but to zap it and thus lose the remainder
* of the free objects in this slab. May cause
* another error because the object count is now wrong.
*/
@@ -764,8 +769,8 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
}
max_objects = (PAGE_SIZE << compound_order(page)) / s->size;
- if (max_objects > 65535)
- max_objects = 65535;
+ if (max_objects > MAX_OBJS_PER_PAGE)
+ max_objects = MAX_OBJS_PER_PAGE;
if (page->objects != max_objects) {
slab_err(s, page, "Wrong number of objects. Found %d but "
@@ -866,7 +871,7 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
}
static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+ void *object, unsigned long addr)
{
if (!check_slab(s, page))
goto bad;
@@ -906,7 +911,7 @@ bad:
}
static int free_debug_processing(struct kmem_cache *s, struct page *page,
- void *object, void *addr)
+ void *object, unsigned long addr)
{
if (!check_slab(s, page))
goto fail;
@@ -1029,10 +1034,10 @@ static inline void setup_object_debug(struct kmem_cache *s,
struct page *page, void *object) {}
static inline int alloc_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, void *addr) { return 0; }
+ struct page *page, void *object, unsigned long addr) { return 0; }
static inline int free_debug_processing(struct kmem_cache *s,
- struct page *page, void *object, void *addr) { return 0; }
+ struct page *page, void *object, unsigned long addr) { return 0; }
static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
{ return 1; }
@@ -1499,8 +1504,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
* we need to allocate a new slab. This is the slowest path since it involves
* a call to the page allocator and the setup of a new slab.
*/
-static void *__slab_alloc(struct kmem_cache *s,
- gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
+static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
+ unsigned long addr, struct kmem_cache_cpu *c)
{
void **object;
struct page *new;
@@ -1584,13 +1589,18 @@ debug:
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __always_inline void *slab_alloc(struct kmem_cache *s,
- gfp_t gfpflags, int node, void *addr)
+ gfp_t gfpflags, int node, unsigned long addr)
{
void **object;
struct kmem_cache_cpu *c;
unsigned long flags;
unsigned int objsize;
+ might_sleep_if(gfpflags & __GFP_WAIT);
+
+ if (should_failslab(s->objsize, gfpflags))
+ return NULL;
+
local_irq_save(flags);
c = get_cpu_slab(s, smp_processor_id());
objsize = c->objsize;
@@ -1613,14 +1623,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- return slab_alloc(s, gfpflags, -1, __builtin_return_address(0));
+ return slab_alloc(s, gfpflags, -1, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_alloc);
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- return slab_alloc(s, gfpflags, node, __builtin_return_address(0));
+ return slab_alloc(s, gfpflags, node, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_alloc_node);
#endif
@@ -1634,7 +1644,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node);
* handling required then we can return immediately.
*/
static void __slab_free(struct kmem_cache *s, struct page *page,
- void *x, void *addr, unsigned int offset)
+ void *x, unsigned long addr, unsigned int offset)
{
void *prior;
void **object = (void *)x;
@@ -1704,7 +1714,7 @@ debug:
* with all sorts of special processing.
*/
static __always_inline void slab_free(struct kmem_cache *s,
- struct page *page, void *x, void *addr)
+ struct page *page, void *x, unsigned long addr)
{
void **object = (void *)x;
struct kmem_cache_cpu *c;
@@ -1731,11 +1741,11 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
page = virt_to_head_page(x);
- slab_free(s, page, x, __builtin_return_address(0));
+ slab_free(s, page, x, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
-/* Figure out on which slab object the object resides */
+/* Figure out on which slab page the object resides */
static struct page *get_object_page(const void *x)
{
struct page *page = virt_to_head_page(x);
@@ -1807,8 +1817,8 @@ static inline int slab_order(int size, int min_objects,
int rem;
int min_order = slub_min_order;
- if ((PAGE_SIZE << min_order) / size > 65535)
- return get_order(size * 65535) - 1;
+ if ((PAGE_SIZE << min_order) / size > MAX_OBJS_PER_PAGE)
+ return get_order(size * MAX_OBJS_PER_PAGE) - 1;
for (order = max(min_order,
fls(min_objects * size - 1) - PAGE_SHIFT);
@@ -1960,7 +1970,7 @@ static DEFINE_PER_CPU(struct kmem_cache_cpu,
kmem_cache_cpu)[NR_KMEM_CACHE_CPU];
static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static cpumask_t kmem_cach_cpu_free_init_once = CPU_MASK_NONE;
+static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
int cpu, gfp_t flags)
@@ -2035,13 +2045,13 @@ static void init_alloc_cpu_cpu(int cpu)
{
int i;
- if (cpu_isset(cpu, kmem_cach_cpu_free_init_once))
+ if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
return;
for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
- cpu_set(cpu, kmem_cach_cpu_free_init_once);
+ cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
}
static void __init init_alloc_cpu(void)
@@ -2073,8 +2083,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
* when allocating for the kmalloc_node_cache. This is used for bootstrapping
* memory on a fresh node that has no slab structures yet.
*/
-static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
- int node)
+static void early_kmem_cache_node_alloc(gfp_t gfpflags, int node)
{
struct page *page;
struct kmem_cache_node *n;
@@ -2112,7 +2121,6 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
local_irq_save(flags);
add_partial(n, page, 0);
local_irq_restore(flags);
- return n;
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -2144,8 +2152,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
n = &s->local_node;
else {
if (slab_state == DOWN) {
- n = early_kmem_cache_node_alloc(gfpflags,
- node);
+ early_kmem_cache_node_alloc(gfpflags, node);
continue;
}
n = kmem_cache_alloc_node(kmalloc_caches,
@@ -2659,7 +2666,7 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, -1, __builtin_return_address(0));
+ return slab_alloc(s, flags, -1, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
@@ -2687,7 +2694,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- return slab_alloc(s, flags, node, __builtin_return_address(0));
+ return slab_alloc(s, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
@@ -2744,7 +2751,7 @@ void kfree(const void *x)
put_page(page);
return;
}
- slab_free(page->slab, page, object, __builtin_return_address(0));
+ slab_free(page->slab, page, object, _RET_IP_);
}
EXPORT_SYMBOL(kfree);
@@ -2931,8 +2938,10 @@ static int slab_memory_callback(struct notifier_block *self,
case MEM_CANCEL_OFFLINE:
break;
}
-
- ret = notifier_from_errno(ret);
+ if (ret)
+ ret = notifier_from_errno(ret);
+ else
+ ret = NOTIFY_OK;
return ret;
}
@@ -3121,8 +3130,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
up_write(&slub_lock);
- if (sysfs_slab_alias(s, name))
+ if (sysfs_slab_alias(s, name)) {
+ down_write(&slub_lock);
+ s->refcount--;
+ up_write(&slub_lock);
goto err;
+ }
return s;
}
@@ -3132,8 +3145,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);
up_write(&slub_lock);
- if (sysfs_slab_add(s))
+ if (sysfs_slab_add(s)) {
+ down_write(&slub_lock);
+ list_del(&s->list);
+ up_write(&slub_lock);
+ kfree(s);
goto err;
+ }
return s;
}
kfree(s);
@@ -3200,7 +3218,7 @@ static struct notifier_block __cpuinitdata slab_notifier = {
#endif
-void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
+void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
{
struct kmem_cache *s;
@@ -3216,7 +3234,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
}
void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
- int node, void *caller)
+ int node, unsigned long caller)
{
struct kmem_cache *s;
@@ -3427,13 +3445,13 @@ static void resiliency_test(void) {};
struct location {
unsigned long count;
- void *addr;
+ unsigned long addr;
long long sum_time;
long min_time;
long max_time;
long min_pid;
long max_pid;
- cpumask_t cpus;
+ DECLARE_BITMAP(cpus, NR_CPUS);
nodemask_t nodes;
};
@@ -3475,7 +3493,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
{
long start, end, pos;
struct location *l;
- void *caddr;
+ unsigned long caddr;
unsigned long age = jiffies - track->when;
start = -1;
@@ -3508,7 +3526,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
if (track->pid > l->max_pid)
l->max_pid = track->pid;
- cpu_set(track->cpu, l->cpus);
+ cpumask_set_cpu(track->cpu,
+ to_cpumask(l->cpus));
}
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
@@ -3538,8 +3557,8 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
l->max_time = age;
l->min_pid = track->pid;
l->max_pid = track->pid;
- cpus_clear(l->cpus);
- cpu_set(track->cpu, l->cpus);
+ cpumask_clear(to_cpumask(l->cpus));
+ cpumask_set_cpu(track->cpu, to_cpumask(l->cpus));
nodes_clear(l->nodes);
node_set(page_to_nid(virt_to_page(track)), l->nodes);
return 1;
@@ -3595,7 +3614,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
for (i = 0; i < t.count; i++) {
struct location *l = &t.loc[i];
- if (len > PAGE_SIZE - 100)
+ if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100)
break;
len += sprintf(buf + len, "%7ld ", l->count);
@@ -3620,11 +3639,12 @@ static int list_locations(struct kmem_cache *s, char *buf,
len += sprintf(buf + len, " pid=%ld",
l->min_pid);
- if (num_online_cpus() > 1 && !cpus_empty(l->cpus) &&
+ if (num_online_cpus() > 1 &&
+ !cpumask_empty(to_cpumask(l->cpus)) &&
len < PAGE_SIZE - 60) {
len += sprintf(buf + len, " cpus=");
len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
- l->cpus);
+ to_cpumask(l->cpus));
}
if (num_online_nodes() > 1 && !nodes_empty(l->nodes) &&
@@ -4343,7 +4363,7 @@ static void sysfs_slab_remove(struct kmem_cache *s)
/*
* Need to buffer aliases during bootup until sysfs becomes
- * available lest we loose that information.
+ * available lest we lose that information.
*/
struct saved_alias {
struct kmem_cache *s;
diff --git a/mm/sparse.c b/mm/sparse.c
index 39db301b920..083f5b63e7a 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -570,7 +570,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
* set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed.
*/
-int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
+int __meminit sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages)
{
unsigned long section_nr = pfn_to_section_nr(start_pfn);
diff --git a/mm/swap.c b/mm/swap.c
index 2152e48a7b8..b135ec90cde 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -299,7 +299,6 @@ void lru_add_drain(void)
put_cpu();
}
-#if defined(CONFIG_NUMA) || defined(CONFIG_UNEVICTABLE_LRU)
static void lru_add_drain_per_cpu(struct work_struct *dummy)
{
lru_add_drain();
@@ -313,18 +312,6 @@ int lru_add_drain_all(void)
return schedule_on_each_cpu(lru_add_drain_per_cpu);
}
-#else
-
-/*
- * Returns 0 for success
- */
-int lru_add_drain_all(void)
-{
- lru_add_drain();
- return 0;
-}
-#endif
-
/*
* Batched page_cache_release(). Decrement the reference count on all the
* passed pages. If it fell to zero then remove the page from the LRU and
@@ -445,6 +432,7 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i];
struct zone *pagezone = page_zone(page);
+ int file;
if (pagezone != zone) {
if (zone)
@@ -456,8 +444,12 @@ void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
VM_BUG_ON(PageUnevictable(page));
VM_BUG_ON(PageLRU(page));
SetPageLRU(page);
- if (is_active_lru(lru))
+ file = is_file_lru(lru);
+ zone->recent_scanned[file]++;
+ if (is_active_lru(lru)) {
SetPageActive(page);
+ zone->recent_rotated[file]++;
+ }
add_page_to_lru_list(zone, page, lru);
}
if (zone)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 90cb67a5417..54a9f87e516 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1462,6 +1462,15 @@ static int __init procswaps_init(void)
__initcall(procswaps_init);
#endif /* CONFIG_PROC_FS */
+#ifdef MAX_SWAPFILES_CHECK
+static int __init max_swapfiles_check(void)
+{
+ MAX_SWAPFILES_CHECK();
+ return 0;
+}
+late_initcall(max_swapfiles_check);
+#endif
+
/*
* Written 01/25/92 by Simmule Turner, heavily changed by Linus.
*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 30f826d484f..7465f22fec0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -77,7 +77,6 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
BUG_ON(addr >= end);
pgd = pgd_offset_k(addr);
- flush_cache_vunmap(addr, end);
do {
next = pgd_addr_end(addr, end);
if (pgd_none_or_clear_bad(pgd))
@@ -152,11 +151,12 @@ static int vmap_pud_range(pgd_t *pgd, unsigned long addr,
*
* Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
*/
-static int vmap_page_range(unsigned long addr, unsigned long end,
+static int vmap_page_range(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages)
{
pgd_t *pgd;
unsigned long next;
+ unsigned long addr = start;
int err = 0;
int nr = 0;
@@ -168,7 +168,7 @@ static int vmap_page_range(unsigned long addr, unsigned long end,
if (err)
break;
} while (pgd++, addr = next, addr != end);
- flush_cache_vmap(addr, end);
+ flush_cache_vmap(start, end);
if (unlikely(err))
return err;
@@ -543,9 +543,10 @@ static void purge_vmap_area_lazy(void)
}
/*
- * Free and unmap a vmap area
+ * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
+ * called for the correct range previously.
*/
-static void free_unmap_vmap_area(struct vmap_area *va)
+static void free_unmap_vmap_area_noflush(struct vmap_area *va)
{
va->flags |= VM_LAZY_FREE;
atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
@@ -553,6 +554,15 @@ static void free_unmap_vmap_area(struct vmap_area *va)
try_purge_vmap_area_lazy();
}
+/*
+ * Free and unmap a vmap area
+ */
+static void free_unmap_vmap_area(struct vmap_area *va)
+{
+ flush_cache_vunmap(va->va_start, va->va_end);
+ free_unmap_vmap_area_noflush(va);
+}
+
static struct vmap_area *find_vmap_area(unsigned long addr)
{
struct vmap_area *va;
@@ -734,7 +744,7 @@ static void free_vmap_block(struct vmap_block *vb)
spin_unlock(&vmap_block_tree_lock);
BUG_ON(tmp != vb);
- free_unmap_vmap_area(vb->va);
+ free_unmap_vmap_area_noflush(vb->va);
call_rcu(&vb->rcu_head, rcu_free_vb);
}
@@ -796,6 +806,9 @@ static void vb_free(const void *addr, unsigned long size)
BUG_ON(size & ~PAGE_MASK);
BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
+
+ flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size);
+
order = get_order(size);
offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1);
@@ -1705,7 +1718,7 @@ static int s_show(struct seq_file *m, void *p)
v->addr, v->addr + v->size, v->size);
if (v->caller) {
- char buff[2 * KSYM_NAME_LEN];
+ char buff[KSYM_SYMBOL_LEN];
seq_putc(m, ' ');
sprint_symbol(buff, (unsigned long)v->caller);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7ea1440b53d..d196f46c880 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1248,6 +1248,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
list_add(&page->lru, &l_inactive);
}
+ spin_lock_irq(&zone->lru_lock);
/*
* Count referenced pages from currently used mappings as
* rotated, even though they are moved to the inactive list.
@@ -1263,7 +1264,6 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
pgmoved = 0;
lru = LRU_BASE + file * LRU_FILE;
- spin_lock_irq(&zone->lru_lock);
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
@@ -1902,7 +1902,7 @@ static int kswapd(void *p)
};
node_to_cpumask_ptr(cpumask, pgdat->node_id);
- if (!cpus_empty(*cpumask))
+ if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state;
@@ -2141,7 +2141,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
pg_data_t *pgdat = NODE_DATA(nid);
node_to_cpumask_ptr(mask, pgdat->node_id);
- if (any_online_cpu(*mask) < nr_cpu_ids)
+ if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
/* One of our CPUs online: restore mask */
set_cpus_allowed_ptr(pgdat->kswapd, mask);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c3ccfda23ad..91149746bb8 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -20,7 +20,7 @@
DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
EXPORT_PER_CPU_SYMBOL(vm_event_states);
-static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+static void sum_vm_events(unsigned long *ret, const struct cpumask *cpumask)
{
int cpu;
int i;
@@ -43,7 +43,7 @@ static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
void all_vm_events(unsigned long *ret)
{
get_online_cpus();
- sum_vm_events(ret, &cpu_online_map);
+ sum_vm_events(ret, cpu_online_mask);
put_online_cpus();
}
EXPORT_SYMBOL_GPL(all_vm_events);