aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c81
1 files changed, 67 insertions, 14 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d3c52113742..fc5e4a48582 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -622,13 +622,18 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
* Call subsys's pre_destroy handler.
* This is called before css refcnt check.
*/
-static void cgroup_call_pre_destroy(struct cgroup *cgrp)
+static int cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
+ int ret = 0;
+
for_each_subsys(cgrp->root, ss)
- if (ss->pre_destroy)
- ss->pre_destroy(ss, cgrp);
- return;
+ if (ss->pre_destroy) {
+ ret = ss->pre_destroy(ss, cgrp);
+ if (ret)
+ break;
+ }
+ return ret;
}
static void free_cgroup_rcu(struct rcu_head *obj)
@@ -722,6 +727,22 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
remove_dir(dentry);
}
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
+{
+ if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+ wake_up_all(&cgroup_rmdir_waitq);
+}
+
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_bits)
{
@@ -1317,6 +1338,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
synchronize_rcu();
put_css_set(cg);
+
+ /*
+ * wake up rmdir() waiter. the rmdir should fail since the cgroup
+ * is no longer empty.
+ */
+ cgroup_wakeup_rmdir_waiters(cgrp);
return 0;
}
@@ -2608,9 +2635,11 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
struct cgroup *cgrp = dentry->d_fsdata;
struct dentry *d;
struct cgroup *parent;
+ DEFINE_WAIT(wait);
+ int ret;
/* the vfs holds both inode->i_mutex already */
-
+again:
mutex_lock(&cgroup_mutex);
if (atomic_read(&cgrp->count) != 0) {
mutex_unlock(&cgroup_mutex);
@@ -2626,17 +2655,39 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
* Call pre_destroy handlers of subsys. Notify subsystems
* that rmdir() request comes.
*/
- cgroup_call_pre_destroy(cgrp);
+ ret = cgroup_call_pre_destroy(cgrp);
+ if (ret)
+ return ret;
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
-
- if (atomic_read(&cgrp->count)
- || !list_empty(&cgrp->children)
- || !cgroup_clear_css_refs(cgrp)) {
+ if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
+ /*
+ * css_put/get is provided for subsys to grab refcnt to css. In typical
+ * case, subsystem has no reference after pre_destroy(). But, under
+ * hierarchy management, some *temporal* refcnt can be hold.
+ * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
+ * is really busy, it should return -EBUSY at pre_destroy(). wake_up
+ * is called when css_put() is called and refcnt goes down to 0.
+ */
+ set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
+
+ if (!cgroup_clear_css_refs(cgrp)) {
+ mutex_unlock(&cgroup_mutex);
+ schedule();
+ finish_wait(&cgroup_rmdir_waitq, &wait);
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+ if (signal_pending(current))
+ return -EINTR;
+ goto again;
+ }
+ /* NO css_tryget() can success after here. */
+ finish_wait(&cgroup_rmdir_waitq, &wait);
+ clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
spin_lock(&release_list_lock);
set_bit(CGRP_REMOVED, &cgrp->flags);
@@ -3194,10 +3245,12 @@ void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
- if ((atomic_dec_return(&css->refcnt) == 1) &&
- notify_on_release(cgrp)) {
- set_bit(CGRP_RELEASABLE, &cgrp->flags);
- check_for_release(cgrp);
+ if (atomic_dec_return(&css->refcnt) == 1) {
+ if (notify_on_release(cgrp)) {
+ set_bit(CGRP_RELEASABLE, &cgrp->flags);
+ check_for_release(cgrp);
+ }
+ cgroup_wakeup_rmdir_waiters(cgrp);
}
rcu_read_unlock();
}