diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/auditfilter.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 429 | ||||
-rw-r--r-- | kernel/futex_compat.c | 3 | ||||
-rw-r--r-- | kernel/nsproxy.c | 10 | ||||
-rw-r--r-- | kernel/posix-timers.c | 35 | ||||
-rw-r--r-- | kernel/rtmutex.c | 41 | ||||
-rw-r--r-- | kernel/rtmutex_common.h | 34 | ||||
-rw-r--r-- | kernel/sched.c | 99 | ||||
-rw-r--r-- | kernel/signal.c | 8 |
9 files changed, 169 insertions, 492 deletions
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 74cc0fc6bb8..ce61f423542 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -947,7 +947,7 @@ static void audit_update_watch(struct audit_parent *parent, /* If the update involves invalidating rules, do the inode-based * filtering now, so we don't omit records. */ - if (invalidating && + if (invalidating && current->audit_context && audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT) audit_set_auditable(current->audit_context); diff --git a/kernel/futex.c b/kernel/futex.c index 3b7f7713d9a..45490bec583 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -56,12 +56,6 @@ #include "rtmutex_common.h" -#ifdef CONFIG_DEBUG_RT_MUTEXES -# include "rtmutex-debug.h" -#else -# include "rtmutex.h" -#endif - #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) /* @@ -111,12 +105,6 @@ struct futex_q { /* Optional priority inheritance state: */ struct futex_pi_state *pi_state; struct task_struct *task; - - /* - * This waiter is used in case of requeue from a - * normal futex to a PI-futex - */ - struct rt_mutex_waiter waiter; }; /* @@ -216,9 +204,6 @@ int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; - /* Save the user address in the ley */ - key->uaddr = uaddr; - /* * Private mappings are handled in a simple way. * @@ -424,14 +409,12 @@ static struct task_struct * futex_find_get_task(pid_t pid) rcu_read_lock(); p = find_task_by_pid(pid); - if (!p) - goto out_unlock; - if ((current->euid != p->euid) && (current->euid != p->uid)) { - p = NULL; - goto out_unlock; - } - get_task_struct(p); -out_unlock: + + if (!p || ((current->euid != p->euid) && (current->euid != p->uid))) + p = ERR_PTR(-ESRCH); + else + get_task_struct(p); + rcu_read_unlock(); return p; @@ -636,8 +619,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) int ret = 0; newval = FUTEX_WAITERS | new_owner->pid; - /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */ - newval |= (uval & FUTEX_WAITER_REQUEUED); pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); @@ -750,259 +731,6 @@ out: } /* - * Called from futex_requeue_pi. - * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the - * PI-futex value; search its associated pi_state if an owner exist - * or create a new one without owner. - */ -static inline int -lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb, - union futex_key *key, - struct futex_pi_state **pi_state) -{ - u32 curval, uval, newval; - -retry: - /* - * We can't handle a fault cleanly because we can't - * release the locks here. Simply return the fault. - */ - if (get_futex_value_locked(&curval, uaddr)) - return -EFAULT; - - /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */ - if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) - != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) { - /* - * No waiters yet, we prepare the futex to have some waiters. - */ - - uval = curval; - newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED; - - pagefault_disable(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - pagefault_enable(); - - if (unlikely(curval == -EFAULT)) - return -EFAULT; - if (unlikely(curval != uval)) - goto retry; - } - - if (!(curval & FUTEX_TID_MASK) - || lookup_pi_state(curval, hb, key, pi_state)) { - /* the futex has no owner (yet) or the lookup failed: - allocate one pi_state without owner */ - - *pi_state = alloc_pi_state(); - - /* Already stores the key: */ - (*pi_state)->key = *key; - - /* init the mutex without owner */ - __rt_mutex_init(&(*pi_state)->pi_mutex, NULL); - } - - return 0; -} - -/* - * Keep the first nr_wake waiter from futex1, wake up one, - * and requeue the next nr_requeue waiters following hashed on - * one physical page to another physical page (PI-futex uaddr2) - */ -static int futex_requeue_pi(u32 __user *uaddr1, - struct rw_semaphore *fshared, - u32 __user *uaddr2, - int nr_wake, int nr_requeue, u32 *cmpval) -{ - union futex_key key1, key2; - struct futex_hash_bucket *hb1, *hb2; - struct plist_head *head1; - struct futex_q *this, *next; - struct futex_pi_state *pi_state2 = NULL; - struct rt_mutex_waiter *waiter, *top_waiter = NULL; - struct rt_mutex *lock2 = NULL; - int ret, drop_count = 0; - - if (refill_pi_state_cache()) - return -ENOMEM; - -retry: - /* - * First take all the futex related locks: - */ - if (fshared) - down_read(fshared); - - ret = get_futex_key(uaddr1, fshared, &key1); - if (unlikely(ret != 0)) - goto out; - ret = get_futex_key(uaddr2, fshared, &key2); - if (unlikely(ret != 0)) - goto out; - - hb1 = hash_futex(&key1); - hb2 = hash_futex(&key2); - - double_lock_hb(hb1, hb2); - - if (likely(cmpval != NULL)) { - u32 curval; - - ret = get_futex_value_locked(&curval, uaddr1); - - if (unlikely(ret)) { - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); - - /* - * If we would have faulted, release mmap_sem, fault - * it in and start all over again. - */ - if (fshared) - up_read(fshared); - - ret = get_user(curval, uaddr1); - - if (!ret) - goto retry; - - return ret; - } - if (curval != *cmpval) { - ret = -EAGAIN; - goto out_unlock; - } - } - - head1 = &hb1->chain; - plist_for_each_entry_safe(this, next, head1, list) { - if (!match_futex (&this->key, &key1)) - continue; - if (++ret <= nr_wake) { - wake_futex(this); - } else { - /* - * FIRST: get and set the pi_state - */ - if (!pi_state2) { - int s; - /* do this only the first time we requeue someone */ - s = lookup_pi_state_for_requeue(uaddr2, hb2, - &key2, &pi_state2); - if (s) { - ret = s; - goto out_unlock; - } - - lock2 = &pi_state2->pi_mutex; - spin_lock(&lock2->wait_lock); - - /* Save the top waiter of the wait_list */ - if (rt_mutex_has_waiters(lock2)) - top_waiter = rt_mutex_top_waiter(lock2); - } else - atomic_inc(&pi_state2->refcount); - - - this->pi_state = pi_state2; - - /* - * SECOND: requeue futex_q to the correct hashbucket - */ - - /* - * If key1 and key2 hash to the same bucket, no need to - * requeue. - */ - if (likely(head1 != &hb2->chain)) { - plist_del(&this->list, &hb1->chain); - plist_add(&this->list, &hb2->chain); - this->lock_ptr = &hb2->lock; -#ifdef CONFIG_DEBUG_PI_LIST - this->list.plist.lock = &hb2->lock; -#endif - } - this->key = key2; - get_futex_key_refs(&key2); - drop_count++; - - - /* - * THIRD: queue it to lock2 - */ - spin_lock_irq(&this->task->pi_lock); - waiter = &this->waiter; - waiter->task = this->task; - waiter->lock = lock2; - plist_node_init(&waiter->list_entry, this->task->prio); - plist_node_init(&waiter->pi_list_entry, this->task->prio); - plist_add(&waiter->list_entry, &lock2->wait_list); - this->task->pi_blocked_on = waiter; - spin_unlock_irq(&this->task->pi_lock); - - if (ret - nr_wake >= nr_requeue) - break; - } - } - - /* If we've requeued some tasks and the top_waiter of the rt_mutex - has changed, we must adjust the priority of the owner, if any */ - if (drop_count) { - struct task_struct *owner = rt_mutex_owner(lock2); - if (owner && - (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) { - int chain_walk = 0; - - spin_lock_irq(&owner->pi_lock); - if (top_waiter) - plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters); - else - /* - * There was no waiters before the requeue, - * the flag must be updated - */ - mark_rt_mutex_waiters(lock2); - - plist_add(&waiter->pi_list_entry, &owner->pi_waiters); - __rt_mutex_adjust_prio(owner); - if (owner->pi_blocked_on) { - chain_walk = 1; - get_task_struct(owner); - } - - spin_unlock_irq(&owner->pi_lock); - spin_unlock(&lock2->wait_lock); - - if (chain_walk) - rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL, - current); - } else { - /* No owner or the top_waiter does not change */ - mark_rt_mutex_waiters(lock2); - spin_unlock(&lock2->wait_lock); - } - } - -out_unlock: - spin_unlock(&hb1->lock); - if (hb1 != hb2) - spin_unlock(&hb2->lock); - - /* drop_futex_key_refs() must be called outside the spinlocks. */ - while (--drop_count >= 0) - drop_futex_key_refs(&key1); - -out: - if (fshared) - up_read(fshared); - return ret; -} - -/* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ @@ -1384,7 +1112,6 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, while (!ret) { newval = (uval & FUTEX_OWNER_DIED) | newtid; - newval |= (uval & FUTEX_WAITER_REQUEUED); pagefault_disable(); curval = futex_atomic_cmpxchg_inatomic(uaddr, @@ -1416,7 +1143,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, struct futex_q q; u32 uval; int ret; - struct hrtimer_sleeper t, *to = NULL; + struct hrtimer_sleeper t; int rem = 0; q.pi_state = NULL; @@ -1472,14 +1199,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (uval != val) goto out_unlock_release_sem; - /* - * This rt_mutex_waiter structure is prepared here and will - * be used only if this task is requeued from a normal futex to - * a PI-futex with futex_requeue_pi. - */ - debug_rt_mutex_init_waiter(&q.waiter); - q.waiter.task = NULL; - /* Only actually queue if *uaddr contained val. */ __queue_me(&q, hb); @@ -1510,7 +1229,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, if (!abs_time) schedule(); else { - to = &t; hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); hrtimer_init_sleeper(&t, current); t.timer.expires = *abs_time; @@ -1538,67 +1256,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, * we are the only user of it. */ - if (q.pi_state) { - /* - * We were woken but have been requeued on a PI-futex. - * We have to complete the lock acquisition by taking - * the rtmutex. - */ - - struct rt_mutex *lock = &q.pi_state->pi_mutex; - - spin_lock(&lock->wait_lock); - if (unlikely(q.waiter.task)) { - remove_waiter(lock, &q.waiter); - } - spin_unlock(&lock->wait_lock); - - if (rem) - ret = -ETIMEDOUT; - else - ret = rt_mutex_timed_lock(lock, to, 1); - - if (fshared) - down_read(fshared); - spin_lock(q.lock_ptr); - - /* - * Got the lock. We might not be the anticipated owner if we - * did a lock-steal - fix up the PI-state in that case. - */ - if (!ret && q.pi_state->owner != curr) { - /* - * We MUST play with the futex we were requeued on, - * NOT the current futex. - * We can retrieve it from the key of the pi_state - */ - uaddr = q.pi_state->key.uaddr; - - ret = fixup_pi_state_owner(uaddr, &q, curr); - } else { - /* - * Catch the rare case, where the lock was released - * when we were on the way back before we locked - * the hash bucket. - */ - if (ret && q.pi_state->owner == curr) { - if (rt_mutex_trylock(&q.pi_state->pi_mutex)) - ret = 0; - } - } - - /* Unqueue and drop the lock */ - unqueue_me_pi(&q); - if (fshared) - up_read(fshared); - - debug_rt_mutex_free_waiter(&q.waiter); - - return ret; - } - - debug_rt_mutex_free_waiter(&q.waiter); - /* If we were woken (and unqueued), we succeeded, whatever. */ if (!unqueue_me(&q)) return 0; @@ -1648,51 +1305,6 @@ static long futex_wait_restart(struct restart_block *restart) } -static void set_pi_futex_owner(struct futex_hash_bucket *hb, - union futex_key *key, struct task_struct *p) -{ - struct plist_head *head; - struct futex_q *this, *next; - struct futex_pi_state *pi_state = NULL; - struct rt_mutex *lock; - - /* Search a waiter that should already exists */ - - head = &hb->chain; - - plist_for_each_entry_safe(this, next, head, list) { - if (match_futex (&this->key, key)) { - pi_state = this->pi_state; - break; - } - } - - BUG_ON(!pi_state); - - /* set p as pi_state's owner */ - lock = &pi_state->pi_mutex; - - spin_lock(&lock->wait_lock); - spin_lock_irq(&p->pi_lock); - - list_add(&pi_state->list, &p->pi_state_list); - pi_state->owner = p; - - - /* set p as pi_mutex's owner */ - debug_rt_mutex_proxy_lock(lock, p); - WARN_ON(rt_mutex_owner(lock)); - rt_mutex_set_owner(lock, p, 0); - rt_mutex_deadlock_account_lock(lock, p); - - plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry, - &p->pi_waiters); - __rt_mutex_adjust_prio(p); - - spin_unlock_irq(&p->pi_lock); - spin_unlock(&lock->wait_lock); -} - /* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: @@ -1753,8 +1365,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, * situation and we return success to user space. */ if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) { - if (!(curval & FUTEX_WAITER_REQUEUED)) - ret = -EDEADLK; + ret = -EDEADLK; goto out_unlock_release_sem; } @@ -1774,14 +1385,14 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, /* * There are two cases, where a futex might have no owner (the - * owner TID is 0): OWNER_DIED or REQUEUE. We take over the - * futex in this case. We also do an unconditional take over, - * when the owner of the futex died. + * owner TID is 0): OWNER_DIED. We take over the futex in this + * case. We also do an unconditional take over, when the owner + * of the futex died. * * This is safe as we are protected by the hash bucket lock ! */ if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) { - /* Keep the OWNER_DIED and REQUEUE bits */ + /* Keep the OWNER_DIED bit */ newval = (curval & ~FUTEX_TID_MASK) | current->pid; ownerdied = 0; lock_taken = 1; @@ -1797,14 +1408,10 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, goto retry_locked; /* - * We took the lock due to requeue or owner died take over. + * We took the lock due to owner died take over. */ - if (unlikely(lock_taken)) { - /* For requeue we need to fixup the pi_futex */ - if (curval & FUTEX_WAITER_REQUEUED) - set_pi_futex_owner(hb, &q.key, curr); + if (unlikely(lock_taken)) goto out_unlock_release_sem; - } /* * We dont have the lock. Look up the PI state (or create it if @@ -2289,8 +1896,6 @@ retry: * userspace. */ mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - /* Also keep the FUTEX_WAITER_REQUEUED flag if set */ - mval |= (uval & FUTEX_WAITER_REQUEUED); nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); if (nval == -EFAULT) @@ -2427,9 +2032,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, case FUTEX_TRYLOCK_PI: ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1); break; - case FUTEX_CMP_REQUEUE_PI: - ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3); - break; default: ret = -ENOSYS; } @@ -2460,8 +2062,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, /* * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE. */ - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE - || cmd == FUTEX_CMP_REQUEUE_PI) + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) val2 = (u32) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 27478948b31..f7921360efa 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -157,8 +157,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, t = ktime_add(ktime_get(), t); tp = &t; } - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE - || cmd == FUTEX_CMP_REQUEUE_PI) + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) val2 = (int) (unsigned long) utime; return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 1bc4b55241a..9e83b589f75 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -145,13 +145,11 @@ void free_nsproxy(struct nsproxy *ns) /* * Called from unshare. Unshare all the namespaces part of nsproxy. - * On sucess, returns the new nsproxy and a reference to old nsproxy - * to make sure it stays around. + * On success, returns the new nsproxy. */ int unshare_nsproxy_namespaces(unsigned long unshare_flags, struct nsproxy **new_nsp, struct fs_struct *new_fs) { - struct nsproxy *old_ns = current->nsproxy; int err = 0; if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC))) @@ -170,13 +168,9 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags, if (!capable(CAP_SYS_ADMIN)) return -EPERM; - get_nsproxy(old_ns); - *new_nsp = create_new_namespaces(unshare_flags, current, new_fs ? new_fs : current->fs); - if (IS_ERR(*new_nsp)) { + if (IS_ERR(*new_nsp)) err = PTR_ERR(*new_nsp); - put_nsproxy(old_ns); - } return err; } diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 588c99da030..329ce017207 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -353,9 +353,40 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) * it should be restarted. */ if (timr->it.real.interval.tv64 != 0) { + ktime_t now = hrtimer_cb_get_time(timer); + + /* + * FIXME: What we really want, is to stop this + * timer completely and restart it in case the + * SIG_IGN is removed. This is a non trivial + * change which involves sighand locking + * (sigh !), which we don't want to do late in + * the release cycle. + * + * For now we just let timers with an interval + * less than a jiffie expire every jiffie to + * avoid softirq starvation in case of SIG_IGN + * and a very small interval, which would put + * the timer right back on the softirq pending + * list. By moving now ahead of time we trick + * hrtimer_forward() to expire the timer + * later, while we still maintain the overrun + * accuracy, but have some inconsistency in + * the timer_gettime() case. This is at least + * better than a starved softirq. A more + * complex fix which solves also another related + * inconsistency is already in the pipeline. + */ +#ifdef CONFIG_HIGH_RES_TIMERS + { + ktime_t kj = ktime_set(0, NSEC_PER_SEC / HZ); + + if (timr->it.real.interval.tv64 < kj.tv64) + now = ktime_add(now, kj); + } +#endif timr->it_overrun += - hrtimer_forward(timer, - hrtimer_cb_get_time(timer), + hrtimer_forward(timer, now, timr->it.real.interval); ret = HRTIMER_RESTART; ++timr->it_requeue_pending; diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index a6fbb413052..17d28ce2030 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -56,7 +56,7 @@ * state. */ -void +static void rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, unsigned long mask) { @@ -81,6 +81,29 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock) } /* + * We can speed up the acquire/release, if the architecture + * supports cmpxchg and if there's no debugging state to be set up + */ +#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) +# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ + unsigned long owner, *p = (unsigned long *) &lock->owner; + + do { + owner = *p; + } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); +} +#else +# define rt_mutex_cmpxchg(l,c,n) (0) +static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) +{ + lock->owner = (struct task_struct *) + ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); +} +#endif + +/* * Calculate task priority from the waiter list priority * * Return task->normal_prio when the waiter list is empty or when @@ -100,7 +123,7 @@ int rt_mutex_getprio(struct task_struct *task) * * This can be both boosting and unboosting. task->pi_lock must be held. */ -void __rt_mutex_adjust_prio(struct task_struct *task) +static void __rt_mutex_adjust_prio(struct task_struct *task) { int prio = rt_mutex_getprio(task); @@ -136,11 +159,11 @@ int max_lock_depth = 1024; * Decreases task's usage by one - may thus free the task. * Returns 0 or -EDEADLK. */ -int rt_mutex_adjust_prio_chain(struct task_struct *task, - int deadlock_detect, - struct rt_mutex *orig_lock, - struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task) +static int rt_mutex_adjust_prio_chain(struct task_struct *task, + int deadlock_detect, + struct rt_mutex *orig_lock, + struct rt_mutex_waiter *orig_waiter, + struct task_struct *top_task) { struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; @@ -514,8 +537,8 @@ static void wakeup_next_waiter(struct rt_mutex *lock) * * Must be called with lock->wait_lock held */ -void remove_waiter(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter) +static void remove_waiter(struct rt_mutex *lock, + struct rt_mutex_waiter *waiter) { int first = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); diff --git a/kernel/rtmutex_common.h b/kernel/rtmutex_common.h index 242ec7ee740..9c75856e791 100644 --- a/kernel/rtmutex_common.h +++ b/kernel/rtmutex_common.h @@ -113,29 +113,6 @@ static inline unsigned long rt_mutex_owner_pending(struct rt_mutex *lock) } /* - * We can speed up the acquire/release, if the architecture - * supports cmpxchg and if there's no debugging state to be set up - */ -#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES) -# define rt_mutex_cmpxchg(l,c,n) (cmpxchg(&l->owner, c, n) == c) -static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -{ - unsigned long owner, *p = (unsigned long *) &lock->owner; - - do { - owner = *p; - } while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner); -} -#else -# define rt_mutex_cmpxchg(l,c,n) (0) -static inline void mark_rt_mutex_waiters(struct rt_mutex *lock) -{ - lock->owner = (struct task_struct *) - ((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS); -} -#endif - -/* * PI-futex support (proxy locking functions, etc.): */ extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock); @@ -143,15 +120,4 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock, struct task_struct *proxy_owner); extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); - -extern void rt_mutex_set_owner(struct rt_mutex *lock, struct task_struct *owner, - unsigned long mask); -extern void __rt_mutex_adjust_prio(struct task_struct *task); -extern int rt_mutex_adjust_prio_chain(struct task_struct *task, - int deadlock_detect, - struct rt_mutex *orig_lock, - struct rt_mutex_waiter *orig_waiter, - struct task_struct *top_task); -extern void remove_waiter(struct rt_mutex *lock, - struct rt_mutex_waiter *waiter); #endif diff --git a/kernel/sched.c b/kernel/sched.c index 13cdab3b4c4..50e1a312269 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1159,21 +1159,72 @@ void wait_task_inactive(struct task_struct *p) { unsigned long flags; struct rq *rq; - int preempted; + struct prio_array *array; + int running; repeat: + /* + * We do the initial early heuristics without holding + * any task-queue locks at all. We'll only try to get + * the runqueue lock when things look like they will + * work out! + */ + rq = task_rq(p); + + /* + * If the task is actively running on another CPU + * still, just relax and busy-wait without holding + * any locks. + * + * NOTE! Since we don't hold any locks, it's not + * even sure that "rq" stays as the right runqueue! + * But we don't care, since "task_running()" will + * return false if the runqueue has changed and p + * is actually now running somewhere else! + */ + while (task_running(rq, p)) + cpu_relax(); + + /* + * Ok, time to look more closely! We need the rq + * lock now, to be *sure*. If we're wrong, we'll + * just go back and repeat. + */ rq = task_rq_lock(p, &flags); - /* Must be off runqueue entirely, not preempted. */ - if (unlikely(p->array || task_running(rq, p))) { - /* If it's preempted, we yield. It could be a while. */ - preempted = !task_running(rq, p); - task_rq_unlock(rq, &flags); + running = task_running(rq, p); + array = p->array; + task_rq_unlock(rq, &flags); + + /* + * Was it really running after all now that we + * checked with the proper locks actually held? + * + * Oops. Go back and try again.. + */ + if (unlikely(running)) { cpu_relax(); - if (preempted) - yield(); goto repeat; } - task_rq_unlock(rq, &flags); + + /* + * It's not enough that it's not actively running, + * it must be off the runqueue _entirely_, and not + * preempted! + * + * So if it wa still runnable (but just not actively + * running right now), it's preempted, and we should + * yield - it could be a while. + */ + if (unlikely(array)) { + yield(); + goto repeat; + } + + /* + * Ahh, all good. It wasn't running, and it wasn't + * runnable, which means that it will never become + * running in the future either. We're all done! + */ } /*** @@ -2887,17 +2938,21 @@ static void idle_balance(int this_cpu, struct rq *this_rq) unsigned long next_balance = jiffies + 60 * HZ; for_each_domain(this_cpu, sd) { - if (sd->flags & SD_BALANCE_NEWIDLE) { + unsigned long interval; + + if (!(sd->flags & SD_LOAD_BALANCE)) + continue; + + if (sd->flags & SD_BALANCE_NEWIDLE) /* If we've pulled tasks over stop searching: */ pulled_task = load_balance_newidle(this_cpu, - this_rq, sd); - if (time_after(next_balance, - sd->last_balance + sd->balance_interval)) - next_balance = sd->last_balance - + sd->balance_interval; - if (pulled_task) - break; - } + this_rq, sd); + + interval = msecs_to_jiffies(sd->balance_interval); + if (time_after(next_balance, sd->last_balance + interval)) + next_balance = sd->last_balance + interval; + if (pulled_task) + break; } if (!pulled_task) /* @@ -7071,12 +7126,13 @@ EXPORT_SYMBOL(__might_sleep); void normalize_rt_tasks(void) { struct prio_array *array; - struct task_struct *p; + struct task_struct *g, *p; unsigned long flags; struct rq *rq; read_lock_irq(&tasklist_lock); - for_each_process(p) { + + do_each_thread(g, p) { if (!rt_task(p)) continue; @@ -7094,7 +7150,8 @@ void normalize_rt_tasks(void) __task_rq_unlock(rq); spin_unlock_irqrestore(&p->pi_lock, flags); - } + } while_each_thread(g, p); + read_unlock_irq(&tasklist_lock); } diff --git a/kernel/signal.c b/kernel/signal.c index fe590e00db8..f9405609774 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -363,7 +363,13 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask, */ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) { - int signr = __dequeue_signal(&tsk->pending, mask, info); + int signr = 0; + + /* We only dequeue private signals from ourselves, we don't let + * signalfd steal them + */ + if (tsk == current) + signr = __dequeue_signal(&tsk->pending, mask, info); if (!signr) { signr = __dequeue_signal(&tsk->signal->shared_pending, mask, info); |