diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 17 | ||||
-rw-r--r-- | ipc/msg.c | 253 | ||||
-rw-r--r-- | ipc/sem.c | 308 | ||||
-rw-r--r-- | ipc/shm.c | 316 | ||||
-rw-r--r-- | ipc/util.c | 506 | ||||
-rw-r--r-- | ipc/util.h | 168 |
6 files changed, 930 insertions, 638 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 774843cff75..c0b26dc4617 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -29,6 +29,8 @@ #include <linux/audit.h> #include <linux/signal.h> #include <linux/mutex.h> +#include <linux/nsproxy.h> +#include <linux/pid.h> #include <net/sock.h> #include "util.h" @@ -44,12 +46,6 @@ #define STATE_PENDING 1 #define STATE_READY 2 -/* used by sysctl */ -#define FS_MQUEUE 1 -#define CTL_QUEUESMAX 2 -#define CTL_MSGMAX 3 -#define CTL_MSGSIZEMAX 4 - /* default values */ #define DFLT_QUEUESMAX 256 /* max number of message queues */ #define DFLT_MSGMAX 10 /* max number of messages in each queue */ @@ -336,7 +332,8 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, (info->notify_owner && info->notify.sigev_notify == SIGEV_SIGNAL) ? info->notify.sigev_signo : 0, - pid_nr(info->notify_owner)); + pid_nr_ns(info->notify_owner, + current->nsproxy->pid_ns)); spin_unlock(&info->lock); buffer[sizeof(buffer)-1] = '\0'; slen = strlen(buffer)+1; @@ -513,7 +510,7 @@ static void __do_notify(struct mqueue_inode_info *info) sig_i.si_errno = 0; sig_i.si_code = SI_MESGQ; sig_i.si_value = info->notify.sigev_value; - sig_i.si_pid = current->tgid; + sig_i.si_pid = task_pid_vnr(current); sig_i.si_uid = current->uid; kill_pid_info(info->notify.sigev_signo, @@ -1196,7 +1193,6 @@ static int msg_maxsize_limit_max = INT_MAX; static ctl_table mq_sysctls[] = { { - .ctl_name = CTL_QUEUESMAX, .procname = "queues_max", .data = &queues_max, .maxlen = sizeof(int), @@ -1204,7 +1200,6 @@ static ctl_table mq_sysctls[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = CTL_MSGMAX, .procname = "msg_max", .data = &msg_max, .maxlen = sizeof(int), @@ -1214,7 +1209,6 @@ static ctl_table mq_sysctls[] = { .extra2 = &msg_max_limit_max, }, { - .ctl_name = CTL_MSGSIZEMAX, .procname = "msgsize_max", .data = &msgsize_max, .maxlen = sizeof(int), @@ -1228,7 +1222,6 @@ static ctl_table mq_sysctls[] = { static ctl_table mq_sysctl_dir[] = { { - .ctl_name = FS_MQUEUE, .procname = "mqueue", .mode = 0555, .child = mq_sysctls, diff --git a/ipc/msg.c b/ipc/msg.c index a03fcb522ff..fdf3db5731c 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -34,7 +34,7 @@ #include <linux/syscalls.h> #include <linux/audit.h> #include <linux/seq_file.h> -#include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/nsproxy.h> #include <asm/current.h> @@ -66,23 +66,15 @@ struct msg_sender { #define SEARCH_NOTEQUAL 3 #define SEARCH_LESSEQUAL 4 -static atomic_t msg_bytes = ATOMIC_INIT(0); -static atomic_t msg_hdrs = ATOMIC_INIT(0); - static struct ipc_ids init_msg_ids; #define msg_ids(ns) (*((ns)->ids[IPC_MSG_IDS])) -#define msg_lock(ns, id) ((struct msg_queue*)ipc_lock(&msg_ids(ns), id)) #define msg_unlock(msq) ipc_unlock(&(msq)->q_perm) -#define msg_rmid(ns, id) ((struct msg_queue*)ipc_rmid(&msg_ids(ns), id)) -#define msg_checkid(ns, msq, msgid) \ - ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid) -#define msg_buildid(ns, id, seq) \ - ipc_buildid(&msg_ids(ns), id, seq) - -static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id); -static int newque (struct ipc_namespace *ns, key_t key, int msgflg); +#define msg_buildid(id, seq) ipc_buildid(id, seq) + +static void freeque(struct ipc_namespace *, struct msg_queue *); +static int newque(struct ipc_namespace *, struct ipc_params *); #ifdef CONFIG_PROC_FS static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif @@ -93,7 +85,9 @@ static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ns->msg_ctlmax = MSGMAX; ns->msg_ctlmnb = MSGMNB; ns->msg_ctlmni = MSGMNI; - ipc_init_ids(ids, ns->msg_ctlmni); + atomic_set(&ns->msg_bytes, 0); + atomic_set(&ns->msg_hdrs, 0); + ipc_init_ids(ids); } int msg_init_ns(struct ipc_namespace *ns) @@ -110,20 +104,25 @@ int msg_init_ns(struct ipc_namespace *ns) void msg_exit_ns(struct ipc_namespace *ns) { - int i; struct msg_queue *msq; + int next_id; + int total, in_use; + + down_write(&msg_ids(ns).rw_mutex); + + in_use = msg_ids(ns).in_use; - mutex_lock(&msg_ids(ns).mutex); - for (i = 0; i <= msg_ids(ns).max_id; i++) { - msq = msg_lock(ns, i); + for (total = 0, next_id = 0; total < in_use; next_id++) { + msq = idr_find(&msg_ids(ns).ipcs_idr, next_id); if (msq == NULL) continue; - - freeque(ns, msq, i); + ipc_lock_by_ptr(&msq->q_perm); + freeque(ns, msq); + total++; } - mutex_unlock(&msg_ids(ns).mutex); - ipc_fini_ids(ns->ids[IPC_MSG_IDS]); + up_write(&msg_ids(ns).rw_mutex); + kfree(ns->ids[IPC_MSG_IDS]); ns->ids[IPC_MSG_IDS] = NULL; } @@ -136,10 +135,55 @@ void __init msg_init(void) IPC_MSG_IDS, sysvipc_msg_proc_show); } -static int newque (struct ipc_namespace *ns, key_t key, int msgflg) +/* + * This routine is called in the paths where the rw_mutex is held to protect + * access to the idr tree. + */ +static inline struct msg_queue *msg_lock_check_down(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check_down(&msg_ids(ns), id); + + return container_of(ipcp, struct msg_queue, q_perm); +} + +/* + * msg_lock_(check_) routines are called in the paths where the rw_mutex + * is not held. + */ +static inline struct msg_queue *msg_lock(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock(&msg_ids(ns), id); + + return container_of(ipcp, struct msg_queue, q_perm); +} + +static inline struct msg_queue *msg_lock_check(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check(&msg_ids(ns), id); + + return container_of(ipcp, struct msg_queue, q_perm); +} + +static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) +{ + ipc_rmid(&msg_ids(ns), &s->q_perm); +} + +/** + * newque - Create a new msg queue + * @ns: namespace + * @params: ptr to the structure that contains the key and msgflg + * + * Called with msg_ids.rw_mutex held (writer) + */ +static int newque(struct ipc_namespace *ns, struct ipc_params *params) { struct msg_queue *msq; int id, retval; + key_t key = params->key; + int msgflg = params->flg; msq = ipc_rcu_alloc(sizeof(*msq)); if (!msq) @@ -155,14 +199,17 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg) return retval; } + /* + * ipc_addid() locks msq + */ id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); - if (id == -1) { + if (id < 0) { security_msg_queue_free(msq); ipc_rcu_putref(msq); - return -ENOSPC; + return id; } - msq->q_id = msg_buildid(ns, id, msq->q_perm.seq); + msq->q_perm.id = msg_buildid(id, msq->q_perm.seq); msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -171,9 +218,10 @@ static int newque (struct ipc_namespace *ns, key_t key, int msgflg) INIT_LIST_HEAD(&msq->q_messages); INIT_LIST_HEAD(&msq->q_receivers); INIT_LIST_HEAD(&msq->q_senders); + msg_unlock(msq); - return msq->q_id; + return msq->q_perm.id; } static inline void ss_add(struct msg_queue *msq, struct msg_sender *mss) @@ -224,19 +272,19 @@ static void expunge_all(struct msg_queue *msq, int res) /* * freeque() wakes up waiters on the sender and receiver waiting queue, - * removes the message queue from message queue ID - * array, and cleans up all the messages associated with this queue. + * removes the message queue from message queue ID IDR, and cleans up all the + * messages associated with this queue. * - * msg_ids.mutex and the spinlock for this message queue is hold - * before freeque() is called. msg_ids.mutex remains locked on exit. + * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held + * before freeque() is called. msg_ids.rw_mutex remains locked on exit. */ -static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id) +static void freeque(struct ipc_namespace *ns, struct msg_queue *msq) { struct list_head *tmp; expunge_all(msq, -EIDRM); ss_wakeup(&msq->q_senders, 1); - msq = msg_rmid(ns, id); + msg_rmid(ns, msq); msg_unlock(msq); tmp = msq->q_messages.next; @@ -244,49 +292,40 @@ static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id) struct msg_msg *msg = list_entry(tmp, struct msg_msg, m_list); tmp = tmp->next; - atomic_dec(&msg_hdrs); + atomic_dec(&ns->msg_hdrs); free_msg(msg); } - atomic_sub(msq->q_cbytes, &msg_bytes); + atomic_sub(msq->q_cbytes, &ns->msg_bytes); security_msg_queue_free(msq); ipc_rcu_putref(msq); } +/* + * Called with msg_ids.rw_mutex and ipcp locked. + */ +static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) +{ + struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm); + + return security_msg_queue_associate(msq, msgflg); +} + asmlinkage long sys_msgget(key_t key, int msgflg) { - struct msg_queue *msq; - int id, ret = -EPERM; struct ipc_namespace *ns; + struct ipc_ops msg_ops; + struct ipc_params msg_params; ns = current->nsproxy->ipc_ns; - - mutex_lock(&msg_ids(ns).mutex); - if (key == IPC_PRIVATE) - ret = newque(ns, key, msgflg); - else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */ - if (!(msgflg & IPC_CREAT)) - ret = -ENOENT; - else - ret = newque(ns, key, msgflg); - } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) { - ret = -EEXIST; - } else { - msq = msg_lock(ns, id); - BUG_ON(msq == NULL); - if (ipcperms(&msq->q_perm, msgflg)) - ret = -EACCES; - else { - int qid = msg_buildid(ns, id, msq->q_perm.seq); - - ret = security_msg_queue_associate(msq, msgflg); - if (!ret) - ret = qid; - } - msg_unlock(msq); - } - mutex_unlock(&msg_ids(ns).mutex); - return ret; + msg_ops.getnew = newque; + msg_ops.associate = msg_security; + msg_ops.more_checks = NULL; + + msg_params.key = key; + msg_params.flg = msgflg; + + return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params); } static inline unsigned long @@ -420,23 +459,23 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) msginfo.msgmnb = ns->msg_ctlmnb; msginfo.msgssz = MSGSSZ; msginfo.msgseg = MSGSEG; - mutex_lock(&msg_ids(ns).mutex); + down_read(&msg_ids(ns).rw_mutex); if (cmd == MSG_INFO) { msginfo.msgpool = msg_ids(ns).in_use; - msginfo.msgmap = atomic_read(&msg_hdrs); - msginfo.msgtql = atomic_read(&msg_bytes); + msginfo.msgmap = atomic_read(&ns->msg_hdrs); + msginfo.msgtql = atomic_read(&ns->msg_bytes); } else { msginfo.msgmap = MSGMAP; msginfo.msgpool = MSGPOOL; msginfo.msgtql = MSGTQL; } - max_id = msg_ids(ns).max_id; - mutex_unlock(&msg_ids(ns).mutex); + max_id = ipc_get_maxid(&msg_ids(ns)); + up_read(&msg_ids(ns).rw_mutex); if (copy_to_user(buf, &msginfo, sizeof(struct msginfo))) return -EFAULT; return (max_id < 0) ? 0 : max_id; } - case MSG_STAT: + case MSG_STAT: /* msqid is an index rather than a msg queue id */ case IPC_STAT: { struct msqid64_ds tbuf; @@ -444,21 +483,16 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) if (!buf) return -EFAULT; - if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size) - return -EINVAL; - - memset(&tbuf, 0, sizeof(tbuf)); - - msq = msg_lock(ns, msqid); - if (msq == NULL) - return -EINVAL; if (cmd == MSG_STAT) { - success_return = msg_buildid(ns, msqid, msq->q_perm.seq); + msq = msg_lock(ns, msqid); + if (IS_ERR(msq)) + return PTR_ERR(msq); + success_return = msq->q_perm.id; } else { - err = -EIDRM; - if (msg_checkid(ns, msq, msqid)) - goto out_unlock; + msq = msg_lock_check(ns, msqid); + if (IS_ERR(msq)) + return PTR_ERR(msq); success_return = 0; } err = -EACCES; @@ -469,6 +503,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) if (err) goto out_unlock; + memset(&tbuf, 0, sizeof(tbuf)); + kernel_to_ipc64_perm(&msq->q_perm, &tbuf.msg_perm); tbuf.msg_stime = msq->q_stime; tbuf.msg_rtime = msq->q_rtime; @@ -495,15 +531,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) return -EINVAL; } - mutex_lock(&msg_ids(ns).mutex); - msq = msg_lock(ns, msqid); - err = -EINVAL; - if (msq == NULL) + down_write(&msg_ids(ns).rw_mutex); + msq = msg_lock_check_down(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); goto out_up; + } - err = -EIDRM; - if (msg_checkid(ns, msq, msqid)) - goto out_unlock_up; ipcp = &msq->q_perm; err = audit_ipc_obj(ipcp); @@ -552,12 +586,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) break; } case IPC_RMID: - freeque(ns, msq, msqid); + freeque(ns, msq); break; } err = 0; out_up: - mutex_unlock(&msg_ids(ns).mutex); + up_write(&msg_ids(ns).rw_mutex); return err; out_unlock_up: msg_unlock(msq); @@ -611,7 +645,7 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg) msr->r_msg = ERR_PTR(-E2BIG); } else { msr->r_msg = NULL; - msq->q_lrpid = msr->r_tsk->pid; + msq->q_lrpid = task_pid_vnr(msr->r_tsk); msq->q_rtime = get_seconds(); wake_up_process(msr->r_tsk); smp_mb(); @@ -646,14 +680,11 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, msg->m_type = mtype; msg->m_ts = msgsz; - msq = msg_lock(ns, msqid); - err = -EINVAL; - if (msq == NULL) + msq = msg_lock_check(ns, msqid); + if (IS_ERR(msq)) { + err = PTR_ERR(msq); goto out_free; - - err= -EIDRM; - if (msg_checkid(ns, msq, msqid)) - goto out_unlock_free; + } for (;;) { struct msg_sender s; @@ -695,7 +726,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, } } - msq->q_lspid = current->tgid; + msq->q_lspid = task_tgid_vnr(current); msq->q_stime = get_seconds(); if (!pipelined_send(msq, msg)) { @@ -703,8 +734,8 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, list_add_tail(&msg->m_list, &msq->q_messages); msq->q_cbytes += msgsz; msq->q_qnum++; - atomic_add(msgsz, &msg_bytes); - atomic_inc(&msg_hdrs); + atomic_add(msgsz, &ns->msg_bytes); + atomic_inc(&ns->msg_hdrs); } err = 0; @@ -760,13 +791,9 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; - msq = msg_lock(ns, msqid); - if (msq == NULL) - return -EINVAL; - - msg = ERR_PTR(-EIDRM); - if (msg_checkid(ns, msq, msqid)) - goto out_unlock; + msq = msg_lock_check(ns, msqid); + if (IS_ERR(msq)) + return PTR_ERR(msq); for (;;) { struct msg_receiver msr_d; @@ -810,10 +837,10 @@ long do_msgrcv(int msqid, long *pmtype, void __user *mtext, list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); - msq->q_lrpid = current->tgid; + msq->q_lrpid = task_tgid_vnr(current); msq->q_cbytes -= msg->m_ts; - atomic_sub(msg->m_ts, &msg_bytes); - atomic_dec(&msg_hdrs); + atomic_sub(msg->m_ts, &ns->msg_bytes); + atomic_dec(&ns->msg_hdrs); ss_wakeup(&msq->q_senders, 0); msg_unlock(msq); break; @@ -926,7 +953,7 @@ static int sysvipc_msg_proc_show(struct seq_file *s, void *it) return seq_printf(s, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", msq->q_perm.key, - msq->q_id, + msq->q_perm.id, msq->q_perm.mode, msq->q_cbytes, msq->q_qnum, diff --git a/ipc/sem.c b/ipc/sem.c index b676fef6d20..35952c0bae4 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -80,7 +80,7 @@ #include <linux/audit.h> #include <linux/capability.h> #include <linux/seq_file.h> -#include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/nsproxy.h> #include <asm/uaccess.h> @@ -88,18 +88,14 @@ #define sem_ids(ns) (*((ns)->ids[IPC_SEM_IDS])) -#define sem_lock(ns, id) ((struct sem_array*)ipc_lock(&sem_ids(ns), id)) #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm) -#define sem_rmid(ns, id) ((struct sem_array*)ipc_rmid(&sem_ids(ns), id)) -#define sem_checkid(ns, sma, semid) \ - ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid) -#define sem_buildid(ns, id, seq) \ - ipc_buildid(&sem_ids(ns), id, seq) +#define sem_checkid(sma, semid) ipc_checkid(&sma->sem_perm, semid) +#define sem_buildid(id, seq) ipc_buildid(id, seq) static struct ipc_ids init_sem_ids; -static int newary(struct ipc_namespace *, key_t, int, int); -static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id); +static int newary(struct ipc_namespace *, struct ipc_params *); +static void freeary(struct ipc_namespace *, struct sem_array *); #ifdef CONFIG_PROC_FS static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #endif @@ -129,7 +125,7 @@ static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ns->sc_semopm = SEMOPM; ns->sc_semmni = SEMMNI; ns->used_sems = 0; - ipc_init_ids(ids, ns->sc_semmni); + ipc_init_ids(ids); } int sem_init_ns(struct ipc_namespace *ns) @@ -146,20 +142,24 @@ int sem_init_ns(struct ipc_namespace *ns) void sem_exit_ns(struct ipc_namespace *ns) { - int i; struct sem_array *sma; + int next_id; + int total, in_use; - mutex_lock(&sem_ids(ns).mutex); - for (i = 0; i <= sem_ids(ns).max_id; i++) { - sma = sem_lock(ns, i); + down_write(&sem_ids(ns).rw_mutex); + + in_use = sem_ids(ns).in_use; + + for (total = 0, next_id = 0; total < in_use; next_id++) { + sma = idr_find(&sem_ids(ns).ipcs_idr, next_id); if (sma == NULL) continue; - - freeary(ns, sma, i); + ipc_lock_by_ptr(&sma->sem_perm); + freeary(ns, sma); + total++; } - mutex_unlock(&sem_ids(ns).mutex); + up_write(&sem_ids(ns).rw_mutex); - ipc_fini_ids(ns->ids[IPC_SEM_IDS]); kfree(ns->ids[IPC_SEM_IDS]); ns->ids[IPC_SEM_IDS] = NULL; } @@ -173,6 +173,42 @@ void __init sem_init (void) } /* + * This routine is called in the paths where the rw_mutex is held to protect + * access to the idr tree. + */ +static inline struct sem_array *sem_lock_check_down(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check_down(&sem_ids(ns), id); + + return container_of(ipcp, struct sem_array, sem_perm); +} + +/* + * sem_lock_(check_) routines are called in the paths where the rw_mutex + * is not held. + */ +static inline struct sem_array *sem_lock(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock(&sem_ids(ns), id); + + return container_of(ipcp, struct sem_array, sem_perm); +} + +static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id); + + return container_of(ipcp, struct sem_array, sem_perm); +} + +static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) +{ + ipc_rmid(&sem_ids(ns), &s->sem_perm); +} + +/* * Lockless wakeup algorithm: * Without the check/retry algorithm a lockless wakeup is possible: * - queue.status is initialized to -EINTR before blocking. @@ -206,12 +242,23 @@ void __init sem_init (void) */ #define IN_WAKEUP 1 -static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg) +/** + * newary - Create a new semaphore set + * @ns: namespace + * @params: ptr to the structure that contains key, semflg and nsems + * + * Called with sem_ids.rw_mutex held (as a writer) + */ + +static int newary(struct ipc_namespace *ns, struct ipc_params *params) { int id; int retval; struct sem_array *sma; int size; + key_t key = params->key; + int nsems = params->u.nsems; + int semflg = params->flg; if (!nsems) return -EINVAL; @@ -236,14 +283,14 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg) } id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); - if(id == -1) { + if (id < 0) { security_sem_free(sma); ipc_rcu_putref(sma); - return -ENOSPC; + return id; } ns->used_sems += nsems; - sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq); + sma->sem_perm.id = sem_buildid(id, sma->sem_perm.seq); sma->sem_base = (struct sem *) &sma[1]; /* sma->sem_pending = NULL; */ sma->sem_pending_last = &sma->sem_pending; @@ -252,48 +299,56 @@ static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg) sma->sem_ctime = get_seconds(); sem_unlock(sma); - return sma->sem_id; + return sma->sem_perm.id; +} + + +/* + * Called with sem_ids.rw_mutex and ipcp locked. + */ +static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg) +{ + struct sem_array *sma; + + sma = container_of(ipcp, struct sem_array, sem_perm); + return security_sem_associate(sma, semflg); } -asmlinkage long sys_semget (key_t key, int nsems, int semflg) +/* + * Called with sem_ids.rw_mutex and ipcp locked. + */ +static inline int sem_more_checks(struct kern_ipc_perm *ipcp, + struct ipc_params *params) { - int id, err = -EINVAL; struct sem_array *sma; + + sma = container_of(ipcp, struct sem_array, sem_perm); + if (params->u.nsems > sma->sem_nsems) + return -EINVAL; + + return 0; +} + +asmlinkage long sys_semget(key_t key, int nsems, int semflg) +{ struct ipc_namespace *ns; + struct ipc_ops sem_ops; + struct ipc_params sem_params; ns = current->nsproxy->ipc_ns; if (nsems < 0 || nsems > ns->sc_semmsl) return -EINVAL; - mutex_lock(&sem_ids(ns).mutex); - - if (key == IPC_PRIVATE) { - err = newary(ns, key, nsems, semflg); - } else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) { /* key not used */ - if (!(semflg & IPC_CREAT)) - err = -ENOENT; - else - err = newary(ns, key, nsems, semflg); - } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) { - err = -EEXIST; - } else { - sma = sem_lock(ns, id); - BUG_ON(sma==NULL); - if (nsems > sma->sem_nsems) - err = -EINVAL; - else if (ipcperms(&sma->sem_perm, semflg)) - err = -EACCES; - else { - int semid = sem_buildid(ns, id, sma->sem_perm.seq); - err = security_sem_associate(sma, semflg); - if (!err) - err = semid; - } - sem_unlock(sma); - } - mutex_unlock(&sem_ids(ns).mutex); - return err; + sem_ops.getnew = newary; + sem_ops.associate = sem_security; + sem_ops.more_checks = sem_more_checks; + + sem_params.key = key; + sem_params.flg = semflg; + sem_params.u.nsems = nsems; + + return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } /* Manage the doubly linked list sma->sem_pending as a FIFO: @@ -487,15 +542,14 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } -/* Free a semaphore set. freeary() is called with sem_ids.mutex locked and - * the spinlock for this semaphore set hold. sem_ids.mutex remains locked - * on exit. +/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked + * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex + * remains locked on exit. */ -static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id) +static void freeary(struct ipc_namespace *ns, struct sem_array *sma) { struct sem_undo *un; struct sem_queue *q; - int size; /* Invalidate the existing undo structures for this semaphore set. * (They will be freed without any further action in exit_sem() @@ -518,12 +572,11 @@ static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id) q = n; } - /* Remove the semaphore set from the ID array*/ - sma = sem_rmid(ns, id); + /* Remove the semaphore set from the IDR */ + sem_rmid(ns, sma); sem_unlock(sma); ns->used_sems -= sma->sem_nsems; - size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem); security_sem_free(sma); ipc_rcu_putref(sma); } @@ -576,7 +629,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, seminfo.semmnu = SEMMNU; seminfo.semmap = SEMMAP; seminfo.semume = SEMUME; - mutex_lock(&sem_ids(ns).mutex); + down_read(&sem_ids(ns).rw_mutex); if (cmd == SEM_INFO) { seminfo.semusz = sem_ids(ns).in_use; seminfo.semaem = ns->used_sems; @@ -584,8 +637,8 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, seminfo.semusz = SEMUSZ; seminfo.semaem = SEMAEM; } - max_id = sem_ids(ns).max_id; - mutex_unlock(&sem_ids(ns).mutex); + max_id = ipc_get_maxid(&sem_ids(ns)); + up_read(&sem_ids(ns).rw_mutex); if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) return -EFAULT; return (max_id < 0) ? 0: max_id; @@ -595,14 +648,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, struct semid64_ds tbuf; int id; - if(semid >= sem_ids(ns).entries->size) - return -EINVAL; - - memset(&tbuf,0,sizeof(tbuf)); - sma = sem_lock(ns, semid); - if(sma == NULL) - return -EINVAL; + if (IS_ERR(sma)) + return PTR_ERR(sma); err = -EACCES; if (ipcperms (&sma->sem_perm, S_IRUGO)) @@ -612,7 +660,9 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum, if (err) goto out_unlock; - id = sem_buildid(ns, semid, sma->sem_perm.seq); + id = sma->sem_perm.id; + + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm); tbuf.sem_otime = sma->sem_otime; @@ -642,16 +692,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, ushort* sem_io = fast_sem_io; int nsems; - sma = sem_lock(ns, semid); - if(sma==NULL) - return -EINVAL; + sma = sem_lock_check(ns, semid); + if (IS_ERR(sma)) + return PTR_ERR(sma); nsems = sma->sem_nsems; - err=-EIDRM; - if (sem_checkid(ns,sma,semid)) - goto out_unlock; - err = -EACCES; if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO)) goto out_unlock; @@ -795,7 +841,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (un = sma->undo; un; un = un->id_next) un->semadj[semnum] = 0; curr->semval = val; - curr->sempid = current->tgid; + curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ update_queue(sma); @@ -863,14 +909,10 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, if(copy_semid_from_user (&setbuf, arg.buf, version)) return -EFAULT; } - sma = sem_lock(ns, semid); - if(sma==NULL) - return -EINVAL; + sma = sem_lock_check_down(ns, semid); + if (IS_ERR(sma)) + return PTR_ERR(sma); - if (sem_checkid(ns,sma,semid)) { - err=-EIDRM; - goto out_unlock; - } ipcp = &sma->sem_perm; err = audit_ipc_obj(ipcp); @@ -894,7 +936,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid, int semnum, switch(cmd){ case IPC_RMID: - freeary(ns, sma, semid); + freeary(ns, sma); err = 0; break; case IPC_SET: @@ -948,45 +990,15 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) return err; case IPC_RMID: case IPC_SET: - mutex_lock(&sem_ids(ns).mutex); + down_write(&sem_ids(ns).rw_mutex); err = semctl_down(ns,semid,semnum,cmd,version,arg); - mutex_unlock(&sem_ids(ns).mutex); + up_write(&sem_ids(ns).rw_mutex); return err; default: return -EINVAL; } } -static inline void lock_semundo(void) -{ - struct sem_undo_list *undo_list; - - undo_list = current->sysvsem.undo_list; - if (undo_list) - spin_lock(&undo_list->lock); -} - -/* This code has an interaction with copy_semundo(). - * Consider; two tasks are sharing the undo_list. task1 - * acquires the undo_list lock in lock_semundo(). If task2 now - * exits before task1 releases the lock (by calling - * unlock_semundo()), then task1 will never call spin_unlock(). - * This leave the sem_undo_list in a locked state. If task1 now creats task3 - * and once again shares the sem_undo_list, the sem_undo_list will still be - * locked, and future SEM_UNDO operations will deadlock. This case is - * dealt with in copy_semundo() by having it reinitialize the spin lock when - * the refcnt goes from 1 to 2. - */ -static inline void unlock_semundo(void) -{ - struct sem_undo_list *undo_list; - - undo_list = current->sysvsem.undo_list; - if (undo_list) - spin_unlock(&undo_list->lock); -} - - /* If the task doesn't already have a undo_list, then allocate one * here. We guarantee there is only one thread using this undo list, * and current is THE ONE @@ -1047,22 +1059,17 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) if (error) return ERR_PTR(error); - lock_semundo(); + spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); - unlock_semundo(); + spin_unlock(&ulp->lock); if (likely(un!=NULL)) goto out; /* no undo structure around - allocate one. */ - sma = sem_lock(ns, semid); - un = ERR_PTR(-EINVAL); - if(sma==NULL) - goto out; - un = ERR_PTR(-EIDRM); - if (sem_checkid(ns,sma,semid)) { - sem_unlock(sma); - goto out; - } + sma = sem_lock_check(ns, semid); + if (IS_ERR(sma)) + return ERR_PTR(PTR_ERR(sma)); + nsems = sma->sem_nsems; ipc_rcu_getref(sma); sem_unlock(sma); @@ -1077,10 +1084,10 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) new->semadj = (short *) &new[1]; new->semid = semid; - lock_semundo(); + spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); if (un) { - unlock_semundo(); + spin_unlock(&ulp->lock); kfree(new); ipc_lock_by_ptr(&sma->sem_perm); ipc_rcu_putref(sma); @@ -1091,7 +1098,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) ipc_rcu_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); - unlock_semundo(); + spin_unlock(&ulp->lock); kfree(new); un = ERR_PTR(-EIDRM); goto out; @@ -1102,7 +1109,7 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) sma->undo = new; sem_unlock(sma); un = new; - unlock_semundo(); + spin_unlock(&ulp->lock); out: return un; } @@ -1168,15 +1175,14 @@ retry_undos: } else un = NULL; - sma = sem_lock(ns, semid); - error=-EINVAL; - if(sma==NULL) + sma = sem_lock_check(ns, semid); + if (IS_ERR(sma)) { + error = PTR_ERR(sma); goto out_free; - error = -EIDRM; - if (sem_checkid(ns,sma,semid)) - goto out_unlock_free; + } + /* - * semid identifies are not unique - find_undo may have + * semid identifiers are not unique - find_undo may have * allocated an undo structure, it was invalidated by an RMID * and now a new array with received the same id. Check and retry. */ @@ -1196,7 +1202,7 @@ retry_undos: if (error) goto out_unlock_free; - error = try_atomic_semop (sma, sops, nsops, un, current->tgid); + error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current)); if (error <= 0) { if (alter && error == 0) update_queue (sma); @@ -1211,7 +1217,7 @@ retry_undos: queue.sops = sops; queue.nsops = nsops; queue.undo = un; - queue.pid = current->tgid; + queue.pid = task_tgid_vnr(current); queue.id = semid; queue.alter = alter; if (alter) @@ -1242,7 +1248,7 @@ retry_undos: } sma = sem_lock(ns, semid); - if(sma==NULL) { + if (IS_ERR(sma)) { BUG_ON(queue.prev != NULL); error = -EIDRM; goto out_free; @@ -1279,10 +1285,6 @@ asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsop /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between * parent and child tasks. - * - * See the notes above unlock_semundo() regarding the spin_lock_init() - * in this code. Initialize the undo_list->lock here instead of get_undo_list() - * because of the reasoning in the comment above unlock_semundo. */ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) @@ -1342,13 +1344,13 @@ void exit_sem(struct task_struct *tsk) if(semid == -1) continue; sma = sem_lock(ns, semid); - if (sma == NULL) + if (IS_ERR(sma)) continue; if (u->semid == -1) goto next_entry; - BUG_ON(sem_checkid(ns,sma,u->semid)); + BUG_ON(sem_checkid(sma, u->semid)); /* remove u from the sma->undo list */ for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { @@ -1382,7 +1384,7 @@ found: semaphore->semval = 0; if (semaphore->semval > SEMVMX) semaphore->semval = SEMVMX; - semaphore->sempid = current->tgid; + semaphore->sempid = task_tgid_vnr(current); } } sma->sem_otime = get_seconds(); @@ -1402,7 +1404,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it) return seq_printf(s, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", sma->sem_perm.key, - sma->sem_id, + sma->sem_perm.id, sma->sem_perm.mode, sma->sem_nsems, sma->sem_perm.uid, diff --git a/ipc/shm.c b/ipc/shm.c index 5fc5cf50cf1..3818fae625c 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -35,7 +35,7 @@ #include <linux/capability.h> #include <linux/ptrace.h> #include <linux/seq_file.h> -#include <linux/mutex.h> +#include <linux/rwsem.h> #include <linux/nsproxy.h> #include <linux/mount.h> @@ -59,17 +59,11 @@ static struct ipc_ids init_shm_ids; #define shm_ids(ns) (*((ns)->ids[IPC_SHM_IDS])) -#define shm_lock(ns, id) \ - ((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id)) #define shm_unlock(shp) \ ipc_unlock(&(shp)->shm_perm) -#define shm_get(ns, id) \ - ((struct shmid_kernel*)ipc_get(&shm_ids(ns),id)) -#define shm_buildid(ns, id, seq) \ - ipc_buildid(&shm_ids(ns), id, seq) +#define shm_buildid(id, seq) ipc_buildid(id, seq) -static int newseg (struct ipc_namespace *ns, key_t key, - int shmflg, size_t size); +static int newseg(struct ipc_namespace *, struct ipc_params *); static void shm_open(struct vm_area_struct *vma); static void shm_close(struct vm_area_struct *vma); static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); @@ -84,9 +78,13 @@ static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids) ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; ns->shm_tot = 0; - ipc_init_ids(ids, 1); + ipc_init_ids(ids); } +/* + * Called with shm_ids.rw_mutex (writer) and the shp structure locked. + * Only shm_ids.rw_mutex remains locked on exit. + */ static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp) { if (shp->shm_nattch){ @@ -112,20 +110,24 @@ int shm_init_ns(struct ipc_namespace *ns) void shm_exit_ns(struct ipc_namespace *ns) { - int i; struct shmid_kernel *shp; + int next_id; + int total, in_use; + + down_write(&shm_ids(ns).rw_mutex); - mutex_lock(&shm_ids(ns).mutex); - for (i = 0; i <= shm_ids(ns).max_id; i++) { - shp = shm_lock(ns, i); + in_use = shm_ids(ns).in_use; + + for (total = 0, next_id = 0; total < in_use; next_id++) { + shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); if (shp == NULL) continue; - + ipc_lock_by_ptr(&shp->shm_perm); do_shm_rmid(ns, shp); + total++; } - mutex_unlock(&shm_ids(ns).mutex); + up_write(&shm_ids(ns).rw_mutex); - ipc_fini_ids(ns->ids[IPC_SHM_IDS]); kfree(ns->ids[IPC_SHM_IDS]); ns->ids[IPC_SHM_IDS] = NULL; } @@ -138,17 +140,49 @@ void __init shm_init (void) IPC_SHM_IDS, sysvipc_shm_proc_show); } -static inline int shm_checkid(struct ipc_namespace *ns, - struct shmid_kernel *s, int id) +/* + * shm_lock_(check_)down routines are called in the paths where the rw_mutex + * is held to protect access to the idr tree. + */ +static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns, + int id) { - if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id)) - return -EIDRM; - return 0; + struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +static inline struct shmid_kernel *shm_lock_check_down( + struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check_down(&shm_ids(ns), id); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +/* + * shm_lock_(check_) routines are called in the paths where the rw_mutex + * is not held. + */ +static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id); + + return container_of(ipcp, struct shmid_kernel, shm_perm); +} + +static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns, + int id) +{ + struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id); + + return container_of(ipcp, struct shmid_kernel, shm_perm); } -static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id) +static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) { - return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id); + ipc_rmid(&shm_ids(ns), &s->shm_perm); } static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp) @@ -166,9 +200,9 @@ static void shm_open(struct vm_area_struct *vma) struct shmid_kernel *shp; shp = shm_lock(sfd->ns, sfd->id); - BUG_ON(!shp); + BUG_ON(IS_ERR(shp)); shp->shm_atim = get_seconds(); - shp->shm_lprid = current->tgid; + shp->shm_lprid = task_tgid_vnr(current); shp->shm_nattch++; shm_unlock(shp); } @@ -176,15 +210,16 @@ static void shm_open(struct vm_area_struct *vma) /* * shm_destroy - free the struct shmid_kernel * + * @ns: namespace * @shp: struct to free * - * It has to be called with shp and shm_ids.mutex locked, + * It has to be called with shp and shm_ids.rw_mutex (writer) locked, * but returns with shp unlocked and freed. */ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) { ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_rmid(ns, shp->id); + shm_rmid(ns, shp); shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); @@ -209,11 +244,11 @@ static void shm_close(struct vm_area_struct *vma) struct shmid_kernel *shp; struct ipc_namespace *ns = sfd->ns; - mutex_lock(&shm_ids(ns).mutex); + down_write(&shm_ids(ns).rw_mutex); /* remove from the list of attaches of the shm segment */ - shp = shm_lock(ns, sfd->id); - BUG_ON(!shp); - shp->shm_lprid = current->tgid; + shp = shm_lock_down(ns, sfd->id); + BUG_ON(IS_ERR(shp)); + shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; if(shp->shm_nattch == 0 && @@ -221,7 +256,7 @@ static void shm_close(struct vm_area_struct *vma) shm_destroy(ns, shp); else shm_unlock(shp); - mutex_unlock(&shm_ids(ns).mutex); + up_write(&shm_ids(ns).rw_mutex); } static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) @@ -337,8 +372,19 @@ static struct vm_operations_struct shm_vm_ops = { #endif }; -static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size) +/** + * newseg - Create a new shared memory segment + * @ns: namespace + * @params: ptr to the structure that contains key, size and shmflg + * + * Called with shm_ids.rw_mutex held as a writer. + */ + +static int newseg(struct ipc_namespace *ns, struct ipc_params *params) { + key_t key = params->key; + int shmflg = params->flg; + size_t size = params->u.size; int error; struct shmid_kernel *shp; int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; @@ -387,28 +433,30 @@ static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size) if (IS_ERR(file)) goto no_file; - error = -ENOSPC; id = shm_addid(ns, shp); - if(id == -1) + if (id < 0) { + error = id; goto no_id; + } - shp->shm_cprid = current->tgid; + shp->shm_cprid = task_tgid_vnr(current); shp->shm_lprid = 0; shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = get_seconds(); shp->shm_segsz = size; shp->shm_nattch = 0; - shp->id = shm_buildid(ns, id, shp->shm_perm.seq); + shp->shm_perm.id = shm_buildid(id, shp->shm_perm.seq); shp->shm_file = file; /* * shmid gets reported as "inode#" in /proc/pid/maps. * proc-ps tools use this. Changing this will break them. */ - file->f_dentry->d_inode->i_ino = shp->id; + file->f_dentry->d_inode->i_ino = shp->shm_perm.id; ns->shm_tot += numpages; + error = shp->shm_perm.id; shm_unlock(shp); - return shp->id; + return error; no_id: fput(file); @@ -418,42 +466,49 @@ no_file: return error; } -asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) +/* + * Called with shm_ids.rw_mutex and ipcp locked. + */ +static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg) { struct shmid_kernel *shp; - int err, id = 0; + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + return security_shm_associate(shp, shmflg); +} + +/* + * Called with shm_ids.rw_mutex and ipcp locked. + */ +static inline int shm_more_checks(struct kern_ipc_perm *ipcp, + struct ipc_params *params) +{ + struct shmid_kernel *shp; + + shp = container_of(ipcp, struct shmid_kernel, shm_perm); + if (shp->shm_segsz < params->u.size) + return -EINVAL; + + return 0; +} + +asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) +{ struct ipc_namespace *ns; + struct ipc_ops shm_ops; + struct ipc_params shm_params; ns = current->nsproxy->ipc_ns; - mutex_lock(&shm_ids(ns).mutex); - if (key == IPC_PRIVATE) { - err = newseg(ns, key, shmflg, size); - } else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) { - if (!(shmflg & IPC_CREAT)) - err = -ENOENT; - else - err = newseg(ns, key, shmflg, size); - } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) { - err = -EEXIST; - } else { - shp = shm_lock(ns, id); - BUG_ON(shp==NULL); - if (shp->shm_segsz < size) - err = -EINVAL; - else if (ipcperms(&shp->shm_perm, shmflg)) - err = -EACCES; - else { - int shmid = shm_buildid(ns, id, shp->shm_perm.seq); - err = security_shm_associate(shp, shmflg); - if (!err) - err = shmid; - } - shm_unlock(shp); - } - mutex_unlock(&shm_ids(ns).mutex); + shm_ops.getnew = newseg; + shm_ops.associate = shm_security; + shm_ops.more_checks = shm_more_checks; - return err; + shm_params.key = key; + shm_params.flg = shmflg; + shm_params.u.size = size; + + return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params); } static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version) @@ -547,20 +602,26 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf } } +/* + * Called with shm_ids.rw_mutex held as a reader + */ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, unsigned long *swp) { - int i; + int next_id; + int total, in_use; *rss = 0; *swp = 0; - for (i = 0; i <= shm_ids(ns).max_id; i++) { + in_use = shm_ids(ns).in_use; + + for (total = 0, next_id = 0; total < in_use; next_id++) { struct shmid_kernel *shp; struct inode *inode; - shp = shm_get(ns, i); - if(!shp) + shp = idr_find(&shm_ids(ns).ipcs_idr, next_id); + if (shp == NULL) continue; inode = shp->shm_file->f_path.dentry->d_inode; @@ -575,6 +636,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, *swp += info->swapped; spin_unlock(&info->lock); } + + total++; } } @@ -610,8 +673,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) shminfo.shmmin = SHMMIN; if(copy_shminfo_to_user (buf, &shminfo, version)) return -EFAULT; - /* reading a integer is always atomic */ - err= shm_ids(ns).max_id; + + down_read(&shm_ids(ns).rw_mutex); + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rw_mutex); + if(err<0) err = 0; goto out; @@ -625,14 +691,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) return err; memset(&shm_info,0,sizeof(shm_info)); - mutex_lock(&shm_ids(ns).mutex); + down_read(&shm_ids(ns).rw_mutex); shm_info.used_ids = shm_ids(ns).in_use; shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp); shm_info.shm_tot = ns->shm_tot; shm_info.swap_attempts = 0; shm_info.swap_successes = 0; - err = shm_ids(ns).max_id; - mutex_unlock(&shm_ids(ns).mutex); + err = ipc_get_maxid(&shm_ids(ns)); + up_read(&shm_ids(ns).rw_mutex); if(copy_to_user (buf, &shm_info, sizeof(shm_info))) { err = -EFAULT; goto out; @@ -646,20 +712,25 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) { struct shmid64_ds tbuf; int result; - memset(&tbuf, 0, sizeof(tbuf)); - shp = shm_lock(ns, shmid); - if(shp==NULL) { - err = -EINVAL; + + if (!buf) { + err = -EFAULT; goto out; - } else if(cmd==SHM_STAT) { - err = -EINVAL; - if (shmid > shm_ids(ns).max_id) - goto out_unlock; - result = shm_buildid(ns, shmid, shp->shm_perm.seq); + } + + if (cmd == SHM_STAT) { + shp = shm_lock(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out; + } + result = shp->shm_perm.id; } else { - err = shm_checkid(ns, shp,shmid); - if(err) - goto out_unlock; + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); + goto out; + } result = 0; } err=-EACCES; @@ -668,6 +739,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) err = security_shm_shmctl(shp, cmd); if (err) goto out_unlock; + memset(&tbuf, 0, sizeof(tbuf)); kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm); tbuf.shm_segsz = shp->shm_segsz; tbuf.shm_atime = shp->shm_atim; @@ -686,14 +758,11 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) case SHM_LOCK: case SHM_UNLOCK: { - shp = shm_lock(ns, shmid); - if(shp==NULL) { - err = -EINVAL; + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out; } - err = shm_checkid(ns, shp,shmid); - if(err) - goto out_unlock; err = audit_ipc_obj(&(shp->shm_perm)); if (err) @@ -742,14 +811,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) * Instead we set a destroyed flag, and then blow * the name away when the usage hits zero. */ - mutex_lock(&shm_ids(ns).mutex); - shp = shm_lock(ns, shmid); - err = -EINVAL; - if (shp == NULL) + down_write(&shm_ids(ns).rw_mutex); + shp = shm_lock_check_down(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out_up; - err = shm_checkid(ns, shp, shmid); - if(err) - goto out_unlock_up; + } err = audit_ipc_obj(&(shp->shm_perm)); if (err) @@ -767,24 +834,27 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock_up; do_shm_rmid(ns, shp); - mutex_unlock(&shm_ids(ns).mutex); + up_write(&shm_ids(ns).rw_mutex); goto out; } case IPC_SET: { + if (!buf) { + err = -EFAULT; + goto out; + } + if (copy_shmid_from_user (&setbuf, buf, version)) { err = -EFAULT; goto out; } - mutex_lock(&shm_ids(ns).mutex); - shp = shm_lock(ns, shmid); - err=-EINVAL; - if(shp==NULL) + down_write(&shm_ids(ns).rw_mutex); + shp = shm_lock_check_down(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out_up; - err = shm_checkid(ns, shp,shmid); - if(err) - goto out_unlock_up; + } err = audit_ipc_obj(&(shp->shm_perm)); if (err) goto out_unlock_up; @@ -819,7 +889,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf) out_unlock_up: shm_unlock(shp); out_up: - mutex_unlock(&shm_ids(ns).mutex); + up_write(&shm_ids(ns).rw_mutex); goto out; out_unlock: shm_unlock(shp); @@ -890,13 +960,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) * additional creator id... */ ns = current->nsproxy->ipc_ns; - shp = shm_lock(ns, shmid); - if(shp == NULL) + shp = shm_lock_check(ns, shmid); + if (IS_ERR(shp)) { + err = PTR_ERR(shp); goto out; - - err = shm_checkid(ns, shp,shmid); - if (err) - goto out_unlock; + } err = -EACCES; if (ipcperms(&shp->shm_perm, acc_mode)) @@ -925,7 +993,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; - sfd->id = shp->id; + sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); sfd->file = shp->shm_file; sfd->vm_ops = NULL; @@ -955,16 +1023,16 @@ invalid: fput(file); out_nattch: - mutex_lock(&shm_ids(ns).mutex); - shp = shm_lock(ns, shmid); - BUG_ON(!shp); + down_write(&shm_ids(ns).rw_mutex); + shp = shm_lock_down(ns, shmid); + BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if(shp->shm_nattch == 0 && shp->shm_perm.mode & SHM_DEST) shm_destroy(ns, shp); else shm_unlock(shp); - mutex_unlock(&shm_ids(ns).mutex); + up_write(&shm_ids(ns).rw_mutex); out: return err; @@ -1094,7 +1162,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it) format = BIG_STRING; return seq_printf(s, format, shp->shm_perm.key, - shp->id, + shp->shm_perm.id, shp->shm_perm.mode, shp->shm_segsz, shp->shm_cprid, diff --git a/ipc/util.c b/ipc/util.c index 44e5135aee4..1aa0ebf71ba 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -32,6 +32,7 @@ #include <linux/proc_fs.h> #include <linux/audit.h> #include <linux/nsproxy.h> +#include <linux/rwsem.h> #include <asm/unistd.h> @@ -129,23 +130,16 @@ __initcall(ipc_init); /** * ipc_init_ids - initialise IPC identifiers * @ids: Identifier set - * @size: Number of identifiers * - * Given a size for the ipc identifier range (limited below IPCMNI) - * set up the sequence range to use then allocate and initialise the - * array itself. + * Set up the sequence range to use for the ipc identifier range (limited + * below IPCMNI) then initialise the ids idr. */ -void ipc_init_ids(struct ipc_ids* ids, int size) +void ipc_init_ids(struct ipc_ids *ids) { - int i; + init_rwsem(&ids->rw_mutex); - mutex_init(&ids->mutex); - - if(size > IPCMNI) - size = IPCMNI; ids->in_use = 0; - ids->max_id = -1; ids->seq = 0; { int seq_limit = INT_MAX/SEQ_MULTIPLIER; @@ -155,17 +149,7 @@ void ipc_init_ids(struct ipc_ids* ids, int size) ids->seq_max = seq_limit; } - ids->entries = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*size + - sizeof(struct ipc_id_ary)); - - if(ids->entries == NULL) { - printk(KERN_ERR "ipc_init_ids() failed, ipc service disabled.\n"); - size = 0; - ids->entries = &ids->nullentry; - } - ids->entries->size = size; - for(i=0;i<size;i++) - ids->entries->p[i] = NULL; + idr_init(&ids->ipcs_idr); } #ifdef CONFIG_PROC_FS @@ -208,99 +192,96 @@ void __init ipc_init_proc_interface(const char *path, const char *header, * @ids: Identifier set * @key: The key to find * - * Requires ipc_ids.mutex locked. - * Returns the identifier if found or -1 if not. + * Requires ipc_ids.rw_mutex locked. + * Returns the LOCKED pointer to the ipc structure if found or NULL + * if not. + * If key is found ipc points to the owning ipc structure */ -int ipc_findkey(struct ipc_ids* ids, key_t key) +static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key) { - int id; - struct kern_ipc_perm* p; - int max_id = ids->max_id; + struct kern_ipc_perm *ipc; + int next_id; + int total; - /* - * rcu_dereference() is not needed here - * since ipc_ids.mutex is held - */ - for (id = 0; id <= max_id; id++) { - p = ids->entries->p[id]; - if(p==NULL) + for (total = 0, next_id = 0; total < ids->in_use; next_id++) { + ipc = idr_find(&ids->ipcs_idr, next_id); + + if (ipc == NULL) + continue; + + if (ipc->key != key) { + total++; continue; - if (key == p->key) - return id; + } + + ipc_lock_by_ptr(ipc); + return ipc; } - return -1; + + return NULL; } -/* - * Requires ipc_ids.mutex locked +/** + * ipc_get_maxid - get the last assigned id + * @ids: IPC identifier set + * + * Called with ipc_ids.rw_mutex held. */ -static int grow_ary(struct ipc_ids* ids, int newsize) -{ - struct ipc_id_ary* new; - struct ipc_id_ary* old; - int i; - int size = ids->entries->size; - - if(newsize > IPCMNI) - newsize = IPCMNI; - if(newsize <= size) - return newsize; - - new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize + - sizeof(struct ipc_id_ary)); - if(new == NULL) - return size; - new->size = newsize; - memcpy(new->p, ids->entries->p, sizeof(struct kern_ipc_perm *)*size); - for(i=size;i<newsize;i++) { - new->p[i] = NULL; - } - old = ids->entries; - /* - * Use rcu_assign_pointer() to make sure the memcpyed contents - * of the new array are visible before the new array becomes visible. - */ - rcu_assign_pointer(ids->entries, new); +int ipc_get_maxid(struct ipc_ids *ids) +{ + struct kern_ipc_perm *ipc; + int max_id = -1; + int total, id; + + if (ids->in_use == 0) + return -1; - __ipc_fini_ids(ids, old); - return newsize; + if (ids->in_use == IPCMNI) + return IPCMNI - 1; + + /* Look for the last assigned id */ + total = 0; + for (id = 0; id < IPCMNI && total < ids->in_use; id++) { + ipc = idr_find(&ids->ipcs_idr, id); + if (ipc != NULL) { + max_id = id; + total++; + } + } + return max_id; } /** * ipc_addid - add an IPC identifier * @ids: IPC identifier set * @new: new IPC permission set - * @size: new size limit for the id array + * @size: limit for the number of used ids * - * Add an entry 'new' to the IPC arrays. The permissions object is + * Add an entry 'new' to the IPC ids idr. The permissions object is * initialised and the first free entry is set up and the id assigned - * is returned. The list is returned in a locked state on success. - * On failure the list is not locked and -1 is returned. + * is returned. The 'new' entry is returned in a locked state on success. + * On failure the entry is not locked and a negative err-code is returned. * - * Called with ipc_ids.mutex held. + * Called with ipc_ids.rw_mutex held as a writer. */ int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size) { - int id; + int id, err; - size = grow_ary(ids,size); + if (size > IPCMNI) + size = IPCMNI; + + if (ids->in_use >= size) + return -ENOSPC; + + err = idr_get_new(&ids->ipcs_idr, new, &id); + if (err) + return err; - /* - * rcu_dereference()() is not needed here since - * ipc_ids.mutex is held - */ - for (id = 0; id < size; id++) { - if(ids->entries->p[id] == NULL) - goto found; - } - return -1; -found: ids->in_use++; - if (id > ids->max_id) - ids->max_id = id; new->cuid = new->uid = current->euid; new->gid = new->cgid = current->egid; @@ -313,48 +294,153 @@ found: new->deleted = 0; rcu_read_lock(); spin_lock(&new->lock); - ids->entries->p[id] = new; return id; } /** + * ipcget_new - create a new ipc object + * @ns: namespace + * @ids: IPC identifer set + * @ops: the actual creation routine to call + * @params: its parameters + * + * This routine is called by sys_msgget, sys_semget() and sys_shmget() + * when the key is IPC_PRIVATE. + */ +int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids, + struct ipc_ops *ops, struct ipc_params *params) +{ + int err; +retry: + err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); + + if (!err) + return -ENOMEM; + + down_write(&ids->rw_mutex); + err = ops->getnew(ns, params); + up_write(&ids->rw_mutex); + + if (err == -EAGAIN) + goto retry; + + return err; +} + +/** + * ipc_check_perms - check security and permissions for an IPC + * @ipcp: ipc permission set + * @ops: the actual security routine to call + * @params: its parameters + * + * This routine is called by sys_msgget(), sys_semget() and sys_shmget() + * when the key is not IPC_PRIVATE and that key already exists in the + * ids IDR. + * + * On success, the IPC id is returned. + * + * It is called with ipc_ids.rw_mutex and ipcp->lock held. + */ +static int ipc_check_perms(struct kern_ipc_perm *ipcp, struct ipc_ops *ops, + struct ipc_params *params) +{ + int err; + + if (ipcperms(ipcp, params->flg)) + err = -EACCES; + else { + err = ops->associate(ipcp, params->flg); + if (!err) + err = ipcp->id; + } + + return err; +} + +/** + * ipcget_public - get an ipc object or create a new one + * @ns: namespace + * @ids: IPC identifer set + * @ops: the actual creation routine to call + * @params: its parameters + * + * This routine is called by sys_msgget, sys_semget() and sys_shmget() + * when the key is not IPC_PRIVATE. + * It adds a new entry if the key is not found and does some permission + * / security checkings if the key is found. + * + * On success, the ipc id is returned. + */ +int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, + struct ipc_ops *ops, struct ipc_params *params) +{ + struct kern_ipc_perm *ipcp; + int flg = params->flg; + int err; +retry: + err = idr_pre_get(&ids->ipcs_idr, GFP_KERNEL); + + /* + * Take the lock as a writer since we are potentially going to add + * a new entry + read locks are not "upgradable" + */ + down_write(&ids->rw_mutex); + ipcp = ipc_findkey(ids, params->key); + if (ipcp == NULL) { + /* key not used */ + if (!(flg & IPC_CREAT)) + err = -ENOENT; + else if (!err) + err = -ENOMEM; + else + err = ops->getnew(ns, params); + } else { + /* ipc object has been locked by ipc_findkey() */ + + if (flg & IPC_CREAT && flg & IPC_EXCL) + err = -EEXIST; + else { + err = 0; + if (ops->more_checks) + err = ops->more_checks(ipcp, params); + if (!err) + /* + * ipc_check_perms returns the IPC id on + * success + */ + err = ipc_check_perms(ipcp, ops, params); + } + ipc_unlock(ipcp); + } + up_write(&ids->rw_mutex); + + if (err == -EAGAIN) + goto retry; + + return err; +} + + +/** * ipc_rmid - remove an IPC identifier - * @ids: identifier set - * @id: Identifier to remove + * @ids: IPC identifier set + * @ipcp: ipc perm structure containing the identifier to remove * - * The identifier must be valid, and in use. The kernel will panic if - * fed an invalid identifier. The entry is removed and internal - * variables recomputed. The object associated with the identifier - * is returned. - * ipc_ids.mutex and the spinlock for this ID is hold before this function - * is called, and remain locked on the exit. + * ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held + * before this function is called, and remain locked on the exit. */ -struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id) +void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { - struct kern_ipc_perm* p; - int lid = id % SEQ_MULTIPLIER; - BUG_ON(lid >= ids->entries->size); + int lid = ipcid_to_idx(ipcp->id); + + idr_remove(&ids->ipcs_idr, lid); - /* - * do not need a rcu_dereference()() here to force ordering - * on Alpha, since the ipc_ids.mutex is held. - */ - p = ids->entries->p[lid]; - ids->entries->p[lid] = NULL; - BUG_ON(p==NULL); ids->in_use--; - if (lid == ids->max_id) { - do { - lid--; - if(lid == -1) - break; - } while (ids->entries->p[lid] == NULL); - ids->max_id = lid; - } - p->deleted = 1; - return p; + ipcp->deleted = 1; + + return; } /** @@ -491,10 +577,12 @@ static void ipc_do_vfree(struct work_struct *work) */ static void ipc_schedule_free(struct rcu_head *head) { - struct ipc_rcu_grace *grace = - container_of(head, struct ipc_rcu_grace, rcu); - struct ipc_rcu_sched *sched = - container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]); + struct ipc_rcu_grace *grace; + struct ipc_rcu_sched *sched; + + grace = container_of(head, struct ipc_rcu_grace, rcu); + sched = container_of(&(grace->data[0]), struct ipc_rcu_sched, + data[0]); INIT_WORK(&sched->work, ipc_do_vfree); schedule_work(&sched->work); @@ -583,7 +671,7 @@ void kernel_to_ipc64_perm (struct kern_ipc_perm *in, struct ipc64_perm *out) } /** - * ipc64_perm_to_ipc_perm - convert old ipc permissions to new + * ipc64_perm_to_ipc_perm - convert new ipc permissions to old * @in: new style IPC permissions * @out: old style IPC permissions * @@ -602,44 +690,37 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) out->seq = in->seq; } -/* - * So far only shm_get_stat() calls ipc_get() via shm_get(), so ipc_get() - * is called with shm_ids.mutex locked. Since grow_ary() is also called with - * shm_ids.mutex down(for Shared Memory), there is no need to add read - * barriers here to gurantee the writes in grow_ary() are seen in order - * here (for Alpha). +/** + * ipc_lock - Lock an ipc structure without rw_mutex held + * @ids: IPC identifier set + * @id: ipc id to look for + * + * Look for an id in the ipc ids idr and lock the associated ipc object. * - * However ipc_get() itself does not necessary require ipc_ids.mutex down. So - * if in the future ipc_get() is used by other places without ipc_ids.mutex - * down, then ipc_get() needs read memery barriers as ipc_lock() does. + * The ipc object is locked on exit. + * + * This is the routine that should be called when the rw_mutex is not already + * held, i.e. idr tree not protected: it protects the idr tree in read mode + * during the idr_find(). */ -struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id) -{ - struct kern_ipc_perm* out; - int lid = id % SEQ_MULTIPLIER; - if(lid >= ids->entries->size) - return NULL; - out = ids->entries->p[lid]; - return out; -} -struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) +struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) { - struct kern_ipc_perm* out; - int lid = id % SEQ_MULTIPLIER; - struct ipc_id_ary* entries; + struct kern_ipc_perm *out; + int lid = ipcid_to_idx(id); + + down_read(&ids->rw_mutex); rcu_read_lock(); - entries = rcu_dereference(ids->entries); - if(lid >= entries->size) { - rcu_read_unlock(); - return NULL; - } - out = entries->p[lid]; - if(out == NULL) { + out = idr_find(&ids->ipcs_idr, lid); + if (out == NULL) { rcu_read_unlock(); - return NULL; + up_read(&ids->rw_mutex); + return ERR_PTR(-EINVAL); } + + up_read(&ids->rw_mutex); + spin_lock(&out->lock); /* ipc_rmid() may have already freed the ID while ipc_lock @@ -648,33 +729,44 @@ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id) if (out->deleted) { spin_unlock(&out->lock); rcu_read_unlock(); - return NULL; + return ERR_PTR(-EINVAL); } + return out; } -void ipc_lock_by_ptr(struct kern_ipc_perm *perm) -{ - rcu_read_lock(); - spin_lock(&perm->lock); -} +/** + * ipc_lock_down - Lock an ipc structure with rw_sem held + * @ids: IPC identifier set + * @id: ipc id to look for + * + * Look for an id in the ipc ids idr and lock the associated ipc object. + * + * The ipc object is locked on exit. + * + * This is the routine that should be called when the rw_mutex is already + * held, i.e. idr tree protected. + */ -void ipc_unlock(struct kern_ipc_perm* perm) +struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id) { - spin_unlock(&perm->lock); - rcu_read_unlock(); -} + struct kern_ipc_perm *out; + int lid = ipcid_to_idx(id); -int ipc_buildid(struct ipc_ids* ids, int id, int seq) -{ - return SEQ_MULTIPLIER*seq + id; -} + rcu_read_lock(); + out = idr_find(&ids->ipcs_idr, lid); + if (out == NULL) { + rcu_read_unlock(); + return ERR_PTR(-EINVAL); + } -int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid) -{ - if(uid/SEQ_MULTIPLIER != ipcp->seq) - return 1; - return 0; + spin_lock(&out->lock); + + /* + * No need to verify that the structure is still valid since the + * rw_mutex is held. + */ + return out; } #ifdef __ARCH_WANT_IPC_PARSE_VERSION @@ -707,27 +799,30 @@ struct ipc_proc_iter { struct ipc_proc_iface *iface; }; -static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) +/* + * This routine locks the ipc structure found at least at position pos. + */ +struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, + loff_t *new_pos) { - struct ipc_proc_iter *iter = s->private; - struct ipc_proc_iface *iface = iter->iface; - struct kern_ipc_perm *ipc = it; - loff_t p; - struct ipc_ids *ids; + struct kern_ipc_perm *ipc; + int total, id; - ids = iter->ns->ids[iface->ids]; + total = 0; + for (id = 0; id < pos && total < ids->in_use; id++) { + ipc = idr_find(&ids->ipcs_idr, id); + if (ipc != NULL) + total++; + } - /* If we had an ipc id locked before, unlock it */ - if (ipc && ipc != SEQ_START_TOKEN) - ipc_unlock(ipc); + if (total >= ids->in_use) + return NULL; - /* - * p = *pos - 1 (because id 0 starts at position 1) - * + 1 (because we increment the position by one) - */ - for (p = *pos; p <= ids->max_id; p++) { - if ((ipc = ipc_lock(ids, p)) != NULL) { - *pos = p + 1; + for ( ; pos < IPCMNI; pos++) { + ipc = idr_find(&ids->ipcs_idr, pos); + if (ipc != NULL) { + *new_pos = pos + 1; + ipc_lock_by_ptr(ipc); return ipc; } } @@ -736,16 +831,27 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) return NULL; } +static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos) +{ + struct ipc_proc_iter *iter = s->private; + struct ipc_proc_iface *iface = iter->iface; + struct kern_ipc_perm *ipc = it; + + /* If we had an ipc id locked before, unlock it */ + if (ipc && ipc != SEQ_START_TOKEN) + ipc_unlock(ipc); + + return sysvipc_find_ipc(iter->ns->ids[iface->ids], *pos, pos); +} + /* - * File positions: pos 0 -> header, pos n -> ipc id + 1. - * SeqFile iterator: iterator value locked shp or SEQ_TOKEN_START. + * File positions: pos 0 -> header, pos n -> ipc id = n - 1. + * SeqFile iterator: iterator value locked ipc pointer or SEQ_TOKEN_START. */ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) { struct ipc_proc_iter *iter = s->private; struct ipc_proc_iface *iface = iter->iface; - struct kern_ipc_perm *ipc; - loff_t p; struct ipc_ids *ids; ids = iter->ns->ids[iface->ids]; @@ -754,7 +860,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) * Take the lock - this will be released by the corresponding * call to stop(). */ - mutex_lock(&ids->mutex); + down_read(&ids->rw_mutex); /* pos < 0 is invalid */ if (*pos < 0) @@ -765,13 +871,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos) return SEQ_START_TOKEN; /* Find the (pos-1)th ipc */ - for (p = *pos - 1; p <= ids->max_id; p++) { - if ((ipc = ipc_lock(ids, p)) != NULL) { - *pos = p + 1; - return ipc; - } - } - return NULL; + return sysvipc_find_ipc(ids, *pos - 1, pos); } static void sysvipc_proc_stop(struct seq_file *s, void *it) @@ -781,13 +881,13 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it) struct ipc_proc_iface *iface = iter->iface; struct ipc_ids *ids; - /* If we had a locked segment, release it */ + /* If we had a locked structure, release it */ if (ipc && ipc != SEQ_START_TOKEN) ipc_unlock(ipc); ids = iter->ns->ids[iface->ids]; /* Release the lock we took in start() */ - mutex_unlock(&ids->mutex); + up_read(&ids->rw_mutex); } static int sysvipc_proc_show(struct seq_file *s, void *it) diff --git a/ipc/util.h b/ipc/util.h index 333e891bcac..9ffea40457c 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -10,6 +10,9 @@ #ifndef _IPC_UTIL_H #define _IPC_UTIL_H +#include <linux/idr.h> +#include <linux/err.h> + #define USHRT_MAX 0xffff #define SEQ_MULTIPLIER (IPCMNI) @@ -25,24 +28,46 @@ void sem_exit_ns(struct ipc_namespace *ns); void msg_exit_ns(struct ipc_namespace *ns); void shm_exit_ns(struct ipc_namespace *ns); -struct ipc_id_ary { - int size; - struct kern_ipc_perm *p[0]; -}; - struct ipc_ids { int in_use; - int max_id; unsigned short seq; unsigned short seq_max; - struct mutex mutex; - struct ipc_id_ary nullentry; - struct ipc_id_ary* entries; + struct rw_semaphore rw_mutex; + struct idr ipcs_idr; +}; + +/* + * Structure that holds the parameters needed by the ipc operations + * (see after) + */ +struct ipc_params { + key_t key; + int flg; + union { + size_t size; /* for shared memories */ + int nsems; /* for semaphores */ + } u; /* holds the getnew() specific param */ +}; + +/* + * Structure that holds some ipc operations. This structure is used to unify + * the calls to sys_msgget(), sys_semget(), sys_shmget() + * . routine to call to create a new ipc object. Can be one of newque, + * newary, newseg + * . routine to call to check permissions for a new ipc object. + * Can be one of security_msg_associate, security_sem_associate, + * security_shm_associate + * . routine to call for an extra check if needed + */ +struct ipc_ops { + int (*getnew) (struct ipc_namespace *, struct ipc_params *); + int (*associate) (struct kern_ipc_perm *, int); + int (*more_checks) (struct kern_ipc_perm *, struct ipc_params *); }; struct seq_file; -void ipc_init_ids(struct ipc_ids *ids, int size); +void ipc_init_ids(struct ipc_ids *); #ifdef CONFIG_PROC_FS void __init ipc_init_proc_interface(const char *path, const char *header, int ids, int (*show)(struct seq_file *, void *)); @@ -54,14 +79,19 @@ void __init ipc_init_proc_interface(const char *path, const char *header, #define IPC_MSG_IDS 1 #define IPC_SHM_IDS 2 -/* must be called with ids->mutex acquired.*/ -int ipc_findkey(struct ipc_ids* ids, key_t key); -int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size); +#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER) + +/* must be called with ids->rw_mutex acquired for writing */ +int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int); + +/* must be called with ids->rw_mutex acquired for reading */ +int ipc_get_maxid(struct ipc_ids *); /* must be called with both locks acquired. */ -struct kern_ipc_perm* ipc_rmid(struct ipc_ids* ids, int id); +void ipc_rmid(struct ipc_ids *, struct kern_ipc_perm *); -int ipcperms (struct kern_ipc_perm *ipcp, short flg); +/* must be called with ipcp locked */ +int ipcperms(struct kern_ipc_perm *ipcp, short flg); /* for rare, potentially huge allocations. * both function can sleep @@ -79,24 +109,12 @@ void* ipc_rcu_alloc(int size); void ipc_rcu_getref(void *ptr); void ipc_rcu_putref(void *ptr); -static inline void __ipc_fini_ids(struct ipc_ids *ids, - struct ipc_id_ary *entries) -{ - if (entries != &ids->nullentry) - ipc_rcu_putref(entries); -} - -static inline void ipc_fini_ids(struct ipc_ids *ids) -{ - __ipc_fini_ids(ids, ids->entries); -} - -struct kern_ipc_perm* ipc_get(struct ipc_ids* ids, int id); -struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id); -void ipc_lock_by_ptr(struct kern_ipc_perm *ipcp); -void ipc_unlock(struct kern_ipc_perm* perm); -int ipc_buildid(struct ipc_ids* ids, int id, int seq); -int ipc_checkid(struct ipc_ids* ids, struct kern_ipc_perm* ipcp, int uid); +/* + * ipc_lock_down: called with rw_mutex held + * ipc_lock: called without that lock held + */ +struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int); +struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); void ipc64_perm_to_ipc_perm(struct ipc64_perm *in, struct ipc_perm *out); @@ -111,5 +129,89 @@ int ipc_parse_version (int *cmd); extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); +extern int ipcget_new(struct ipc_namespace *, struct ipc_ids *, + struct ipc_ops *, struct ipc_params *); +extern int ipcget_public(struct ipc_namespace *, struct ipc_ids *, + struct ipc_ops *, struct ipc_params *); + +static inline int ipc_buildid(int id, int seq) +{ + return SEQ_MULTIPLIER * seq + id; +} + +/* + * Must be called with ipcp locked + */ +static inline int ipc_checkid(struct kern_ipc_perm *ipcp, int uid) +{ + if (uid / SEQ_MULTIPLIER != ipcp->seq) + return 1; + return 0; +} + +static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm) +{ + rcu_read_lock(); + spin_lock(&perm->lock); +} + +static inline void ipc_unlock(struct kern_ipc_perm *perm) +{ + spin_unlock(&perm->lock); + rcu_read_unlock(); +} + +static inline struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, + int id) +{ + struct kern_ipc_perm *out; + + out = ipc_lock_down(ids, id); + if (IS_ERR(out)) + return out; + + if (ipc_checkid(out, id)) { + ipc_unlock(out); + return ERR_PTR(-EIDRM); + } + + return out; +} + +static inline struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, + int id) +{ + struct kern_ipc_perm *out; + + out = ipc_lock(ids, id); + if (IS_ERR(out)) + return out; + + if (ipc_checkid(out, id)) { + ipc_unlock(out); + return ERR_PTR(-EIDRM); + } + + return out; +} + +/** + * ipcget - Common sys_*get() code + * @ns : namsepace + * @ids : IPC identifier set + * @ops : operations to be called on ipc object creation, permission checks + * and further checks + * @params : the parameters needed by the previous operations. + * + * Common routine called by sys_msgget(), sys_semget() and sys_shmget(). + */ +static inline int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, + struct ipc_ops *ops, struct ipc_params *params) +{ + if (params->key == IPC_PRIVATE) + return ipcget_new(ns, ids, ops, params); + else + return ipcget_public(ns, ids, ops, params); +} #endif |