From 74a3e5a71c9b54c63bff978e9cafbcef67600f0b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 10:56:20 +0000 Subject: tun: Remove unnecessary tun_get_by_name Currently the tun driver keeps a private list of tun devices for what appears to be a small gain in performance when reconnecting a file descriptor to an existing tun or tap device. So simplify the code by removing it. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 74 ++++++++++++++----------------------------------------- 1 file changed, 19 insertions(+), 55 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d7b81e4fdd5..17923a50853 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -88,7 +88,6 @@ struct tap_filter { }; struct tun_struct { - struct list_head list; unsigned int flags; int attached; uid_t owner; @@ -213,11 +212,6 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) /* Network device part of the driver */ -static int tun_net_id; -struct tun_net { - struct list_head dev_list; -}; - static const struct ethtool_ops tun_ethtool_ops; /* Net device open. */ @@ -697,30 +691,22 @@ static void tun_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; } -static struct tun_struct *tun_get_by_name(struct tun_net *tn, const char *name) -{ - struct tun_struct *tun; - - ASSERT_RTNL(); - list_for_each_entry(tun, &tn->dev_list, list) { - if (!strncmp(tun->dev->name, name, IFNAMSIZ)) - return tun; - } - - return NULL; -} - static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct tun_net *tn; struct tun_struct *tun; struct net_device *dev; const struct cred *cred = current_cred(); int err; - tn = net_generic(net, tun_net_id); - tun = tun_get_by_name(tn, ifr->ifr_name); - if (tun) { + dev = __dev_get_by_name(net, ifr->ifr_name); + if (dev) { + if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) + tun = netdev_priv(dev); + else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops) + tun = netdev_priv(dev); + else + return -EINVAL; + if (tun->attached) return -EBUSY; @@ -733,8 +719,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return -EPERM; } } - else if (__dev_get_by_name(net, ifr->ifr_name)) - return -EINVAL; else { char *name; unsigned long flags = 0; @@ -782,8 +766,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = register_netdevice(tun->dev); if (err < 0) goto err_free_dev; - - list_add(&tun->list, &tn->dev_list); } DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); @@ -1095,10 +1077,8 @@ static int tun_chr_close(struct inode *inode, struct file *file) /* Drop read queue */ skb_queue_purge(&tun->readq); - if (!(tun->flags & TUN_PERSIST)) { - list_del(&tun->list); + if (!(tun->flags & TUN_PERSIST)) unregister_netdevice(tun->dev); - } rtnl_unlock(); @@ -1212,37 +1192,21 @@ static const struct ethtool_ops tun_ethtool_ops = { static int tun_init_net(struct net *net) { - struct tun_net *tn; - - tn = kmalloc(sizeof(*tn), GFP_KERNEL); - if (tn == NULL) - return -ENOMEM; - - INIT_LIST_HEAD(&tn->dev_list); - - if (net_assign_generic(net, tun_net_id, tn)) { - kfree(tn); - return -ENOMEM; - } - return 0; } static void tun_exit_net(struct net *net) { - struct tun_net *tn; - struct tun_struct *tun, *nxt; - - tn = net_generic(net, tun_net_id); + struct net_device *dev, *next; rtnl_lock(); - list_for_each_entry_safe(tun, nxt, &tn->dev_list, list) { - DBG(KERN_INFO "%s cleaned up\n", tun->dev->name); - unregister_netdevice(tun->dev); + for_each_netdev_safe(net, dev, next) { + if (dev->ethtool_ops != &tun_ethtool_ops) + continue; + DBG(KERN_INFO "%s cleaned up\n", dev->name); + unregister_netdevice(dev); } rtnl_unlock(); - - kfree(tn); } static struct pernet_operations tun_net_ops = { @@ -1257,7 +1221,7 @@ static int __init tun_init(void) printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); - ret = register_pernet_gen_device(&tun_net_id, &tun_net_ops); + ret = register_pernet_device(&tun_net_ops); if (ret) { printk(KERN_ERR "tun: Can't register pernet ops\n"); goto err_pernet; @@ -1271,7 +1235,7 @@ static int __init tun_init(void) return 0; err_misc: - unregister_pernet_gen_device(tun_net_id, &tun_net_ops); + unregister_pernet_device(&tun_net_ops); err_pernet: return ret; } @@ -1279,7 +1243,7 @@ err_pernet: static void tun_cleanup(void) { misc_deregister(&tun_miscdev); - unregister_pernet_gen_device(tun_net_id, &tun_net_ops); + unregister_pernet_device(&tun_net_ops); } module_init(tun_init); -- cgit v1.2.3 From a7385ba21102a90f902055f9f185ca02bf62fa43 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 10:57:48 +0000 Subject: tun: Fix races in tun_set_iff It is possible for two different tasks with access to the same file descriptor to call tun_set_iff on it at the same time and race to attach to a tap device. Prevent this by placing all of the logic to attach to a file descriptor in one function and testing the file descriptor to be certain it is not already attached to another tun device. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 17923a50853..20ef14dc560 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -106,6 +106,31 @@ struct tun_struct { #endif }; +static int tun_attach(struct tun_struct *tun, struct file *file) +{ + const struct cred *cred = current_cred(); + + ASSERT_RTNL(); + + if (file->private_data) + return -EINVAL; + + if (tun->attached) + return -EBUSY; + + /* Check permissions */ + if (((tun->owner != -1 && cred->euid != tun->owner) || + (tun->group != -1 && cred->egid != tun->group)) && + !capable(CAP_NET_ADMIN)) + return -EPERM; + + file->private_data = tun; + tun->attached = 1; + get_net(dev_net(tun->dev)); + + return 0; +} + /* TAP filterting */ static void addr_hash_set(u32 *mask, const u8 *addr) { @@ -695,7 +720,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { struct tun_struct *tun; struct net_device *dev; - const struct cred *cred = current_cred(); int err; dev = __dev_get_by_name(net, ifr->ifr_name); @@ -707,17 +731,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; - if (tun->attached) - return -EBUSY; - - /* Check permissions */ - if (((tun->owner != -1 && - cred->euid != tun->owner) || - (tun->group != -1 && - cred->egid != tun->group)) && - !capable(CAP_NET_ADMIN)) { - return -EPERM; - } + err = tun_attach(tun, file); + if (err < 0) + return err; } else { char *name; @@ -766,6 +782,10 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) err = register_netdevice(tun->dev); if (err < 0) goto err_free_dev; + + err = tun_attach(tun, file); + if (err < 0) + goto err_free_dev; } DBG(KERN_INFO "%s: tun_set_iff\n", tun->dev->name); @@ -785,10 +805,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_VNET_HDR; - file->private_data = tun; - tun->attached = 1; - get_net(dev_net(tun->dev)); - /* Make sure persistent devices do not get stuck in * xoff state. */ -- cgit v1.2.3 From eac9e902658dab1e097b8ef064e9e3d16c152cc9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 10:59:05 +0000 Subject: tun: Use POLLERR not EBADF in tun_chr_poll EBADF is meaningless in the context of a poll mask so use POLLERR instead. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 20ef14dc560..8743de9d257 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -382,7 +382,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) unsigned int mask = POLLOUT | POLLWRNORM; if (!tun) - return -EBADFD; + return POLLERR; DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); -- cgit v1.2.3 From 631ab46b79559d6fed784fd7883c0cda4d8cfcfa Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:00:40 +0000 Subject: tun: Introduce tun_file Currently the tun code suffers from only having a single word of data that exists for the entire life of the tun file descriptor. This results in peculiar holding of references to the network namespace as well as races between free_netdevice and tun_chr_close. Fix this by introducing tun_file which will hold the per file state. For the moment it still holds just a single word so the differences are all logic changes with no changes in semantics. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 156 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 104 insertions(+), 52 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8743de9d257..d3a665d2f7b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -87,9 +87,13 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; +struct tun_file { + struct tun_struct *tun; +}; + struct tun_struct { + struct tun_file *tfile; unsigned int flags; - int attached; uid_t owner; gid_t group; @@ -108,14 +112,15 @@ struct tun_struct { static int tun_attach(struct tun_struct *tun, struct file *file) { + struct tun_file *tfile = file->private_data; const struct cred *cred = current_cred(); ASSERT_RTNL(); - if (file->private_data) + if (tfile->tun) return -EINVAL; - if (tun->attached) + if (tun->tfile) return -EBUSY; /* Check permissions */ @@ -124,13 +129,41 @@ static int tun_attach(struct tun_struct *tun, struct file *file) !capable(CAP_NET_ADMIN)) return -EPERM; - file->private_data = tun; - tun->attached = 1; + tfile->tun = tun; + tun->tfile = tfile; get_net(dev_net(tun->dev)); return 0; } +static void __tun_detach(struct tun_struct *tun) +{ + struct tun_file *tfile = tun->tfile; + + /* Detach from net device */ + tfile->tun = NULL; + tun->tfile = NULL; + put_net(dev_net(tun->dev)); + + /* Drop read queue */ + skb_queue_purge(&tun->readq); +} + +static struct tun_struct *__tun_get(struct tun_file *tfile) +{ + return tfile->tun; +} + +static struct tun_struct *tun_get(struct file *file) +{ + return __tun_get(file->private_data); +} + +static void tun_put(struct tun_struct *tun) +{ + /* Noop for now */ +} + /* TAP filterting */ static void addr_hash_set(u32 *mask, const u8 *addr) { @@ -261,7 +294,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) DBG(KERN_INFO "%s: tun_net_xmit %d\n", tun->dev->name, skb->len); /* Drop packet if interface is not attached */ - if (!tun->attached) + if (!tun->tfile) goto drop; /* Drop if the filter does not like it. @@ -378,7 +411,7 @@ static void tun_net_init(struct net_device *dev) /* Poll */ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); unsigned int mask = POLLOUT | POLLWRNORM; if (!tun) @@ -391,6 +424,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) if (!skb_queue_empty(&tun->readq)) mask |= POLLIN | POLLRDNORM; + tun_put(tun); return mask; } @@ -575,14 +609,18 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { - struct tun_struct *tun = iocb->ki_filp->private_data; + struct tun_struct *tun = tun_get(iocb->ki_filp); + ssize_t result; if (!tun) return -EBADFD; DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); - return tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); + result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); + + tun_put(tun); + return result; } /* Put packet to the user space buffer */ @@ -655,7 +693,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t len, ret = 0; @@ -666,8 +704,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); len = iov_length(iv, count); - if (len < 0) - return -EINVAL; + if (len < 0) { + ret = -EINVAL; + goto out; + } add_wait_queue(&tun->read_wait, &wait); while (len) { @@ -698,6 +738,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, current->state = TASK_RUNNING; remove_wait_queue(&tun->read_wait, &wait); +out: + tun_put(tun); return ret; } @@ -822,7 +864,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); if (!tun) return -EBADFD; @@ -847,6 +889,7 @@ static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) if (tun->flags & TUN_VNET_HDR) ifr->ifr_flags |= IFF_VNET_HDR; + tun_put(tun); return 0; } @@ -893,7 +936,7 @@ static int set_offload(struct net_device *dev, unsigned long arg) static int tun_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; int ret; @@ -902,6 +945,16 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, if (copy_from_user(&ifr, argp, sizeof ifr)) return -EFAULT; + if (cmd == TUNGETFEATURES) { + /* Currently this just means: "what IFF flags are valid?". + * This is needed because we never checked for invalid flags on + * TUNSETIFF. */ + return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | + IFF_VNET_HDR, + (unsigned int __user*)argp); + } + + tun = tun_get(file); if (cmd == TUNSETIFF && !tun) { int err; @@ -919,28 +972,21 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, return 0; } - if (cmd == TUNGETFEATURES) { - /* Currently this just means: "what IFF flags are valid?". - * This is needed because we never checked for invalid flags on - * TUNSETIFF. */ - return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, - (unsigned int __user*)argp); - } if (!tun) return -EBADFD; DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd); + ret = 0; switch (cmd) { case TUNGETIFF: ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr); if (ret) - return ret; + break; if (copy_to_user(argp, &ifr, sizeof(ifr))) - return -EFAULT; + ret = -EFAULT; break; case TUNSETNOCSUM: @@ -992,7 +1038,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, ret = 0; } rtnl_unlock(); - return ret; + break; #ifdef TUN_DEBUG case TUNSETDEBUG: @@ -1003,24 +1049,25 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, rtnl_lock(); ret = set_offload(tun->dev, arg); rtnl_unlock(); - return ret; + break; case TUNSETTXFILTER: /* Can be set only for TAPs */ + ret = -EINVAL; if ((tun->flags & TUN_TYPE_MASK) != TUN_TAP_DEV) - return -EINVAL; + break; rtnl_lock(); ret = update_filter(&tun->txflt, (void __user *)arg); rtnl_unlock(); - return ret; + break; case SIOCGIFHWADDR: /* Get hw addres */ memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); ifr.ifr_hwaddr.sa_family = tun->dev->type; if (copy_to_user(argp, &ifr, sizeof ifr)) - return -EFAULT; - return 0; + ret = -EFAULT; + break; case SIOCSIFHWADDR: /* Set hw address */ @@ -1030,18 +1077,19 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, rtnl_lock(); ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); rtnl_unlock(); - return ret; - + break; default: - return -EINVAL; + ret = -EINVAL; + break; }; - return 0; + tun_put(tun); + return ret; } static int tun_chr_fasync(int fd, struct file *file, int on) { - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); int ret; if (!tun) @@ -1063,40 +1111,44 @@ static int tun_chr_fasync(int fd, struct file *file, int on) ret = 0; out: unlock_kernel(); + tun_put(tun); return ret; } static int tun_chr_open(struct inode *inode, struct file * file) { + struct tun_file *tfile; cycle_kernel_lock(); DBG1(KERN_INFO "tunX: tun_chr_open\n"); - file->private_data = NULL; + + tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); + if (!tfile) + return -ENOMEM; + tfile->tun = NULL; + file->private_data = tfile; return 0; } static int tun_chr_close(struct inode *inode, struct file *file) { - struct tun_struct *tun = file->private_data; - - if (!tun) - return 0; + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); - DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); - rtnl_lock(); + if (tun) { + DBG(KERN_INFO "%s: tun_chr_close\n", tun->dev->name); - /* Detach from net device */ - file->private_data = NULL; - tun->attached = 0; - put_net(dev_net(tun->dev)); + rtnl_lock(); + __tun_detach(tun); - /* Drop read queue */ - skb_queue_purge(&tun->readq); + /* If desireable, unregister the netdevice. */ + if (!(tun->flags & TUN_PERSIST)) + unregister_netdevice(tun->dev); - if (!(tun->flags & TUN_PERSIST)) - unregister_netdevice(tun->dev); + rtnl_unlock(); + } - rtnl_unlock(); + kfree(tfile); return 0; } @@ -1177,7 +1229,7 @@ static void tun_set_msglevel(struct net_device *dev, u32 value) static u32 tun_get_link(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - return tun->attached; + return !!tun->tfile; } static u32 tun_get_rx_csum(struct net_device *dev) -- cgit v1.2.3 From 36b50bab53207daf34be63ca62fb8b0b08dc6e6b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:01:48 +0000 Subject: tun: Grab the netns in open. Grabbing namespaces in open, and putting them in close always seems to be the cleanest approach with the fewest surprises. So now that we have tun_file so we have somepleace to put the network namespace, let's grab the network namespace on file open and put on file close. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d3a665d2f7b..dfbf5865977 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -89,6 +89,7 @@ struct tap_filter { struct tun_file { struct tun_struct *tun; + struct net *net; }; struct tun_struct { @@ -131,7 +132,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file) tfile->tun = tun; tun->tfile = tfile; - get_net(dev_net(tun->dev)); return 0; } @@ -143,7 +143,6 @@ static void __tun_detach(struct tun_struct *tun) /* Detach from net device */ tfile->tun = NULL; tun->tfile = NULL; - put_net(dev_net(tun->dev)); /* Drop read queue */ skb_queue_purge(&tun->readq); @@ -936,6 +935,7 @@ static int set_offload(struct net_device *dev, unsigned long arg) static int tun_chr_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { + struct tun_file *tfile = file->private_data; struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; @@ -954,14 +954,14 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, (unsigned int __user*)argp); } - tun = tun_get(file); + tun = __tun_get(tfile); if (cmd == TUNSETIFF && !tun) { int err; ifr.ifr_name[IFNAMSIZ-1] = '\0'; rtnl_lock(); - err = tun_set_iff(current->nsproxy->net_ns, file, &ifr); + err = tun_set_iff(tfile->net, file, &ifr); rtnl_unlock(); if (err) @@ -1125,6 +1125,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) if (!tfile) return -ENOMEM; tfile->tun = NULL; + tfile->net = get_net(current->nsproxy->net_ns); file->private_data = tfile; return 0; } @@ -1148,6 +1149,7 @@ static int tun_chr_close(struct inode *inode, struct file *file) rtnl_unlock(); } + put_net(tfile->net); kfree(tfile); return 0; -- cgit v1.2.3 From 38231b7a8d1b74c920822640d1ce8eb8046377fb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:02:28 +0000 Subject: tun: Make tun_net_xmit atomic wrt tun_attach && tun_detach Currently this small race allows for a packet to be received when we detach from an tun device and still be enqueued. Not especially important but not what the code is trying to do. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index dfbf5865977..fa93160bf52 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -115,25 +115,33 @@ static int tun_attach(struct tun_struct *tun, struct file *file) { struct tun_file *tfile = file->private_data; const struct cred *cred = current_cred(); + int err; ASSERT_RTNL(); - if (tfile->tun) - return -EINVAL; - - if (tun->tfile) - return -EBUSY; - /* Check permissions */ if (((tun->owner != -1 && cred->euid != tun->owner) || (tun->group != -1 && cred->egid != tun->group)) && !capable(CAP_NET_ADMIN)) return -EPERM; + netif_tx_lock_bh(tun->dev); + + err = -EINVAL; + if (tfile->tun) + goto out; + + err = -EBUSY; + if (tun->tfile) + goto out; + + err = 0; tfile->tun = tun; tun->tfile = tfile; - return 0; +out: + netif_tx_unlock_bh(tun->dev); + return err; } static void __tun_detach(struct tun_struct *tun) @@ -141,8 +149,10 @@ static void __tun_detach(struct tun_struct *tun) struct tun_file *tfile = tun->tfile; /* Detach from net device */ + netif_tx_lock_bh(tun->dev); tfile->tun = NULL; tun->tfile = NULL; + netif_tx_unlock_bh(tun->dev); /* Drop read queue */ skb_queue_purge(&tun->readq); -- cgit v1.2.3 From b2430de37ef0bc0799ffba7b5219d38ca417eb76 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:03:21 +0000 Subject: tun: Move read_wait into tun_file The poll interface requires that the waitqueue exist while the struct file is open. In the rare case when a tun device disappears before the tun file closes we fail to provide this property, so move read_wait. This is safe now that tun_net_xmit is atomic with tun_detach. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index fa93160bf52..030d9858bb6 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -90,6 +90,7 @@ struct tap_filter { struct tun_file { struct tun_struct *tun; struct net *net; + wait_queue_head_t read_wait; }; struct tun_struct { @@ -98,7 +99,6 @@ struct tun_struct { uid_t owner; gid_t group; - wait_queue_head_t read_wait; struct sk_buff_head readq; struct net_device *dev; @@ -335,7 +335,7 @@ static int tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Notify and wake up reader process */ if (tun->flags & TUN_FASYNC) kill_fasync(&tun->fasync, SIGIO, POLL_IN); - wake_up_interruptible(&tun->read_wait); + wake_up_interruptible(&tun->tfile->read_wait); return 0; drop: @@ -420,7 +420,8 @@ static void tun_net_init(struct net_device *dev) /* Poll */ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) { - struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); unsigned int mask = POLLOUT | POLLWRNORM; if (!tun) @@ -428,7 +429,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name); - poll_wait(file, &tun->read_wait, wait); + poll_wait(file, &tfile->read_wait, wait); if (!skb_queue_empty(&tun->readq)) mask |= POLLIN | POLLRDNORM; @@ -702,7 +703,8 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = tun_get(file); + struct tun_file *tfile = file->private_data; + struct tun_struct *tun = __tun_get(tfile); DECLARE_WAITQUEUE(wait, current); struct sk_buff *skb; ssize_t len, ret = 0; @@ -718,7 +720,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, goto out; } - add_wait_queue(&tun->read_wait, &wait); + add_wait_queue(&tfile->read_wait, &wait); while (len) { current->state = TASK_INTERRUPTIBLE; @@ -745,7 +747,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, } current->state = TASK_RUNNING; - remove_wait_queue(&tun->read_wait, &wait); + remove_wait_queue(&tfile->read_wait, &wait); out: tun_put(tun); @@ -757,7 +759,6 @@ static void tun_setup(struct net_device *dev) struct tun_struct *tun = netdev_priv(dev); skb_queue_head_init(&tun->readq); - init_waitqueue_head(&tun->read_wait); tun->owner = -1; tun->group = -1; @@ -1136,6 +1137,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) return -ENOMEM; tfile->tun = NULL; tfile->net = get_net(current->nsproxy->net_ns); + init_waitqueue_head(&tfile->read_wait); file->private_data = tfile; return 0; } -- cgit v1.2.3 From c70f182940f988448f3c12a209d18b1edc276e33 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:07:17 +0000 Subject: tun: Fix races between tun_net_close and free_netdev. The tun code does not cope gracefully if the network device goes away before the tun file descriptor is closed. It looks like we can trigger this with rmmod, and moving tun devices between network namespaces will allow this to be triggered when network namespaces exit. To fix this I introduce an intermediate data structure tun_file which holds a count of users and a pointer to the struct tun_struct. tun_get increments that reference count if it is greater than 0. tun_put decrements that reference count and detaches from the network device if the count is 0. While we have a file attached to the network device I hold a reference to the network device keeping it from going away completely. When a network device is unregistered I decrement the count of the attached tun_file and if that was the last user I detach the tun_file, and all processes on read_wait are woken up to ensure they do not sleep indefinitely. As some of those sleeps happen with the count on the tun device elevated waking up the read waiters ensures that tun_file will be detached in a timely manner. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 030d9858bb6..51dba6192ba 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -88,6 +88,7 @@ struct tap_filter { }; struct tun_file { + atomic_t count; struct tun_struct *tun; struct net *net; wait_queue_head_t read_wait; @@ -138,6 +139,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file) err = 0; tfile->tun = tun; tun->tfile = tfile; + dev_hold(tun->dev); + atomic_inc(&tfile->count); out: netif_tx_unlock_bh(tun->dev); @@ -156,11 +159,26 @@ static void __tun_detach(struct tun_struct *tun) /* Drop read queue */ skb_queue_purge(&tun->readq); + + /* Drop the extra count on the net device */ + dev_put(tun->dev); +} + +static void tun_detach(struct tun_struct *tun) +{ + rtnl_lock(); + __tun_detach(tun); + rtnl_unlock(); } static struct tun_struct *__tun_get(struct tun_file *tfile) { - return tfile->tun; + struct tun_struct *tun = NULL; + + if (atomic_inc_not_zero(&tfile->count)) + tun = tfile->tun; + + return tun; } static struct tun_struct *tun_get(struct file *file) @@ -170,7 +188,10 @@ static struct tun_struct *tun_get(struct file *file) static void tun_put(struct tun_struct *tun) { - /* Noop for now */ + struct tun_file *tfile = tun->tfile; + + if (atomic_dec_and_test(&tfile->count)) + tun_detach(tfile->tun); } /* TAP filterting */ @@ -281,6 +302,21 @@ static int check_filter(struct tap_filter *filter, const struct sk_buff *skb) static const struct ethtool_ops tun_ethtool_ops; +/* Net device detach from fd. */ +static void tun_net_uninit(struct net_device *dev) +{ + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile = tun->tfile; + + /* Inform the methods they need to stop using the dev. + */ + if (tfile) { + wake_up_all(&tfile->read_wait); + if (atomic_dec_and_test(&tfile->count)) + __tun_detach(tun); + } +} + /* Net device open. */ static int tun_net_open(struct net_device *dev) { @@ -367,6 +403,7 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu) } static const struct net_device_ops tun_netdev_ops = { + .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, @@ -374,6 +411,7 @@ static const struct net_device_ops tun_netdev_ops = { }; static const struct net_device_ops tap_netdev_ops = { + .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, .ndo_stop = tun_net_close, .ndo_start_xmit = tun_net_xmit, @@ -434,6 +472,9 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) if (!skb_queue_empty(&tun->readq)) mask |= POLLIN | POLLRDNORM; + if (tun->dev->reg_state != NETREG_REGISTERED) + mask = POLLERR; + tun_put(tun); return mask; } @@ -734,6 +775,10 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ret = -ERESTARTSYS; break; } + if (tun->dev->reg_state != NETREG_REGISTERED) { + ret = -EIO; + break; + } /* Nothing to read, let's sleep */ schedule(); @@ -1135,6 +1180,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); if (!tfile) return -ENOMEM; + atomic_set(&tfile->count, 0); tfile->tun = NULL; tfile->net = get_net(current->nsproxy->net_ns); init_waitqueue_head(&tfile->read_wait); -- cgit v1.2.3 From aec191aa2a04b082238156dc9690fff8ce95dd6b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 20 Jan 2009 11:08:46 +0000 Subject: tun: There is no longer any need to deny changing network namespaces With the awkward case between free_netdev and dev_chr_close fixed there is no longer any need to limit tun and tap devices to the network namespace they were created in. So remove the NETIF_F_NETNS_LOCAL flag on the network device. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 51dba6192ba..97b050015f5 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -810,7 +810,6 @@ static void tun_setup(struct net_device *dev) dev->ethtool_ops = &tun_ethtool_ops; dev->destructor = free_netdev; - dev->features |= NETIF_F_NETNS_LOCAL; } static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) -- cgit v1.2.3 From f019a7a594d951f085eb3878c3d825556d447efe Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 21 Jan 2009 16:02:16 -0800 Subject: tun: Implement ip link del tunXXX This greatly simplifies testing to verify I have fixed the problems with a tun device disappearing when the tun file descriptor is still held open. Further it allows removal network namespace operations for the tun driver. Reducing the network namespace handling in the driver to the minimum. i.e. When we are creating a tun device. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- drivers/net/tun.c | 56 +++++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 31 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 97b050015f5..e9bcbdfe015 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -812,6 +813,22 @@ static void tun_setup(struct net_device *dev) dev->destructor = free_netdev; } +/* Trivial set of netlink ops to allow deleting tun or tap + * device with netlink. + */ +static int tun_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + return -EINVAL; +} + +static struct rtnl_link_ops tun_link_ops __read_mostly = { + .kind = DRV_NAME, + .priv_size = sizeof(struct tun_struct), + .setup = tun_setup, + .validate = tun_validate, +}; + + static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { struct tun_struct *tun; @@ -861,6 +878,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return -ENOMEM; dev_net_set(dev, net); + dev->rtnl_link_ops = &tun_link_ops; tun = netdev_priv(dev); tun->dev = dev; @@ -1317,29 +1335,6 @@ static const struct ethtool_ops tun_ethtool_ops = { .set_rx_csum = tun_set_rx_csum }; -static int tun_init_net(struct net *net) -{ - return 0; -} - -static void tun_exit_net(struct net *net) -{ - struct net_device *dev, *next; - - rtnl_lock(); - for_each_netdev_safe(net, dev, next) { - if (dev->ethtool_ops != &tun_ethtool_ops) - continue; - DBG(KERN_INFO "%s cleaned up\n", dev->name); - unregister_netdevice(dev); - } - rtnl_unlock(); -} - -static struct pernet_operations tun_net_ops = { - .init = tun_init_net, - .exit = tun_exit_net, -}; static int __init tun_init(void) { @@ -1348,10 +1343,10 @@ static int __init tun_init(void) printk(KERN_INFO "tun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO "tun: %s\n", DRV_COPYRIGHT); - ret = register_pernet_device(&tun_net_ops); + ret = rtnl_link_register(&tun_link_ops); if (ret) { - printk(KERN_ERR "tun: Can't register pernet ops\n"); - goto err_pernet; + printk(KERN_ERR "tun: Can't register link_ops\n"); + goto err_linkops; } ret = misc_register(&tun_miscdev); @@ -1359,18 +1354,17 @@ static int __init tun_init(void) printk(KERN_ERR "tun: Can't register misc device %d\n", TUN_MINOR); goto err_misc; } - return 0; - + return 0; err_misc: - unregister_pernet_device(&tun_net_ops); -err_pernet: + rtnl_link_unregister(&tun_link_ops); +err_linkops: return ret; } static void tun_cleanup(void) { misc_deregister(&tun_miscdev); - unregister_pernet_device(&tun_net_ops); + rtnl_link_unregister(&tun_link_ops); } module_init(tun_init); -- cgit v1.2.3 From 09640e6365c679b5642b1c41b6d7078f51689ddf Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 1 Feb 2009 00:45:17 -0800 Subject: net: replace uses of __constant_{endian} Base versions handle constant folding now. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index e9bcbdfe015..457f2d7430c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -543,7 +543,7 @@ static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear, /* Get packet from user space buffer */ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) { - struct tun_pi pi = { 0, __constant_htons(ETH_P_IP) }; + struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; size_t len = count, align = 0; struct virtio_net_hdr gso = { 0 }; -- cgit v1.2.3 From 1bded710a574f20d41bc9e7fb531301db282d623 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Mon, 2 Feb 2009 23:34:56 -0800 Subject: tun: Check supplemental groups in TUN/TAP driver. Michael Tokarev wrote: [] > 2, and this is the main one: How about supplementary groups? > > Here I have a valid usage case: a group of testers running various > versions of windows using KVM (kernel virtual machine), 1 at a time, > to test some software. kvm is set up to use bridge with a tap device > (there should be a way to connect to the machine). Anyone on that group > has to be able to start/stop the virtual machines. > > My first attempt - pretty obvious when I saw -g option of tunctl - is > to add group ownership for the tun device and add a supplementary group > to each user (their primary group should be different). But that fails, > since kernel only checks for egid, not any other group ids. > > What's the reasoning to not allow supplementary groups and to only check > for egid? Signed-off-by: Michael Tokarev Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 457f2d7430c..15d67635bb1 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -123,7 +123,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) /* Check permissions */ if (((tun->owner != -1 && cred->euid != tun->owner) || - (tun->group != -1 && cred->egid != tun->group)) && + (tun->group != -1 && !in_egroup_p(tun->group))) && !capable(CAP_NET_ADMIN)) return -EPERM; -- cgit v1.2.3 From 33dccbb050bbe35b88ca8cf1228dcf3e4d4b3554 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Feb 2009 21:25:32 -0800 Subject: tun: Limit amount of queued packets per device Unlike a normal socket path, the tuntap device send path does not have any accounting. This means that the user-space sender may be able to pin down arbitrary amounts of kernel memory by continuing to send data to an end-point that is congested. Even when this isn't an issue because of limited queueing at most end points, this can also be a problem because its only response to congestion is packet loss. That is, when those local queues at the end-point fills up, the tuntap device will start wasting system time because it will continue to send data there which simply gets dropped straight away. Of course one could argue that everybody should do congestion control end-to-end, unfortunately there are people in this world still hooked on UDP, and they don't appear to be going away anywhere fast. In fact, we've always helped them by performing accounting in our UDP code, the sole purpose of which is to provide congestion feedback other than through packet loss. This patch attempts to apply the same bandaid to the tuntap device. It creates a pseudo-socket object which is used to account our packets just as a normal socket does for UDP. Of course things are a little complex because we're actually reinjecting traffic back into the stack rather than out of the stack. The stack complexities however should have been resolved by preceding patches. So this one can simply start using skb_set_owner_w. For now the accounting is essentially disabled by default for backwards compatibility. In particular, we set the cap to INT_MAX. This is so that existing applications don't get confused by the sudden arrival EAGAIN errors. In future we may wish (or be forced to) do this by default. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/tun.c | 167 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 114 insertions(+), 53 deletions(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 15d67635bb1..0476549841a 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -95,6 +96,8 @@ struct tun_file { wait_queue_head_t read_wait; }; +struct tun_sock; + struct tun_struct { struct tun_file *tfile; unsigned int flags; @@ -107,12 +110,24 @@ struct tun_struct { struct fasync_struct *fasync; struct tap_filter txflt; + struct sock *sk; + struct socket socket; #ifdef TUN_DEBUG int debug; #endif }; +struct tun_sock { + struct sock sk; + struct tun_struct *tun; +}; + +static inline struct tun_sock *tun_sk(struct sock *sk) +{ + return container_of(sk, struct tun_sock, sk); +} + static int tun_attach(struct tun_struct *tun, struct file *file) { struct tun_file *tfile = file->private_data; @@ -461,7 +476,8 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = __tun_get(tfile); - unsigned int mask = POLLOUT | POLLWRNORM; + struct sock *sk = tun->sk; + unsigned int mask = 0; if (!tun) return POLLERR; @@ -473,6 +489,11 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) if (!skb_queue_empty(&tun->readq)) mask |= POLLIN | POLLRDNORM; + if (sock_writeable(sk) || + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags) && + sock_writeable(sk))) + mask |= POLLOUT | POLLWRNORM; + if (tun->dev->reg_state != NETREG_REGISTERED) mask = POLLERR; @@ -482,66 +503,35 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) /* prepad is the amount to reserve at front. len is length after that. * linear is a hint as to how much to copy (usually headers). */ -static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear, - gfp_t gfp) +static inline struct sk_buff *tun_alloc_skb(struct tun_struct *tun, + size_t prepad, size_t len, + size_t linear, int noblock) { + struct sock *sk = tun->sk; struct sk_buff *skb; - unsigned int i; - - skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN); - if (skb) { - skb_reserve(skb, prepad); - skb_put(skb, len); - return skb; - } + int err; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE) - return NULL; + linear = len; - /* Start with a normal skb, and add pages. */ - skb = alloc_skb(prepad + linear, gfp); + skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, + &err); if (!skb) - return NULL; + return ERR_PTR(err); skb_reserve(skb, prepad); skb_put(skb, linear); - - len -= linear; - - for (i = 0; i < MAX_SKB_FRAGS; i++) { - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - - f->page = alloc_page(gfp|__GFP_ZERO); - if (!f->page) - break; - - f->page_offset = 0; - f->size = PAGE_SIZE; - - skb->data_len += PAGE_SIZE; - skb->len += PAGE_SIZE; - skb->truesize += PAGE_SIZE; - skb_shinfo(skb)->nr_frags++; - - if (len < PAGE_SIZE) { - len = 0; - break; - } - len -= PAGE_SIZE; - } - - /* Too large, or alloc fail? */ - if (unlikely(len)) { - kfree_skb(skb); - skb = NULL; - } + skb->data_len = len - linear; + skb->len += len - linear; return skb; } /* Get packet from user space buffer */ -static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) +static __inline__ ssize_t tun_get_user(struct tun_struct *tun, + struct iovec *iv, size_t count, + int noblock) { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; @@ -573,9 +563,11 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, return -EINVAL; } - if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) { - tun->dev->stats.rx_dropped++; - return -ENOMEM; + skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock); + if (IS_ERR(skb)) { + if (PTR_ERR(skb) != -EAGAIN) + tun->dev->stats.rx_dropped++; + return PTR_ERR(skb); } if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) { @@ -661,7 +653,8 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { - struct tun_struct *tun = tun_get(iocb->ki_filp); + struct file *file = iocb->ki_filp; + struct tun_struct *tun = file->private_data; ssize_t result; if (!tun) @@ -669,7 +662,8 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, DBG(KERN_INFO "%s: tun_chr_write %ld\n", tun->dev->name, count); - result = tun_get_user(tun, (struct iovec *) iv, iov_length(iv, count)); + result = tun_get_user(tun, (struct iovec *)iv, iov_length(iv, count), + file->f_flags & O_NONBLOCK); tun_put(tun); return result; @@ -828,11 +822,40 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = { .validate = tun_validate, }; +static void tun_sock_write_space(struct sock *sk) +{ + struct tun_struct *tun; + + if (!sock_writeable(sk)) + return; + + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_sync(sk->sk_sleep); + + if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) + return; + + tun = container_of(sk, struct tun_sock, sk)->tun; + kill_fasync(&tun->fasync, SIGIO, POLL_OUT); +} + +static void tun_sock_destruct(struct sock *sk) +{ + dev_put(container_of(sk, struct tun_sock, sk)->tun->dev); +} + +static struct proto tun_proto = { + .name = "tun", + .owner = THIS_MODULE, + .obj_size = sizeof(struct tun_sock), +}; static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) { + struct sock *sk; struct tun_struct *tun; struct net_device *dev; + struct tun_file *tfile = file->private_data; int err; dev = __dev_get_by_name(net, ifr->ifr_name); @@ -885,14 +908,31 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) tun->flags = flags; tun->txflt.count = 0; + err = -ENOMEM; + sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto); + if (!sk) + goto err_free_dev; + + /* This ref count is for tun->sk. */ + dev_hold(dev); + sock_init_data(&tun->socket, sk); + sk->sk_write_space = tun_sock_write_space; + sk->sk_destruct = tun_sock_destruct; + sk->sk_sndbuf = INT_MAX; + sk->sk_sleep = &tfile->read_wait; + + tun->sk = sk; + container_of(sk, struct tun_sock, sk)->tun = tun; + tun_net_init(dev); if (strchr(dev->name, '%')) { err = dev_alloc_name(dev, dev->name); if (err < 0) - goto err_free_dev; + goto err_free_sk; } + err = -EINVAL; err = register_netdevice(tun->dev); if (err < 0) goto err_free_dev; @@ -928,6 +968,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) strcpy(ifr->ifr_name, tun->dev->name); return 0; + err_free_sk: + sock_put(sk); err_free_dev: free_netdev(dev); failed: @@ -1012,6 +1054,7 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, struct tun_struct *tun; void __user* argp = (void __user*)arg; struct ifreq ifr; + int sndbuf; int ret; if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) @@ -1151,6 +1194,22 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr); rtnl_unlock(); break; + + case TUNGETSNDBUF: + sndbuf = tun->sk->sk_sndbuf; + if (copy_to_user(argp, &sndbuf, sizeof(sndbuf))) + ret = -EFAULT; + break; + + case TUNSETSNDBUF: + if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) { + ret = -EFAULT; + break; + } + + tun->sk->sk_sndbuf = sndbuf; + break; + default: ret = -EINVAL; break; @@ -1218,8 +1277,10 @@ static int tun_chr_close(struct inode *inode, struct file *file) __tun_detach(tun); /* If desireable, unregister the netdevice. */ - if (!(tun->flags & TUN_PERSIST)) + if (!(tun->flags & TUN_PERSIST)) { + sock_put(tun->sk); unregister_netdevice(tun->dev); + } rtnl_unlock(); } -- cgit v1.2.3 From ab46d779661d7a03b7aa00279eead5dc3f0b3901 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 14 Feb 2009 20:46:39 -0800 Subject: tun: Fix merge error When forward-porting the tun accounting patch I managed to break the send path compltely by dropping the tun_get call. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/tun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net/tun.c') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 4825c52924b..a1b0697340b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -660,7 +660,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, unsigned long count, loff_t pos) { struct file *file = iocb->ki_filp; - struct tun_struct *tun = file->private_data; + struct tun_struct *tun = tun_get(file); ssize_t result; if (!tun) -- cgit v1.2.3