aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c779
1 files changed, 529 insertions, 250 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 9174c77d311..8d675975d85 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -108,7 +108,6 @@
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/module.h>
-#include <linux/kallsyms.h>
#include <linux/netpoll.h>
#include <linux/rcupdate.h>
#include <linux/delay.h>
@@ -130,6 +129,12 @@
#include "net-sysfs.h"
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -165,25 +170,6 @@ static DEFINE_SPINLOCK(ptype_lock);
static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
static struct list_head ptype_all __read_mostly; /* Taps */
-#ifdef CONFIG_NET_DMA
-struct net_dma {
- struct dma_client client;
- spinlock_t lock;
- cpumask_t channel_mask;
- struct dma_chan **channels;
-};
-
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
- enum dma_state state);
-
-static struct net_dma net_dma = {
- .client = {
- .event_callback = netdev_dma_event,
- },
-};
-#endif
-
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
* semaphore.
@@ -281,8 +267,8 @@ static const unsigned short netdev_lock_type[] =
ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
- ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
- ARPHRD_NONE};
+ ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+ ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
static const char *netdev_lock_name[] =
{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -298,8 +284,8 @@ static const char *netdev_lock_name[] =
"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
"_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
- "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
- "_xmit_NONE"};
+ "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+ "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -924,10 +910,15 @@ int dev_change_name(struct net_device *dev, const char *newname)
strlcpy(dev->name, newname, IFNAMSIZ);
rollback:
- ret = device_rename(&dev->dev, dev->name);
- if (ret) {
- memcpy(dev->name, oldname, IFNAMSIZ);
- return ret;
+ /* For now only devices in the initial network namespace
+ * are in sysfs.
+ */
+ if (net == &init_net) {
+ ret = device_rename(&dev->dev, dev->name);
+ if (ret) {
+ memcpy(dev->name, oldname, IFNAMSIZ);
+ return ret;
+ }
}
write_lock_bh(&dev_base_lock);
@@ -1055,6 +1046,7 @@ void dev_load(struct net *net, const char *name)
*/
int dev_open(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int ret = 0;
ASSERT_RTNL();
@@ -1077,11 +1069,11 @@ int dev_open(struct net_device *dev)
*/
set_bit(__LINK_STATE_START, &dev->state);
- if (dev->validate_addr)
- ret = dev->validate_addr(dev);
+ if (ops->ndo_validate_addr)
+ ret = ops->ndo_validate_addr(dev);
- if (!ret && dev->open)
- ret = dev->open(dev);
+ if (!ret && ops->ndo_open)
+ ret = ops->ndo_open(dev);
/*
* If it went open OK then:
@@ -1096,6 +1088,11 @@ int dev_open(struct net_device *dev)
dev->flags |= IFF_UP;
/*
+ * Enable NET_DMA
+ */
+ dmaengine_get();
+
+ /*
* Initialize multicasting status
*/
dev_set_rx_mode(dev);
@@ -1125,6 +1122,7 @@ int dev_open(struct net_device *dev)
*/
int dev_close(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
ASSERT_RTNL();
might_sleep();
@@ -1157,8 +1155,8 @@ int dev_close(struct net_device *dev)
* We allow it to be called even after a DETACH hot-plug
* event.
*/
- if (dev->stop)
- dev->stop(dev);
+ if (ops->ndo_stop)
+ ops->ndo_stop(dev);
/*
* Device is now down.
@@ -1171,6 +1169,11 @@ int dev_close(struct net_device *dev)
*/
call_netdevice_notifiers(NETDEV_DOWN, dev);
+ /*
+ * Shutdown NET_DMA
+ */
+ dmaengine_put();
+
return 0;
}
@@ -1527,8 +1530,6 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
__be16 type = skb->protocol;
int err;
- BUG_ON(skb_shinfo(skb)->frag_list);
-
skb_reset_mac_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
__skb_pull(skb, skb->mac_len);
@@ -1654,6 +1655,9 @@ static int dev_gso_segment(struct sk_buff *skb)
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
@@ -1665,7 +1669,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
- return dev->hard_start_xmit(skb, dev);
+ return ops->ndo_start_xmit(skb, dev);
}
gso:
@@ -1675,7 +1679,7 @@ gso:
skb->next = nskb->next;
nskb->next = NULL;
- rc = dev->hard_start_xmit(nskb, dev);
+ rc = ops->ndo_start_xmit(nskb, dev);
if (unlikely(rc)) {
nskb->next = skb->next;
skb->next = nskb;
@@ -1749,10 +1753,11 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
u16 queue_index = 0;
- if (dev->select_queue)
- queue_index = dev->select_queue(dev, skb);
+ if (ops->ndo_select_queue)
+ queue_index = ops->ndo_select_queue(dev, skb);
else if (dev->real_num_tx_queues > 1)
queue_index = simple_tx_hash(dev, skb);
@@ -2251,8 +2256,10 @@ int netif_receive_skb(struct sk_buff *skb)
rcu_read_lock();
/* Don't receive packets in an exiting network namespace */
- if (!net_alive(dev_net(skb->dev)))
+ if (!net_alive(dev_net(skb->dev))) {
+ kfree_skb(skb);
goto out;
+ }
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
@@ -2325,6 +2332,236 @@ static void flush_backlog(void *arg)
}
}
+static int napi_gro_complete(struct sk_buff *skb)
+{
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int err = -ENOENT;
+
+ if (NAPI_GRO_CB(skb)->count == 1)
+ goto out;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+ continue;
+
+ err = ptype->gro_complete(skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (err) {
+ WARN_ON(&ptype->list == head);
+ kfree_skb(skb);
+ return NET_RX_SUCCESS;
+ }
+
+out:
+ skb_shinfo(skb)->gso_size = 0;
+ __skb_push(skb, -skb_network_offset(skb));
+ return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ napi_gro_complete(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ struct sk_buff **pp = NULL;
+ struct packet_type *ptype;
+ __be16 type = skb->protocol;
+ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+ int count = 0;
+ int same_flow;
+ int mac_len;
+ int free;
+
+ if (!(skb->dev->features & NETIF_F_GRO))
+ goto normal;
+
+ if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
+ goto normal;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, head, list) {
+ struct sk_buff *p;
+
+ if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+ continue;
+
+ skb_reset_network_header(skb);
+ mac_len = skb->network_header - skb->mac_header;
+ skb->mac_len = mac_len;
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 0;
+ NAPI_GRO_CB(skb)->free = 0;
+
+ for (p = napi->gro_list; p; p = p->next) {
+ count++;
+
+ if (!NAPI_GRO_CB(p)->same_flow)
+ continue;
+
+ if (p->mac_len != mac_len ||
+ memcmp(skb_mac_header(p), skb_mac_header(skb),
+ mac_len))
+ NAPI_GRO_CB(p)->same_flow = 0;
+ }
+
+ pp = ptype->gro_receive(&napi->gro_list, skb);
+ break;
+ }
+ rcu_read_unlock();
+
+ if (&ptype->list == head)
+ goto normal;
+
+ same_flow = NAPI_GRO_CB(skb)->same_flow;
+ free = NAPI_GRO_CB(skb)->free;
+
+ if (pp) {
+ struct sk_buff *nskb = *pp;
+
+ *pp = nskb->next;
+ nskb->next = NULL;
+ napi_gro_complete(nskb);
+ count--;
+ }
+
+ if (same_flow)
+ goto ok;
+
+ if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
+ __skb_push(skb, -skb_network_offset(skb));
+ goto normal;
+ }
+
+ NAPI_GRO_CB(skb)->count = 1;
+ skb_shinfo(skb)->gso_size = skb->len;
+ skb->next = napi->gro_list;
+ napi->gro_list = skb;
+
+ok:
+ return free;
+
+normal:
+ return -1;
+}
+EXPORT_SYMBOL(dev_gro_receive);
+
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ struct sk_buff *p;
+
+ for (p = napi->gro_list; p; p = p->next) {
+ NAPI_GRO_CB(p)->same_flow = 1;
+ NAPI_GRO_CB(p)->flush = 0;
+ }
+
+ return dev_gro_receive(napi, skb);
+}
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ switch (__napi_gro_receive(napi, skb)) {
+ case -1:
+ return netif_receive_skb(skb);
+
+ case 1:
+ kfree_skb(skb);
+ break;
+ }
+
+ return NET_RX_SUCCESS;
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
+void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+{
+ __skb_pull(skb, skb_headlen(skb));
+ skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+ napi->skb = skb;
+}
+EXPORT_SYMBOL(napi_reuse_skb);
+
+struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
+ struct napi_gro_fraginfo *info)
+{
+ struct net_device *dev = napi->dev;
+ struct sk_buff *skb = napi->skb;
+
+ napi->skb = NULL;
+
+ if (!skb) {
+ skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+ if (!skb)
+ goto out;
+
+ skb_reserve(skb, NET_IP_ALIGN);
+ }
+
+ BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+ skb_shinfo(skb)->nr_frags = info->nr_frags;
+ memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
+
+ skb->data_len = info->len;
+ skb->len += info->len;
+ skb->truesize += info->len;
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ napi_reuse_skb(napi, skb);
+ goto out;
+ }
+
+ skb->protocol = eth_type_trans(skb, dev);
+
+ skb->ip_summed = info->ip_summed;
+ skb->csum = info->csum;
+
+out:
+ return skb;
+}
+EXPORT_SYMBOL(napi_fraginfo_skb);
+
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+ struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+ int err = NET_RX_DROP;
+
+ if (!skb)
+ goto out;
+
+ err = NET_RX_SUCCESS;
+
+ switch (__napi_gro_receive(napi, skb)) {
+ case -1:
+ return netif_receive_skb(skb);
+
+ case 0:
+ goto out;
+ }
+
+ napi_reuse_skb(napi, skb);
+
+out:
+ return err;
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
static int process_backlog(struct napi_struct *napi, int quota)
{
int work = 0;
@@ -2344,9 +2581,11 @@ static int process_backlog(struct napi_struct *napi, int quota)
}
local_irq_enable();
- netif_receive_skb(skb);
+ napi_gro_receive(napi, skb);
} while (++work < quota && jiffies == start_time);
+ napi_gro_flush(napi);
+
return work;
}
@@ -2367,11 +2606,75 @@ void __napi_schedule(struct napi_struct *n)
}
EXPORT_SYMBOL(__napi_schedule);
+void __napi_complete(struct napi_struct *n)
+{
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+ BUG_ON(n->gro_list);
+
+ list_del(&n->poll_list);
+ smp_mb__before_clear_bit();
+ clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+ unsigned long flags;
+
+ /*
+ * don't let napi dequeue from the cpu poll list
+ * just in case its running on a different cpu
+ */
+ if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+ return;
+
+ napi_gro_flush(n);
+ local_irq_save(flags);
+ __napi_complete(n);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+{
+ INIT_LIST_HEAD(&napi->poll_list);
+ napi->gro_list = NULL;
+ napi->skb = NULL;
+ napi->poll = poll;
+ napi->weight = weight;
+ list_add(&napi->dev_list, &dev->napi_list);
+ napi->dev = dev;
+#ifdef CONFIG_NETPOLL
+ spin_lock_init(&napi->poll_lock);
+ napi->poll_owner = -1;
+#endif
+ set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+ struct sk_buff *skb, *next;
+
+ list_del_init(&napi->dev_list);
+ kfree(napi->skb);
+
+ for (skb = napi->gro_list; skb; skb = next) {
+ next = skb->next;
+ skb->next = NULL;
+ kfree_skb(skb);
+ }
+
+ napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
static void net_rx_action(struct softirq_action *h)
{
struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
- unsigned long start_time = jiffies;
+ unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
void *have;
@@ -2382,13 +2685,10 @@ static void net_rx_action(struct softirq_action *h)
int work, weight;
/* If softirq window is exhuasted then punt.
- *
- * Note that this is a slight policy change from the
- * previous NAPI code, which would allow up to 2
- * jiffies to pass before breaking out. The test
- * used to be "jiffies - start_time > 1".
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(budget <= 0 || jiffies != start_time))
+ if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
goto softnet_break;
local_irq_enable();
@@ -2442,14 +2742,7 @@ out:
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
- if (!cpus_empty(net_dma.channel_mask)) {
- int chan_idx;
- for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
- struct dma_chan *chan = net_dma.channels[chan_idx];
- if (chan)
- dma_async_memcpy_issue_pending(chan);
- }
- }
+ dma_issue_pending_all();
#endif
return;
@@ -2615,7 +2908,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
{
- struct net_device_stats *stats = dev->get_stats(dev);
+ const struct net_device_stats *stats = dev_get_stats(dev);
seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2797,31 +3090,6 @@ static void ptype_seq_stop(struct seq_file *seq, void *v)
rcu_read_unlock();
}
-static void ptype_seq_decode(struct seq_file *seq, void *sym)
-{
-#ifdef CONFIG_KALLSYMS
- unsigned long offset = 0, symsize;
- const char *symname;
- char *modname;
- char namebuf[128];
-
- symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
- &modname, namebuf);
-
- if (symname) {
- char *delim = ":";
-
- if (!modname)
- modname = delim = "";
- seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
- symname, offset);
- return;
- }
-#endif
-
- seq_printf(seq, "[%p]", sym);
-}
-
static int ptype_seq_show(struct seq_file *seq, void *v)
{
struct packet_type *pt = v;
@@ -2834,10 +3102,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
else
seq_printf(seq, "%04x", ntohs(pt->type));
- seq_printf(seq, " %-8s ",
- pt->dev ? pt->dev->name : "");
- ptype_seq_decode(seq, pt->func);
- seq_putc(seq, '\n');
+ seq_printf(seq, " %-8s %pF\n",
+ pt->dev ? pt->dev->name : "", pt->func);
}
return 0;
@@ -2954,13 +3220,17 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
- if (dev->flags & IFF_UP && dev->change_rx_flags)
- dev->change_rx_flags(dev, flags);
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+ ops->ndo_change_rx_flags(dev, flags);
}
static int __dev_set_promiscuity(struct net_device *dev, int inc)
{
unsigned short old_flags = dev->flags;
+ uid_t uid;
+ gid_t gid;
ASSERT_RTNL();
@@ -2985,15 +3255,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
printk(KERN_INFO "device %s %s promiscuous mode\n",
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
"left");
- if (audit_enabled)
+ if (audit_enabled) {
+ current_uid_gid(&uid, &gid);
audit_log(current->audit_context, GFP_ATOMIC,
AUDIT_ANOM_PROMISCUOUS,
"dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
dev->name, (dev->flags & IFF_PROMISC),
(old_flags & IFF_PROMISC),
audit_get_loginuid(current),
- current->uid, current->gid,
+ uid, gid,
audit_get_sessionid(current));
+ }
dev_change_rx_flags(dev, IFF_PROMISC);
}
@@ -3075,6 +3347,8 @@ int dev_set_allmulti(struct net_device *dev, int inc)
*/
void __dev_set_rx_mode(struct net_device *dev)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
/* dev_open will call this function so the list will stay sane. */
if (!(dev->flags&IFF_UP))
return;
@@ -3082,8 +3356,8 @@ void __dev_set_rx_mode(struct net_device *dev)
if (!netif_device_present(dev))
return;
- if (dev->set_rx_mode)
- dev->set_rx_mode(dev);
+ if (ops->ndo_set_rx_mode)
+ ops->ndo_set_rx_mode(dev);
else {
/* Unicast addresses changes may only happen under the rtnl,
* therefore calling __dev_set_promiscuity here is safe.
@@ -3096,8 +3370,8 @@ void __dev_set_rx_mode(struct net_device *dev)
dev->uc_promisc = 0;
}
- if (dev->set_multicast_list)
- dev->set_multicast_list(dev);
+ if (ops->ndo_set_multicast_list)
+ ops->ndo_set_multicast_list(dev);
}
}
@@ -3456,6 +3730,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
*/
int dev_set_mtu(struct net_device *dev, int new_mtu)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int err;
if (new_mtu == dev->mtu)
@@ -3469,10 +3744,11 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
return -ENODEV;
err = 0;
- if (dev->change_mtu)
- err = dev->change_mtu(dev, new_mtu);
+ if (ops->ndo_change_mtu)
+ err = ops->ndo_change_mtu(dev, new_mtu);
else
dev->mtu = new_mtu;
+
if (!err && dev->flags & IFF_UP)
call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
return err;
@@ -3487,15 +3763,16 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
*/
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
{
+ const struct net_device_ops *ops = dev->netdev_ops;
int err;
- if (!dev->set_mac_address)
+ if (!ops->ndo_set_mac_address)
return -EOPNOTSUPP;
if (sa->sa_family != dev->type)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
- err = dev->set_mac_address(dev, sa);
+ err = ops->ndo_set_mac_address(dev, sa);
if (!err)
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return err;
@@ -3575,10 +3852,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
{
int err;
struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ const struct net_device_ops *ops;
if (!dev)
return -ENODEV;
+ ops = dev->netdev_ops;
+
switch (cmd) {
case SIOCSIFFLAGS: /* Set interface flags */
return dev_change_flags(dev, ifr->ifr_flags);
@@ -3602,15 +3882,15 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return 0;
case SIOCSIFMAP:
- if (dev->set_config) {
+ if (ops->ndo_set_config) {
if (!netif_device_present(dev))
return -ENODEV;
- return dev->set_config(dev, &ifr->ifr_map);
+ return ops->ndo_set_config(dev, &ifr->ifr_map);
}
return -EOPNOTSUPP;
case SIOCADDMULTI:
- if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -3619,7 +3899,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
dev->addr_len, 1);
case SIOCDELMULTI:
- if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
+ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
@@ -3657,10 +3937,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCBRDELIF ||
cmd == SIOCWANDEV) {
err = -EOPNOTSUPP;
- if (dev->do_ioctl) {
+ if (ops->ndo_do_ioctl) {
if (netif_device_present(dev))
- err = dev->do_ioctl(dev, ifr,
- cmd);
+ err = ops->ndo_do_ioctl(dev, ifr, cmd);
else
err = -ENODEV;
}
@@ -3921,8 +4200,8 @@ static void rollback_registered(struct net_device *dev)
*/
dev_addr_discard(dev);
- if (dev->uninit)
- dev->uninit(dev);
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
/* Notifier chain MUST detach us from master device. */
WARN_ON(dev->master);
@@ -4012,7 +4291,7 @@ int register_netdevice(struct net_device *dev)
struct hlist_head *head;
struct hlist_node *p;
int ret;
- struct net *net;
+ struct net *net = dev_net(dev);
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
@@ -4021,8 +4300,7 @@ int register_netdevice(struct net_device *dev)
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
- BUG_ON(!dev_net(dev));
- net = dev_net(dev);
+ BUG_ON(!net);
spin_lock_init(&dev->addr_list_lock);
netdev_set_addr_lockdep_class(dev);
@@ -4030,9 +4308,46 @@ int register_netdevice(struct net_device *dev)
dev->iflink = -1;
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+ /* Netdevice_ops API compatiability support.
+ * This is temporary until all network devices are converted.
+ */
+ if (dev->netdev_ops) {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ dev->init = ops->ndo_init;
+ dev->uninit = ops->ndo_uninit;
+ dev->open = ops->ndo_open;
+ dev->change_rx_flags = ops->ndo_change_rx_flags;
+ dev->set_rx_mode = ops->ndo_set_rx_mode;
+ dev->set_multicast_list = ops->ndo_set_multicast_list;
+ dev->set_mac_address = ops->ndo_set_mac_address;
+ dev->validate_addr = ops->ndo_validate_addr;
+ dev->do_ioctl = ops->ndo_do_ioctl;
+ dev->set_config = ops->ndo_set_config;
+ dev->change_mtu = ops->ndo_change_mtu;
+ dev->tx_timeout = ops->ndo_tx_timeout;
+ dev->get_stats = ops->ndo_get_stats;
+ dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+ dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+ dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ dev->poll_controller = ops->ndo_poll_controller;
+#endif
+ } else {
+ char drivername[64];
+ pr_info("%s (%s): not using net_device_ops yet\n",
+ dev->name, netdev_drivername(dev, drivername, 64));
+
+ /* This works only because net_device_ops and the
+ compatiablity structure are the same. */
+ dev->netdev_ops = (void *) &(dev->init);
+ }
+#endif
+
/* Init, if this function is available */
- if (dev->init) {
- ret = dev->init(dev);
+ if (dev->netdev_ops->ndo_init) {
+ ret = dev->netdev_ops->ndo_init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
@@ -4110,12 +4425,51 @@ out:
return ret;
err_uninit:
- if (dev->uninit)
- dev->uninit(dev);
+ if (dev->netdev_ops->ndo_uninit)
+ dev->netdev_ops->ndo_uninit(dev);
goto out;
}
/**
+ * init_dummy_netdev - init a dummy network device for NAPI
+ * @dev: device to init
+ *
+ * This takes a network device structure and initialize the minimum
+ * amount of fields so it can be used to schedule NAPI polls without
+ * registering a full blown interface. This is to be used by drivers
+ * that need to tie several hardware interfaces to a single NAPI
+ * poll scheduler due to HW limitations.
+ */
+int init_dummy_netdev(struct net_device *dev)
+{
+ /* Clear everything. Note we don't initialize spinlocks
+ * are they aren't supposed to be taken by any of the
+ * NAPI code and this dummy netdev is supposed to be
+ * only ever used for NAPI polls
+ */
+ memset(dev, 0, sizeof(struct net_device));
+
+ /* make sure we BUG if trying to hit standard
+ * register/unregister code path
+ */
+ dev->reg_state = NETREG_DUMMY;
+
+ /* initialize the ref count */
+ atomic_set(&dev->refcnt, 1);
+
+ /* NAPI wants this */
+ INIT_LIST_HEAD(&dev->napi_list);
+
+ /* a dummy interface is started by default */
+ set_bit(__LINK_STATE_PRESENT, &dev->state);
+ set_bit(__LINK_STATE_START, &dev->state);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(init_dummy_netdev);
+
+
+/**
* register_netdev - register a network device
* @dev: device to register
*
@@ -4267,10 +4621,24 @@ void netdev_run_todo(void)
}
}
-static struct net_device_stats *internal_stats(struct net_device *dev)
-{
- return &dev->stats;
+/**
+ * dev_get_stats - get network device statistics
+ * @dev: device to get statistics from
+ *
+ * Get network statistics from device. The device driver may provide
+ * its own method by setting dev->netdev_ops->get_stats; otherwise
+ * the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
+ {
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (ops->ndo_get_stats)
+ return ops->ndo_get_stats(dev);
+ else
+ return &dev->stats;
}
+EXPORT_SYMBOL(dev_get_stats);
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
@@ -4339,18 +4707,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
- if (sizeof_priv) {
- dev->priv = ((char *)dev +
- ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
- & ~NETDEV_ALIGN_CONST));
- }
-
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
- dev->get_stats = internal_stats;
- netpoll_netdev_init(dev);
+ INIT_LIST_HEAD(&dev->napi_list);
setup(dev);
strcpy(dev->name, name);
return dev;
@@ -4367,10 +4728,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
*/
void free_netdev(struct net_device *dev)
{
+ struct napi_struct *p, *n;
+
release_net(dev_net(dev));
kfree(dev->_tx);
+ list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+ netif_napi_del(p);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
@@ -4463,6 +4829,15 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
if (dev->features & NETIF_F_NETNS_LOCAL)
goto out;
+#ifdef CONFIG_SYSFS
+ /* Don't allow real devices to be moved when sysfs
+ * is enabled.
+ */
+ err = -EINVAL;
+ if (dev->dev.parent)
+ goto out;
+#endif
+
/* Ensure the device has been registrered */
err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
@@ -4520,6 +4895,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
*/
dev_addr_discard(dev);
+ netdev_unregister_kobject(dev);
+
/* Actually switch the network namespace */
dev_net_set(dev, net);
@@ -4536,7 +4913,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
}
/* Fixup kobjects */
- netdev_unregister_kobject(dev);
err = netdev_register_kobject(dev);
WARN_ON(err);
@@ -4596,122 +4972,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-#ifdef CONFIG_NET_DMA
-/**
- * net_dma_rebalance - try to maintain one DMA channel per CPU
- * @net_dma: DMA client and associated data (lock, channels, channel_mask)
- *
- * This is called when the number of channels allocated to the net_dma client
- * changes. The net_dma client tries to have one DMA channel per CPU.
- */
-
-static void net_dma_rebalance(struct net_dma *net_dma)
-{
- unsigned int cpu, i, n, chan_idx;
- struct dma_chan *chan;
-
- if (cpus_empty(net_dma->channel_mask)) {
- for_each_online_cpu(cpu)
- rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
- return;
- }
-
- i = 0;
- cpu = first_cpu(cpu_online_map);
-
- for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
- chan = net_dma->channels[chan_idx];
-
- n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
- + (i < (num_online_cpus() %
- cpus_weight(net_dma->channel_mask)) ? 1 : 0));
-
- while(n) {
- per_cpu(softnet_data, cpu).net_dma = chan;
- cpu = next_cpu(cpu, cpu_online_map);
- n--;
- }
- i++;
- }
-}
-
-/**
- * netdev_dma_event - event callback for the net_dma_client
- * @client: should always be net_dma_client
- * @chan: DMA channel for the event
- * @state: DMA state to be handled
- */
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
- enum dma_state state)
-{
- int i, found = 0, pos = -1;
- struct net_dma *net_dma =
- container_of(client, struct net_dma, client);
- enum dma_state_client ack = DMA_DUP; /* default: take no action */
-
- spin_lock(&net_dma->lock);
- switch (state) {
- case DMA_RESOURCE_AVAILABLE:
- for (i = 0; i < nr_cpu_ids; i++)
- if (net_dma->channels[i] == chan) {
- found = 1;
- break;
- } else if (net_dma->channels[i] == NULL && pos < 0)
- pos = i;
-
- if (!found && pos >= 0) {
- ack = DMA_ACK;
- net_dma->channels[pos] = chan;
- cpu_set(pos, net_dma->channel_mask);
- net_dma_rebalance(net_dma);
- }
- break;
- case DMA_RESOURCE_REMOVED:
- for (i = 0; i < nr_cpu_ids; i++)
- if (net_dma->channels[i] == chan) {
- found = 1;
- pos = i;
- break;
- }
-
- if (found) {
- ack = DMA_ACK;
- cpu_clear(pos, net_dma->channel_mask);
- net_dma->channels[i] = NULL;
- net_dma_rebalance(net_dma);
- }
- break;
- default:
- break;
- }
- spin_unlock(&net_dma->lock);
-
- return ack;
-}
-
-/**
- * netdev_dma_register - register the networking subsystem as a DMA client
- */
-static int __init netdev_dma_register(void)
-{
- net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
- GFP_KERNEL);
- if (unlikely(!net_dma.channels)) {
- printk(KERN_NOTICE
- "netdev_dma: no memory for net_dma.channels\n");
- return -ENOMEM;
- }
- spin_lock_init(&net_dma.lock);
- dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
- dma_async_client_register(&net_dma.client);
- dma_async_client_chan_request(&net_dma.client);
- return 0;
-}
-
-#else
-static int __init netdev_dma_register(void) { return -ENODEV; }
-#endif /* CONFIG_NET_DMA */
/**
* netdev_increment_features - increment feature set by one
@@ -4829,13 +5089,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
static void __net_exit default_device_exit(struct net *net)
{
- struct net_device *dev, *next;
+ struct net_device *dev;
/*
* Push all migratable of the network devices back to the
* initial network namespace
*/
rtnl_lock();
- for_each_netdev_safe(net, dev, next) {
+restart:
+ for_each_netdev(net, dev) {
int err;
char fb_name[IFNAMSIZ];
@@ -4843,6 +5104,12 @@ static void __net_exit default_device_exit(struct net *net)
if (dev->features & NETIF_F_NETNS_LOCAL)
continue;
+ /* Delete virtual devices */
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
+ dev->rtnl_link_ops->dellink(dev);
+ goto restart;
+ }
+
/* Push remaing network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
err = dev_change_net_namespace(dev, &init_net, fb_name);
@@ -4851,6 +5118,7 @@ static void __net_exit default_device_exit(struct net *net)
__func__, dev->name, err);
BUG();
}
+ goto restart;
}
rtnl_unlock();
}
@@ -4889,9 +5157,6 @@ static int __init net_dev_init(void)
if (register_pernet_subsys(&netdev_net_ops))
goto out;
- if (register_pernet_device(&default_device_ops))
- goto out;
-
/*
* Initialise the packet receive queues.
*/
@@ -4906,12 +5171,26 @@ static int __init net_dev_init(void)
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
+ queue->backlog.gro_list = NULL;
}
- netdev_dma_register();
-
dev_boot_phase = 0;
+ /* The loopback device is special if any other network devices
+ * is present in a network namespace the loopback device must
+ * be present. Since we now dynamically allocate and free the
+ * loopback device ensure this invariant is maintained by
+ * keeping the loopback device as the first device on the
+ * list of network devices. Ensuring the loopback devices
+ * is the first device that appears and the last network device
+ * that disappears.
+ */
+ if (register_pernet_device(&loopback_net_ops))
+ goto out;
+
+ if (register_pernet_device(&default_device_ops))
+ goto out;
+
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);