aboutsummaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c212
-rw-r--r--net/core/skbuff.c27
2 files changed, 151 insertions, 88 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5379b0c1190..ec5be1c7f2f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,14 @@
/* This should be increased if a protocol with a bigger head is added. */
#define GRO_MAX_HEAD (MAX_HEADER + 128)
+enum {
+ GRO_MERGED,
+ GRO_MERGED_FREE,
+ GRO_HELD,
+ GRO_NORMAL,
+ GRO_DROP,
+};
+
/*
* The list of packet types we will receive (as opposed to discard)
* and the routines to invoke.
@@ -207,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
}
+static inline void *skb_gro_mac_header(struct sk_buff *skb)
+{
+ return skb_headlen(skb) ? skb_mac_header(skb) :
+ page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset;
+}
+
/* Device list insertion */
static int list_netdevice(struct net_device *dev)
{
@@ -1708,56 +1723,26 @@ out_kfree_skb:
return 0;
}
-static u32 simple_tx_hashrnd;
-static int simple_tx_hashrnd_initialized = 0;
+static u32 skb_tx_hashrnd;
+static int skb_tx_hashrnd_initialized = 0;
-static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
{
- u32 addr1, addr2, ports;
- u32 hash, ihl;
- u8 ip_proto = 0;
-
- if (unlikely(!simple_tx_hashrnd_initialized)) {
- get_random_bytes(&simple_tx_hashrnd, 4);
- simple_tx_hashrnd_initialized = 1;
- }
+ u32 hash;
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
- ip_proto = ip_hdr(skb)->protocol;
- addr1 = ip_hdr(skb)->saddr;
- addr2 = ip_hdr(skb)->daddr;
- ihl = ip_hdr(skb)->ihl;
- break;
- case htons(ETH_P_IPV6):
- ip_proto = ipv6_hdr(skb)->nexthdr;
- addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
- addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
- ihl = (40 >> 2);
- break;
- default:
- return 0;
+ if (unlikely(!skb_tx_hashrnd_initialized)) {
+ get_random_bytes(&skb_tx_hashrnd, 4);
+ skb_tx_hashrnd_initialized = 1;
}
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ } else if (skb->sk && skb->sk->sk_hash) {
+ hash = skb->sk->sk_hash;
+ } else
+ hash = skb->protocol;
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
- break;
-
- default:
- ports = 0;
- break;
- }
-
- hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+ hash = jhash_1word(hash, skb_tx_hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
@@ -1771,7 +1756,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb);
else if (dev->real_num_tx_queues > 1)
- queue_index = simple_tx_hash(dev, skb);
+ queue_index = skb_tx_hash(dev, skb);
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
@@ -2372,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb)
out:
skb_shinfo(skb)->gso_size = 0;
- __skb_push(skb, -skb_network_offset(skb));
return netif_receive_skb(skb);
}
@@ -2390,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi)
}
EXPORT_SYMBOL(napi_gro_flush);
+void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+{
+ unsigned int offset = skb_gro_offset(skb);
+
+ hlen += offset;
+ if (hlen <= skb_headlen(skb))
+ return skb->data + offset;
+
+ if (unlikely(!skb_shinfo(skb)->nr_frags ||
+ skb_shinfo(skb)->frags[0].size <=
+ hlen - skb_headlen(skb) ||
+ PageHighMem(skb_shinfo(skb)->frags[0].page)))
+ return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+ return page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset + offset;
+}
+EXPORT_SYMBOL(skb_gro_header);
+
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
@@ -2399,7 +2402,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
int count = 0;
int same_flow;
int mac_len;
- int free;
+ int ret;
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
@@ -2410,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
struct sk_buff *p;
+ void *mac;
if (ptype->type != type || ptype->dev || !ptype->gro_receive)
continue;
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, skb_gro_offset(skb));
+ mac = skb_gro_mac_header(skb);
mac_len = skb->network_header - skb->mac_header;
skb->mac_len = mac_len;
NAPI_GRO_CB(skb)->same_flow = 0;
@@ -2428,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
continue;
if (p->mac_len != mac_len ||
- memcmp(skb_mac_header(p), skb_mac_header(skb),
- mac_len))
+ memcmp(skb_mac_header(p), mac, mac_len))
NAPI_GRO_CB(p)->same_flow = 0;
}
@@ -2442,7 +2446,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
goto normal;
same_flow = NAPI_GRO_CB(skb)->same_flow;
- free = NAPI_GRO_CB(skb)->free;
+ ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
if (pp) {
struct sk_buff *nskb = *pp;
@@ -2456,21 +2460,20 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
if (same_flow)
goto ok;
- if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
- __skb_push(skb, -skb_network_offset(skb));
+ if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS)
goto normal;
- }
NAPI_GRO_CB(skb)->count = 1;
- skb_shinfo(skb)->gso_size = skb->len;
+ skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
+ ret = GRO_HELD;
ok:
- return free;
+ return ret;
normal:
- return -1;
+ return GRO_NORMAL;
}
EXPORT_SYMBOL(dev_gro_receive);
@@ -2486,18 +2489,32 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
return dev_gro_receive(napi, skb);
}
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+int napi_skb_finish(int ret, struct sk_buff *skb)
{
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
+ int err = NET_RX_SUCCESS;
+
+ switch (ret) {
+ case GRO_NORMAL:
return netif_receive_skb(skb);
- case 1:
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
+
+ case GRO_MERGED_FREE:
kfree_skb(skb);
break;
}
- return NET_RX_SUCCESS;
+ return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ skb_gro_reset_offset(skb);
+
+ return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -2515,6 +2532,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
{
struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb;
+ struct ethhdr *eth;
+ skb_frag_t *frag;
+ int i;
napi->skb = NULL;
@@ -2527,20 +2547,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
}
BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+ frag = &info->frags[info->nr_frags - 1];
+
+ for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
+ skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
+ frag->size);
+ frag++;
+ }
skb_shinfo(skb)->nr_frags = info->nr_frags;
- memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
skb->data_len = info->len;
skb->len += info->len;
skb->truesize += info->len;
- if (!pskb_may_pull(skb, ETH_HLEN)) {
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ eth = skb_gro_header(skb, sizeof(*eth));
+ if (!eth) {
napi_reuse_skb(napi, skb);
skb = NULL;
goto out;
}
- skb->protocol = eth_type_trans(skb, dev);
+ skb_gro_pull(skb, sizeof(*eth));
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling. We'll fix it up properly at the end.
+ */
+ skb->protocol = eth->h_proto;
skb->ip_summed = info->ip_summed;
skb->csum = info->csum;
@@ -2550,29 +2586,47 @@ out:
}
EXPORT_SYMBOL(napi_fraginfo_skb);
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{
- struct sk_buff *skb = napi_fraginfo_skb(napi, info);
- int err = NET_RX_DROP;
+ int err = NET_RX_SUCCESS;
+ int may;
- if (!skb)
- goto out;
+ switch (ret) {
+ case GRO_NORMAL:
+ case GRO_HELD:
+ may = pskb_may_pull(skb, skb_gro_offset(skb));
+ BUG_ON(!may);
- err = NET_RX_SUCCESS;
+ skb->protocol = eth_type_trans(skb, napi->dev);
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
- return netif_receive_skb(skb);
+ if (ret == GRO_NORMAL)
+ return netif_receive_skb(skb);
- case 0:
- goto out;
- }
+ skb_gro_pull(skb, -ETH_HLEN);
+ break;
- napi_reuse_skb(napi, skb);
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
+
+ case GRO_MERGED_FREE:
+ napi_reuse_skb(napi, skb);
+ break;
+ }
-out:
return err;
}
+EXPORT_SYMBOL(napi_frags_finish);
+
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+ struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+
+ if (!skb)
+ return NET_RX_DROP;
+
+ return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index da74b844f4e..f20e758fe46 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2585,17 +2585,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct sk_buff *p = *head;
struct sk_buff *nskb;
unsigned int headroom;
- unsigned int hlen = p->data - skb_mac_header(p);
- unsigned int len = skb->len;
+ unsigned int len = skb_gro_len(skb);
- if (hlen + p->len + len >= 65536)
+ if (p->len + len >= 65536)
return -E2BIG;
if (skb_shinfo(p)->frag_list)
goto merge;
- else if (!skb_headlen(p) && !skb_headlen(skb) &&
- skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
- MAX_SKB_FRAGS) {
+ else if (skb_headlen(skb) <= skb_gro_offset(skb)) {
+ if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags >
+ MAX_SKB_FRAGS)
+ return -E2BIG;
+
+ skb_shinfo(skb)->frags[0].page_offset +=
+ skb_gro_offset(skb) - skb_headlen(skb);
+ skb_shinfo(skb)->frags[0].size -=
+ skb_gro_offset(skb) - skb_headlen(skb);
+
memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
skb_shinfo(skb)->frags,
skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2612,7 +2618,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
}
headroom = skb_headroom(p);
- nskb = netdev_alloc_skb(p->dev, headroom);
+ nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
if (unlikely(!nskb))
return -ENOMEM;
@@ -2620,12 +2626,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
nskb->mac_len = p->mac_len;
skb_reserve(nskb, headroom);
+ __skb_put(nskb, skb_gro_offset(p));
- skb_set_mac_header(nskb, -hlen);
+ skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
skb_set_network_header(nskb, skb_network_offset(p));
skb_set_transport_header(nskb, skb_transport_offset(p));
- memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+ __skb_pull(p, skb_gro_offset(p));
+ memcpy(skb_mac_header(nskb), skb_mac_header(p),
+ p->data - skb_mac_header(p));
*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
skb_shinfo(nskb)->frag_list = p;