aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig52
-rw-r--r--net/ipv4/af_inet.c26
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c9
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/inet_connection_sock.c42
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/ip_gre.c131
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ipconfig.c8
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c464
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp.c27
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_policy.c2
22 files changed, 532 insertions, 301 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 691268f3a35..b2cf91e4cca 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER
at boot time after the /proc file system has been mounted.
- If you turn on IP forwarding, you will also get the rp_filter, which
+ If you turn on IP forwarding, you should consider the rp_filter, which
automatically rejects incoming packets if the routing table entry
for their source address doesn't match the network interface they're
arriving on. This has security advantages because it prevents the
@@ -46,12 +46,16 @@ config IP_ADVANCED_ROUTER
rp_filter on use:
echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
- or
+ and
echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
+ Note that some distributions enable it in startup scripts.
+ For details about rp_filter strict and loose mode read
+ <file:Documentation/networking/ip-sysctl.txt>.
+
If unsure, say N here.
-choice
+choice
prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
depends on IP_ADVANCED_ROUTER
default ASK_IP_FIB_HASH
@@ -59,27 +63,29 @@ choice
config ASK_IP_FIB_HASH
bool "FIB_HASH"
---help---
- Current FIB is very proven and good enough for most users.
+ Current FIB is very proven and good enough for most users.
config IP_FIB_TRIE
bool "FIB_TRIE"
---help---
- Use new experimental LC-trie as FIB lookup algorithm.
- This improves lookup performance if you have a large
- number of routes.
-
- LC-trie is a longest matching prefix lookup algorithm which
- performs better than FIB_HASH for large routing tables.
- But, it consumes more memory and is more complex.
-
- LC-trie is described in:
-
- IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
- IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
- An experimental study of compression methods for dynamic tries
- Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
- http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
-
+ Use new experimental LC-trie as FIB lookup algorithm.
+ This improves lookup performance if you have a large
+ number of routes.
+
+ LC-trie is a longest matching prefix lookup algorithm which
+ performs better than FIB_HASH for large routing tables.
+ But, it consumes more memory and is more complex.
+
+ LC-trie is described in:
+
+ IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
+ IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
+ June 1999
+
+ An experimental study of compression methods for dynamic tries
+ Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
+ http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
+
endchoice
config IP_FIB_HASH
@@ -191,7 +197,7 @@ config IP_PNP_RARP
<file:Documentation/filesystems/nfsroot.txt> for details.
# not yet ready..
-# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
+# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
config NET_IPIP
tristate "IP: tunneling"
select INET_TUNNEL
@@ -361,7 +367,7 @@ config INET_IPCOMP
---help---
Support for IP Payload Compression Protocol (IPComp) (RFC3173),
typically needed for IPsec.
-
+
If unsure, say Y.
config INET_XFRM_TUNNEL
@@ -415,7 +421,7 @@ config INET_DIAG
Support for INET (TCP, DCCP, etc) socket monitoring interface used by
native Linux tools such as ss. ss is included in iproute2, currently
downloadable at <http://linux-net.osdl.org/index.php/Iproute2>.
-
+
If unsure, say Y.
config INET_TCP_DIAG
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 743f5542d65..627be4dc7fb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -369,7 +369,6 @@ lookup_protocol:
sock_init_data(sock, sk);
sk->sk_destruct = inet_sock_destruct;
- sk->sk_family = PF_INET;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
@@ -1253,10 +1252,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
int proto;
int id;
- if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+ iph = skb_gro_header(skb, sizeof(*iph));
+ if (unlikely(!iph))
goto out;
- iph = ip_hdr(skb);
proto = iph->protocol & (MAX_INET_PROTOS - 1);
rcu_read_lock();
@@ -1264,13 +1263,13 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
if (!ops || !ops->gro_receive)
goto out_unlock;
- if (iph->version != 4 || iph->ihl != 5)
+ if (*(u8 *)iph != 0x45)
goto out_unlock;
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto out_unlock;
- flush = ntohs(iph->tot_len) != skb->len ||
+ flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
iph->frag_off != htons(IP_DF);
id = ntohs(iph->id);
@@ -1282,24 +1281,25 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
iph2 = ip_hdr(p);
- if (iph->protocol != iph2->protocol ||
- iph->tos != iph2->tos ||
- memcmp(&iph->saddr, &iph2->saddr, 8)) {
+ if ((iph->protocol ^ iph2->protocol) |
+ (iph->tos ^ iph2->tos) |
+ (iph->saddr ^ iph2->saddr) |
+ (iph->daddr ^ iph2->daddr)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
/* All fields must match except length and checksum. */
NAPI_GRO_CB(p)->flush |=
- memcmp(&iph->frag_off, &iph2->frag_off, 4) ||
- (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id;
+ (iph->ttl ^ iph2->ttl) |
+ ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
NAPI_GRO_CB(p)->flush |= flush;
}
NAPI_GRO_CB(skb)->flush |= flush;
- __skb_pull(skb, sizeof(*iph));
- skb_reset_transport_header(skb);
+ skb_gro_pull(skb, sizeof(*iph));
+ skb_set_transport_header(skb, skb_gro_offset(skb));
pp = ops->gro_receive(head, skb);
@@ -1501,7 +1501,7 @@ static int ipv4_proc_init(void);
*/
static struct packet_type ip_packet_type = {
- .type = __constant_htons(ETH_P_IP),
+ .type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
.gso_send_check = inet_gso_send_check,
.gso_segment = inet_gso_segment,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 29a74c01d8d..3f6b7354699 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1226,7 +1226,7 @@ void arp_ifdown(struct net_device *dev)
*/
static struct packet_type arp_packet_type = {
- .type = __constant_htons(ETH_P_ARP),
+ .type = cpu_to_be16(ETH_P_ARP),
.func = arp_rcv,
};
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 309997edc8a..d519a6a6672 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
}
}
ip_mc_up(in_dev);
+ /* fall through */
+ case NETDEV_CHANGEADDR:
+ if (IN_DEV_ARP_NOTIFY(in_dev))
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ in_dev->ifa_list->ifa_address,
+ dev,
+ in_dev->ifa_list->ifa_address,
+ NULL, dev->dev_addr, NULL);
break;
case NETDEV_DOWN:
ip_mc_down(in_dev);
@@ -1439,6 +1447,7 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
+ DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 741e4fa3e47..cafcc49d099 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
fib_res_put(&res);
if (no_addr)
goto last_resort;
- if (rpf)
+ if (rpf == 1)
goto e_inval;
fl.oif = dev->ifindex;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 705b33b184a..3f50807237e 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
inet->tos = ip_hdr(skb)->tos;
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
+ ipc.shtx.flags = 0;
if (icmp_param->replyopts.optlen) {
ipc.opt = &icmp_param->replyopts;
if (ipc.opt->srr)
@@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
inet_sk(sk)->tos = tos;
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts;
+ ipc.shtx.flags = 0;
{
struct flowi fl = {
@@ -1205,7 +1207,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = {
int __init icmp_init(void)
{
- return register_pernet_device(&icmp_sk_ops);
+ return register_pernet_subsys(&icmp_sk_ops);
}
EXPORT_SYMBOL(icmp_err_convert);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f26ab38680d..22cd19ee44e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct inet_bind_hashbucket *head;
struct hlist_node *node;
struct inet_bind_bucket *tb;
- int ret;
+ int ret, attempts = 5;
struct net *net = sock_net(sk);
+ int smallest_size = -1, smallest_rover;
local_bh_disable();
if (!snum) {
int remaining, rover, low, high;
+again:
inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1;
- rover = net_random() % remaining + low;
+ smallest_rover = rover = net_random() % remaining + low;
+ smallest_size = -1;
do {
head = &hashinfo->bhash[inet_bhashfn(net, rover,
hashinfo->bhash_size)];
spin_lock(&head->lock);
inet_bind_bucket_for_each(tb, node, &head->chain)
- if (ib_net(tb) == net && tb->port == rover)
+ if (ib_net(tb) == net && tb->port == rover) {
+ if (tb->fastreuse > 0 &&
+ sk->sk_reuse &&
+ sk->sk_state != TCP_LISTEN &&
+ (tb->num_owners < smallest_size || smallest_size == -1)) {
+ smallest_size = tb->num_owners;
+ smallest_rover = rover;
+ if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
+ spin_unlock(&head->lock);
+ snum = smallest_rover;
+ goto have_snum;
+ }
+ }
goto next;
+ }
break;
next:
spin_unlock(&head->lock);
@@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
* the top level, not from the 'break;' statement.
*/
ret = 1;
- if (remaining <= 0)
+ if (remaining <= 0) {
+ if (smallest_size != -1) {
+ snum = smallest_rover;
+ goto have_snum;
+ }
goto fail;
-
+ }
/* OK, here is the one we will use. HEAD is
* non-NULL and we hold it's mutex.
*/
snum = rover;
} else {
+have_snum:
head = &hashinfo->bhash[inet_bhashfn(net, snum,
hashinfo->bhash_size)];
spin_lock(&head->lock);
@@ -145,12 +166,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
tb_found:
if (!hlist_empty(&tb->owners)) {
if (tb->fastreuse > 0 &&
- sk->sk_reuse && sk->sk_state != TCP_LISTEN) {
+ sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+ smallest_size == -1) {
goto success;
} else {
ret = 1;
- if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb))
+ if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
+ if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
+ smallest_size != -1 && --attempts >= 0) {
+ spin_unlock(&head->lock);
+ goto again;
+ }
goto fail_unlock;
+ }
}
}
tb_not_found:
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 6a1045da48d..625cc5f64c9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
write_pnet(&tb->ib_net, hold_net(net));
tb->port = snum;
tb->fastreuse = 0;
+ tb->num_owners = 0;
INIT_HLIST_HEAD(&tb->owners);
hlist_add_head(&tb->node, &head->chain);
}
@@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum)
{
+ struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+
+ atomic_inc(&hashinfo->bsockets);
+
inet_sk(sk)->num = snum;
sk_add_bind_node(sk, &tb->owners);
+ tb->num_owners++;
inet_csk(sk)->icsk_bind_hash = tb;
}
@@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk)
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
struct inet_bind_bucket *tb;
+ atomic_dec(&hashinfo->bsockets);
+
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
__sk_del_bind_node(sk);
+ tb->num_owners--;
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
@@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
*/
inet_bind_bucket_for_each(tb, node, &head->chain) {
if (ib_net(tb) == net && tb->port == port) {
- WARN_ON(hlist_empty(&tb->owners));
if (tb->fastreuse >= 0)
goto next_port;
+ WARN_ON(hlist_empty(&tb->owners));
if (!check_established(death_row, sk,
port, &tw))
goto ok;
@@ -523,6 +532,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
{
int i;
+ atomic_set(&h->bsockets, 0);
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0101521f366..07a188afb3a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,67 +164,124 @@ static DEFINE_RWLOCK(ipgre_lock);
/* Given src, dst and key, find appropriate for input tunnel. */
-static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net,
+static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
__be32 remote, __be32 local,
__be32 key, __be16 gre_proto)
{
+ struct net *net = dev_net(dev);
+ int link = dev->ifindex;
unsigned h0 = HASH(remote);
unsigned h1 = HASH(key);
- struct ip_tunnel *t;
- struct ip_tunnel *t2 = NULL;
+ struct ip_tunnel *t, *cand = NULL;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
ARPHRD_ETHER : ARPHRD_IPGRE;
+ int score, cand_score = 4;
for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
- if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if (local != t->parms.iph.saddr ||
+ remote != t->parms.iph.daddr ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
- if (remote == t->parms.iph.daddr) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if (remote != t->parms.iph.daddr ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_l[h1]; t; t = t->next) {
- if (local == t->parms.iph.saddr ||
- (local == t->parms.iph.daddr &&
- ipv4_is_multicast(local))) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
- }
+ if ((local != t->parms.iph.saddr &&
+ (local != t->parms.iph.daddr ||
+ !ipv4_is_multicast(local))) ||
+ key != t->parms.i_key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
for (t = ign->tunnels_wc[h1]; t; t = t->next) {
- if (t->parms.i_key == key && t->dev->flags & IFF_UP) {
- if (t->dev->type == dev_type)
- return t;
- if (t->dev->type == ARPHRD_IPGRE && !t2)
- t2 = t;
+ if (t->parms.i_key != key ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->dev->type != ARPHRD_IPGRE &&
+ t->dev->type != dev_type)
+ continue;
+
+ score = 0;
+ if (t->parms.link != link)
+ score |= 1;
+ if (t->dev->type != dev_type)
+ score |= 2;
+ if (score == 0)
+ return t;
+
+ if (score < cand_score) {
+ cand = t;
+ cand_score = score;
}
}
- if (t2)
- return t2;
+ if (cand != NULL)
+ return cand;
- if (ign->fb_tunnel_dev->flags&IFF_UP)
+ if (ign->fb_tunnel_dev->flags & IFF_UP)
return netdev_priv(ign->fb_tunnel_dev);
+
return NULL;
}
@@ -284,6 +341,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
__be32 key = parms->i_key;
+ int link = parms->link;
struct ip_tunnel *t, **tp;
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
@@ -291,6 +349,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
key == t->parms.i_key &&
+ link == t->parms.link &&
type == t->dev->type)
break;
@@ -421,7 +480,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
}
read_lock(&ipgre_lock);
- t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr,
+ t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
flags & GRE_KEY ?
*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
p[1]);
@@ -518,7 +577,7 @@ static int ipgre_rcv(struct sk_buff *skb)
gre_proto = *(__be16 *)(h + 2);
read_lock(&ipgre_lock);
- if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev),
+ if ((tunnel = ipgre_tunnel_lookup(skb->dev,
iph->saddr, iph->daddr, key,
gre_proto))) {
struct net_device_stats *stats = &tunnel->dev->stats;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8ebe86dd72a..3e7e910c7c0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -935,6 +935,10 @@ alloc_new_skb:
sk->sk_allocation);
if (unlikely(skb == NULL))
err = -ENOBUFS;
+ else
+ /* only the initial fragment is
+ time stamped */
+ ipc->shtx.flags = 0;
}
if (skb == NULL)
goto error;
@@ -945,6 +949,7 @@ alloc_new_skb:
skb->ip_summed = csummode;
skb->csum = 0;
skb_reserve(skb, hh_len);
+ *skb_tx(skb) = ipc->shtx;
/*
* Find where to start putting bytes.
@@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
daddr = ipc.addr = rt->rt_src;
ipc.opt = NULL;
+ ipc.shtx.flags = 0;
if (replyopts.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index d722013c1ca..90d22ae0a41 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -100,8 +100,8 @@
#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
- '3' from resolv.h */
-#define NONE __constant_htonl(INADDR_NONE)
-#define ANY __constant_htonl(INADDR_ANY)
+#define NONE cpu_to_be32(INADDR_NONE)
+#define ANY cpu_to_be32(INADDR_ANY)
/*
* Public IP configuration
@@ -406,7 +406,7 @@ static int __init ic_defaults(void)
static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
static struct packet_type rarp_packet_type __initdata = {
- .type = __constant_htons(ETH_P_RARP),
+ .type = cpu_to_be16(ETH_P_RARP),
.func = ic_rarp_recv,
};
@@ -568,7 +568,7 @@ struct bootp_pkt { /* BOOTP packet format */
static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
static struct packet_type bootp_packet_type __initdata = {
- .type = __constant_htons(ETH_P_IP),
+ .type = cpu_to_be16(ETH_P_IP),
.func = ic_bootp_recv,
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 5079dfbc6f3..c49c4ecfb15 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -750,7 +750,7 @@ static struct xfrm_tunnel ipip_handler = {
.priority = 1,
};
-static char banner[] __initdata =
+static const char banner[] __initconst =
KERN_INFO "IPv4 over IPv4 tunneling driver\n";
static void ipip_destroy_tunnels(struct ipip_net *ipn)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 14666449dc1..13e9dd3012b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,9 +67,6 @@
#define CONFIG_IP_PIMSM 1
#endif
-static struct sock *mroute_socket;
-
-
/* Big lock, protecting vif table, mrt cache and mroute socket state.
Note that the changes are semaphored via rtnl_lock.
*/
@@ -80,18 +77,9 @@ static DEFINE_RWLOCK(mrt_lock);
* Multicast router control variables
*/
-static struct vif_device vif_table[MAXVIFS]; /* Devices */
-static int maxvif;
-
-#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
-
-static int mroute_do_assert; /* Set in PIM assert */
-static int mroute_do_pim;
-
-static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
+#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
-static atomic_t cache_resolve_queue_len; /* Size of unresolved */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -107,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+static int ipmr_cache_report(struct net *net,
+ struct sk_buff *pkt, vifi_t vifi, int assert);
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
#ifdef CONFIG_IP_PIMSM_V2
@@ -120,9 +109,11 @@ static struct timer_list ipmr_expire_timer;
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
{
+ struct net *net = dev_net(dev);
+
dev_close(dev);
- dev = __dev_get_by_name(&init_net, "tunl0");
+ dev = __dev_get_by_name(net, "tunl0");
if (dev) {
const struct net_device_ops *ops = dev->netdev_ops;
struct ifreq ifr;
@@ -148,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
}
static
-struct net_device *ipmr_new_tunnel(struct vifctl *v)
+struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
{
struct net_device *dev;
- dev = __dev_get_by_name(&init_net, "tunl0");
+ dev = __dev_get_by_name(net, "tunl0");
if (dev) {
const struct net_device_ops *ops = dev->netdev_ops;
@@ -181,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
dev = NULL;
- if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
+ if (err == 0 &&
+ (dev = __dev_get_by_name(net, p.name)) != NULL) {
dev->flags |= IFF_MULTICAST;
in_dev = __in_dev_get_rtnl(dev);
@@ -209,14 +201,15 @@ failure:
#ifdef CONFIG_IP_PIMSM
-static int reg_vif_num = -1;
-
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct net *net = dev_net(dev);
+
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
- ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
+ ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
+ IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
return 0;
@@ -283,16 +276,16 @@ failure:
* @notify: Set to 1, if the caller is a notifier_call
*/
-static int vif_delete(int vifi, int notify)
+static int vif_delete(struct net *net, int vifi, int notify)
{
struct vif_device *v;
struct net_device *dev;
struct in_device *in_dev;
- if (vifi < 0 || vifi >= maxvif)
+ if (vifi < 0 || vifi >= net->ipv4.maxvif)
return -EADDRNOTAVAIL;
- v = &vif_table[vifi];
+ v = &net->ipv4.vif_table[vifi];
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -304,17 +297,17 @@ static int vif_delete(int vifi, int notify)
}
#ifdef CONFIG_IP_PIMSM
- if (vifi == reg_vif_num)
- reg_vif_num = -1;
+ if (vifi == net->ipv4.mroute_reg_vif_num)
+ net->ipv4.mroute_reg_vif_num = -1;
#endif
- if (vifi+1 == maxvif) {
+ if (vifi+1 == net->ipv4.maxvif) {
int tmp;
for (tmp=vifi-1; tmp>=0; tmp--) {
- if (VIF_EXISTS(tmp))
+ if (VIF_EXISTS(net, tmp))
break;
}
- maxvif = tmp+1;
+ net->ipv4.maxvif = tmp+1;
}
write_unlock_bh(&mrt_lock);
@@ -333,6 +326,12 @@ static int vif_delete(int vifi, int notify)
return 0;
}
+static inline void ipmr_cache_free(struct mfc_cache *c)
+{
+ release_net(mfc_net(c));
+ kmem_cache_free(mrt_cachep, c);
+}
+
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
@@ -341,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
{
struct sk_buff *skb;
struct nlmsgerr *e;
+ struct net *net = mfc_net(c);
- atomic_dec(&cache_resolve_queue_len);
+ atomic_dec(&net->ipv4.cache_resolve_queue_len);
while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
@@ -354,12 +354,12 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
- rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
} else
kfree_skb(skb);
}
- kmem_cache_free(mrt_cachep, c);
+ ipmr_cache_free(c);
}
@@ -376,7 +376,7 @@ static void ipmr_expire_process(unsigned long dummy)
return;
}
- if (atomic_read(&cache_resolve_queue_len) == 0)
+ if (mfc_unres_queue == NULL)
goto out;
now = jiffies;
@@ -397,7 +397,7 @@ static void ipmr_expire_process(unsigned long dummy)
ipmr_destroy_unres(c);
}
- if (atomic_read(&cache_resolve_queue_len))
+ if (mfc_unres_queue != NULL)
mod_timer(&ipmr_expire_timer, jiffies + expires);
out:
@@ -409,13 +409,15 @@ out:
static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
{
int vifi;
+ struct net *net = mfc_net(cache);
cache->mfc_un.res.minvif = MAXVIFS;
cache->mfc_un.res.maxvif = 0;
memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
- for (vifi=0; vifi<maxvif; vifi++) {
- if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
+ for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
+ if (VIF_EXISTS(net, vifi) &&
+ ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
cache->mfc_un.res.minvif = vifi;
@@ -425,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
}
}
-static int vif_add(struct vifctl *vifc, int mrtsock)
+static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
{
int vifi = vifc->vifc_vifi;
- struct vif_device *v = &vif_table[vifi];
+ struct vif_device *v = &net->ipv4.vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
int err;
/* Is vif busy ? */
- if (VIF_EXISTS(vifi))
+ if (VIF_EXISTS(net, vifi))
return -EADDRINUSE;
switch (vifc->vifc_flags) {
@@ -444,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
* Special Purpose VIF in PIM
* All the packets will be sent to the daemon
*/
- if (reg_vif_num >= 0)
+ if (net->ipv4.mroute_reg_vif_num >= 0)
return -EADDRINUSE;
dev = ipmr_reg_vif();
if (!dev)
@@ -458,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
break;
#endif
case VIFF_TUNNEL:
- dev = ipmr_new_tunnel(vifc);
+ dev = ipmr_new_tunnel(net, vifc);
if (!dev)
return -ENOBUFS;
err = dev_set_allmulti(dev, 1);
@@ -469,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
}
break;
case 0:
- dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
+ dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
if (!dev)
return -EADDRNOTAVAIL;
err = dev_set_allmulti(dev, 1);
@@ -510,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
v->dev = dev;
#ifdef CONFIG_IP_PIMSM
if (v->flags&VIFF_REGISTER)
- reg_vif_num = vifi;
+ net->ipv4.mroute_reg_vif_num = vifi;
#endif
- if (vifi+1 > maxvif)
- maxvif = vifi+1;
+ if (vifi+1 > net->ipv4.maxvif)
+ net->ipv4.maxvif = vifi+1;
write_unlock_bh(&mrt_lock);
return 0;
}
-static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
+static struct mfc_cache *ipmr_cache_find(struct net *net,
+ __be32 origin,
+ __be32 mcastgrp)
{
int line = MFC_HASH(mcastgrp, origin);
struct mfc_cache *c;
- for (c=mfc_cache_array[line]; c; c = c->next) {
+ for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
break;
}
@@ -533,22 +537,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
/*
* Allocate a multicast cache entry
*/
-static struct mfc_cache *ipmr_cache_alloc(void)
+static struct mfc_cache *ipmr_cache_alloc(struct net *net)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c == NULL)
return NULL;
c->mfc_un.res.minvif = MAXVIFS;
+ mfc_net_set(c, net);
return c;
}
-static struct mfc_cache *ipmr_cache_alloc_unres(void)
+static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
{
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c == NULL)
return NULL;
skb_queue_head_init(&c->mfc_un.unres.unresolved);
c->mfc_un.unres.expires = jiffies + 10*HZ;
+ mfc_net_set(c, net);
return c;
}
@@ -581,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
memset(&e->msg, 0, sizeof(e->msg));
}
- rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
} else
ip_mr_forward(skb, c, 0);
}
@@ -594,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
* Called under mrt_lock.
*/
-static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
+static int ipmr_cache_report(struct net *net,
+ struct sk_buff *pkt, vifi_t vifi, int assert)
{
struct sk_buff *skb;
const int ihl = ip_hdrlen(pkt);
@@ -626,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
msg->im_msgtype = IGMPMSG_WHOLEPKT;
msg->im_mbz = 0;
- msg->im_vif = reg_vif_num;
+ msg->im_vif = net->ipv4.mroute_reg_vif_num;
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
sizeof(struct iphdr));
@@ -658,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
skb->transport_header = skb->network_header;
}
- if (mroute_socket == NULL) {
+ if (net->ipv4.mroute_sk == NULL) {
kfree_skb(skb);
return -EINVAL;
}
@@ -666,7 +673,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
/*
* Deliver to mrouted
*/
- if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) {
+ ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
+ if (ret < 0) {
if (net_ratelimit())
printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
kfree_skb(skb);
@@ -680,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
*/
static int
-ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
+ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
{
int err;
struct mfc_cache *c;
@@ -688,7 +696,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
spin_lock_bh(&mfc_unres_lock);
for (c=mfc_unres_queue; c; c=c->next) {
- if (c->mfc_mcastgrp == iph->daddr &&
+ if (net_eq(mfc_net(c), net) &&
+ c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr)
break;
}
@@ -698,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
* Create a new entry if allowable
*/
- if (atomic_read(&cache_resolve_queue_len) >= 10 ||
- (c=ipmr_cache_alloc_unres())==NULL) {
+ if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
+ (c = ipmr_cache_alloc_unres(net)) == NULL) {
spin_unlock_bh(&mfc_unres_lock);
kfree_skb(skb);
@@ -716,18 +725,19 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
/*
* Reflect first query at mrouted.
*/
- if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
+ err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
+ if (err < 0) {
/* If the report failed throw the cache entry
out - Brad Parker
*/
spin_unlock_bh(&mfc_unres_lock);
- kmem_cache_free(mrt_cachep, c);
+ ipmr_cache_free(c);
kfree_skb(skb);
return err;
}
- atomic_inc(&cache_resolve_queue_len);
+ atomic_inc(&net->ipv4.cache_resolve_queue_len);
c->next = mfc_unres_queue;
mfc_unres_queue = c;
@@ -753,35 +763,37 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
* MFC cache manipulation by user space mroute daemon
*/
-static int ipmr_mfc_delete(struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
{
int line;
struct mfc_cache *c, **cp;
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
- for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
+ for (cp = &net->ipv4.mfc_cache_array[line];
+ (c = *cp) != NULL; cp = &c->next) {
if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
write_lock_bh(&mrt_lock);
*cp = c->next;
write_unlock_bh(&mrt_lock);
- kmem_cache_free(mrt_cachep, c);
+ ipmr_cache_free(c);
return 0;
}
}
return -ENOENT;
}
-static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
+static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
{
int line;
struct mfc_cache *uc, *c, **cp;
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
- for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
+ for (cp = &net->ipv4.mfc_cache_array[line];
+ (c = *cp) != NULL; cp = &c->next) {
if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
break;
@@ -800,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
return -EINVAL;
- c = ipmr_cache_alloc();
+ c = ipmr_cache_alloc(net);
if (c == NULL)
return -ENOMEM;
@@ -812,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
c->mfc_flags |= MFC_STATIC;
write_lock_bh(&mrt_lock);
- c->next = mfc_cache_array[line];
- mfc_cache_array[line] = c;
+ c->next = net->ipv4.mfc_cache_array[line];
+ net->ipv4.mfc_cache_array[line] = c;
write_unlock_bh(&mrt_lock);
/*
@@ -823,19 +835,21 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
spin_lock_bh(&mfc_unres_lock);
for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
cp = &uc->next) {
- if (uc->mfc_origin == c->mfc_origin &&
+ if (net_eq(mfc_net(uc), net) &&
+ uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
*cp = uc->next;
- if (atomic_dec_and_test(&cache_resolve_queue_len))
- del_timer(&ipmr_expire_timer);
+ atomic_dec(&net->ipv4.cache_resolve_queue_len);
break;
}
}
+ if (mfc_unres_queue == NULL)
+ del_timer(&ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
if (uc) {
ipmr_cache_resolve(uc, c);
- kmem_cache_free(mrt_cachep, uc);
+ ipmr_cache_free(uc);
}
return 0;
}
@@ -844,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct sock *sk)
+static void mroute_clean_tables(struct net *net)
{
int i;
/*
* Shut down all active vif entries
*/
- for (i=0; i<maxvif; i++) {
- if (!(vif_table[i].flags&VIFF_STATIC))
- vif_delete(i, 0);
+ for (i = 0; i < net->ipv4.maxvif; i++) {
+ if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
+ vif_delete(net, i, 0);
}
/*
@@ -862,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk)
for (i=0; i<MFC_LINES; i++) {
struct mfc_cache *c, **cp;
- cp = &mfc_cache_array[i];
+ cp = &net->ipv4.mfc_cache_array[i];
while ((c = *cp) != NULL) {
if (c->mfc_flags&MFC_STATIC) {
cp = &c->next;
@@ -872,22 +886,23 @@ static void mroute_clean_tables(struct sock *sk)
*cp = c->next;
write_unlock_bh(&mrt_lock);
- kmem_cache_free(mrt_cachep, c);
+ ipmr_cache_free(c);
}
}
- if (atomic_read(&cache_resolve_queue_len) != 0) {
- struct mfc_cache *c;
+ if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
+ struct mfc_cache *c, **cp;
spin_lock_bh(&mfc_unres_lock);
- while (mfc_unres_queue != NULL) {
- c = mfc_unres_queue;
- mfc_unres_queue = c->next;
- spin_unlock_bh(&mfc_unres_lock);
+ cp = &mfc_unres_queue;
+ while ((c = *cp) != NULL) {
+ if (!net_eq(mfc_net(c), net)) {
+ cp = &c->next;
+ continue;
+ }
+ *cp = c->next;
ipmr_destroy_unres(c);
-
- spin_lock_bh(&mfc_unres_lock);
}
spin_unlock_bh(&mfc_unres_lock);
}
@@ -895,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk)
static void mrtsock_destruct(struct sock *sk)
{
+ struct net *net = sock_net(sk);
+
rtnl_lock();
- if (sk == mroute_socket) {
- IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
+ if (sk == net->ipv4.mroute_sk) {
+ IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
write_lock_bh(&mrt_lock);
- mroute_socket = NULL;
+ net->ipv4.mroute_sk = NULL;
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(sk);
+ mroute_clean_tables(net);
}
rtnl_unlock();
}
@@ -920,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
int ret;
struct vifctl vif;
struct mfcctl mfc;
+ struct net *net = sock_net(sk);
if (optname != MRT_INIT) {
- if (sk != mroute_socket && !capable(CAP_NET_ADMIN))
+ if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
return -EACCES;
}
@@ -935,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
return -ENOPROTOOPT;
rtnl_lock();
- if (mroute_socket) {
+ if (net->ipv4.mroute_sk) {
rtnl_unlock();
return -EADDRINUSE;
}
@@ -943,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
ret = ip_ra_control(sk, 1, mrtsock_destruct);
if (ret == 0) {
write_lock_bh(&mrt_lock);
- mroute_socket = sk;
+ net->ipv4.mroute_sk = sk;
write_unlock_bh(&mrt_lock);
- IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
+ IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
case MRT_DONE:
- if (sk != mroute_socket)
+ if (sk != net->ipv4.mroute_sk)
return -EACCES;
return ip_ra_control(sk, 0, NULL);
case MRT_ADD_VIF:
@@ -964,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
return -ENFILE;
rtnl_lock();
if (optname == MRT_ADD_VIF) {
- ret = vif_add(&vif, sk==mroute_socket);
+ ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
} else {
- ret = vif_delete(vif.vifc_vifi, 0);
+ ret = vif_delete(net, vif.vifc_vifi, 0);
}
rtnl_unlock();
return ret;
@@ -983,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
return -EFAULT;
rtnl_lock();
if (optname == MRT_DEL_MFC)
- ret = ipmr_mfc_delete(&mfc);
+ ret = ipmr_mfc_delete(net, &mfc);
else
- ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
+ ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
rtnl_unlock();
return ret;
/*
@@ -996,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
int v;
if (get_user(v,(int __user *)optval))
return -EFAULT;
- mroute_do_assert=(v)?1:0;
+ net->ipv4.mroute_do_assert = (v) ? 1 : 0;
return 0;
}
#ifdef CONFIG_IP_PIMSM
@@ -1010,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
rtnl_lock();
ret = 0;
- if (v != mroute_do_pim) {
- mroute_do_pim = v;
- mroute_do_assert = v;
+ if (v != net->ipv4.mroute_do_pim) {
+ net->ipv4.mroute_do_pim = v;
+ net->ipv4.mroute_do_assert = v;
#ifdef CONFIG_IP_PIMSM_V2
- if (mroute_do_pim)
+ if (net->ipv4.mroute_do_pim)
ret = inet_add_protocol(&pim_protocol,
IPPROTO_PIM);
else
@@ -1045,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
{
int olr;
int val;
+ struct net *net = sock_net(sk);
if (optname != MRT_VERSION &&
#ifdef CONFIG_IP_PIMSM
@@ -1066,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
val = 0x0305;
#ifdef CONFIG_IP_PIMSM
else if (optname == MRT_PIM)
- val = mroute_do_pim;
+ val = net->ipv4.mroute_do_pim;
#endif
else
- val = mroute_do_assert;
+ val = net->ipv4.mroute_do_assert;
if (copy_to_user(optval, &val, olr))
return -EFAULT;
return 0;
@@ -1085,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
struct sioc_vif_req vr;
struct vif_device *vif;
struct mfc_cache *c;
+ struct net *net = sock_net(sk);
switch (cmd) {
case SIOCGETVIFCNT:
if (copy_from_user(&vr, arg, sizeof(vr)))
return -EFAULT;
- if (vr.vifi >= maxvif)
+ if (vr.vifi >= net->ipv4.maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif=&vif_table[vr.vifi];
- if (VIF_EXISTS(vr.vifi)) {
+ vif = &net->ipv4.vif_table[vr.vifi];
+ if (VIF_EXISTS(net, vr.vifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1112,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
return -EFAULT;
read_lock(&mrt_lock);
- c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
+ c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
if (c) {
sr.pktcnt = c->mfc_un.res.pkt;
sr.bytecnt = c->mfc_un.res.bytes;
@@ -1134,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = ptr;
+ struct net *net = dev_net(dev);
struct vif_device *v;
int ct;
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), net))
return NOTIFY_DONE;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
- v=&vif_table[0];
- for (ct=0; ct<maxvif; ct++,v++) {
+ v = &net->ipv4.vif_table[0];
+ for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
if (v->dev == dev)
- vif_delete(ct, 1);
+ vif_delete(net, ct, 1);
}
return NOTIFY_DONE;
}
@@ -1205,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
{
+ struct net *net = mfc_net(c);
const struct iphdr *iph = ip_hdr(skb);
- struct vif_device *vif = &vif_table[vifi];
+ struct vif_device *vif = &net->ipv4.vif_table[vifi];
struct net_device *dev;
struct rtable *rt;
int encap = 0;
@@ -1220,9 +1242,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
vif->bytes_out += skb->len;
vif->dev->stats.tx_bytes += skb->len;
vif->dev->stats.tx_packets++;
- ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
- kfree_skb(skb);
- return;
+ ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
+ goto out_free;
}
#endif
@@ -1233,7 +1254,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
.saddr = vif->local,
.tos = RT_TOS(iph->tos) } },
.proto = IPPROTO_IPIP };
- if (ip_route_output_key(&init_net, &rt, &fl))
+ if (ip_route_output_key(net, &rt, &fl))
goto out_free;
encap = sizeof(struct iphdr);
} else {
@@ -1242,7 +1263,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
{ .daddr = iph->daddr,
.tos = RT_TOS(iph->tos) } },
.proto = IPPROTO_IPIP };
- if (ip_route_output_key(&init_net, &rt, &fl))
+ if (ip_route_output_key(net, &rt, &fl))
goto out_free;
}
@@ -1306,9 +1327,10 @@ out_free:
static int ipmr_find_vif(struct net_device *dev)
{
+ struct net *net = dev_net(dev);
int ct;
- for (ct=maxvif-1; ct>=0; ct--) {
- if (vif_table[ct].dev == dev)
+ for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
+ if (net->ipv4.vif_table[ct].dev == dev)
break;
}
return ct;
@@ -1320,6 +1342,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
{
int psend = -1;
int vif, ct;
+ struct net *net = mfc_net(cache);
vif = cache->mfc_parent;
cache->mfc_un.res.pkt++;
@@ -1328,7 +1351,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (vif_table[vif].dev != skb->dev) {
+ if (net->ipv4.vif_table[vif].dev != skb->dev) {
int true_vifi;
if (skb->rtable->fl.iif == 0) {
@@ -1349,23 +1372,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
cache->mfc_un.res.wrong_if++;
true_vifi = ipmr_find_vif(skb->dev);
- if (true_vifi >= 0 && mroute_do_assert &&
+ if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
so that we cannot check that packet arrived on an oif.
It is bad, but otherwise we would need to move pretty
large chunk of pimd to kernel. Ough... --ANK
*/
- (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ (net->ipv4.mroute_do_pim ||
+ cache->mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
cache->mfc_un.res.last_assert = jiffies;
- ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
+ ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
}
goto dont_forward;
}
- vif_table[vif].pkt_in++;
- vif_table[vif].bytes_in += skb->len;
+ net->ipv4.vif_table[vif].pkt_in++;
+ net->ipv4.vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
@@ -1405,6 +1429,7 @@ dont_forward:
int ip_mr_input(struct sk_buff *skb)
{
struct mfc_cache *cache;
+ struct net *net = dev_net(skb->dev);
int local = skb->rtable->rt_flags&RTCF_LOCAL;
/* Packet is looped back after forward, it should not be
@@ -1425,9 +1450,9 @@ int ip_mr_input(struct sk_buff *skb)
that we can forward NO IGMP messages.
*/
read_lock(&mrt_lock);
- if (mroute_socket) {
+ if (net->ipv4.mroute_sk) {
nf_reset(skb);
- raw_rcv(mroute_socket, skb);
+ raw_rcv(net->ipv4.mroute_sk, skb);
read_unlock(&mrt_lock);
return 0;
}
@@ -1436,7 +1461,7 @@ int ip_mr_input(struct sk_buff *skb)
}
read_lock(&mrt_lock);
- cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+ cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
/*
* No usable cache entry
@@ -1456,7 +1481,7 @@ int ip_mr_input(struct sk_buff *skb)
vif = ipmr_find_vif(skb->dev);
if (vif >= 0) {
- int err = ipmr_cache_unresolved(vif, skb);
+ int err = ipmr_cache_unresolved(net, vif, skb);
read_unlock(&mrt_lock);
return err;
@@ -1487,6 +1512,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
{
struct net_device *reg_dev = NULL;
struct iphdr *encap;
+ struct net *net = dev_net(skb->dev);
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
/*
@@ -1501,8 +1527,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
return 1;
read_lock(&mrt_lock);
- if (reg_vif_num >= 0)
- reg_dev = vif_table[reg_vif_num].dev;
+ if (net->ipv4.mroute_reg_vif_num >= 0)
+ reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -1537,13 +1563,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
int pim_rcv_v1(struct sk_buff * skb)
{
struct igmphdr *pim;
+ struct net *net = dev_net(skb->dev);
if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
goto drop;
pim = igmp_hdr(skb);
- if (!mroute_do_pim ||
+ if (!net->ipv4.mroute_do_pim ||
pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
goto drop;
@@ -1583,7 +1610,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
{
int ct;
struct rtnexthop *nhp;
- struct net_device *dev = vif_table[c->mfc_parent].dev;
+ struct net *net = mfc_net(c);
+ struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
u8 *b = skb_tail_pointer(skb);
struct rtattr *mp_head;
@@ -1599,7 +1627,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
+ nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
@@ -1613,14 +1641,15 @@ rtattr_failure:
return -EMSGSIZE;
}
-int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ipmr_get_route(struct net *net,
+ struct sk_buff *skb, struct rtmsg *rtm, int nowait)
{
int err;
struct mfc_cache *cache;
struct rtable *rt = skb->rtable;
read_lock(&mrt_lock);
- cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
+ cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
if (cache == NULL) {
struct sk_buff *skb2;
@@ -1651,7 +1680,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
iph->saddr = rt->rt_src;
iph->daddr = rt->rt_dst;
iph->version = 0;
- err = ipmr_cache_unresolved(vif, skb2);
+ err = ipmr_cache_unresolved(net, vif, skb2);
read_unlock(&mrt_lock);
return err;
}
@@ -1668,17 +1697,19 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
* The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
*/
struct ipmr_vif_iter {
+ struct seq_net_private p;
int ct;
};
-static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
+static struct vif_device *ipmr_vif_seq_idx(struct net *net,
+ struct ipmr_vif_iter *iter,
loff_t pos)
{
- for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) {
- if (!VIF_EXISTS(iter->ct))
+ for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
+ if (!VIF_EXISTS(net, iter->ct))
continue;
if (pos-- == 0)
- return &vif_table[iter->ct];
+ return &net->ipv4.vif_table[iter->ct];
}
return NULL;
}
@@ -1686,23 +1717,26 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
+ struct net *net = seq_file_net(seq);
+
read_lock(&mrt_lock);
- return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1)
+ return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ipmr_vif_iter *iter = seq->private;
+ struct net *net = seq_file_net(seq);
++*pos;
if (v == SEQ_START_TOKEN)
- return ipmr_vif_seq_idx(iter, 0);
+ return ipmr_vif_seq_idx(net, iter, 0);
- while (++iter->ct < maxvif) {
- if (!VIF_EXISTS(iter->ct))
+ while (++iter->ct < net->ipv4.maxvif) {
+ if (!VIF_EXISTS(net, iter->ct))
continue;
- return &vif_table[iter->ct];
+ return &net->ipv4.vif_table[iter->ct];
}
return NULL;
}
@@ -1715,6 +1749,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
{
+ struct net *net = seq_file_net(seq);
+
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
@@ -1724,7 +1760,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
- vif - vif_table,
+ vif - net->ipv4.vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags, vif->local, vif->remote);
@@ -1741,8 +1777,8 @@ static const struct seq_operations ipmr_vif_seq_ops = {
static int ipmr_vif_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ipmr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ return seq_open_net(inode, file, &ipmr_vif_seq_ops,
+ sizeof(struct ipmr_vif_iter));
}
static const struct file_operations ipmr_vif_fops = {
@@ -1750,23 +1786,26 @@ static const struct file_operations ipmr_vif_fops = {
.open = ipmr_vif_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
struct ipmr_mfc_iter {
+ struct seq_net_private p;
struct mfc_cache **cache;
int ct;
};
-static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
+static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
+ struct ipmr_mfc_iter *it, loff_t pos)
{
struct mfc_cache *mfc;
- it->cache = mfc_cache_array;
+ it->cache = net->ipv4.mfc_cache_array;
read_lock(&mrt_lock);
for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
- for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next)
+ for (mfc = net->ipv4.mfc_cache_array[it->ct];
+ mfc; mfc = mfc->next)
if (pos-- == 0)
return mfc;
read_unlock(&mrt_lock);
@@ -1774,7 +1813,8 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
it->cache = &mfc_unres_queue;
spin_lock_bh(&mfc_unres_lock);
for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
- if (pos-- == 0)
+ if (net_eq(mfc_net(mfc), net) &&
+ pos-- == 0)
return mfc;
spin_unlock_bh(&mfc_unres_lock);
@@ -1786,9 +1826,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
struct ipmr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
+
it->cache = NULL;
it->ct = 0;
- return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
+ return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
@@ -1796,11 +1838,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct mfc_cache *mfc = v;
struct ipmr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
++*pos;
if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(seq->private, 0);
+ return ipmr_mfc_seq_idx(net, seq->private, 0);
if (mfc->next)
return mfc->next;
@@ -1808,10 +1851,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (it->cache == &mfc_unres_queue)
goto end_of_list;
- BUG_ON(it->cache != mfc_cache_array);
+ BUG_ON(it->cache != net->ipv4.mfc_cache_array);
while (++it->ct < MFC_LINES) {
- mfc = mfc_cache_array[it->ct];
+ mfc = net->ipv4.mfc_cache_array[it->ct];
if (mfc)
return mfc;
}
@@ -1823,6 +1866,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
spin_lock_bh(&mfc_unres_lock);
mfc = mfc_unres_queue;
+ while (mfc && !net_eq(mfc_net(mfc), net))
+ mfc = mfc->next;
if (mfc)
return mfc;
@@ -1836,16 +1881,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
{
struct ipmr_mfc_iter *it = seq->private;
+ struct net *net = seq_file_net(seq);
if (it->cache == &mfc_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == mfc_cache_array)
+ else if (it->cache == net->ipv4.mfc_cache_array)
read_unlock(&mrt_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
{
int n;
+ struct net *net = seq_file_net(seq);
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
@@ -1866,9 +1913,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
mfc->mfc_un.res.wrong_if);
for (n = mfc->mfc_un.res.minvif;
n < mfc->mfc_un.res.maxvif; n++ ) {
- if (VIF_EXISTS(n)
- && mfc->mfc_un.res.ttls[n] < 255)
- seq_printf(seq,
+ if (VIF_EXISTS(net, n) &&
+ mfc->mfc_un.res.ttls[n] < 255)
+ seq_printf(seq,
" %2d:%-3d",
n, mfc->mfc_un.res.ttls[n]);
}
@@ -1892,8 +1939,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
+ sizeof(struct ipmr_mfc_iter));
}
static const struct file_operations ipmr_mfc_fops = {
@@ -1901,7 +1948,7 @@ static const struct file_operations ipmr_mfc_fops = {
.open = ipmr_mfc_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
+ .release = seq_release_net,
};
#endif
@@ -1915,6 +1962,65 @@ static struct net_protocol pim_protocol = {
/*
* Setup for IP multicast routing
*/
+static int __net_init ipmr_net_init(struct net *net)
+{
+ int err = 0;
+
+ net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
+ GFP_KERNEL);
+ if (!net->ipv4.vif_table) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /* Forwarding cache */
+ net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
+ sizeof(struct mfc_cache *),
+ GFP_KERNEL);
+ if (!net->ipv4.mfc_cache_array) {
+ err = -ENOMEM;
+ goto fail_mfc_cache;
+ }
+
+#ifdef CONFIG_IP_PIMSM
+ net->ipv4.mroute_reg_vif_num = -1;
+#endif
+
+#ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+ if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
+ goto proc_vif_fail;
+ if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
+ goto proc_cache_fail;
+#endif
+ return 0;
+
+#ifdef CONFIG_PROC_FS
+proc_cache_fail:
+ proc_net_remove(net, "ip_mr_vif");
+proc_vif_fail:
+ kfree(net->ipv4.mfc_cache_array);
+#endif
+fail_mfc_cache:
+ kfree(net->ipv4.vif_table);
+fail:
+ return err;
+}
+
+static void __net_exit ipmr_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_remove(net, "ip_mr_cache");
+ proc_net_remove(net, "ip_mr_vif");
+#endif
+ kfree(net->ipv4.mfc_cache_array);
+ kfree(net->ipv4.vif_table);
+}
+
+static struct pernet_operations ipmr_net_ops = {
+ .init = ipmr_net_init,
+ .exit = ipmr_net_exit,
+};
int __init ip_mr_init(void)
{
@@ -1927,26 +2033,20 @@ int __init ip_mr_init(void)
if (!mrt_cachep)
return -ENOMEM;
+ err = register_pernet_subsys(&ipmr_net_ops);
+ if (err)
+ goto reg_pernet_fail;
+
setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
err = register_netdevice_notifier(&ip_mr_notifier);
if (err)
goto reg_notif_fail;
-#ifdef CONFIG_PROC_FS
- err = -ENOMEM;
- if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
- goto proc_vif_fail;
- if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
- goto proc_cache_fail;
-#endif
return 0;
-#ifdef CONFIG_PROC_FS
-proc_cache_fail:
- proc_net_remove(&init_net, "ip_mr_vif");
-proc_vif_fail:
- unregister_netdevice_notifier(&ip_mr_notifier);
-#endif
+
reg_notif_fail:
del_timer(&ipmr_expire_timer);
+ unregister_pernet_subsys(&ipmr_net_ops);
+reg_pernet_fail:
kmem_cache_destroy(mrt_cachep);
return err;
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 182f845de92..d9521f6f9ed 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1292,7 +1292,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
.expect_policy = &snmp_exp_policy,
.name = "snmp",
.tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
};
@@ -1302,7 +1302,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.expect_policy = &snmp_exp_policy,
.name = "snmp_trap",
.tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
};
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index eb62e58bff7..cf0cdeeb1db 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
int orphans, sockets;
local_bh_disable();
- orphans = percpu_counter_sum_positive(&tcp_orphan_count),
- sockets = percpu_counter_sum_positive(&tcp_sockets_allocated),
+ orphans = percpu_counter_sum_positive(&tcp_orphan_count);
+ sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
local_bh_enable();
socket_seq_show(seq);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dff8bc4e0fa..f774651f0a4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->saddr;
ipc.opt = NULL;
+ ipc.shtx.flags = 0;
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 97f71153584..5caee609be0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -151,7 +151,7 @@ static void rt_emergency_hash_rebuild(struct net *net);
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
- .protocol = __constant_htons(ETH_P_IP),
+ .protocol = cpu_to_be16(ETH_P_IP),
.gc = rt_garbage_collect,
.check = ipv4_dst_check,
.destroy = ipv4_dst_destroy,
@@ -2696,7 +2696,7 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
static struct dst_ops ipv4_dst_blackhole_ops = {
.family = AF_INET,
- .protocol = __constant_htons(ETH_P_IP),
+ .protocol = cpu_to_be16(ETH_P_IP),
.destroy = ipv4_dst_destroy,
.check = ipv4_dst_check,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
@@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
return ip_route_output_flow(net, rp, flp, NULL, 0);
}
-static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+static int rt_fill_info(struct net *net,
+ struct sk_buff *skb, u32 pid, u32 seq, int event,
int nowait, unsigned int flags)
{
struct rtable *rt = skb->rtable;
@@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
__be32 dst = rt->rt_dst;
if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
- IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
- int err = ipmr_get_route(skb, r, nowait);
+ IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
+ int err = ipmr_get_route(net, skb, r, nowait);
if (err <= 0) {
if (!nowait) {
if (err == 0)
@@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
+ err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err <= 0)
goto errout_free;
@@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (rt_is_expired(rt))
continue;
skb->dst = dst_clone(&rt->u.dst);
- if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, RTM_NEWROUTE,
1, NLM_F_MULTI) <= 0) {
dst_release(xchg(&skb->dst, NULL));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 76b148bcb0d..90b2f3c192f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2478,23 +2478,23 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct tcphdr *th2;
unsigned int thlen;
unsigned int flags;
- unsigned int total;
unsigned int mss = 1;
int flush = 1;
+ int i;
- if (!pskb_may_pull(skb, sizeof(*th)))
+ th = skb_gro_header(skb, sizeof(*th));
+ if (unlikely(!th))
goto out;
- th = tcp_hdr(skb);
thlen = th->doff * 4;
if (thlen < sizeof(*th))
goto out;
- if (!pskb_may_pull(skb, thlen))
+ th = skb_gro_header(skb, thlen);
+ if (unlikely(!th))
goto out;
- th = tcp_hdr(skb);
- __skb_pull(skb, thlen);
+ skb_gro_pull(skb, thlen);
flags = tcp_flag_word(th);
@@ -2504,7 +2504,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
th2 = tcp_hdr(p);
- if (th->source != th2->source || th->dest != th2->dest) {
+ if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
@@ -2519,14 +2519,15 @@ found:
flush |= flags & TCP_FLAG_CWR;
flush |= (flags ^ tcp_flag_word(th2)) &
~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
- flush |= th->ack_seq != th2->ack_seq || th->window != th2->window;
- flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th));
+ flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window);
+ for (i = sizeof(*th); !flush && i < thlen; i += 4)
+ flush |= *(u32 *)((u8 *)th + i) ^
+ *(u32 *)((u8 *)th2 + i);
- total = p->len;
mss = skb_shinfo(p)->gso_size;
- flush |= skb->len > mss || skb->len <= 0;
- flush |= ntohl(th2->seq) + total != ntohl(th->seq);
+ flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
+ flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
if (flush || skb_gro_receive(head, skb)) {
mss = 1;
@@ -2538,7 +2539,7 @@ found:
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final:
- flush = skb->len < mss;
+ flush = skb_gro_len(skb) < mss;
flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
TCP_FLAG_SYN | TCP_FLAG_FIN);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 19d7b429a26..a7381205bbf 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
switch (skb->ip_summed) {
case CHECKSUM_COMPLETE:
- if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr,
+ if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
skb->csum)) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
break;
@@ -2443,7 +2443,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = {
void __init tcp_v4_init(void)
{
inet_hashinfo_init(&tcp_hashinfo);
- if (register_pernet_device(&tcp_sk_ops))
+ if (register_pernet_subsys(&tcp_sk_ops))
panic("Failed to create the TCP control socket.\n");
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f..f6f61b3e677 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->urg_ptr = 0;
/* The urg_mode check is necessary during a below snd_una win probe */
- if (unlikely(tcp_urg_mode(tp) &&
- between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
- th->urg_ptr = htons(tp->snd_up - tcb->seq);
- th->urg = 1;
+ if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
+ if (before(tp->snd_up, tcb->seq + 0x10000)) {
+ th->urg_ptr = htons(tp->snd_up - tcb->seq);
+ th->urg = 1;
+ } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
+ th->urg_ptr = 0xFFFF;
+ th->urg = 1;
+ }
}
tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c47c989cb1f..4bd178a111d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
return -EOPNOTSUPP;
ipc.opt = NULL;
+ ipc.shtx.flags = 0;
if (up->pending) {
/*
@@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
ipc.addr = inet->saddr;
ipc.oif = sk->sk_bound_dev_if;
+ err = sock_tx_timestamp(msg, sk, &ipc.shtx);
+ if (err)
+ return err;
if (msg->msg_controllen) {
err = ip_cmsg_send(sock_net(sk), msg, &ipc);
if (err)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 2ad24ba31f9..60d918c96a4 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -241,7 +241,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
static struct dst_ops xfrm4_dst_ops = {
.family = AF_INET,
- .protocol = __constant_htons(ETH_P_IP),
+ .protocol = cpu_to_be16(ETH_P_IP),
.gc = xfrm4_garbage_collect,
.update_pmtu = xfrm4_update_pmtu,
.destroy = xfrm4_dst_destroy,