From 55b333253d5bcafbe187b50474e40789301c53c6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:21:26 -0700 Subject: [NET]: Introduce the sk_detach_filter() call Filter is attached in a separate function, so do the same for filter detaching. This also removes one variable sock_setsockopt(). Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/filter.c | 16 ++++++++++++++++ net/core/sock.c | 12 +----------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index bd903aaf7aa..fd607581ab5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -433,5 +433,21 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) return err; } +int sk_detach_filter(struct sock *sk) +{ + int ret = -ENOENT; + struct sk_filter *filter; + + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter) { + rcu_assign_pointer(sk->sk_filter, NULL); + sk_filter_release(sk, filter); + ret = 0; + } + rcu_read_unlock_bh(); + return ret; +} + EXPORT_SYMBOL(sk_chk_filter); EXPORT_SYMBOL(sk_run_filter); diff --git a/net/core/sock.c b/net/core/sock.c index d45ecdccc6a..07101381b8b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -428,7 +428,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk=sock->sk; - struct sk_filter *filter; int val; int valbool; struct linger ling; @@ -652,16 +651,7 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); - rcu_read_unlock_bh(); - break; - } - rcu_read_unlock_bh(); - ret = -ENONET; + ret = sk_detach_filter(sk); break; case SO_PASSSEC: -- cgit v1.2.3 From 309dd5fc872448e35634d510049642312ebc170d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:21:51 -0700 Subject: [NET]: Move the filter releasing into a separate call This is done merely as a preparation for the fix. The sk_filter_uncharge() unaccounts the filter memory and calls the sk_filter_release(), which in turn decrements the refcount anf frees the filter. The latter function will be required separately. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/filter.c | 4 ++-- net/core/sock.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index fd607581ab5..2be1830d3c3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -429,7 +429,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) } if (fp) - sk_filter_release(sk, fp); + sk_filter_uncharge(sk, fp); return err; } @@ -442,7 +442,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } rcu_read_unlock_bh(); diff --git a/net/core/sock.c b/net/core/sock.c index 07101381b8b..d292b4113d6 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -915,7 +915,7 @@ void sk_free(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { - sk_filter_release(sk, filter); + sk_filter_uncharge(sk, filter); rcu_assign_pointer(sk->sk_filter, NULL); } -- cgit v1.2.3 From d3904b739928edd83d117f1eb5bfa69f18d6f046 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:22:17 -0700 Subject: [NET]: Cleanup the error path in sk_attach_filter The sk_filter_uncharge is called for error handling and for releasing the former filter, but this will have to be done in a bit different manner, so cleanup the error path a bit. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/filter.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 2be1830d3c3..54dddc92452 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -398,7 +398,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen) */ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) { - struct sk_filter *fp; + struct sk_filter *fp, *old_fp; unsigned int fsize = sizeof(struct sock_filter) * fprog->len; int err; @@ -418,19 +418,18 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) fp->len = fprog->len; err = sk_chk_filter(fp->insns, fp->len); - if (!err) { - struct sk_filter *old_fp; - - rcu_read_lock_bh(); - old_fp = rcu_dereference(sk->sk_filter); - rcu_assign_pointer(sk->sk_filter, fp); - rcu_read_unlock_bh(); - fp = old_fp; + if (err) { + sk_filter_uncharge(sk, fp); + return err; } - if (fp) - sk_filter_uncharge(sk, fp); - return err; + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); + + sk_filter_uncharge(sk, old_fp); + return 0; } int sk_detach_filter(struct sock *sk) -- cgit v1.2.3 From 47e958eac280c263397582d5581e868c3227a1bd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 17 Oct 2007 21:22:42 -0700 Subject: [NET]: Fix the race between sk_filter_(de|at)tach and sk_clone() The proposed fix is to delay the reference counter decrement until the quiescent state pass. This will give sk_clone() a chance to get the reference on the cloned filter. Regular sk_filter_uncharge can happen from the sk_free() only and there's no need in delaying the put - the socket is dead anyway and is to be release itself. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/filter.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/filter.c b/net/core/filter.c index 54dddc92452..1f0068eae50 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -386,6 +386,25 @@ int sk_chk_filter(struct sock_filter *filter, int flen) return (BPF_CLASS(filter[flen - 1].code) == BPF_RET) ? 0 : -EINVAL; } +/** + * sk_filter_rcu_release: Release a socket filter by rcu_head + * @rcu: rcu_head that contains the sk_filter to free + */ +static void sk_filter_rcu_release(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + + sk_filter_release(fp); +} + +static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) +{ + unsigned int size = sk_filter_len(fp); + + atomic_sub(size, &sk->sk_omem_alloc); + call_rcu_bh(&fp->rcu, sk_filter_rcu_release); +} + /** * sk_attach_filter - attach a socket filter * @fprog: the filter program @@ -428,7 +447,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); rcu_read_unlock_bh(); - sk_filter_uncharge(sk, old_fp); + sk_filter_delayed_uncharge(sk, old_fp); return 0; } @@ -441,7 +460,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference(sk->sk_filter); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_uncharge(sk, filter); + sk_filter_delayed_uncharge(sk, filter); ret = 0; } rcu_read_unlock_bh(); -- cgit v1.2.3 From 13996378e6585fb25e582afe7489bf52dde78deb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 17 Oct 2007 21:35:51 -0700 Subject: [IPSEC]: Rename mode to outer_mode and add inner_mode This patch adds a new field to xfrm states called inner_mode. The existing mode object is renamed to outer_mode. This is the first part of an attempt to fix inter-family transforms. As it is we always use the outer family when determining which mode to use. As a result we may end up shoving IPv4 packets into netfilter6 and vice versa. What we really want is to use the inner family for the first part of outbound processing and the outer family for the second part. For inbound processing we'd use the opposite pairing. I've also added a check to prevent silly combinations such as transport mode with inter-family transforms. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2100c734b10..8cae60c5338 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2454,7 +2454,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) spin_lock(&x->lock); iph = ip_hdr(skb); - err = x->mode->output(x, skb); + err = x->outer_mode->output(x, skb); if (err) goto error; err = x->type->output(x, skb); -- cgit v1.2.3