From 37e20a66db02eff9adbeee043af990cca85d0034 Mon Sep 17 00:00:00 2001 From: "Pravin B. Shelar" Date: Sun, 29 May 2005 20:26:44 -0700 Subject: [IPV4]: Kill MULTIPATHHOLDROUTE flag. It cannot work properly, so just ignore it in drr and rr multipath algorithms just like the random multipath algorithm does. Suggested by Herbert Xu. Signed-off by: Pravin B. Shelar Signed-off-by: David S. Miller --- include/net/route.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index efe92b239ef..d34ca8fc675 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -181,9 +181,6 @@ static inline int ip_route_newports(struct rtable **rp, u16 sport, u16 dport, memcpy(&fl, &(*rp)->fl, sizeof(fl)); fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; -#if defined(CONFIG_IP_ROUTE_MULTIPATH_CACHED) - fl.flags |= FLOWI_FLAG_MULTIPATHOLDROUTE; -#endif ip_rt_put(*rp); *rp = NULL; return ip_route_output_flow(rp, &fl, sk, 0); -- cgit v1.2.3 From cdac4e07748934e37e415437055ed591aed9eb21 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 13 Jun 2005 15:12:33 -0700 Subject: [SCTP] Add support for ip_nonlocal_bind sysctl & IP_FREEBIND socket option Signed-off-by: Neil Horman Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/ip.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 3f63992eb71..32360bbe143 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,7 @@ DECLARE_SNMP_STAT(struct linux_mib, net_statistics); extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; +extern int sysctl_ip_nonlocal_bind; #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for -- cgit v1.2.3 From 26b15dad9f1c19d6d4f7b999b07eaa6d98e4b375 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 18 Jun 2005 22:42:13 -0700 Subject: [IPSEC] Add complete xfrm event notification Heres the final patch. What this patch provides - netlink xfrm events - ability to have events generated by netlink propagated to pfkey and vice versa. - fixes the acquire lets-be-happy-with-one-success issue Signed-off-by: Jamal Hadi Salim Signed-off-by: Herbert Xu --- include/net/xfrm.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d675836ba6c..a159655ebed 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -158,6 +158,27 @@ enum { XFRM_STATE_DEAD }; +/* events that could be sent by kernel */ +enum { + XFRM_SAP_INVALID, + XFRM_SAP_EXPIRED, + XFRM_SAP_ADDED, + XFRM_SAP_UPDATED, + XFRM_SAP_DELETED, + XFRM_SAP_FLUSHED, + __XFRM_SAP_MAX +}; +#define XFRM_SAP_MAX (__XFRM_SAP_MAX - 1) + +/* callback structure passed from either netlink or pfkey */ +struct km_event +{ + u32 data; + u32 seq; + u32 pid; + u32 event; +}; + struct xfrm_type; struct xfrm_dst; struct xfrm_policy_afinfo { @@ -179,6 +200,8 @@ struct xfrm_policy_afinfo { extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo); extern int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo); +extern void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c); +extern void km_state_notify(struct xfrm_state *x, struct km_event *c); #define XFRM_ACQ_EXPIRES 30 @@ -290,11 +313,11 @@ struct xfrm_mgr { struct list_head list; char *id; - int (*notify)(struct xfrm_state *x, int event); + int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); - int (*notify_policy)(struct xfrm_policy *x, int dir, int event); + int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -817,7 +840,7 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); -extern void xfrm_state_delete(struct xfrm_state *x); +extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); -- cgit v1.2.3 From 4666faab095230ec8aa62da6c33391287f281154 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:43:22 -0700 Subject: [IPSEC] Kill spurious hard expire messages This patch ensures that the hard state/policy expire notifications are only sent when the state/policy is successfully removed from their respective tables. As it is, it's possible for a state/policy to both expire through reaching a hard limit, as well as being deleted by the user. Note that this behaviour isn't actually forbidden by RFC 2367. However, it is a quality of implementation issue. As an added bonus, the restructuring in this patch will help eventually in moving the expire notifications from softirq context into process context, thus improving their reliability. One important side-effect from this change is that SAs reaching their hard byte/packet limits are now deleted immediately, just like SAs that have reached their hard time limits. Previously they were announced immediately but only deleted after 30 seconds. This is bad because it prevents the system from issuing an ACQUIRE command until the existing state was deleted by the user or expires after the time is up. In the scenario where the expire notification was lost this introduces a 30 second delay into the system for no good reason. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a159655ebed..aaa0f5f330e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -679,7 +679,7 @@ static inline int xfrm_sk_clone_policy(struct sock *sk) return 0; } -extern void xfrm_policy_delete(struct xfrm_policy *pol, int dir); +extern int xfrm_policy_delete(struct xfrm_policy *pol, int dir); static inline void xfrm_sk_free_policy(struct sock *sk) { -- cgit v1.2.3 From bf08867f91a43aa3ba2e4598c06c4769a6cdddf6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:44:00 -0700 Subject: [IPSEC] Turn km_event.data into a union This patch turns km_event.data into a union. This makes code that uses it clearer. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aaa0f5f330e..fda356e8101 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -173,7 +173,12 @@ enum { /* callback structure passed from either netlink or pfkey */ struct km_event { - u32 data; + union { + u32 hard; + u32 proto; + u32 byid; + } data; + u32 seq; u32 pid; u32 event; -- cgit v1.2.3 From f60f6b8f70c756fc786d68f02ec17a1e84db645f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2005 22:44:37 -0700 Subject: [IPSEC] Use XFRM_MSG_* instead of XFRM_SAP_* This patch removes XFRM_SAP_* and converts them over to XFRM_MSG_*. The netlink interface is meant to map directly onto the underlying xfrm subsystem. Therefore rather than using a new independent representation for the events we can simply use the existing ones from xfrm_user. Signed-off-by: Herbert Xu --- include/net/xfrm.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fda356e8101..0e65e02b7a1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -158,18 +158,6 @@ enum { XFRM_STATE_DEAD }; -/* events that could be sent by kernel */ -enum { - XFRM_SAP_INVALID, - XFRM_SAP_EXPIRED, - XFRM_SAP_ADDED, - XFRM_SAP_UPDATED, - XFRM_SAP_DELETED, - XFRM_SAP_FLUSHED, - __XFRM_SAP_MAX -}; -#define XFRM_SAP_MAX (__XFRM_SAP_MAX - 1) - /* callback structure passed from either netlink or pfkey */ struct km_event { -- cgit v1.2.3 From 2e6599cb899ba4b133f42cbf9d2b1883d2dc583a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:46:52 -0700 Subject: [NET] Generalise TCP's struct open_request minisock infrastructure Kept this first changeset minimal, without changing existing names to ease peer review. Basicaly tcp_openreq_alloc now receives the or_calltable, that in turn has two new members: ->slab, that replaces tcp_openreq_cachep ->obj_size, to inform the size of the openreq descendant for a specific protocol The protocol specific fields in struct open_request were moved to a class hierarchy, with the things that are common to all connection oriented PF_INET protocols in struct inet_request_sock, the TCP ones in tcp_request_sock, that is an inet_request_sock, that is an open_request. I.e. this uses the same approach used for the struct sock class hierarchy, with sk_prot indicating if the protocol wants to use the open_request infrastructure by filling in sk_prot->rsk_prot with an or_calltable. Results? Performance is improved and TCP v4 now uses only 64 bytes per open request minisock, down from 96 without this patch :-) Next changeset will rename some of the structs, fields and functions mentioned above, struct or_calltable is way unclear, better name it struct request_sock_ops, s/struct open_request/struct request_sock/g, etc. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 77 ++++++++++++++++++++++++++++++++++++++++ include/net/sock.h | 4 +++ include/net/tcp.h | 87 ++++++---------------------------------------- include/net/tcp_ecn.h | 7 ++-- 4 files changed, 96 insertions(+), 79 deletions(-) create mode 100644 include/net/request_sock.h (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h new file mode 100644 index 00000000000..9502f558793 --- /dev/null +++ b/include/net/request_sock.h @@ -0,0 +1,77 @@ +/* + * NET Generic infrastructure for Network protocols. + * + * Definitions for request_sock + * + * Authors: Arnaldo Carvalho de Melo + * + * From code originally in include/net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _REQUEST_SOCK_H +#define _REQUEST_SOCK_H + +#include +#include +#include + +struct open_request; +struct sk_buff; +struct dst_entry; +struct proto; + +struct or_calltable { + int family; + kmem_cache_t *slab; + int obj_size; + int (*rtx_syn_ack)(struct sock *sk, + struct open_request *req, + struct dst_entry *dst); + void (*send_ack)(struct sk_buff *skb, + struct open_request *req); + void (*send_reset)(struct sk_buff *skb); + void (*destructor)(struct open_request *req); +}; + +/* struct open_request - mini sock to represent a connection request + */ +struct open_request { + struct open_request *dl_next; /* Must be first member! */ + u16 mss; + u8 retrans; + u8 __pad; + /* The following two fields can be easily recomputed I think -AK */ + u32 window_clamp; /* window clamp at creation time */ + u32 rcv_wnd; /* rcv_wnd offered first time */ + u32 ts_recent; + unsigned long expires; + struct or_calltable *class; + struct sock *sk; +}; + +static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class) +{ + struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC); + + if (req != NULL) + req->class = class; + + return req; +} + +static inline void tcp_openreq_fastfree(struct open_request *req) +{ + kmem_cache_free(req->class->slab, req); +} + +static inline void tcp_openreq_free(struct open_request *req) +{ + req->class->destructor(req); + tcp_openreq_fastfree(req); +} + +#endif /* _REQUEST_SOCK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index a9ef3a6a13f..6919276af8a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -484,6 +484,8 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); +struct or_calltable; + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -547,6 +549,8 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + struct or_calltable *rsk_prot; + struct module *owner; char name[32]; diff --git a/include/net/tcp.h b/include/net/tcp.h index e71f8ba3e10..d438ba566b8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -613,74 +614,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -struct open_request; - -struct or_calltable { - int family; - int (*rtx_syn_ack) (struct sock *sk, struct open_request *req, struct dst_entry*); - void (*send_ack) (struct sk_buff *skb, struct open_request *req); - void (*destructor) (struct open_request *req); - void (*send_reset) (struct sk_buff *skb); -}; - -struct tcp_v4_open_req { - __u32 loc_addr; - __u32 rmt_addr; - struct ip_options *opt; -}; - -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -struct tcp_v6_open_req { - struct in6_addr loc_addr; - struct in6_addr rmt_addr; - struct sk_buff *pktopts; - int iif; -}; -#endif - -/* this structure is too big */ -struct open_request { - struct open_request *dl_next; /* Must be first member! */ - __u32 rcv_isn; - __u32 snt_isn; - __u16 rmt_port; - __u16 mss; - __u8 retrans; - __u8 __pad; - __u16 snd_wscale : 4, - rcv_wscale : 4, - tstamp_ok : 1, - sack_ok : 1, - wscale_ok : 1, - ecn_ok : 1, - acked : 1; - /* The following two fields can be easily recomputed I think -AK */ - __u32 window_clamp; /* window clamp at creation time */ - __u32 rcv_wnd; /* rcv_wnd offered first time */ - __u32 ts_recent; - unsigned long expires; - struct or_calltable *class; - struct sock *sk; - union { - struct tcp_v4_open_req v4_req; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - struct tcp_v6_open_req v6_req; -#endif - } af; -}; - -/* SLAB cache for open requests. */ -extern kmem_cache_t *tcp_openreq_cachep; - -#define tcp_openreq_alloc() kmem_cache_alloc(tcp_openreq_cachep, SLAB_ATOMIC) -#define tcp_openreq_fastfree(req) kmem_cache_free(tcp_openreq_cachep, req) - -static inline void tcp_openreq_free(struct open_request *req) -{ - req->class->destructor(req); - tcp_openreq_fastfree(req); -} - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #define TCP_INET_FAMILY(fam) ((fam) == AF_INET) #else @@ -1832,17 +1765,19 @@ static __inline__ void tcp_openreq_init(struct open_request *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) { + struct inet_request_sock *ireq = inet_rsk(req); + req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */ - req->rcv_isn = TCP_SKB_CB(skb)->seq; + tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; - req->tstamp_ok = rx_opt->tstamp_ok; - req->sack_ok = rx_opt->sack_ok; - req->snd_wscale = rx_opt->snd_wscale; - req->wscale_ok = rx_opt->wscale_ok; - req->acked = 0; - req->ecn_ok = 0; - req->rmt_port = skb->h.th->source; + ireq->tstamp_ok = rx_opt->tstamp_ok; + ireq->sack_ok = rx_opt->sack_ok; + ireq->snd_wscale = rx_opt->snd_wscale; + ireq->wscale_ok = rx_opt->wscale_ok; + ireq->acked = 0; + ireq->ecn_ok = 0; + ireq->rmt_port = skb->h.th->source; } extern void tcp_enter_memory_pressure(void); diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index dc1456389a9..94ad970e844 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -2,6 +2,7 @@ #define _NET_TCP_ECN_H_ 1 #include +#include #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -40,7 +41,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, static __inline__ void TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th) { - if (req->ecn_ok) + if (inet_rsk(req)->ecn_ok) th->ece = 1; } @@ -113,14 +114,14 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct open_request *req) { - tp->ecn_flags = req->ecn_ok ? TCP_ECN_OK : 0; + tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } static __inline__ void TCP_ECN_create_request(struct open_request *req, struct tcphdr *th) { if (sysctl_tcp_ecn && th->ece && th->cwr) - req->ecn_ok = 1; + inet_rsk(req)->ecn_ok = 1; } #endif -- cgit v1.2.3 From 60236fdd08b2169045a3bbfc5ffe1576e6c3c17b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:21 -0700 Subject: [NET] Rename open_request to request_sock Ok, this one just renames some stuff to have a better namespace and to dissassociate it from TCP: struct open_request -> struct request_sock tcp_openreq_alloc -> reqsk_alloc tcp_openreq_free -> reqsk_free tcp_openreq_fastfree -> __reqsk_free With this most of the infrastructure closely resembles a struct sock methods subset. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 34 +++++++++++++++++----------------- include/net/sock.h | 4 ++-- include/net/tcp.h | 30 +++++++++++++++--------------- include/net/tcp_ecn.h | 6 +++--- 4 files changed, 37 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 9502f558793..08a8fd1d161 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -19,28 +19,28 @@ #include #include -struct open_request; +struct request_sock; struct sk_buff; struct dst_entry; struct proto; -struct or_calltable { +struct request_sock_ops { int family; kmem_cache_t *slab; int obj_size; int (*rtx_syn_ack)(struct sock *sk, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); void (*send_ack)(struct sk_buff *skb, - struct open_request *req); + struct request_sock *req); void (*send_reset)(struct sk_buff *skb); - void (*destructor)(struct open_request *req); + void (*destructor)(struct request_sock *req); }; -/* struct open_request - mini sock to represent a connection request +/* struct request_sock - mini sock to represent a connection request */ -struct open_request { - struct open_request *dl_next; /* Must be first member! */ +struct request_sock { + struct request_sock *dl_next; /* Must be first member! */ u16 mss; u8 retrans; u8 __pad; @@ -49,29 +49,29 @@ struct open_request { u32 rcv_wnd; /* rcv_wnd offered first time */ u32 ts_recent; unsigned long expires; - struct or_calltable *class; + struct request_sock_ops *rsk_ops; struct sock *sk; }; -static inline struct open_request *tcp_openreq_alloc(struct or_calltable *class) +static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) { - struct open_request *req = kmem_cache_alloc(class->slab, SLAB_ATOMIC); + struct request_sock *req = kmem_cache_alloc(ops->slab, SLAB_ATOMIC); if (req != NULL) - req->class = class; + req->rsk_ops = ops; return req; } -static inline void tcp_openreq_fastfree(struct open_request *req) +static inline void __reqsk_free(struct request_sock *req) { - kmem_cache_free(req->class->slab, req); + kmem_cache_free(req->rsk_ops->slab, req); } -static inline void tcp_openreq_free(struct open_request *req) +static inline void reqsk_free(struct request_sock *req) { - req->class->destructor(req); - tcp_openreq_fastfree(req); + req->rsk_ops->destructor(req); + __reqsk_free(req); } #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index 6919276af8a..e593af5b1ec 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -484,7 +484,7 @@ extern void sk_stream_kill_queues(struct sock *sk); extern int sk_wait_data(struct sock *sk, long *timeo); -struct or_calltable; +struct request_sock_ops; /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface @@ -549,7 +549,7 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; - struct or_calltable *rsk_prot; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index d438ba566b8..6663086a5e3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -641,7 +641,7 @@ struct tcp_func { struct sock * (*syn_recv_sock) (struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); int (*remember_stamp) (struct sock *sk); @@ -785,8 +785,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, unsigned len); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, - struct open_request *req, - struct open_request **prev); + struct request_sock *req, + struct request_sock **prev); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); @@ -836,12 +836,12 @@ extern int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); extern struct sock * tcp_create_openreq_child(struct sock *sk, - struct open_request *req, + struct request_sock *req, struct sk_buff *skb); extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct open_request *req, + struct request_sock *req, struct dst_entry *dst); extern int tcp_v4_do_rcv(struct sock *sk, @@ -855,7 +855,7 @@ extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, - struct open_request *req); + struct request_sock *req); extern int tcp_disconnect(struct sock *sk, int flags); @@ -1683,7 +1683,7 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct open_request *req, +static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, struct sock *child) { struct tcp_sock *tp = tcp_sk(sk); @@ -1707,11 +1707,11 @@ struct tcp_listen_opt int qlen_young; int clock_hand; u32 hash_rnd; - struct open_request *syn_table[TCP_SYNQ_HSIZE]; + struct request_sock *syn_table[TCP_SYNQ_HSIZE]; }; static inline void -tcp_synq_removed(struct sock *sk, struct open_request *req) +tcp_synq_removed(struct sock *sk, struct request_sock *req) { struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; @@ -1745,23 +1745,23 @@ static inline int tcp_synq_is_full(struct sock *sk) return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct open_request *req, - struct open_request **prev) +static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, + struct request_sock **prev) { write_lock(&tp->syn_wait_lock); *prev = req->dl_next; write_unlock(&tp->syn_wait_lock); } -static inline void tcp_synq_drop(struct sock *sk, struct open_request *req, - struct open_request **prev) +static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, + struct request_sock **prev) { tcp_synq_unlink(tcp_sk(sk), req, prev); tcp_synq_removed(sk, req); - tcp_openreq_free(req); + reqsk_free(req); } -static __inline__ void tcp_openreq_init(struct open_request *req, +static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) { diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 94ad970e844..64980ee8c92 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -39,7 +39,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct tcp_sock *tp, } static __inline__ void -TCP_ECN_make_synack(struct open_request *req, struct tcphdr *th) +TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; @@ -112,13 +112,13 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) } static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, - struct open_request *req) + struct request_sock *req) { tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0; } static __inline__ void -TCP_ECN_create_request(struct open_request *req, struct tcphdr *th) +TCP_ECN_create_request(struct request_sock *req, struct tcphdr *th) { if (sysctl_tcp_ecn && th->ece && th->cwr) inet_rsk(req)->ecn_ok = 1; -- cgit v1.2.3 From 0e87506fcc734647c7b2497eee4eb81e785c857a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:47:59 -0700 Subject: [NET] Generalise tcp_listen_opt This chunks out the accept_queue and tcp_listen_opt code and moves them to net/core/request_sock.c and include/net/request_sock.h, to make it useful for other transport protocols, DCCP being the first one to use it. Next patches will rename tcp_listen_opt to accept_sock and remove the inline tcp functions that just call a reqsk_queue_ function. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 178 +++++++++++++++++++++++++++++++++++++++++++++ include/net/tcp.h | 46 ++---------- 2 files changed, 186 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 08a8fd1d161..38943ed04e7 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -16,7 +16,9 @@ #define _REQUEST_SOCK_H #include +#include #include + #include struct request_sock; @@ -74,4 +76,180 @@ static inline void reqsk_free(struct request_sock *req) __reqsk_free(req); } +extern int sysctl_max_syn_backlog; + +/** struct tcp_listen_opt - listen state + * + * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs + */ +struct tcp_listen_opt { + u8 max_qlen_log; + /* 3 bytes hole, try to use */ + int qlen; + int qlen_young; + int clock_hand; + u32 hash_rnd; + struct request_sock *syn_table[0]; +}; + +/** struct request_sock_queue - queue of request_socks + * + * @rskq_accept_head - FIFO head of established children + * @rskq_accept_tail - FIFO tail of established children + * @syn_wait_lock - serializer + * + * %syn_wait_lock is necessary only to avoid proc interface having to grab the main + * lock sock while browsing the listening hash (otherwise it's deadlock prone). + * + * This lock is acquired in read mode only from listening_get_next() seq_file + * op and it's acquired in write mode _only_ from code that is actively + * changing rskq_accept_head. All readers that are holding the master sock lock + * don't need to grab this lock in read mode too as rskq_accept_head. writes + * are always protected from the main sock lock. + */ +struct request_sock_queue { + struct request_sock *rskq_accept_head; + struct request_sock *rskq_accept_tail; + rwlock_t syn_wait_lock; + struct tcp_listen_opt *listen_opt; +}; + +extern int reqsk_queue_alloc(struct request_sock_queue *queue, + const int nr_table_entries); + +static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +{ + struct tcp_listen_opt *lopt; + + write_lock_bh(&queue->syn_wait_lock); + lopt = queue->listen_opt; + queue->listen_opt = NULL; + write_unlock_bh(&queue->syn_wait_lock); + + return lopt; +} + +static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +{ + kfree(reqsk_queue_yank_listen_sk(queue)); +} + +static inline struct request_sock * + reqsk_queue_yank_acceptq(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + queue->rskq_accept_head = queue->rskq_accept_head = NULL; + return req; +} + +static inline int reqsk_queue_empty(struct request_sock_queue *queue) +{ + return queue->rskq_accept_head == NULL; +} + +static inline void reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req, + struct request_sock **prev_req) +{ + write_lock(&queue->syn_wait_lock); + *prev_req = req->dl_next; + write_unlock(&queue->syn_wait_lock); +} + +static inline void reqsk_queue_add(struct request_sock_queue *queue, + struct request_sock *req, + struct sock *parent, + struct sock *child) +{ + req->sk = child; + sk_acceptq_added(parent); + + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_head = req; + else + queue->rskq_accept_tail->dl_next = req; + + queue->rskq_accept_tail = req; + req->dl_next = NULL; +} + +static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) +{ + struct request_sock *req = queue->rskq_accept_head; + + BUG_TRAP(req != NULL); + + queue->rskq_accept_head = req->dl_next; + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_tail = NULL; + + return req; +} + +static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, + struct sock *parent) +{ + struct request_sock *req = reqsk_queue_remove(queue); + struct sock *child = req->sk; + + BUG_TRAP(child != NULL); + + sk_acceptq_removed(parent); + __reqsk_free(req); + return child; +} + +static inline int reqsk_queue_removed(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + + if (req->retrans == 0) + --lopt->qlen_young; + + return --lopt->qlen; +} + +static inline int reqsk_queue_added(struct request_sock_queue *queue) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + const int prev_qlen = lopt->qlen; + + lopt->qlen_young++; + lopt->qlen++; + return prev_qlen; +} + +static inline int reqsk_queue_len(struct request_sock_queue *queue) +{ + return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; +} + +static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen_young; +} + +static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +{ + return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; +} + +static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, + u32 hash, struct request_sock *req, + unsigned timeout) +{ + struct tcp_listen_opt *lopt = queue->listen_opt; + + req->expires = jiffies + timeout; + req->retrans = 0; + req->sk = NULL; + req->dl_next = lopt->syn_table[hash]; + + write_lock(&queue->syn_wait_lock); + lopt->syn_table[hash] = req; + write_unlock(&queue->syn_wait_lock); +} + #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6663086a5e3..a2e323c5445 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1686,71 +1686,41 @@ static inline int tcp_full_space(const struct sock *sk) static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, struct sock *child) { - struct tcp_sock *tp = tcp_sk(sk); - - req->sk = child; - sk_acceptq_added(sk); - - if (!tp->accept_queue_tail) { - tp->accept_queue = req; - } else { - tp->accept_queue_tail->dl_next = req; - } - tp->accept_queue_tail = req; - req->dl_next = NULL; + reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); } -struct tcp_listen_opt -{ - u8 max_qlen_log; /* log_2 of maximal queued SYNs */ - int qlen; - int qlen_young; - int clock_hand; - u32 hash_rnd; - struct request_sock *syn_table[TCP_SYNQ_HSIZE]; -}; - static inline void tcp_synq_removed(struct sock *sk, struct request_sock *req) { - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (--lopt->qlen == 0) + if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) tcp_delete_keepalive_timer(sk); - if (req->retrans == 0) - lopt->qlen_young--; } static inline void tcp_synq_added(struct sock *sk) { - struct tcp_listen_opt *lopt = tcp_sk(sk)->listen_opt; - - if (lopt->qlen++ == 0) + if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); - lopt->qlen_young++; } static inline int tcp_synq_len(struct sock *sk) { - return tcp_sk(sk)->listen_opt->qlen; + return reqsk_queue_len(&tcp_sk(sk)->accept_queue); } static inline int tcp_synq_young(struct sock *sk) { - return tcp_sk(sk)->listen_opt->qlen_young; + return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); } static inline int tcp_synq_is_full(struct sock *sk) { - return tcp_synq_len(sk) >> tcp_sk(sk)->listen_opt->max_qlen_log; + return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); } static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) + struct request_sock **prev) { - write_lock(&tp->syn_wait_lock); - *prev = req->dl_next; - write_unlock(&tp->syn_wait_lock); + reqsk_queue_unlink(&tp->accept_queue, req, prev); } static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, -- cgit v1.2.3 From 2ad69c55a282315e6119cf7fd744f26a925bdfd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 18 Jun 2005 22:48:55 -0700 Subject: [NET] rename struct tcp_listen_opt to struct listen_sock Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 38943ed04e7..72fd6f5e86b 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -78,11 +78,11 @@ static inline void reqsk_free(struct request_sock *req) extern int sysctl_max_syn_backlog; -/** struct tcp_listen_opt - listen state +/** struct listen_sock - listen state * * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs */ -struct tcp_listen_opt { +struct listen_sock { u8 max_qlen_log; /* 3 bytes hole, try to use */ int qlen; @@ -111,15 +111,15 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; - struct tcp_listen_opt *listen_opt; + struct listen_sock *listen_opt; }; extern int reqsk_queue_alloc(struct request_sock_queue *queue, const int nr_table_entries); -static inline struct tcp_listen_opt *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) +static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock_queue *queue) { - struct tcp_listen_opt *lopt; + struct listen_sock *lopt; write_lock_bh(&queue->syn_wait_lock); lopt = queue->listen_opt; @@ -203,7 +203,7 @@ static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queu static inline int reqsk_queue_removed(struct request_sock_queue *queue, struct request_sock *req) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; if (req->retrans == 0) --lopt->qlen_young; @@ -213,7 +213,7 @@ static inline int reqsk_queue_removed(struct request_sock_queue *queue, static inline int reqsk_queue_added(struct request_sock_queue *queue) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; const int prev_qlen = lopt->qlen; lopt->qlen_young++; @@ -240,7 +240,7 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, u32 hash, struct request_sock *req, unsigned timeout) { - struct tcp_listen_opt *lopt = queue->listen_opt; + struct listen_sock *lopt = queue->listen_opt; req->expires = jiffies + timeout; req->retrans = 0; -- cgit v1.2.3 From e52c1f17e4ea8e61bd26eb25f1a184202693c2b9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 18 Jun 2005 22:49:40 -0700 Subject: [NET]: Move sysctl_max_syn_backlog into request_sock.c This fixes the CONFIG_INET=n build failure noticed by Andrew Morton. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a2e323c5445..f730935b824 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -564,7 +564,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ /* sysctl variables for tcp */ -extern int sysctl_max_syn_backlog; extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; -- cgit v1.2.3 From c7fb64db001f83ece669c76a02d8ec2fdb1dd307 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:50:55 -0700 Subject: [NETLINK]: Neighbour table configuration and statistics via rtnetlink To retrieve the neighbour tables send RTM_GETNEIGHTBL with the NLM_F_DUMP flag set. Every neighbour table configuration is spread over multiple messages to avoid running into message size limits on systems with many interfaces. The first message in the sequence transports all not device specific data such as statistics, configuration, and the default parameter set. This message is followed by 0..n messages carrying device specific parameter sets. Although the ordering should be sufficient, NDTA_NAME can be used to identify sequences. The initial message can be identified by checking for NDTA_CONFIG. The device specific messages do not contain this TLV but have NDTPA_IFINDEX set to the corresponding interface index. To change neighbour table attributes, send RTM_SETNEIGHTBL with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked otherwise. Device specific parameter sets can be changed by setting NDTPA_IFINDEX to the interface index of the corresponding device. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/neighbour.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4f33bbc21e7..17191ac9be7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -65,6 +65,7 @@ struct neighbour; struct neigh_parms { + struct net_device *dev; struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; @@ -252,6 +253,9 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern void neigh_app_ns(struct neighbour *n); +extern int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb); +extern int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); + extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); -- cgit v1.2.3 From 88121aea7bdb5fdc527388e262381829c4e1db16 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:51:12 -0700 Subject: [NEIGHBOUR]: Remove unused fields in struct neigh_parms and neigh_table Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/neighbour.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 17191ac9be7..89809891e5a 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -69,8 +69,6 @@ struct neigh_parms struct neigh_parms *next; int (*neigh_setup)(struct neighbour *); struct neigh_table *tbl; - int entries; - void *priv; void *sysctl_table; @@ -193,7 +191,6 @@ struct neigh_table atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct neigh_statistics *stats; struct neighbour **hash_buckets; -- cgit v1.2.3 From 9972b25d0c6e7f8f893eb3444dea37b42b1201de Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 18 Jun 2005 22:57:26 -0700 Subject: [PKT_SCHED]: Generic queue management interface for qdiscs using internal skb queues Implements an interface to be used by leaf qdiscs maintaining an internal skb queue. The interface maintains a backlog in bytes additionaly to the skb_queue_len() maintained by the queue itself. Relevant statistics get incremented automatically. Every function comes in two variants, one assuming Qdisc->q is used as queue and the second taking a sk_buff_head as argument. Be aware that, if you use multiple queues, you still have to maintain the Qdisc->q.qlen counter yourself. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/sch_generic.h | 122 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c57504b3b51..7b97405e2db 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -172,4 +172,126 @@ tcf_destroy(struct tcf_proto *tp) kfree(tp); } +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_tail(list, skb); + sch->qstats.backlog += skb->len; + sch->bstats.bytes += skb->len; + sch->bstats.packets++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_enqueue_tail(skb, sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) +{ + return __qdisc_dequeue_head(sch, &sch->q); +} + +static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __skb_dequeue_tail(list); + + if (likely(skb != NULL)) + sch->qstats.backlog -= skb->len; + + return skb; +} + +static inline struct sk_buff *qdisc_dequeue_tail(struct Qdisc *sch) +{ + return __qdisc_dequeue_tail(sch, &sch->q); +} + +static inline int __qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff_head *list) +{ + __skb_queue_head(list, skb); + sch->qstats.backlog += skb->len; + sch->qstats.requeues++; + + return NET_XMIT_SUCCESS; +} + +static inline int qdisc_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + return __qdisc_requeue(skb, sch, &sch->q); +} + +static inline void __qdisc_reset_queue(struct Qdisc *sch, + struct sk_buff_head *list) +{ + /* + * We do not know the backlog in bytes of this list, it + * is up to the caller to correct it + */ + skb_queue_purge(list); +} + +static inline void qdisc_reset_queue(struct Qdisc *sch) +{ + __qdisc_reset_queue(sch, &sch->q); + sch->qstats.backlog = 0; +} + +static inline unsigned int __qdisc_queue_drop(struct Qdisc *sch, + struct sk_buff_head *list) +{ + struct sk_buff *skb = __qdisc_dequeue_tail(sch, list); + + if (likely(skb != NULL)) { + unsigned int len = skb->len; + kfree_skb(skb); + return len; + } + + return 0; +} + +static inline unsigned int qdisc_queue_drop(struct Qdisc *sch) +{ + return __qdisc_queue_drop(sch, &sch->q); +} + +static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch) +{ + kfree_skb(skb); + sch->qstats.drops++; + + return NET_XMIT_DROP; +} + +static inline int qdisc_reshape_fail(struct sk_buff *skb, struct Qdisc *sch) +{ + sch->qstats.drops++; + +#ifdef CONFIG_NET_CLS_POLICE + if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) + goto drop; + + return NET_XMIT_SUCCESS; + +drop: +#endif + kfree_skb(skb); + return NET_XMIT_DROP; +} + #endif -- cgit v1.2.3 From 3f7a87d2fa9b42f7aade43914f060df68cc89cc7 Mon Sep 17 00:00:00 2001 From: Frank Filz Date: Mon, 20 Jun 2005 13:14:57 -0700 Subject: [SCTP] sctp_connectx() API support Implements sctp_connectx() as defined in the SCTP sockets API draft by tunneling the request through a setsockopt(). Signed-off-by: Frank Filz Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/command.h | 8 ++++---- include/net/sctp/constants.h | 7 ------- include/net/sctp/sctp.h | 17 +++++++++++++++++ include/net/sctp/sm.h | 8 ++++++-- include/net/sctp/structs.h | 41 ++++++++++++++++++++++++++++++++--------- include/net/sctp/user.h | 3 +++ 6 files changed, 62 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index ebc5282e6d5..dc107ffad48 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -65,9 +65,11 @@ typedef enum { SCTP_CMD_TIMER_START, /* Start a timer. */ SCTP_CMD_TIMER_RESTART, /* Restart a timer. */ SCTP_CMD_TIMER_STOP, /* Stop a timer. */ - SCTP_CMD_COUNTER_RESET, /* Reset a counter. */ - SCTP_CMD_COUNTER_INC, /* Increment a counter. */ + SCTP_CMD_INIT_CHOOSE_TRANSPORT, /* Choose transport for an INIT. */ + SCTP_CMD_INIT_COUNTER_RESET, /* Reset init counter. */ + SCTP_CMD_INIT_COUNTER_INC, /* Increment init counter. */ SCTP_CMD_INIT_RESTART, /* High level, do init timer work. */ + SCTP_CMD_COOKIEECHO_RESTART, /* High level, do cookie-echo timer work. */ SCTP_CMD_INIT_FAILED, /* High level, do init failure work. */ SCTP_CMD_REPORT_DUP, /* Report a duplicate TSN. */ SCTP_CMD_STRIKE, /* Mark a strike against a transport. */ @@ -118,7 +120,6 @@ typedef union { int error; sctp_state_t state; sctp_event_timeout_t to; - sctp_counter_t counter; void *ptr; struct sctp_chunk *chunk; struct sctp_association *asoc; @@ -165,7 +166,6 @@ SCTP_ARG_CONSTRUCTOR(U16, __u16, u16) SCTP_ARG_CONSTRUCTOR(U8, __u8, u8) SCTP_ARG_CONSTRUCTOR(ERROR, int, error) SCTP_ARG_CONSTRUCTOR(STATE, sctp_state_t, state) -SCTP_ARG_CONSTRUCTOR(COUNTER, sctp_counter_t, counter) SCTP_ARG_CONSTRUCTOR(TO, sctp_event_timeout_t, to) SCTP_ARG_CONSTRUCTOR(PTR, void *, ptr) SCTP_ARG_CONSTRUCTOR(CHUNK, struct sctp_chunk *, chunk) diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 2b76c0f4bab..4868c7f7749 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -263,13 +263,6 @@ enum { SCTP_MIN_PMTU = 576 }; enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; -typedef enum { - SCTP_COUNTER_INIT_ERROR, -} sctp_counter_t; - -/* How many counters does an association need? */ -#define SCTP_NUMBER_COUNTERS 5 - /* Here we define the default timers. */ /* cookie timer def = ? seconds */ diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 960abfa48d6..ef2738159ab 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -223,6 +223,22 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); extern int sctp_debug_flag; #define SCTP_DEBUG_PRINTK(whatever...) \ ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) +#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + lead "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x" trail, \ + leadparm, \ + NIP6(saddr->v6.sin6_addr), \ + otherparms); \ + } else { \ + printk(KERN_DEBUG \ + lead "%u.%u.%u.%u" trail, \ + leadparm, \ + NIPQUAD(saddr->v4.sin_addr.s_addr), \ + otherparms); \ + } \ + } #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -236,6 +252,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG #define SCTP_ASSERT(expr, str, func) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index f4fcee10470..a53e08a45e3 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -116,7 +116,8 @@ sctp_state_fn_t sctp_sf_eat_data_fast_4_4; sctp_state_fn_t sctp_sf_eat_sack_6_2; sctp_state_fn_t sctp_sf_tabort_8_4_8; sctp_state_fn_t sctp_sf_operr_notify; -sctp_state_fn_t sctp_sf_t1_timer_expire; +sctp_state_fn_t sctp_sf_t1_init_timer_expire; +sctp_state_fn_t sctp_sf_t1_cookie_timer_expire; sctp_state_fn_t sctp_sf_t2_timer_expire; sctp_state_fn_t sctp_sf_t4_timer_expire; sctp_state_fn_t sctp_sf_t5_timer_expire; @@ -258,7 +259,10 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); -void sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, __u16 error); +sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, + __u16 error, + const struct sctp_association *asoc, + struct sctp_transport *transport); /* Prototypes for statetable processing. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6c24d9cd3d6..dfad4d3c581 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -867,10 +867,13 @@ struct sctp_transport { */ unsigned long last_time_ecne_reduced; - /* active : The current active state of this destination, - * : i.e. DOWN, UP, etc. + /* The number of times INIT has been sent on this transport. */ + int init_sent_count; + + /* state : The current state of this destination, + * : i.e. SCTP_ACTIVE, SCTP_INACTIVE, SCTP_UNKOWN. */ - int active; + int state; /* hb_allowed : The current heartbeat state of this destination, * : i.e. ALLOW-HB, NO-HEARTBEAT, etc. @@ -1222,9 +1225,6 @@ struct sctp_endpoint { /* sendbuf acct. policy. */ __u32 sndbuf_policy; - - /* Name for debugging output... */ - char *debug_name; }; /* Recover the outter endpoint structure. */ @@ -1314,11 +1314,23 @@ struct sctp_association { * : association. Normally this information is * : hashed or keyed for quick lookup and access * : of the TCB. + * : The list is also initialized with the list + * : of addresses passed with the sctp_connectx() + * : call. * * It is a list of SCTP_transport's. */ struct list_head transport_addr_list; + /* transport_count + * + * Peer : A count of the number of peer addresses + * Transport : in the Peer Transport Address List. + * Address : + * Count : + */ + __u16 transport_count; + /* port * The transport layer port number. */ @@ -1486,6 +1498,9 @@ struct sctp_association { /* Transport to which SHUTDOWN chunk was last sent. */ struct sctp_transport *shutdown_last_sent_to; + /* Transport to which INIT chunk was last sent. */ + struct sctp_transport *init_last_sent_to; + /* Next TSN : The next TSN number to be assigned to a new * : DATA chunk. This is sent in the INIT or INIT * : ACK chunk to the peer and incremented each @@ -1549,8 +1564,11 @@ struct sctp_association { /* The message size at which SCTP fragmentation will occur. */ __u32 frag_point; - /* Currently only one counter is used to count INIT errors. */ - int counters[SCTP_NUMBER_COUNTERS]; + /* Counter used to count INIT errors. */ + int init_err_counter; + + /* Count the number of INIT cycles (for doubling timeout). */ + int init_cycle; /* Default send parameters. */ __u16 default_stream; @@ -1708,6 +1726,8 @@ void sctp_association_free(struct sctp_association *); void sctp_association_put(struct sctp_association *); void sctp_association_hold(struct sctp_association *); +struct sctp_transport *sctp_assoc_choose_init_transport( + struct sctp_association *); struct sctp_transport *sctp_assoc_choose_shutdown_transport( struct sctp_association *); void sctp_assoc_update_retran_path(struct sctp_association *); @@ -1717,9 +1737,12 @@ int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr); struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *, const union sctp_addr *address, - const int gfp); + const int gfp, + const int peer_state); void sctp_assoc_del_peer(struct sctp_association *asoc, const union sctp_addr *addr); +void sctp_assoc_rm_peer(struct sctp_association *asoc, + struct sctp_transport *peer); void sctp_assoc_control_transport(struct sctp_association *, struct sctp_transport *, sctp_transport_cmd_t, sctp_sn_error_t); diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 2758e8ce4f2..f6328aeddcc 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -111,6 +111,8 @@ enum sctp_optname { #define SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS_NUM SCTP_GET_LOCAL_ADDRS, /* Get all local addresss. */ #define SCTP_GET_LOCAL_ADDRS SCTP_GET_LOCAL_ADDRS + SCTP_SOCKOPT_CONNECTX, /* CONNECTX requests. */ +#define SCTP_SOCKOPT_CONNECTX SCTP_SOCKOPT_CONNECTX }; /* @@ -527,6 +529,7 @@ struct sctp_paddrinfo { enum sctp_spinfo_state { SCTP_INACTIVE, SCTP_ACTIVE, + SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ }; /* -- cgit v1.2.3 From 72cb6962a91f2af9eef69a06198e1949c10259ae Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:18:08 -0700 Subject: [IPSEC]: Add xfrm_init_state This patch adds xfrm_init_state which is simply a wrapper that calls xfrm_get_type and subsequently x->type->init_state. It also gets rid of the unused args argument. Abstracting it out allows us to add common initialisation code, e.g., to set family-specific flags. The add_time setting in xfrm_user.c was deleted because it's already set by xfrm_state_alloc. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0e65e02b7a1..77bfdde440f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -225,7 +225,7 @@ struct xfrm_type struct module *owner; __u8 proto; - int (*init_state)(struct xfrm_state *x, void *args); + int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); int (*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb); @@ -839,6 +839,7 @@ extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); +extern int xfrm_init_state(struct xfrm_state *x); extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); -- cgit v1.2.3 From d094cd83c06e06e01d8edb540555f3f64e4081c2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 20 Jun 2005 13:19:41 -0700 Subject: [IPSEC]: Add xfrm_state_afinfo->init_flags This patch adds the xfrm_state_afinfo->init_flags hook which allows each address family to perform any common initialisation that does not require a corresponding destructor call. It will be used subsequently to set the XFRM_STATE_NOPMTUDISC flag in IPv4. It also fixes up the error codes returned by xfrm_init_state. Signed-off-by: Herbert Xu Acked-by: James Morris Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 77bfdde440f..029522a4ced 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -204,6 +204,7 @@ struct xfrm_state_afinfo { rwlock_t lock; struct list_head *state_bydst; struct list_head *state_byspi; + int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); -- cgit v1.2.3 From f852640e74f71e6dd38146e1149ec1fe6da2fb07 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 20 Jun 2005 13:31:11 -0700 Subject: [AX25]: endian-annotate ax25_type_trans() Signed-off-by: Alexey Dobriyan Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- include/net/ax25.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ax25.h b/include/net/ax25.h index 9e6368a5454..828a3a93dda 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -220,7 +220,7 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } -static inline unsigned short ax25_type_trans(struct sk_buff *skb, struct net_device *dev) +static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; skb->pkt_type = PACKET_HOST; -- cgit v1.2.3 From 246955fe4c38bd706ae30e37c64892c94213775d Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Mon, 20 Jun 2005 13:36:39 -0700 Subject: [NETLINK]: fib_lookup() via netlink Below is a more generic patch to do fib_lookup via netlink. For others we should say that we discussed this as a way to verify route selection. It's also possible there are others uses for this. In short the fist half of struct fib_result_nl is filled in by caller and netlink call fills in the other half and returns it. In case anyone is interested there is a corresponding user app to compare the full routing table this was used to test implementation of the LC-trie. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e5a5f6b62f8..a4208a336ac 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -109,6 +109,20 @@ struct fib_result { #endif }; +struct fib_result_nl { + u32 fl_addr; /* To be looked up*/ + u32 fl_fwmark; + unsigned char fl_tos; + unsigned char fl_scope; + unsigned char tb_id_in; + + unsigned char tb_id; /* Results */ + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + int err; +}; #ifdef CONFIG_IP_ROUTE_MULTIPATH -- cgit v1.2.3 From 0d51aa80a9b1db43920c0770c3bb842dd823c005 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Tue, 21 Jun 2005 13:51:04 -0700 Subject: [IPV6]: V6 route events reported with wrong netlink PID and seq number Essentially netlink at the moment always reports a pid and sequence of 0 always for v6 route activities. To understand the repurcassions of this look at: http://lists.quagga.net/pipermail/quagga-dev/2005-June/003507.html While fixing this, i took the liberty to resolve the outstanding issue of IPV6 routes inserted via ioctls to have the correct pids as well. This patch tries to behave as close as possible to the v4 routes i.e maintains whatever PID the socket issuing the command owns as opposed to the process. That made the patch a little bulky. I have tested against both netlink derived utility to add/del routes as well as ioctl derived one. The Quagga folks have tested against quagga. This fixes the problem and so far hasnt been detected to introduce any new issues. Signed-off-by: Jamal Hadi Salim Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 9 ++++++--- include/net/ip6_route.h | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 31990451819..a66e9de16a6 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,14 +167,17 @@ extern int fib6_walk_continue(struct fib6_walker_t *w); extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nlmsghdr *nlh); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern void fib6_run_gc(unsigned long dummy); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d5d1dd10cdb..f920706d526 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -41,13 +41,16 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_del_rt(struct rt6_info *, struct nlmsghdr *, - void *rtattr); + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, -- cgit v1.2.3 From 39c715b71740c4a78ba4769fb54826929bac03cb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Jun 2005 17:14:34 -0700 Subject: [PATCH] smp_processor_id() cleanup This patch implements a number of smp_processor_id() cleanup ideas that Arjan van de Ven and I came up with. The previous __smp_processor_id/_smp_processor_id/smp_processor_id API spaghetti was hard to follow both on the implementational and on the usage side. Some of the complexity arose from picking wrong names, some of the complexity comes from the fact that not all architectures defined __smp_processor_id. In the new code, there are two externally visible symbols: - smp_processor_id(): debug variant. - raw_smp_processor_id(): nondebug variant. Replaces all existing uses of _smp_processor_id() and __smp_processor_id(). Defined by every SMP architecture in include/asm-*/smp.h. There is one new internal symbol, dependent on DEBUG_PREEMPT: - debug_smp_processor_id(): internal debug variant, mapped to smp_processor_id(). Also, i moved debug_smp_processor_id() from lib/kernel_lock.c into a new lib/smp_processor_id.c file. All related comments got updated and/or clarified. I have build/boot tested the following 8 .config combinations on x86: {SMP,UP} x {PREEMPT,!PREEMPT} x {DEBUG_PREEMPT,!DEBUG_PREEMPT} I have also build/boot tested x64 on UP/PREEMPT/DEBUG_PREEMPT. (Other architectures are untested, but should work just fine.) Signed-off-by: Ingo Molnar Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/route.h | 2 +- include/net/snmp.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index d34ca8fc675..c3cd069a9ac 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -107,7 +107,7 @@ struct rt_cache_stat extern struct rt_cache_stat *rt_cache_stat; #define RT_CACHE_STAT_INC(field) \ - (per_cpu_ptr(rt_cache_stat, _smp_processor_id())->field++) + (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) extern struct ip_rt_acct *ip_rt_acct; diff --git a/include/net/snmp.h b/include/net/snmp.h index a15ab256276..a36bed8ea21 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -128,18 +128,18 @@ struct linux_mib { #define SNMP_STAT_USRPTR(name) (name[1]) #define SNMP_INC_STATS_BH(mib, field) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field]++) #define SNMP_INC_STATS_OFFSET_BH(mib, field, offset) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field + (offset)]++) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field + (offset)]++) #define SNMP_INC_STATS_USER(mib, field) \ - (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field]++) #define SNMP_INC_STATS(mib, field) \ - (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]++) + (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]++) #define SNMP_DEC_STATS(mib, field) \ - (per_cpu_ptr(mib[!in_softirq()], _smp_processor_id())->mibs[field]--) + (per_cpu_ptr(mib[!in_softirq()], raw_smp_processor_id())->mibs[field]--) #define SNMP_ADD_STATS_BH(mib, field, addend) \ - (per_cpu_ptr(mib[0], _smp_processor_id())->mibs[field] += addend) + (per_cpu_ptr(mib[0], raw_smp_processor_id())->mibs[field] += addend) #define SNMP_ADD_STATS_USER(mib, field, addend) \ - (per_cpu_ptr(mib[1], _smp_processor_id())->mibs[field] += addend) + (per_cpu_ptr(mib[1], raw_smp_processor_id())->mibs[field] += addend) #endif -- cgit v1.2.3 From cb65d506c34c86df5bcef939ce5a8666a451bd8b Mon Sep 17 00:00:00 2001 From: Shaun Pereira Date: Wed, 22 Jun 2005 22:15:01 -0700 Subject: [X25]: Selective sub-address matching with call user data. From: Shaun Pereira This is the first (independent of the second) patch of two that I am working on with x25 on linux (tested with xot on a cisco router). Details are as follows. Current state of module: A server using the current implementation (2.6.11.7) of the x25 module will accept a call request/ incoming call packet at the listening x.25 address, from all callers to that address, as long as NO call user data is present in the packet header. If the server needs to choose to accept a particular call request/ incoming call packet arriving at its listening x25 address, then the kernel has to allow a match of call user data present in the call request packet with its own. This is required when multiple servers listen at the same x25 address and device interface. The kernel currently matches ALL call user data, if present. Current Changes: This patch is a follow up to the patch submitted previously by Andrew Hendry, and allows the user to selectively control the number of octets of call user data in the call request packet, that the kernel will match. By default no call user data is matched, even if call user data is present. To allow call user data matching, a cudmatchlength > 0 has to be passed into the kernel after which the passed number of octets will be matched. Otherwise the kernel behavior is exactly as the original implementation. This patch also ensures that as is normally the case, no call user data will be present in the Call accepted / call connected packet sent back to the caller Future Changes on next patch: There are cases however when call user data may be present in the call accepted packet. According to the X.25 recommendation (ITU-T 10/96) section 5.2.3.2 call user data may be present in the call accepted packet provided the fast select facility is used. My next patch will include this fast select utility and the ability to send up to 128 octets call user data in the call accepted packet provided the fast select facility is used. I am currently testing this, again with xot on linux and cisco. Signed-off-by: Shaun Pereira (With a fix from Alexey Dobriyan ) Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/x25.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/x25.h b/include/net/x25.h index 7a1ba5bbb86..9dd70dd4a9b 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -134,7 +134,7 @@ struct x25_sock { struct sock sk; struct x25_address source_addr, dest_addr; struct x25_neigh *neighbour; - unsigned int lci; + unsigned int lci, cudmatchlength; unsigned char state, condition, qbitincl, intflag; unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; @@ -242,7 +242,6 @@ extern int x25_validate_nr(struct sock *, unsigned short); extern void x25_write_internal(struct sock *, int); extern int x25_decode(struct sock *, struct sk_buff *, int *, int *, int *, int *, int *); extern void x25_disconnect(struct sock *, int, unsigned char, unsigned char); -extern int x25_check_calluserdata(struct x25_calluserdata *,struct x25_calluserdata *); /* x25_timer.c */ extern void x25_start_heartbeat(struct sock *); -- cgit v1.2.3 From ebc3f64b864fc16a594c2e63bf55a55c7d42084b Mon Sep 17 00:00:00 2001 From: Shaun Pereira Date: Wed, 22 Jun 2005 22:16:17 -0700 Subject: [X25]: Fast select with no restriction on response This patch is a follow up to patch 1 regarding "Selective Sub Address matching with call user data". It allows use of the Fast-Select-Acceptance optional user facility for X.25. This patch just implements fast select with no restriction on response (NRR). What this means (according to ITU-T Recomendation 10/96 section 6.16) is that if in an incoming call packet, the relevant facility bits are set for fast-select-NRR, then the called DTE can issue a direct response to the incoming packet using a call-accepted packet that contains call-user-data. This patch allows such a response. The called DTE can also respond with a clear-request packet that contains call-user-data. However, this feature is currently not implemented by the patch. How is Fast Select Acceptance used? By default, the system does not allow fast select acceptance (as before). To enable a response to fast select acceptance, After a listen socket in created and bound as follows socket(AF_X25, SOCK_SEQPACKET, 0); bind(call_soc, (struct sockaddr *)&locl_addr, sizeof(locl_addr)); but before a listen system call is made, the following ioctl should be used. ioctl(call_soc,SIOCX25CALLACCPTAPPRV); Now the listen system call can be made listen(call_soc, 4); After this, an incoming-call packet will be accepted, but no call-accepted packet will be sent back until the following system call is made on the socket that accepts the call ioctl(vc_soc,SIOCX25SENDCALLACCPT); The network (or cisco xot router used for testing here) will allow the application server's call-user-data in the call-accepted packet, provided the call-request was made with Fast-select NRR. Signed-off-by: Shaun Pereira Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/x25.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/x25.h b/include/net/x25.h index 9dd70dd4a9b..8b39b98876e 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -79,6 +79,8 @@ enum { #define X25_DEFAULT_PACKET_SIZE X25_PS128 /* Default Packet Size */ #define X25_DEFAULT_THROUGHPUT 0x0A /* Deafult Throughput */ #define X25_DEFAULT_REVERSE 0x00 /* Default Reverse Charging */ +#define X25_DENY_ACCPT_APPRV 0x01 /* Default value */ +#define X25_ALLOW_ACCPT_APPRV 0x00 /* Control enabled */ #define X25_SMODULUS 8 #define X25_EMODULUS 128 @@ -94,7 +96,7 @@ enum { #define X25_FAC_CLASS_C 0x80 #define X25_FAC_CLASS_D 0xC0 -#define X25_FAC_REVERSE 0x01 +#define X25_FAC_REVERSE 0x01 /* also fast select */ #define X25_FAC_THROUGHPUT 0x02 #define X25_FAC_PACKET_SIZE 0x42 #define X25_FAC_WINDOW_SIZE 0x43 @@ -135,7 +137,7 @@ struct x25_sock { struct x25_address source_addr, dest_addr; struct x25_neigh *neighbour; unsigned int lci, cudmatchlength; - unsigned char state, condition, qbitincl, intflag; + unsigned char state, condition, qbitincl, intflag, accptapprv; unsigned short vs, vr, va, vl; unsigned long t2, t21, t22, t23; unsigned short fraglen; -- cgit v1.2.3 From 317a76f9a44b437d6301718f4e5d08bd93f98da7 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 23 Jun 2005 12:19:55 -0700 Subject: [TCP]: Add pluggable congestion control algorithm infrastructure. Allow TCP to have multiple pluggable congestion control algorithms. Algorithms are defined by a set of operations and can be built in or modules. The legacy "new RENO" algorithm is used as a starting point and fallback. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 237 +++++++++++++++++------------------------------------- 1 file changed, 75 insertions(+), 162 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index f730935b824..e427cf35915 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -505,25 +505,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) #else # define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) #endif - -#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation - * max_cwnd = snd_cwnd * beta - */ -#define BICTCP_MAX_INCREMENT 32 /* - * Limit on the amount of - * increment allowed during - * binary search. - */ -#define BICTCP_FUNC_OF_MIN_INCR 11 /* - * log(B/Smin)/log(B/(B-1))+1, - * Smin:min increment - * B:log factor - */ -#define BICTCP_B 4 /* - * In binary search, - * go to point (max+min)/N - */ - /* * TCP option */ @@ -596,16 +577,7 @@ extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_frto; extern int sysctl_tcp_low_latency; -extern int sysctl_tcp_westwood; -extern int sysctl_tcp_vegas_cong_avoid; -extern int sysctl_tcp_vegas_alpha; -extern int sysctl_tcp_vegas_beta; -extern int sysctl_tcp_vegas_gamma; extern int sysctl_tcp_nometrics_save; -extern int sysctl_tcp_bic; -extern int sysctl_tcp_bic_fast_convergence; -extern int sysctl_tcp_bic_low_window; -extern int sysctl_tcp_bic_beta; extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_tso_win_divisor; @@ -1136,6 +1108,80 @@ static inline void tcp_packets_out_dec(struct tcp_sock *tp, tp->packets_out -= tcp_skb_pcount(skb); } +/* Events passed to congestion control interface */ +enum tcp_ca_event { + CA_EVENT_TX_START, /* first transmit when no packets in flight */ + CA_EVENT_CWND_RESTART, /* congestion window restart */ + CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ + CA_EVENT_FRTO, /* fast recovery timeout */ + CA_EVENT_LOSS, /* loss timeout */ + CA_EVENT_FAST_ACK, /* in sequence ack */ + CA_EVENT_SLOW_ACK, /* other ack */ +}; + +/* + * Interface for adding new TCP congestion control handlers + */ +#define TCP_CA_NAME_MAX 16 +struct tcp_congestion_ops { + struct list_head list; + + /* initialize private data (optional) */ + void (*init)(struct tcp_sock *tp); + /* cleanup private data (optional) */ + void (*release)(struct tcp_sock *tp); + + /* return slow start threshold (required) */ + u32 (*ssthresh)(struct tcp_sock *tp); + /* lower bound for congestion window (optional) */ + u32 (*min_cwnd)(struct tcp_sock *tp); + /* do new cwnd calculation (required) */ + void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + u32 rtt, u32 in_flight, int good_ack); + /* round trip time sample per acked packet (optional) */ + void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + /* call before changing ca_state (optional) */ + void (*set_state)(struct tcp_sock *tp, u8 new_state); + /* call when cwnd event occurs (optional) */ + void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + /* new value of cwnd after loss (optional) */ + u32 (*undo_cwnd)(struct tcp_sock *tp); + /* hook for packet ack accounting (optional) */ + void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + /* get info for tcp_diag (optional) */ + void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + + char name[TCP_CA_NAME_MAX]; + struct module *owner; +}; + +extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); +extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); + +extern void tcp_init_congestion_control(struct tcp_sock *tp); +extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern int tcp_set_default_congestion_control(const char *name); +extern void tcp_get_default_congestion_control(char *name); + +extern struct tcp_congestion_ops tcp_reno; +extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); +extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, + u32 rtt, u32 in_flight, int flag); +extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); + +static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +{ + if (tp->ca_ops->set_state) + tp->ca_ops->set_state(tp, ca_state); + tp->ca_state = ca_state; +} + +static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +{ + if (tp->ca_ops->cwnd_event) + tp->ca_ops->cwnd_event(tp, event); +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -1155,91 +1201,6 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) return (tp->packets_out - tp->left_out + tp->retrans_out); } -/* - * Which congestion algorithim is in use on the connection. - */ -#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS) -#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD) -#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC) - -/* Recalculate snd_ssthresh, we want to set it to: - * - * Reno: - * one half the current congestion window, but no - * less than two segments - * - * BIC: - * behave like Reno until low_window is reached, - * then increase congestion window slowly - */ -static inline __u32 tcp_recalc_ssthresh(struct tcp_sock *tp) -{ - if (tcp_is_bic(tp)) { - if (sysctl_tcp_bic_fast_convergence && - tp->snd_cwnd < tp->bictcp.last_max_cwnd) - tp->bictcp.last_max_cwnd = (tp->snd_cwnd * - (BICTCP_BETA_SCALE - + sysctl_tcp_bic_beta)) - / (2 * BICTCP_BETA_SCALE); - else - tp->bictcp.last_max_cwnd = tp->snd_cwnd; - - if (tp->snd_cwnd > sysctl_tcp_bic_low_window) - return max((tp->snd_cwnd * sysctl_tcp_bic_beta) - / BICTCP_BETA_SCALE, 2U); - } - - return max(tp->snd_cwnd >> 1U, 2U); -} - -/* Stop taking Vegas samples for now. */ -#define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0) - -static inline void tcp_vegas_enable(struct tcp_sock *tp) -{ - /* There are several situations when we must "re-start" Vegas: - * - * o when a connection is established - * o after an RTO - * o after fast recovery - * o when we send a packet and there is no outstanding - * unacknowledged data (restarting an idle connection) - * - * In these circumstances we cannot do a Vegas calculation at the - * end of the first RTT, because any calculation we do is using - * stale info -- both the saved cwnd and congestion feedback are - * stale. - * - * Instead we must wait until the completion of an RTT during - * which we actually receive ACKs. - */ - - /* Begin taking Vegas samples next time we send something. */ - tp->vegas.doing_vegas_now = 1; - - /* Set the beginning of the next send window. */ - tp->vegas.beg_snd_nxt = tp->snd_nxt; - - tp->vegas.cntRTT = 0; - tp->vegas.minRTT = 0x7fffffff; -} - -/* Should we be taking Vegas samples right now? */ -#define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now) - -extern void tcp_ca_init(struct tcp_sock *tp); - -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) -{ - if (tcp_is_vegas(tp)) { - if (ca_state == TCP_CA_Open) - tcp_vegas_enable(tp); - else - tcp_vegas_disable(tp); - } - tp->ca_state = ca_state; -} - /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. @@ -1288,7 +1249,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) static inline void __tcp_enter_cwr(struct tcp_sock *tp) { tp->undo_marker = 0; - tp->snd_ssthresh = tcp_recalc_ssthresh(tp); + tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -1876,52 +1837,4 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); -/* TCP Westwood functions and constants */ - -#define TCP_WESTWOOD_INIT_RTT (20*HZ) /* maybe too conservative?! */ -#define TCP_WESTWOOD_RTT_MIN (HZ/20) /* 50ms */ - -static inline void tcp_westwood_update_rtt(struct tcp_sock *tp, __u32 rtt_seq) -{ - if (tcp_is_westwood(tp)) - tp->westwood.rtt = rtt_seq; -} - -static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_sock *tp) -{ - return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / - (__u32) (tp->mss_cache_std), - 2U); -} - -static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_sock *tp) -{ - return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0; -} - -static inline int tcp_westwood_ssthresh(struct tcp_sock *tp) -{ - __u32 ssthresh = 0; - - if (tcp_is_westwood(tp)) { - ssthresh = __tcp_westwood_bw_rttmin(tp); - if (ssthresh) - tp->snd_ssthresh = ssthresh; - } - - return (ssthresh != 0); -} - -static inline int tcp_westwood_cwnd(struct tcp_sock *tp) -{ - __u32 cwnd = 0; - - if (tcp_is_westwood(tp)) { - cwnd = __tcp_westwood_bw_rttmin(tp); - if (cwnd) - tp->snd_cwnd = cwnd; - } - - return (cwnd != 0); -} #endif /* _TCP_H */ -- cgit v1.2.3 From 5f8ef48d240963093451bcf83df89f1a1364f51d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 23 Jun 2005 20:37:36 -0700 Subject: [TCP]: Allow choosing TCP congestion control via sockopt. Allow using setsockopt to set TCP congestion control to use on a per socket basis. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e427cf35915..d04b21188cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1162,8 +1162,9 @@ extern void tcp_init_congestion_control(struct tcp_sock *tp); extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); +extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); -extern struct tcp_congestion_ops tcp_reno; +extern struct tcp_congestion_ops tcp_init_congestion_ops; extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, int flag); -- cgit v1.2.3 From a8acfbac75c2ffdd66fb5dfcdb7ab5aaced94fd8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 23 Jun 2005 23:45:02 -0700 Subject: [TCP]: Need to declare 'tcp_reno' in net/tcp.h Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index d04b21188cc..ec9e20c2717 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1169,6 +1169,7 @@ extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, int flag); extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern struct tcp_congestion_ops tcp_reno; static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) { -- cgit v1.2.3 From 52c1da39534fb382c061de58b65f678ad74b59f5 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 23 Jun 2005 22:05:33 -0700 Subject: [PATCH] make various thing static Another rollup of patches which give various symbols static scope Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sctp/sm.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index a53e08a45e3..88d9fe5975d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -131,7 +131,6 @@ sctp_state_fn_t sctp_sf_do_ecne; sctp_state_fn_t sctp_sf_ootb; sctp_state_fn_t sctp_sf_pdiscard; sctp_state_fn_t sctp_sf_violation; -sctp_state_fn_t sctp_sf_violation_chunklen; sctp_state_fn_t sctp_sf_discard_chunk; sctp_state_fn_t sctp_sf_do_5_2_1_siminit; sctp_state_fn_t sctp_sf_do_5_2_2_dupinit; @@ -259,11 +258,6 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, void sctp_chunk_assign_tsn(struct sctp_chunk *); void sctp_chunk_assign_ssn(struct sctp_chunk *); -sctp_disposition_t sctp_stop_t1_and_abort(sctp_cmd_seq_t *commands, - __u16 error, - const struct sctp_association *asoc, - struct sctp_transport *transport); - /* Prototypes for statetable processing. */ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype, -- cgit v1.2.3