From ba4e58eca8aa9473b44fdfd312f26c4a2e7798b3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Mon, 27 Nov 2006 11:10:57 -0800 Subject: [NET]: Supporting UDP-Lite (RFC 3828) in Linux This is a revision of the previously submitted patch, which alters the way files are organized and compiled in the following manner: * UDP and UDP-Lite now use separate object files * source file dependencies resolved via header files net/ipv{4,6}/udp_impl.h * order of inclusion files in udp.c/udplite.c adapted accordingly [NET/IPv4]: Support for the UDP-Lite protocol (RFC 3828) This patch adds support for UDP-Lite to the IPv4 stack, provided as an extension to the existing UDPv4 code: * generic routines are all located in net/ipv4/udp.c * UDP-Lite specific routines are in net/ipv4/udplite.c * MIB/statistics support in /proc/net/snmp and /proc/net/udplite * shared API with extensions for partial checksum coverage [NET/IPv6]: Extension for UDP-Lite over IPv6 It extends the existing UDPv6 code base with support for UDP-Lite in the same manner as per UDPv4. In particular, * UDPv6 generic and shared code is in net/ipv6/udp.c * UDP-Litev6 specific extensions are in net/ipv6/udplite.c * MIB/statistics support in /proc/net/snmp6 and /proc/net/udplite6 * support for IPV6_ADDRFORM * aligned the coding style of protocol initialisation with af_inet6.c * made the error handling in udpv6_queue_rcv_skb consistent; to return `-1' on error on all error cases * consolidation of shared code [NET]: UDP-Lite Documentation and basic XFRM/Netfilter support The UDP-Lite patch further provides * API documentation for UDP-Lite * basic xfrm support * basic netfilter support for IPv4 and IPv6 (LOG target) Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/linux/in.h | 1 + include/linux/socket.h | 1 + include/linux/udp.h | 12 ++++ include/net/ipv6.h | 12 +++- include/net/transp_v6.h | 2 + include/net/udp.h | 91 +++++++++++++++++++++++++++-- include/net/udplite.h | 149 ++++++++++++++++++++++++++++++++++++++++++++++++ include/net/xfrm.h | 2 + 8 files changed, 263 insertions(+), 7 deletions(-) create mode 100644 include/net/udplite.h (limited to 'include') diff --git a/include/linux/in.h b/include/linux/in.h index 2619859f6e1..1912e7c0bc2 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -45,6 +45,7 @@ enum { IPPROTO_COMP = 108, /* Compression Header protocol */ IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */ + IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */ IPPROTO_RAW = 255, /* Raw IP packets */ IPPROTO_MAX diff --git a/include/linux/socket.h b/include/linux/socket.h index 36140909464..592b6667982 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -264,6 +264,7 @@ struct ucred { #define SOL_IPV6 41 #define SOL_ICMPV6 58 #define SOL_SCTP 132 +#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */ #define SOL_RAW 255 #define SOL_IPX 256 #define SOL_AX25 257 diff --git a/include/linux/udp.h b/include/linux/udp.h index 014b41d1e30..564f3b05010 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -38,6 +38,7 @@ struct udphdr { #include #include +#define UDP_HTABLE_SIZE 128 struct udp_sock { /* inet_sock has to be the first member */ @@ -50,12 +51,23 @@ struct udp_sock { * when the socket is uncorked. */ __u16 len; /* total length of pending frames */ + /* + * Fields specific to UDP-Lite. + */ + __u16 pcslen; + __u16 pcrlen; +/* indicator bits used by pcflag: */ +#define UDPLITE_BIT 0x1 /* set by udplite proto init function */ +#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ +#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ + __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ }; static inline struct udp_sock *udp_sk(const struct sock *sk) { return (struct udp_sock *)sk; } +#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3c266ad99a0..9390649bbfe 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -158,9 +158,13 @@ DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); SNMP_INC_STATS_OFFSET_BH(icmpv6_statistics, field, _offset); \ }) DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); -#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) -#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) -#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) +DECLARE_SNMP_STAT(struct udp_mib, udplite_stats_in6); +#define UDP6_INC_STATS_BH(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_BH(udplite_stats_in6, field); \ + else SNMP_INC_STATS_BH(udp_stats_in6, field); } while(0) +#define UDP6_INC_STATS_USER(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_USER(udplite_stats_in6, field); \ + else SNMP_INC_STATS_USER(udp_stats_in6, field); } while(0) int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); @@ -604,6 +608,8 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); +extern int udplite6_proc_init(void); +extern void udplite6_proc_exit(void); extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 61f724c1036..409da3a9a45 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -11,6 +11,7 @@ extern struct proto rawv6_prot; extern struct proto udpv6_prot; +extern struct proto udplitev6_prot; extern struct proto tcpv6_prot; struct flowi; @@ -24,6 +25,7 @@ extern void ipv6_destopt_init(void); /* transport protocols */ extern void rawv6_init(void); extern void udpv6_init(void); +extern void udplitev6_init(void); extern void tcpv6_init(void); extern int udpv6_connect(struct sock *sk, diff --git a/include/net/udp.h b/include/net/udp.h index db0c05f6754..4f0626735ed 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -26,9 +26,28 @@ #include #include #include +#include +#include #include -#define UDP_HTABLE_SIZE 128 +/** + * struct udp_skb_cb - UDP(-Lite) private variables + * + * @header: private variables used by IPv4/IPv6 + * @cscov: checksum coverage length (UDP-Lite only) + * @partial_cov: if set indicates partial csum coverage + */ +struct udp_skb_cb { + union { + struct inet_skb_parm h4; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + struct inet6_skb_parm h6; +#endif + } header; + __u16 cscov; + __u8 partial_cov; +}; +#define UDP_SKB_CB(__skb) ((struct udp_skb_cb *)((__skb)->cb)) extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; @@ -47,6 +66,62 @@ extern struct proto udp_prot; struct sk_buff; +/* + * Generic checksumming routines for UDP(-Lite) v4 and v6 + */ +static inline u16 __udp_lib_checksum_complete(struct sk_buff *skb) +{ + if (! UDP_SKB_CB(skb)->partial_cov) + return __skb_checksum_complete(skb); + return csum_fold(skb_checksum(skb, 0, UDP_SKB_CB(skb)->cscov, + skb->csum)); +} + +static __inline__ int udp_lib_checksum_complete(struct sk_buff *skb) +{ + return skb->ip_summed != CHECKSUM_UNNECESSARY && + __udp_lib_checksum_complete(skb); +} + +/** + * udp_csum_outgoing - compute UDPv4/v6 checksum over fragments + * @sk: socket we are writing to + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static inline u32 udp_csum_outgoing(struct sock *sk, struct sk_buff *skb) +{ + u32 csum = csum_partial(skb->h.raw, sizeof(struct udphdr), 0); + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + return csum; +} + +/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ +static inline void udp_lib_hash(struct sock *sk) +{ + BUG(); +} + +static inline void udp_lib_unhash(struct sock *sk) +{ + write_lock_bh(&udp_hash_lock); + if (sk_del_node_init(sk)) { + inet_sk(sk)->num = 0; + sock_prot_dec_use(sk->sk_prot); + } + write_unlock_bh(&udp_hash_lock); +} + +static inline void udp_lib_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + + +/* net/ipv4/udp.c */ extern int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); @@ -61,21 +136,29 @@ extern unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); DECLARE_SNMP_STAT(struct udp_mib, udp_statistics); -#define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field) -#define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field) -#define UDP_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_statistics, field) +/* + * SNMP statistics for UDP and UDP-Lite + */ +#define UDP_INC_STATS_USER(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_USER(udplite_statistics, field); \ + else SNMP_INC_STATS_USER(udp_statistics, field); } while(0) +#define UDP_INC_STATS_BH(field, is_udplite) do { \ + if (is_udplite) SNMP_INC_STATS_BH(udplite_statistics, field); \ + else SNMP_INC_STATS_BH(udp_statistics, field); } while(0) /* /proc */ struct udp_seq_afinfo { struct module *owner; char *name; sa_family_t family; + struct hlist_head *hashtable; int (*seq_show) (struct seq_file *m, void *v); struct file_operations *seq_fops; }; struct udp_iter_state { sa_family_t family; + struct hlist_head *hashtable; int bucket; struct seq_operations seq_ops; }; diff --git a/include/net/udplite.h b/include/net/udplite.h new file mode 100644 index 00000000000..1473b3e4904 --- /dev/null +++ b/include/net/udplite.h @@ -0,0 +1,149 @@ +/* + * Definitions for the UDP-Lite (RFC 3828) code. + */ +#ifndef _UDPLITE_H +#define _UDPLITE_H + +/* UDP-Lite socket options */ +#define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ +#define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ + +extern struct proto udplite_prot; +extern struct hlist_head udplite_hash[UDP_HTABLE_SIZE]; + +/* UDP-Lite does not have a standardized MIB yet, so we inherit from UDP */ +DECLARE_SNMP_STAT(struct udp_mib, udplite_statistics); + +/* + * Checksum computation is all in software, hence simpler getfrag. + */ +static __inline__ int udplite_getfrag(void *from, char *to, int offset, + int len, int odd, struct sk_buff *skb) +{ + return memcpy_fromiovecend(to, (struct iovec *) from, offset, len); +} + +/* Designate sk as UDP-Lite socket */ +static inline int udplite_sk_init(struct sock *sk) +{ + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + +/* + * Checksumming routines + */ +static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) +{ + u16 cscov; + + /* In UDPv4 a zero checksum means that the transmitter generated no + * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets + * with a zero checksum field are illegal. */ + if (uh->check == 0) { + LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: zeroed checksum field\n"); + return 1; + } + + UDP_SKB_CB(skb)->partial_cov = 0; + cscov = ntohs(uh->len); + + if (cscov == 0) /* Indicates that full coverage is required. */ + cscov = skb->len; + else if (cscov < 8 || cscov > skb->len) { + /* + * Coverage length violates RFC 3828: log and discard silently. + */ + LIMIT_NETDEBUG(KERN_DEBUG "UDPLITE: bad csum coverage %d/%d\n", + cscov, skb->len); + return 1; + + } else if (cscov < skb->len) + UDP_SKB_CB(skb)->partial_cov = 1; + + UDP_SKB_CB(skb)->cscov = cscov; + + /* + * There is no known NIC manufacturer supporting UDP-Lite yet, + * hence ip_summed is always (re-)set to CHECKSUM_NONE. + */ + skb->ip_summed = CHECKSUM_NONE; + + return 0; +} + +static __inline__ int udplite4_csum_init(struct sk_buff *skb, struct udphdr *uh) +{ + int rc = udplite_checksum_init(skb, uh); + + if (!rc) + skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, + skb->nh.iph->daddr, + skb->len, IPPROTO_UDPLITE, 0); + return rc; +} + +static __inline__ int udplite6_csum_init(struct sk_buff *skb, struct udphdr *uh) +{ + int rc = udplite_checksum_init(skb, uh); + + if (!rc) + skb->csum = ~csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + skb->len, IPPROTO_UDPLITE, 0); + return rc; +} + +static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) +{ + int cscov = up->len; + + /* + * Sender has set `partial coverage' option on UDP-Lite socket + */ + if (up->pcflag & UDPLITE_SEND_CC) { + if (up->pcslen < up->len) { + /* up->pcslen == 0 means that full coverage is required, + * partial coverage only if 0 < up->pcslen < up->len */ + if (0 < up->pcslen) { + cscov = up->pcslen; + } + uh->len = htons(up->pcslen); + } + /* + * NOTE: Causes for the error case `up->pcslen > up->len': + * (i) Application error (will not be penalized). + * (ii) Payload too big for send buffer: data is split + * into several packets, each with its own header. + * In this case (e.g. last segment), coverage may + * exceed packet length. + * Since packets with coverage length > packet length are + * illegal, we fall back to the defaults here. + */ + } + return cscov; +} + +static inline u32 udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) +{ + u32 csum = 0; + int off, len, cscov = udplite_sender_cscov(udp_sk(sk), skb->h.uh); + + skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ + + skb_queue_walk(&sk->sk_write_queue, skb) { + off = skb->h.raw - skb->data; + len = skb->len - off; + + csum = skb_checksum(skb, off, (cscov > len)? len : cscov, csum); + + if ((cscov -= len) <= 0) + break; + } + return csum; +} + +extern void udplite4_register(void); +extern int udplite_get_port(struct sock *sk, unsigned short snum, + int (*scmp)(const struct sock *, const struct sock *)); +#endif /* _UDPLITE_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 81c91e8a328..3878a88ff61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -468,6 +468,7 @@ __be16 xfrm_flowi_sport(struct flowi *fl) switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = fl->fl_ip_sport; break; @@ -493,6 +494,7 @@ __be16 xfrm_flowi_dport(struct flowi *fl) switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: + case IPPROTO_UDPLITE: case IPPROTO_SCTP: port = fl->fl_ip_dport; break; -- cgit v1.2.3