diff options
32 files changed, 1345 insertions, 1393 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h index 4144664d69d..1e7b7cb5703 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,10 +70,8 @@ struct udp_sock { #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ -#ifdef CONFIG_IP_UDPLITE __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ __u8 unused[3]; -#endif /* * For encapsulation sockets. */ @@ -85,15 +83,7 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } -#ifdef CONFIG_IP_UDPLITE #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) -#define IS_PROTO_UDPLITE(__proto) ((__proto) == IPPROTO_UDPLITE) -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP || (level) == SOL_UDPLITE) -#else -#define IS_UDPLITE(__sk) 0 -#define IS_PROTO_UDPLITE(__proto) 0 -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP) -#endif #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5f6df50a33a..8db06af1efb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -599,13 +599,8 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplite6_proc_init(void); extern void udplite6_proc_exit(void); -#else -static inline int udplite6_proc_init(void) { return 0; } -static inline void udplite6_proc_exit(void) { } -#endif extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); extern int snmp6_register_dev(struct inet6_dev *idev); diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 90e6e24df85..ac053be6c25 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -51,5 +51,8 @@ struct netns_ipv6 { struct fib_rules_ops *fib6_rules_ops; #endif struct sock **icmp_sk; + struct sock *ndisc_sk; + struct sock *tcp_sk; + struct sock *igmp_sk; }; #endif diff --git a/include/net/tipc/tipc_bearer.h b/include/net/tipc/tipc_bearer.h index 2151a80cdf3..ee2f304e491 100644 --- a/include/net/tipc/tipc_bearer.h +++ b/include/net/tipc/tipc_bearer.h @@ -99,6 +99,9 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; }; +/* + * TIPC routines available to supported media types + */ int tipc_register_media(u32 media_type, char *media_name, @@ -123,6 +126,12 @@ void tipc_continue(struct tipc_bearer *tb_ptr); int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); int tipc_disable_bearer(const char *name); +/* + * Routines made available to TIPC by supported media types + */ + +int tipc_eth_media_start(void); +void tipc_eth_media_stop(void); #endif diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index cfc4ba46de8..c9b36b77a0b 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -86,13 +86,6 @@ u32 tipc_createport_raw(void *usr_handle, void (*wakeup)(struct tipc_port *), const u32 importance); -/* - * tipc_set_msg_option(): port must be locked. - */ -int tipc_set_msg_option(struct tipc_port *tp_ptr, - const char *opt, - const u32 len); - int tipc_reject_msg(struct sk_buff *buf, u32 err); int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 902e6c6bc79..27394e0447d 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -27,13 +27,8 @@ extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); extern void udpv6_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplitev6_init(void); extern void udplitev6_exit(void); -#else -static inline int udplitev6_init(void) { return 0; } -static inline void udplitev6_exit(void) { } -#endif extern int tcpv6_init(void); extern void tcpv6_exit(void); diff --git a/include/net/udplite.h b/include/net/udplite.h index 01ddb2c2026..b76b2e377af 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,9 +25,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { -#ifdef CONFIG_IP_UDPLITE udp_sk(sk)->pcflag = UDPLITE_BIT; -#endif return 0; } @@ -71,7 +69,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) { int cscov = up->len; -#ifdef CONFIG_IP_UDPLITE + /* * Sender has set `partial coverage' option on UDP-Lite socket */ @@ -95,15 +93,13 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) * illegal, we fall back to the defaults here. */ } -#endif return cscov; } static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { - __wsum csum = 0; -#ifdef CONFIG_IP_UDPLITE int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); + __wsum csum = 0; skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -116,7 +112,6 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) if ((cscov -= len) <= 0) break; } -#endif return csum; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5098fd2ff4d..9c7e5ffb223 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -632,15 +632,5 @@ config TCP_MD5SIG If unsure, say N. -config IP_UDPLITE - bool "IP: UDP-Lite Protocol (RFC 3828)" - default n - ---help--- - UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length - checksum. Read <file:Documentation/networking/udplite.txt> for - details. - - If unsure, say N. - source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index d5226241d5e..ad40ef3f9eb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o udp_ipv4.o \ + datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o \ inet_fragment.o @@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 25871c6c744..4cb8a138553 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void) if (snmp_mib_init((void **)udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; -#endif + tcp_mib_init(); return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_statistics); err_udp_mib: snmp_mib_free((void **)tcp_statistics); @@ -1426,10 +1423,8 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); -#ifdef CONFIG_IP_UDPLITE /* Add UDP-Lite (RFC 3828) */ udplite4_register(); -#endif /* * Set the ICMP layer up diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d75ddb7fa4b..d63474c6b40 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), atomic_read(&udp_memory_allocated)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); -#endif seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); @@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)udp_statistics, snmp4_udp_list[i].entry)); -#ifdef CONFIG_IP_UDPLITE /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) @@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %lu", snmp_fold_field((void **)udplite_statistics, snmp4_udp_list[i].entry)); -#endif + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c53d7673b57..7ea1b67b6de 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum, return __udp_lib_get_port(sk, snum, udp_hash, scmp); } +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); +} + +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +/* UDP is nearly always wildcards out the wazoo, it makes no sense to try + * harder than this. -DaveM + */ +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) +{ + struct sock *sk, *result = NULL; + struct hlist_node *node; + unsigned short hnum = ntohs(dport); + int badness = -1; + + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet = inet_sk(sk); + + if (sk->sk_net == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } + } + } + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); + return result; +} + +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct hlist_node *node; + struct sock *s = sk; + unsigned short hnum = ntohs(loc_port); + + sk_for_each_from(s, node) { + struct inet_sock *inet = inet_sk(s); + + if (s->sk_hash != hnum || + (inet->daddr && inet->daddr != rmt_addr) || + (inet->dport != rmt_port && inet->dport) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + ipv6_only_sock(s) || + (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) + continue; + if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) + continue; + goto found; + } + s = NULL; +found: + return s; +} + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. + * Header points to the ip header of the error packet. We move + * on past this. Then (as it used to claim before adjustment) + * header points to the first 8 bytes of the udp header. We need + * to find the appropriate port. + */ + +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +{ + struct inet_sock *inet; + struct iphdr *iph = (struct iphdr*)skb->data; + struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct sock *sk; + int harderr; + int err; + + sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; /* No socket for error */ + } + + err = 0; + harderr = 0; + inet = inet_sk(sk); + + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + if (inet->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; + } + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + } + + /* + * RFC1122: OK. Passes ICMP errors back to application, as per + * 4.1.3.3. + */ + if (!inet->recverr) { + if (!harderr || sk->sk_state != TCP_ESTABLISHED) + goto out; + } else { + ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); + } + sk->sk_err = err; + sk->sk_error_report(sk); +out: + sock_put(sk); +} + +void udp_err(struct sk_buff *skb, u32 info) +{ + __udp4_lib_err(skb, info, udp_hash); +} + +/* + * Throw away all pending data and cancel the corking. Socket is locked. + */ +static void udp_flush_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + if (up->pending) { + up->len = 0; + up->pending = 0; + ip_flush_pending_frames(sk); + } +} + +/** + * udp4_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, int len ) +{ + unsigned int offset; + struct udphdr *uh = udp_hdr(skb); + __wsum csum = 0; + + if (skb_queue_len(&sk->sk_write_queue) == 1) { + /* + * Only one fragment on the socket. + */ + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + } else { + /* + * HW-checksum won't work as there are two or more + * fragments on the socket so that all csums of sk_buffs + * should be together + */ + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + + skb->ip_summed = CHECKSUM_NONE; + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + + uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi *fl = &inet->cork.fl; + struct sk_buff *skb; + struct udphdr *uh; + int err = 0; + int is_udplite = IS_UDPLITE(sk); + __wsum csum = 0; + + /* Grab the skbuff where UDP header space exists. */ + if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + goto out; + + /* + * Create a UDP header + */ + uh = udp_hdr(skb); + uh->source = fl->fl_ip_sport; + uh->dest = fl->fl_ip_dport; + uh->len = htons(up->len); + uh->check = 0; + + if (is_udplite) /* UDP-Lite */ + csum = udplite_csum_outgoing(sk, skb); + + else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + + skb->ip_summed = CHECKSUM_NONE; + goto send; + + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + goto send; + + } else /* `normal' UDP */ + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + sk->sk_protocol, csum ); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + +send: + err = ip_push_pending_frames(sk); +out: + up->len = 0; + up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); + int ulen = len; + struct ipcm_cookie ipc; + struct rtable *rt = NULL; + int free = 0; + int connected = 0; + __be32 daddr, faddr, saddr; + __be16 dport; + u8 tos; + int err, is_udplite = IS_UDPLITE(sk); + int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + + if (len > 0xFFFF) + return -EMSGSIZE; + + /* + * Check the flags. + */ + + if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + + ipc.opt = NULL; + + if (up->pending) { + /* + * There are pending frames. + * The socket lock must be held while it's corked. + */ + lock_sock(sk); + if (likely(up->pending)) { + if (unlikely(up->pending != AF_INET)) { + release_sock(sk); + return -EINVAL; + } + goto do_append_data; + } + release_sock(sk); + } + ulen += sizeof(struct udphdr); + + /* + * Get and verify the address. + */ + if (msg->msg_name) { + struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; + if (msg->msg_namelen < sizeof(*usin)) + return -EINVAL; + if (usin->sin_family != AF_INET) { + if (usin->sin_family != AF_UNSPEC) + return -EAFNOSUPPORT; + } + + daddr = usin->sin_addr.s_addr; + dport = usin->sin_port; + if (dport == 0) + return -EINVAL; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = inet->daddr; + dport = inet->dport; + /* Open fast path for connected socket. + Route will not be used, if at least one option is set. + */ + connected = 1; + } + ipc.addr = inet->saddr; + + ipc.oif = sk->sk_bound_dev_if; + if (msg->msg_controllen) { + err = ip_cmsg_send(msg, &ipc); + if (err) + return err; + if (ipc.opt) + free = 1; + connected = 0; + } + if (!ipc.opt) + ipc.opt = inet->opt; + + saddr = ipc.addr; + ipc.addr = faddr = daddr; + + if (ipc.opt && ipc.opt->srr) { + if (!daddr) + return -EINVAL; + faddr = ipc.opt->faddr; + connected = 0; + } + tos = RT_TOS(inet->tos); + if (sock_flag(sk, SOCK_LOCALROUTE) || + (msg->msg_flags & MSG_DONTROUTE) || + (ipc.opt && ipc.opt->is_strictroute)) { + tos |= RTO_ONLINK; + connected = 0; + } + + if (ipv4_is_multicast(daddr)) { + if (!ipc.oif) + ipc.oif = inet->mc_index; + if (!saddr) + saddr = inet->mc_addr; + connected = 0; + } + + if (connected) + rt = (struct rtable*)sk_dst_check(sk, 0); + + if (rt == NULL) { + struct flowi fl = { .oif = ipc.oif, + .nl_u = { .ip4_u = + { .daddr = faddr, + .saddr = saddr, + .tos = tos } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = inet->sport, + .dport = dport } } }; + security_sk_classify_flow(sk, &fl); + err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); + if (err) { + if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + goto out; + } + + err = -EACCES; + if ((rt->rt_flags & RTCF_BROADCAST) && + !sock_flag(sk, SOCK_BROADCAST)) + goto out; + if (connected) + sk_dst_set(sk, dst_clone(&rt->u.dst)); + } + + if (msg->msg_flags&MSG_CONFIRM) + goto do_confirm; +back_from_confirm: + + saddr = rt->rt_src; + if (!ipc.addr) + daddr = ipc.addr = rt->rt_dst; + + lock_sock(sk); + if (unlikely(up->pending)) { + /* The socket is already corked while preparing it. */ + /* ... which is an evident application bug. --ANK */ + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + err = -EINVAL; + goto out; + } + /* + * Now cork the socket to pend data. + */ + inet->cork.fl.fl4_dst = daddr; + inet->cork.fl.fl_ip_dport = dport; + inet->cork.fl.fl4_src = saddr; + inet->cork.fl.fl_ip_sport = inet->sport; + up->pending = AF_INET; + +do_append_data: + up->len += ulen; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, rt, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + if (err) + udp_flush_pending_frames(sk); + else if (!corkreq) + err = udp_push_pending_frames(sk); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; + release_sock(sk); + +out: + ip_rt_put(rt); + if (free) + kfree(ipc.opt); + if (!err) + return len; + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + } + return err; + +do_confirm: + dst_confirm(&rt->u.dst); + if (!(msg->msg_flags&MSG_PROBE) || len) + goto back_from_confirm; + err = 0; + goto out; +} + +int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + struct udp_sock *up = udp_sk(sk); + int ret; + + if (!up->pending) { + struct msghdr msg = { .msg_flags = flags|MSG_MORE }; + + /* Call udp_sendmsg to specify destination address which + * sendpage interface can't pass. + * This will succeed only when the socket is connected. + */ + ret = udp_sendmsg(NULL, sk, &msg, 0); + if (ret < 0) + return ret; + } + + lock_sock(sk); + + if (unlikely(!up->pending)) { + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); + return -EINVAL; + } + + ret = ip_append_page(sk, page, offset, size, flags); + if (ret == -EOPNOTSUPP) { + release_sock(sk); + return sock_no_sendpage(sk->sk_socket, page, offset, + size, flags); + } + if (ret < 0) { + udp_flush_pending_frames(sk); + goto out; + } + + up->len += size; + if (!(up->corkflag || (flags&MSG_MORE))) + ret = udp_push_pending_frames(sk); + if (!ret) + ret = size; +out: + release_sock(sk); + return ret; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return 0; } +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + struct sk_buff *skb; + unsigned int ulen, copied; + int peeked; + int err; + int is_udplite = IS_UDPLITE(sk); + + /* + * Check any passed addresses + */ + if (addr_len) + *addr_len=sizeof(*sin); + + if (flags & MSG_ERRQUEUE) + return ip_recv_error(sk, msg, len); + +try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); + if (!skb) + goto out; + + ulen = skb->len - sizeof(struct udphdr); + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) + msg->msg_flags |= MSG_TRUNC; + + /* + * If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, or if we only want a partial + * coverage checksum (UDP-Lite), do it before the copy. + */ + + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (udp_lib_checksum_complete(skb)) + goto csum_copy_err; + } + + if (skb_csum_unnecessary(skb)) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { + err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + + if (err == -EINVAL) + goto csum_copy_err; + } + + if (err) + goto out_free; + + if (!peeked) + UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + + sock_recv_timestamp(msg, sk, skb); + + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); + + err = copied; + if (flags & MSG_TRUNC) + err = ulen; + +out_free: + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +out: + return err; + +csum_copy_err: + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); + + if (noblock) + return -EAGAIN; + goto try_again; +} + + int udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +/* returns: + * -1: error + * 0: success + * >0: "udp encap" protocol resubmission + * + * Note that in the success and error cases, the skb is assumed to + * have either been requeued or freed. + */ +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + int rc; + int is_udplite = IS_UDPLITE(sk); + + /* + * Charge it to the socket, dropping if the queue is full. + */ + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + if (up->encap_type) { + /* + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N + */ + + /* if we're overly short, let UDP handle it */ + if (skb->len > sizeof(struct udphdr) && + up->encap_rcv != NULL) { + int ret; + + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + is_udplite); + return -ret; + } + } + + /* FALLTHROUGH -- it's a UDP Packet */ + } + + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are + * disabled for the following two types of errors: these depend + * on the application settings, not on the functioning of the + * protocol stack as such. + * + * RFC 3828 here recommends (sec 3.3): "There should also be a + * way ... to ... at least let the receiving application block + * delivery of packets with coverage values less than a value + * provided by the application." + */ + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " + "%d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + /* The next case involves violating the min. coverage requested + * by the receiver. This is subtle: if receiver wants x and x is + * greater than the buffersize/MTU then receiver will complain + * that it wants x while sender emits packets of smaller size y. + * Therefore the above ...()->partial_cov statement is essential. + */ + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING + "UDPLITE: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter) { + if (udp_lib_checksum_complete(skb)) + goto drop; + } + + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + +/* + * Multicasts and broadcasts go to each listener. + * + * Note: called only from the BH handler context, + * so we don't need to lock the hashes. + */ +static int __udp4_lib_mcast_deliver(struct sk_buff *skb, + struct udphdr *uh, + __be32 saddr, __be32 daddr, + struct hlist_head udptable[]) +{ + struct sock *sk; + int dif; + + read_lock(&udp_hash_lock); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = skb->dev->ifindex; + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + if (sk) { + struct sock *sknext = NULL; + + do { + struct sk_buff *skb1 = skb; + + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, + uh->source, saddr, dif); + if (sknext) + skb1 = skb_clone(skb, GFP_ATOMIC); + + if (skb1) { + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + + if (ret > 0) + /* we should probably re-process instead + * of dropping packets here. */ + kfree_skb(skb1); + } + sk = sknext; + } while (sknext); + } else + kfree_skb(skb); + read_unlock(&udp_hash_lock); + return 0; +} + +/* Initialize UDP checksum. If exited with zero value (success), + * CHECKSUM_UNNECESSARY means, that no more checks are required. + * Otherwise, csum completion requires chacksumming packet body, + * including udp header and folding it to skb->csum. + */ +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, + int proto) +{ + const struct iphdr *iph; + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + iph = ip_hdr(skb); + if (uh->check == 0) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + if (!skb_csum_unnecessary(skb)) + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb->len, proto, 0); + /* Probably, we should checksum udp header (it should be in cache + * in any case) and data in tiny packets (< rx copybreak). + */ + + return 0; +} + +/* + * All we need to do is get the socket, and then do a checksum. + */ + +int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int proto) +{ + struct sock *sk; + struct udphdr *uh = udp_hdr(skb); + unsigned short ulen; + struct rtable *rt = (struct rtable*)skb->dst; + __be32 saddr = ip_hdr(skb)->saddr; + __be32 daddr = ip_hdr(skb)->daddr; + + /* + * Validate the packet. + */ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto drop; /* No space for header. */ + + ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; + + if (proto == IPPROTO_UDP) { + /* UDP validates ulen. */ + if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + goto short_packet; + uh = udp_hdr(skb); + } + + if (udp4_csum_init(skb, uh, proto)) + goto csum_error; + + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) + return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + + sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); + + if (sk != NULL) { + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + sock_put(sk); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; + } + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + /* No socket. Drop packet silently, if checksum is wrong */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + /* + * Hmm. We got an UDP packet to a port to which we + * don't wanna listen. Ignore it. + */ + kfree_skb(skb); + return 0; + +short_packet: + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + skb->len, + NIPQUAD(daddr), + ntohs(uh->dest)); + goto drop; + +csum_error: + /* + * RFC1122: OK. Discards the bad packet silently (as far as + * the network is concerned, anyway) as per 4.1.3.4 (MUST). + */ + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + kfree_skb(skb); + return 0; +} + +int udp_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); +} + +int udp_destroy_sock(struct sock *sk) +{ + lock_sock(sk); + udp_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* * Socket option code for UDP */ @@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val; int err = 0; -#ifdef CONFIG_IP_UDPLITE int is_udplite = IS_UDPLITE(sk); -#endif if (optlen<sizeof(int)) return -EINVAL; @@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; -#ifdef CONFIG_IP_UDPLITE /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; -#endif default: err = -ENOPROTOOPT; @@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return ip_setsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return compat_ip_setsockopt(sk, level, optname, optval, optlen); +} +#endif + int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return 0; } +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ip_getsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ip_getsockopt(sk, level, optname, optval, optlen); +} +#endif /** * udp_poll - wait for a UDP event. * @file - file struct @@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } +DEFINE_PROTO_INUSE(udp) + +struct proto udp_prot = { + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif + REF_PROTO_INUSE(udp) +}; /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } + +/* ------------------------------------------------------------------------ */ +static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) +{ + struct inet_sock *inet = inet_sk(sp); + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; + __u16 destp = ntohs(inet->dport); + __u16 srcp = ntohs(inet->sport); + + sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", + bucket, src, srcp, dest, destp, sp->sk_state, + atomic_read(&sp->sk_wmem_alloc), + atomic_read(&sp->sk_rmem_alloc), + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp); +} + +int udp4_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%-127s\n", + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode"); + else { + char tmpbuf[129]; + struct udp_iter_state *state = seq->private; + + udp4_format_sock(v, tmpbuf, state->bucket); + seq_printf(seq, "%-127s\n", tmpbuf); + } + return 0; +} + +/* ------------------------------------------------------------------------ */ +static struct file_operations udp4_seq_fops; +static struct udp_seq_afinfo udp4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udp", + .family = AF_INET, + .hashtable = udp_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udp4_seq_fops, +}; + +int __init udp4_proc_init(void) +{ + return udp_proc_register(&udp4_seq_afinfo); +} + +void udp4_proc_exit(void) +{ + udp_proc_unregister(&udp4_seq_afinfo); +} #endif /* CONFIG_PROC_FS */ void __init udp_init(void) @@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_get_port); +EXPORT_SYMBOL(udp_prot); +EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c deleted file mode 100644 index fd14c2c50ed..00000000000 --- a/net/ipv4/udp_ipv4.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * UDP for IPv4. - * - * For full credits, see net/ipv4/udp.c. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/ioctls.h> -#include <linux/bootmem.h> -#include <linux/types.h> -#include <linux/fcntl.h> -#include <linux/module.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/igmp.h> -#include <linux/in.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <net/tcp_states.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <net/net_namespace.h> -#include <net/icmp.h> -#include <net/route.h> -#include <net/checksum.h> -#include <net/xfrm.h> -#include "udp_impl.h" - -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) -{ - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - - return ( !ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr )); -} - -static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) -{ - return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); -} - -/* UDP is nearly always wildcards out the wazoo, it makes no sense to try - * harder than this. -DaveM - */ -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) -{ - struct sock *sk, *result = NULL; - struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet = inet_sk(sk); - - if (sk->sk_net == net && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } - } - } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); - return result; -} - -static inline struct sock *udp_v4_mcast_next(struct sock *sk, - __be16 loc_port, __be32 loc_addr, - __be16 rmt_port, __be32 rmt_addr, - int dif) -{ - struct hlist_node *node; - struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); - - sk_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (s->sk_hash != hnum || - (inet->daddr && inet->daddr != rmt_addr) || - (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || - ipv6_only_sock(s) || - (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - continue; - if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) - continue; - goto found; - } - s = NULL; -found: - return s; -} - -/* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. - * Header points to the ip header of the error packet. We move - * on past this. Then (as it used to claim before adjustment) - * header points to the first 8 bytes of the udp header. We need - * to find the appropriate port. - */ - -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) -{ - struct inet_sock *inet; - struct iphdr *iph = (struct iphdr*)skb->data; - struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct sock *sk; - int harderr; - int err; - - sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); - if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); - return; /* No socket for error */ - } - - err = 0; - harderr = 0; - inet = inet_sk(sk); - - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - if (inet->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; - } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; - } - break; - } - - /* - * RFC1122: OK. Passes ICMP errors back to application, as per - * 4.1.3.3. - */ - if (!inet->recverr) { - if (!harderr || sk->sk_state != TCP_ESTABLISHED) - goto out; - } else { - ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); - } - sk->sk_err = err; - sk->sk_error_report(sk); -out: - sock_put(sk); -} - -void udp_err(struct sk_buff *skb, u32 info) -{ - __udp4_lib_err(skb, info, udp_hash); -} - -/* - * Throw away all pending data and cancel the corking. Socket is locked. - */ -static void udp_flush_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - - if (up->pending) { - up->len = 0; - up->pending = 0; - ip_flush_pending_frames(sk); - } -} - -/** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on - * @skb: sk_buff containing the filled-in UDP header - * (checksum field must be zeroed out) - */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len ) -{ - unsigned int offset; - struct udphdr *uh = udp_hdr(skb); - __wsum csum = 0; - - if (skb_queue_len(&sk->sk_write_queue) == 1) { - /* - * Only one fragment on the socket. - */ - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); - } else { - /* - * HW-checksum won't work as there are two or more - * fragments on the socket so that all csums of sk_buffs - * should be together - */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); - - skb->ip_summed = CHECKSUM_NONE; - - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } -} - -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; - struct udphdr *uh; - int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - - /* - * Create a UDP header - */ - uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); - uh->check = 0; - - if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); - - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ - - skb->ip_summed = CHECKSUM_NONE; - goto send; - - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); - goto send; - - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); - - /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, - sk->sk_protocol, csum ); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - -send: - err = ip_push_pending_frames(sk); -out: - up->len = 0; - up->pending = 0; - if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); - return err; -} - -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct inet_sock *inet = inet_sk(sk); - struct udp_sock *up = udp_sk(sk); - int ulen = len; - struct ipcm_cookie ipc; - struct rtable *rt = NULL; - int free = 0; - int connected = 0; - __be32 daddr, faddr, saddr; - __be16 dport; - u8 tos; - int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - - if (len > 0xFFFF) - return -EMSGSIZE; - - /* - * Check the flags. - */ - - if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ - return -EOPNOTSUPP; - - ipc.opt = NULL; - - if (up->pending) { - /* - * There are pending frames. - * The socket lock must be held while it's corked. - */ - lock_sock(sk); - if (likely(up->pending)) { - if (unlikely(up->pending != AF_INET)) { - release_sock(sk); - return -EINVAL; - } - goto do_append_data; - } - release_sock(sk); - } - ulen += sizeof(struct udphdr); - - /* - * Get and verify the address. - */ - if (msg->msg_name) { - struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; - if (msg->msg_namelen < sizeof(*usin)) - return -EINVAL; - if (usin->sin_family != AF_INET) { - if (usin->sin_family != AF_UNSPEC) - return -EAFNOSUPPORT; - } - - daddr = usin->sin_addr.s_addr; - dport = usin->sin_port; - if (dport == 0) - return -EINVAL; - } else { - if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; - daddr = inet->daddr; - dport = inet->dport; - /* Open fast path for connected socket. - Route will not be used, if at least one option is set. - */ - connected = 1; - } - ipc.addr = inet->saddr; - - ipc.oif = sk->sk_bound_dev_if; - if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); - if (err) - return err; - if (ipc.opt) - free = 1; - connected = 0; - } - if (!ipc.opt) - ipc.opt = inet->opt; - - saddr = ipc.addr; - ipc.addr = faddr = daddr; - - if (ipc.opt && ipc.opt->srr) { - if (!daddr) - return -EINVAL; - faddr = ipc.opt->faddr; - connected = 0; - } - tos = RT_TOS(inet->tos); - if (sock_flag(sk, SOCK_LOCALROUTE) || - (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { - tos |= RTO_ONLINK; - connected = 0; - } - - if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) - ipc.oif = inet->mc_index; - if (!saddr) - saddr = inet->mc_addr; - connected = 0; - } - - if (connected) - rt = (struct rtable*)sk_dst_check(sk, 0); - - if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = dport } } }; - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); - if (err) { - if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - goto out; - } - - err = -EACCES; - if ((rt->rt_flags & RTCF_BROADCAST) && - !sock_flag(sk, SOCK_BROADCAST)) - goto out; - if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); - } - - if (msg->msg_flags&MSG_CONFIRM) - goto do_confirm; -back_from_confirm: - - saddr = rt->rt_src; - if (!ipc.addr) - daddr = ipc.addr = rt->rt_dst; - - lock_sock(sk); - if (unlikely(up->pending)) { - /* The socket is already corked while preparing it. */ - /* ... which is an evident application bug. --ANK */ - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); - err = -EINVAL; - goto out; - } - /* - * Now cork the socket to pend data. - */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->sport; - up->pending = AF_INET; - -do_append_data: - up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); - if (err) - udp_flush_pending_frames(sk); - else if (!corkreq) - err = udp_push_pending_frames(sk); - else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; - release_sock(sk); - -out: - ip_rt_put(rt); - if (free) - kfree(ipc.opt); - if (!err) - return len; - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); - } - return err; - -do_confirm: - dst_confirm(&rt->u.dst); - if (!(msg->msg_flags&MSG_PROBE) || len) - goto back_from_confirm; - err = 0; - goto out; -} - -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct udp_sock *up = udp_sk(sk); - int ret; - - if (!up->pending) { - struct msghdr msg = { .msg_flags = flags|MSG_MORE }; - - /* Call udp_sendmsg to specify destination address which - * sendpage interface can't pass. - * This will succeed only when the socket is connected. - */ - ret = udp_sendmsg(NULL, sk, &msg, 0); - if (ret < 0) - return ret; - } - - lock_sock(sk); - - if (unlikely(!up->pending)) { - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); - return -EINVAL; - } - - ret = ip_append_page(sk, page, offset, size, flags); - if (ret == -EOPNOTSUPP) { - release_sock(sk); - return sock_no_sendpage(sk->sk_socket, page, offset, - size, flags); - } - if (ret < 0) { - udp_flush_pending_frames(sk); - goto out; - } - - up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk); - if (!ret) - ret = size; -out: - release_sock(sk); - return ret; -} - -/* - * This should be easy, if there is something there we - * return it, otherwise we block. - */ - -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; - struct sk_buff *skb; - unsigned int ulen, copied; - int peeked; - int err; - int is_udplite = IS_UDPLITE(sk); - - /* - * Check any passed addresses - */ - if (addr_len) - *addr_len=sizeof(*sin); - - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); - -try_again: - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); - if (!skb) - goto out; - - ulen = skb->len - sizeof(struct udphdr); - copied = len; - if (copied > ulen) - copied = ulen; - else if (copied < ulen) - msg->msg_flags |= MSG_TRUNC; - - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. - */ - - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) - goto csum_copy_err; - } - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); - else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); - - if (err == -EINVAL) - goto csum_copy_err; - } - - if (err) - goto out_free; - - if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); - - sock_recv_timestamp(msg, sk, skb); - - /* Copy the address. */ - if (sin) - { - sin->sin_family = AF_INET; - sin->sin_port = udp_hdr(skb)->source; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); - - err = copied; - if (flags & MSG_TRUNC) - err = ulen; - -out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); -out: - return err; - -csum_copy_err: - lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - release_sock(sk); - - if (noblock) - return -EAGAIN; - goto try_again; -} - - -/* returns: - * -1: error - * 0: success - * >0: "udp encap" protocol resubmission - * - * Note that in the success and error cases, the skb is assumed to - * have either been requeued or freed. - */ -int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) -{ - struct udp_sock *up = udp_sk(sk); - int rc; - int is_udplite = IS_UDPLITE(sk); - - /* - * Charge it to the socket, dropping if the queue is full. - */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - if (up->encap_type) { - /* - * This is an encapsulation socket so pass the skb to - * the socket's udp_encap_rcv() hook. Otherwise, just - * fall through and pass this up the UDP socket. - * up->encap_rcv() returns the following value: - * =0 if skb was successfully passed to the encap - * handler or was discarded by it. - * >0 if skb should be passed on to UDP. - * <0 if skb should be resubmitted as proto -N - */ - - /* if we're overly short, let UDP handle it */ - if (skb->len > sizeof(struct udphdr) && - up->encap_rcv != NULL) { - int ret; - - ret = (*up->encap_rcv)(sk, skb); - if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, - is_udplite); - return -ret; - } - } - - /* FALLTHROUGH -- it's a UDP Packet */ - } - - /* - * UDP-Lite specific tests, ignored on UDP sockets - */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { - - /* - * MIB statistics other than incrementing the error count are - * disabled for the following two types of errors: these depend - * on the application settings, not on the functioning of the - * protocol stack as such. - * - * RFC 3828 here recommends (sec 3.3): "There should also be a - * way ... to ... at least let the receiving application block - * delivery of packets with coverage values less than a value - * provided by the application." - */ - if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " - "%d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - /* The next case involves violating the min. coverage requested - * by the receiver. This is subtle: if receiver wants x and x is - * greater than the buffersize/MTU then receiver will complain - * that it wants x while sender emits packets of smaller size y. - * Therefore the above ...()->partial_cov statement is essential. - */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING - "UDPLITE: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); - goto drop; - } - } - - if (sk->sk_filter) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } - - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } - - return 0; - -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); - return -1; -} - -/* - * Multicasts and broadcasts go to each listener. - * - * Note: called only from the BH handler context, - * so we don't need to lock the hashes. - */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, - struct udphdr *uh, - __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) -{ - struct sock *sk; - int dif; - - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; - - do { - struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); - if (sknext) - skb1 = skb_clone(skb, GFP_ATOMIC); - - if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - - if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ - kfree_skb(skb1); - } - sk = sknext; - } while (sknext); - } else - kfree_skb(skb); - read_unlock(&udp_hash_lock); - return 0; -} - -/* Initialize UDP checksum. If exited with zero value (success), - * CHECKSUM_UNNECESSARY means, that no more checks are required. - * Otherwise, csum completion requires chacksumming packet body, - * including udp header and folding it to skb->csum. - */ -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - const struct iphdr *iph; - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (IS_PROTO_UDPLITE(proto)) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; -} - -/* - * All we need to do is get the socket, and then do a checksum. - */ - -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], - int proto) -{ - struct sock *sk; - struct udphdr *uh = udp_hdr(skb); - unsigned short ulen; - struct rtable *rt = skb->rtable; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - - /* - * Validate the packet. - */ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto drop; /* No space for header. */ - - ulen = ntohs(uh->len); - if (ulen > skb->len) - goto short_packet; - - if (IS_PROTO_UDPLITE(proto)) { - /* UDP validates ulen. */ - if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) - goto short_packet; - uh = udp_hdr(skb); - } - - if (udp4_csum_init(skb, uh, proto)) - goto csum_error; - - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - - sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); - - if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; - } - - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - /* No socket. Drop packet silently, if checksum is wrong */ - if (udp_lib_checksum_complete(skb)) - goto csum_error; - - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - - /* - * Hmm. We got an UDP packet to a port to which we - * don't wanna listen. Ignore it. - */ - kfree_skb(skb); - return 0; - -short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - skb->len, - NIPQUAD(daddr), - ntohs(uh->dest)); - goto drop; - -csum_error: - /* - * RFC1122: OK. Discards the bad packet silently (as far as - * the network is concerned, anyway) as per 4.1.3.4 (MUST). - */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen); -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); - kfree_skb(skb); - return 0; -} - -int udp_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); -} - -int udp_destroy_sock(struct sock *sk) -{ - lock_sock(sk); - udp_flush_pending_frames(sk); - release_sock(sk); - return 0; -} - -int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return ip_setsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return compat_ip_setsockopt(sk, level, optname, optval, optlen); -} -#endif - -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return ip_getsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ip_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -/* ------------------------------------------------------------------------ */ -DEFINE_PROTO_INUSE(udp) - -struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif - REF_PROTO_INUSE(udp) -}; - -/* ------------------------------------------------------------------------ */ -static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", - bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); -} - -int udp4_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); - else { - char tmpbuf[129]; - struct udp_iter_state *state = seq->private; - - udp4_format_sock(v, tmpbuf, state->bucket); - seq_printf(seq, "%-127s\n", tmpbuf); - } - return 0; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS -static struct file_operations udp4_seq_fops; -static struct udp_seq_afinfo udp4_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udp", - .family = AF_INET, - .hashtable = udp_hash, - .seq_show = udp4_seq_show, - .seq_fops = &udp4_seq_fops, -}; - -int __init udp4_proc_init(void) -{ - return udp_proc_register(&udp4_seq_afinfo); -} - -void udp4_proc_exit(void) -{ - udp_proc_unregister(&udp4_seq_afinfo); -} -#endif /* CONFIG_PROC_FS */ - -EXPORT_SYMBOL(udp_prot); -EXPORT_SYMBOL(udp_sendmsg); - diff --git a/net/ipv4/udplite_ipv4.c b/net/ipv4/udplite.c index d49c6d68c8a..d49c6d68c8a 100644 --- a/net/ipv4/udplite_ipv4.c +++ b/net/ipv4/udplite.c diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 107051f7c22..ae14617e607 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp_ipv6.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o @@ -17,7 +17,6 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o -ipv6-$(CONFIG_IP_UDPLITE) += udplite_ipv6.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index afe9276d042..730a861b8f4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -813,16 +813,12 @@ static int __init init_ipv6_mibs(void) goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; -#endif return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_stats_in6); err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); @@ -841,9 +837,7 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); -#ifdef CONFIG_IP_UDPLITE snmp_mib_free((void **)udplite_stats_in6); -#endif } static int inet6_net_init(struct net *net) @@ -888,11 +882,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; -#ifdef CONFIG_IP_UDPLITE err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; -#endif err = proto_register(&rawv6_prot, 1); if (err) @@ -1063,10 +1055,8 @@ out_sock_register_fail: out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); out_unregister_udp_proto: -#endif proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); @@ -1085,9 +1075,7 @@ static void __exit inet6_exit(void) ipv6_sysctl_unregister(); #endif udpv6_exit(); -#ifdef CONFIG_IP_UDPLITE udplitev6_exit(); -#endif tcpv6_exit(); /* Cleanup code parts. */ @@ -1117,9 +1105,7 @@ static void __exit inet6_exit(void) unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); -#endif proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 937018529d1..98762fde2b6 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -404,6 +404,7 @@ int ip6_forward(struct sk_buff *skb) struct dst_entry *dst = skb->dst; struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dst->dev->nd_net; if (ipv6_devconf.forwarding == 0) goto error; @@ -450,7 +451,7 @@ int ip6_forward(struct sk_buff *skb) /* XXX: idev->cnf.proxy_ndp? */ if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) { + pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) return ip6_input(skb); @@ -911,9 +912,10 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct dst_entry **dst, struct flowi *fl) { int err; + struct net *net = sk->sk_net; if (*dst == NULL) - *dst = ip6_route_output(sk->sk_net, sk, fl); + *dst = ip6_route_output(net, sk, fl); if ((err = (*dst)->error)) goto out_err_release; @@ -939,7 +941,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi fl_gw; int redirect; - ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl->fl6_src, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -954,7 +956,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, dst_release(*dst); memcpy(&fl_gw, fl, sizeof(struct flowi)); memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(sk->sk_net, sk, &fl_gw); + *dst = ip6_route_output(net, sk, &fl_gw); if ((err = (*dst)->error)) goto out_err_release; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bbfdff698d..c11c76cab37 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -107,6 +107,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sk->sk_net; int val, valbool; int retv = -ENOPROTOOPT; @@ -127,9 +128,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) break; @@ -169,7 +168,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } else { struct proto *prot = &udp_prot; - if (IS_PROTO_UDPLITE(sk->sk_protocol)) + if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(sk->sk_prot, -1); @@ -434,7 +433,7 @@ done: if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val) goto e_inval; - if (__dev_get_by_index(&init_net, val) == NULL) { + if (__dev_get_by_index(net, val) == NULL) { retv = -ENODEV; break; } @@ -734,9 +733,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 197ca390a15..f2879056fab 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -126,8 +126,6 @@ static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ static DEFINE_RWLOCK(ipv6_sk_mc_lock); -static struct socket *igmp6_socket; - int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr); static void igmp6_join_group(struct ifmcaddr6 *ma); @@ -183,6 +181,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) struct net_device *dev = NULL; struct ipv6_mc_socklist *mc_lst; struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sk->sk_net; int err; if (!ipv6_addr_is_multicast(addr)) @@ -208,14 +207,14 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(&init_net, addr, NULL, 0, 0); + rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (dev == NULL) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -256,6 +255,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst, **lnk; + struct net *net = sk->sk_net; write_lock_bh(&ipv6_sk_mc_lock); for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { @@ -266,7 +266,8 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) *lnk = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) { + dev = dev_get_by_index(net, mc_lst->ifindex); + if (dev != NULL) { struct inet6_dev *idev = in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -286,7 +287,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) return -EADDRNOTAVAIL; } -static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) +static struct inet6_dev *ip6_mc_find_dev(struct net *net, + struct in6_addr *group, + int ifindex) { struct net_device *dev = NULL; struct inet6_dev *idev = NULL; @@ -294,14 +297,14 @@ static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) if (ifindex == 0) { struct rt6_info *rt; - rt = rt6_lookup(&init_net, group, NULL, 0, 0); + rt = rt6_lookup(net, group, NULL, 0, 0); if (rt) { dev = rt->rt6i_dev; dev_hold(dev); dst_release(&rt->u.dst); } } else - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(net, ifindex); if (!dev) return NULL; @@ -324,6 +327,7 @@ void ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; + struct net *net = sk->sk_net; write_lock_bh(&ipv6_sk_mc_lock); while ((mc_lst = np->ipv6_mc_list) != NULL) { @@ -332,7 +336,7 @@ void ipv6_sock_mc_close(struct sock *sk) np->ipv6_mc_list = mc_lst->next; write_unlock_bh(&ipv6_sk_mc_lock); - dev = dev_get_by_index(&init_net, mc_lst->ifindex); + dev = dev_get_by_index(net, mc_lst->ifindex); if (dev) { struct inet6_dev *idev = in6_dev_get(dev); @@ -361,6 +365,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; + struct net *net = sk->sk_net; int i, j, rv; int leavegroup = 0; int pmclocked = 0; @@ -376,7 +381,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(group, pgsr->gsr_interface); + idev = ip6_mc_find_dev(net, group, pgsr->gsr_interface); if (!idev) return -ENODEV; dev = idev->dev; @@ -500,6 +505,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) struct inet6_dev *idev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *newpsl, *psl; + struct net *net = sk->sk_net; int leavegroup = 0; int i, err; @@ -511,7 +517,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) gsf->gf_fmode != MCAST_EXCLUDE) return -EINVAL; - idev = ip6_mc_find_dev(group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; @@ -592,13 +598,14 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, struct net_device *dev; struct ipv6_pinfo *inet6 = inet6_sk(sk); struct ip6_sf_socklist *psl; + struct net *net = sk->sk_net; group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; if (!ipv6_addr_is_multicast(group)) return -EINVAL; - idev = ip6_mc_find_dev(group, gsf->gf_interface); + idev = ip6_mc_find_dev(net, group, gsf->gf_interface); if (!idev) return -ENODEV; @@ -1393,7 +1400,8 @@ mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) static struct sk_buff *mld_newpack(struct net_device *dev, int size) { - struct sock *sk = igmp6_socket->sk; + struct net *net = dev->nd_net; + struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; struct mld2_report *pmr; struct in6_addr addr_buf; @@ -1440,6 +1448,7 @@ static void mld_sendpack(struct sk_buff *skb) (struct mld2_report *)skb_transport_header(skb); int payload_len, mldlen; struct inet6_dev *idev = in6_dev_get(skb->dev); + struct net *net = skb->dev->nd_net; int err; struct flowi fl; @@ -1459,7 +1468,7 @@ static void mld_sendpack(struct sk_buff *skb) goto err_out; } - icmpv6_flow_init(igmp6_socket->sk, &fl, ICMPV6_MLD2_REPORT, + icmpv6_flow_init(net->ipv6.igmp_sk, &fl, ICMPV6_MLD2_REPORT, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); @@ -1753,7 +1762,8 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct sock *sk = igmp6_socket->sk; + struct net *net = dev->nd_net; + struct sock *sk = net->ipv6.igmp_sk; struct inet6_dev *idev; struct sk_buff *skb; struct icmp6hdr *hdr; @@ -1824,7 +1834,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) goto err_out; } - icmpv6_flow_init(igmp6_socket->sk, &fl, type, + icmpv6_flow_init(sk, &fl, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); @@ -2334,6 +2344,7 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) #ifdef CONFIG_PROC_FS struct igmp6_mc_iter_state { + struct seq_net_private p; struct net_device *dev; struct inet6_dev *idev; }; @@ -2344,9 +2355,10 @@ static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq) { struct ifmcaddr6 *im = NULL; struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); + struct net *net = state->p.net; state->idev = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (!idev) @@ -2448,8 +2460,8 @@ static const struct seq_operations igmp6_mc_seq_ops = { static int igmp6_mc_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp6_mc_seq_ops, - sizeof(struct igmp6_mc_iter_state)); + return seq_open_net(inode, file, &igmp6_mc_seq_ops, + sizeof(struct igmp6_mc_iter_state)); } static const struct file_operations igmp6_mc_seq_fops = { @@ -2457,10 +2469,11 @@ static const struct file_operations igmp6_mc_seq_fops = { .open = igmp6_mc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct igmp6_mcf_iter_state { + struct seq_net_private p; struct net_device *dev; struct inet6_dev *idev; struct ifmcaddr6 *im; @@ -2473,10 +2486,11 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) struct ip6_sf_list *psf = NULL; struct ifmcaddr6 *im = NULL; struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); + struct net *net = state->p.net; state->idev = NULL; state->im = NULL; - for_each_netdev(&init_net, state->dev) { + for_each_netdev(net, state->dev) { struct inet6_dev *idev; idev = in6_dev_get(state->dev); if (unlikely(idev == NULL)) @@ -2608,8 +2622,8 @@ static const struct seq_operations igmp6_mcf_seq_ops = { static int igmp6_mcf_seq_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &igmp6_mcf_seq_ops, - sizeof(struct igmp6_mcf_iter_state)); + return seq_open_net(inode, file, &igmp6_mcf_seq_ops, + sizeof(struct igmp6_mcf_iter_state)); } static const struct file_operations igmp6_mcf_seq_fops = { @@ -2617,26 +2631,27 @@ static const struct file_operations igmp6_mcf_seq_fops = { .open = igmp6_mcf_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif -int __init igmp6_init(void) +static int igmp6_net_init(struct net *net) { struct ipv6_pinfo *np; + struct socket *sock; struct sock *sk; int err; - err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket); + err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock); if (err < 0) { printk(KERN_ERR "Failed to initialize the IGMP6 control socket (err %d).\n", err); - igmp6_socket = NULL; /* For safety. */ - return err; + goto out; } - sk = igmp6_socket->sk; + net->ipv6.igmp_sk = sk = sock->sk; + sk_change_net(sk, net); sk->sk_allocation = GFP_ATOMIC; sk->sk_prot->unhash(sk); @@ -2644,20 +2659,45 @@ int __init igmp6_init(void) np->hop_limit = 1; #ifdef CONFIG_PROC_FS - proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops); - proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops); + err = -ENOMEM; + if (!proc_net_fops_create(net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops)) + goto out_sock_create; + if (!proc_net_fops_create(net, "mcfilter6", S_IRUGO, + &igmp6_mcf_seq_fops)) { + proc_net_remove(net, "igmp6"); + goto out_sock_create; + } #endif - return 0; + err = 0; +out: + return err; + +out_sock_create: + sk_release_kernel(net->ipv6.igmp_sk); + goto out; } -void igmp6_cleanup(void) +static void igmp6_net_exit(struct net *net) { - sock_release(igmp6_socket); - igmp6_socket = NULL; /* for safety */ - + sk_release_kernel(net->ipv6.igmp_sk); #ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "mcfilter6"); - proc_net_remove(&init_net, "igmp6"); + proc_net_remove(net, "mcfilter6"); + proc_net_remove(net, "igmp6"); #endif } + +static struct pernet_operations igmp6_net_ops = { + .init = igmp6_net_init, + .exit = igmp6_net_exit, +}; + +int __init igmp6_init(void) +{ + return register_pernet_subsys(&igmp6_net_ops); +} + +void igmp6_cleanup(void) +{ + unregister_pernet_subsys(&igmp6_net_ops); +} diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e0d023360a8..24e76ed9888 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -89,8 +89,6 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> -static struct socket *ndisc_socket; - static u32 ndisc_hash(const void *pkey, const struct net_device *dev); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); @@ -449,7 +447,8 @@ static void __ndisc_send(struct net_device *dev, { struct flowi fl; struct dst_entry *dst; - struct sock *sk = ndisc_socket->sk; + struct net *net = dev->nd_net; + struct sock *sk = net->ipv6.ndisc_sk; struct sk_buff *skb; struct icmp6hdr *hdr; struct inet6_dev *idev; @@ -459,8 +458,7 @@ static void __ndisc_send(struct net_device *dev, type = icmp6h->icmp6_type; - icmpv6_flow_init(ndisc_socket->sk, &fl, type, - saddr, daddr, dev->ifindex); + icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex); dst = icmp6_dst_alloc(dev, neigh, daddr); if (!dst) @@ -541,7 +539,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, }; /* for anycast or proxy, solicited_addr != src_addr */ - ifp = ipv6_get_ifaddr(&init_net, solicited_addr, dev, 1); + ifp = ipv6_get_ifaddr(dev->nd_net, solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) @@ -601,7 +599,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, * suppress the inclusion of the sllao. */ if (send_sllao) { - struct inet6_ifaddr *ifp = ipv6_get_ifaddr(&init_net, saddr, + struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev->nd_net, saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { @@ -639,7 +637,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && ipv6_chk_addr(&init_net, &ipv6_hdr(skb)->saddr, dev, 1)) + if (skb && ipv6_chk_addr(dev->nd_net, &ipv6_hdr(skb)->saddr, dev, 1)) saddr = &ipv6_hdr(skb)->saddr; if ((probes -= neigh->parms->ucast_probes) < 0) { @@ -727,7 +725,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) inc = ipv6_addr_is_multicast(daddr); - if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1)) != NULL) { + ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1); + if (ifp) { if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { @@ -775,7 +774,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, &init_net, + (pneigh = pneigh_lookup(&nd_tbl, dev->nd_net, &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && @@ -885,7 +884,8 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } } - if ((ifp = ipv6_get_ifaddr(&init_net, &msg->target, dev, 1))) { + ifp = ipv6_get_ifaddr(dev->nd_net, &msg->target, dev, 1); + if (ifp) { if (ifp->flags & IFA_F_TENTATIVE) { addrconf_dad_failure(ifp); return; @@ -916,7 +916,7 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) { + pneigh_lookup(&nd_tbl, dev->nd_net, &msg->target, dev, 0)) { /* XXX: idev->cnf.prixy_ndp */ goto out; } @@ -1006,6 +1006,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; + struct net *net = ra->dev->nd_net; int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); @@ -1035,7 +1036,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_ND_USEROPT, NULL, + err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); if (err < 0) goto errout; @@ -1046,7 +1047,7 @@ nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: - rtnl_set_sk_err(&init_net, RTNLGRP_ND_USEROPT, err); + rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } static void ndisc_router_discovery(struct sk_buff *skb) @@ -1391,13 +1392,14 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, struct in6_addr *target) { - struct sock *sk = ndisc_socket->sk; + struct net_device *dev = skb->dev; + struct net *net = dev->nd_net; + struct sock *sk = net->ipv6.ndisc_sk; int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); struct sk_buff *buff; struct icmp6hdr *icmph; struct in6_addr saddr_buf; struct in6_addr *addrp; - struct net_device *dev; struct rt6_info *rt; struct dst_entry *dst; struct inet6_dev *idev; @@ -1408,8 +1410,6 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, int hlen; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL; - dev = skb->dev; - if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: no link-local address on %s\n", @@ -1424,10 +1424,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - icmpv6_flow_init(ndisc_socket->sk, &fl, NDISC_REDIRECT, + icmpv6_flow_init(sk, &fl, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; @@ -1600,9 +1600,6 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, struct net_device *dev = ptr; struct net *net = dev->nd_net; - if (dev->nd_net != &init_net) - return NOTIFY_DONE; - switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); @@ -1719,22 +1716,24 @@ static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, #endif -int __init ndisc_init(void) +static int ndisc_net_init(struct net *net) { + struct socket *sock; struct ipv6_pinfo *np; struct sock *sk; int err; - err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket); + err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &sock); if (err < 0) { ND_PRINTK0(KERN_ERR "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n", err); - ndisc_socket = NULL; /* For safety. */ return err; } - sk = ndisc_socket->sk; + net->ipv6.ndisc_sk = sk = sock->sk; + sk_change_net(sk, net); + np = inet6_sk(sk); sk->sk_allocation = GFP_ATOMIC; np->hop_limit = 255; @@ -1742,21 +1741,52 @@ int __init ndisc_init(void) np->mc_loop = 0; sk->sk_prot->unhash(sk); + return 0; +} + +static void ndisc_net_exit(struct net *net) +{ + sk_release_kernel(net->ipv6.ndisc_sk); +} + +static struct pernet_operations ndisc_net_ops = { + .init = ndisc_net_init, + .exit = ndisc_net_exit, +}; + +int __init ndisc_init(void) +{ + int err; + + err = register_pernet_subsys(&ndisc_net_ops); + if (err) + return err; /* * Initialize the neighbour table */ - neigh_table_init(&nd_tbl); #ifdef CONFIG_SYSCTL - neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH, - "ipv6", - &ndisc_ifinfo_sysctl_change, - &ndisc_ifinfo_sysctl_strategy); + err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, + NET_IPV6_NEIGH, "ipv6", + &ndisc_ifinfo_sysctl_change, + &ndisc_ifinfo_sysctl_strategy); + if (err) + goto out_unregister_pernet; #endif + err = register_netdevice_notifier(&ndisc_netdev_notifier); + if (err) + goto out_unregister_sysctl; +out: + return err; - register_netdevice_notifier(&ndisc_netdev_notifier); - return 0; +out_unregister_sysctl: +#ifdef CONFIG_SYSCTL + neigh_sysctl_unregister(&nd_tbl.parms); +out_unregister_pernet: +#endif + unregister_pernet_subsys(&ndisc_net_ops); + goto out; } void ndisc_cleanup(void) @@ -1766,6 +1796,5 @@ void ndisc_cleanup(void) neigh_sysctl_unregister(&nd_tbl.parms); #endif neigh_table_clear(&nd_tbl); - sock_release(ndisc_socket); - ndisc_socket = NULL; /* For safety. */ + unregister_pernet_subsys(&ndisc_net_ops); } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2453f2229ef..8a5be290c71 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,10 +39,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(&udpv6_prot)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(&udplitev6_prot)); -#endif seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -113,7 +111,6 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; -#ifdef CONFIG_IP_UDPLITE static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), @@ -121,7 +118,6 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_SENTINEL }; -#endif static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { @@ -180,9 +176,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); -#ifdef CONFIG_IP_UDPLITE snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); -#endif } return 0; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 15e9a86f28c..a4b5aee0f68 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2261,7 +2261,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_reset_mac_header(skb); skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - rt = (struct rt6_info*) ip6_route_output(&init_net, NULL, &fl); + rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); skb->dst = &rt->u.dst; err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b47cce8fea4..caf0cc1c00e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -69,9 +69,6 @@ #include <linux/crypto.h> #include <linux/scatterlist.h> -/* Socket used for sending RSTs and ACKs */ -static struct socket *tcp6_socket; - static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req); static void tcp_v6_send_check(struct sock *sk, int len, @@ -991,6 +988,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; + struct net *net = skb->dst->dev->nd_net; + struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(*th); #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; @@ -1075,10 +1074,10 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) * Underlying function will use this to retrieve the network * namespace */ - if (!ip6_dst_lookup(tcp6_socket->sk, &buff->dst, &fl)) { + if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); TCP_INC_STATS_BH(TCP_MIB_OUTRSTS); return; @@ -1094,6 +1093,8 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; + struct net *net = skb->dev->nd_net; + struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; #ifdef CONFIG_TCP_MD5SIG @@ -1175,9 +1176,9 @@ static void tcp_v6_send_ack(struct tcp_timewait_sock *tw, fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); - if (!ip6_dst_lookup(tcp6_socket->sk, &buff->dst, &fl)) { + if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0); + ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(TCP_MIB_OUTSEGS); return; } @@ -2198,6 +2199,31 @@ static struct inet_protosw tcpv6_protosw = { INET_PROTOSW_ICSK, }; +static int tcpv6_net_init(struct net *net) +{ + int err; + struct socket *sock; + struct sock *sk; + + err = inet_csk_ctl_sock_create(&sock, PF_INET6, SOCK_RAW, IPPROTO_TCP); + if (err) + return err; + + net->ipv6.tcp_sk = sk = sock->sk; + sk_change_net(sk, net); + return err; +} + +static void tcpv6_net_exit(struct net *net) +{ + sk_release_kernel(net->ipv6.tcp_sk); +} + +static struct pernet_operations tcpv6_net_ops = { + .init = tcpv6_net_init, + .exit = tcpv6_net_exit, +}; + int __init tcpv6_init(void) { int ret; @@ -2211,8 +2237,7 @@ int __init tcpv6_init(void) if (ret) goto out_tcpv6_protocol; - ret = inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, - SOCK_RAW, IPPROTO_TCP); + ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; out: @@ -2227,7 +2252,7 @@ out_tcpv6_protosw: void tcpv6_exit(void) { - sock_release(tcp6_socket); + unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); } diff --git a/net/ipv6/udp_ipv6.c b/net/ipv6/udp.c index 55feac7ba71..d6e311f6c8e 100644 --- a/net/ipv6/udp_ipv6.c +++ b/net/ipv6/udp.c @@ -323,6 +323,9 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); + if (s->sk_net != sk->sk_net) + continue; + if (s->sk_hash == num && s->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(s); if (inet->dport) { @@ -400,7 +403,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; - if (IS_PROTO_UDPLITE(proto)) { + if (proto == IPPROTO_UDPLITE) { err = udplite_checksum_init(skb, uh); if (err) return err; @@ -489,7 +492,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -510,11 +513,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", + proto == IPPROTO_UDPLITE ? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -890,7 +893,7 @@ int udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -900,7 +903,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -910,7 +913,7 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -919,7 +922,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/udplite_ipv6.c b/net/ipv6/udplite.c index 87d4202522e..87d4202522e 100644 --- a/net/ipv6/udplite_ipv6.c +++ b/net/ipv6/udplite.c diff --git a/net/tipc/core.c b/net/tipc/core.c index d2d7d32c02c..740aac5cdfb 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -48,16 +48,8 @@ #include "subscr.h" #include "config.h" -int tipc_eth_media_start(void); -void tipc_eth_media_stop(void); -int tipc_handler_start(void); -void tipc_handler_stop(void); -int tipc_socket_init(void); -void tipc_socket_stop(void); -int tipc_netlink_start(void); -void tipc_netlink_stop(void); -#define TIPC_MOD_VER "1.6.2" +#define TIPC_MOD_VER "1.6.3" #ifndef CONFIG_TIPC_ZONES #define CONFIG_TIPC_ZONES 3 @@ -277,7 +269,6 @@ EXPORT_SYMBOL(tipc_register_media); /* TIPC API for external APIs (see tipc_port.h) */ EXPORT_SYMBOL(tipc_createport_raw); -EXPORT_SYMBOL(tipc_set_msg_option); EXPORT_SYMBOL(tipc_reject_msg); EXPORT_SYMBOL(tipc_send_buf_fast); EXPORT_SYMBOL(tipc_acknowledge); diff --git a/net/tipc/core.h b/net/tipc/core.h index feabca58082..3fe9b70331d 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -180,6 +180,12 @@ extern int tipc_core_start(void); extern void tipc_core_stop(void); extern int tipc_core_start_net(void); extern void tipc_core_stop_net(void); +extern int tipc_handler_start(void); +extern void tipc_handler_stop(void); +extern int tipc_netlink_start(void); +extern void tipc_netlink_stop(void); +extern int tipc_socket_init(void); +extern void tipc_socket_stop(void); static inline int delimit(int val, int min, int max) { diff --git a/net/tipc/link.c b/net/tipc/link.c index cefa99824c5..a42f4343010 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2832,15 +2832,15 @@ static void link_set_supervision_props(struct link *l_ptr, u32 tolerance) void tipc_link_set_queue_limits(struct link *l_ptr, u32 window) { /* Data messages from this node, inclusive FIRST_FRAGM */ - l_ptr->queue_limit[DATA_LOW] = window; - l_ptr->queue_limit[DATA_MEDIUM] = (window / 3) * 4; - l_ptr->queue_limit[DATA_HIGH] = (window / 3) * 5; - l_ptr->queue_limit[DATA_CRITICAL] = (window / 3) * 6; + l_ptr->queue_limit[TIPC_LOW_IMPORTANCE] = window; + l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE] = (window / 3) * 4; + l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE] = (window / 3) * 5; + l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE] = (window / 3) * 6; /* Transiting data messages,inclusive FIRST_FRAGM */ - l_ptr->queue_limit[DATA_LOW + 4] = 300; - l_ptr->queue_limit[DATA_MEDIUM + 4] = 600; - l_ptr->queue_limit[DATA_HIGH + 4] = 900; - l_ptr->queue_limit[DATA_CRITICAL + 4] = 1200; + l_ptr->queue_limit[TIPC_LOW_IMPORTANCE + 4] = 300; + l_ptr->queue_limit[TIPC_MEDIUM_IMPORTANCE + 4] = 600; + l_ptr->queue_limit[TIPC_HIGH_IMPORTANCE + 4] = 900; + l_ptr->queue_limit[TIPC_CRITICAL_IMPORTANCE + 4] = 1200; l_ptr->queue_limit[CONN_MANAGER] = 1200; l_ptr->queue_limit[ROUTE_DISTRIBUTOR] = 1200; l_ptr->queue_limit[CHANGEOVER_PROTOCOL] = 2500; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 782485468fb..696a8633df7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -73,10 +73,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str tipc_printf(buf, "NO(%u/%u):",msg_long_msgno(msg), msg_fragm_no(msg)); break; - case DATA_LOW: - case DATA_MEDIUM: - case DATA_HIGH: - case DATA_CRITICAL: + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: tipc_printf(buf, "DAT%u:", msg_user(msg)); if (msg_short(msg)) { tipc_printf(buf, "CON:"); @@ -229,10 +229,10 @@ void tipc_msg_print(struct print_buf *buf, struct tipc_msg *msg, const char *str switch (usr) { case CONN_MANAGER: case NAME_DISTRIBUTOR: - case DATA_LOW: - case DATA_MEDIUM: - case DATA_HIGH: - case DATA_CRITICAL: + case TIPC_LOW_IMPORTANCE: + case TIPC_MEDIUM_IMPORTANCE: + case TIPC_HIGH_IMPORTANCE: + case TIPC_CRITICAL_IMPORTANCE: if (msg_short(msg)) break; /* No error */ switch (msg_errcode(msg)) { diff --git a/net/tipc/msg.h b/net/tipc/msg.h index e9ef6df2656..6ad070d8770 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -40,18 +40,16 @@ #include "core.h" #define TIPC_VERSION 2 -#define DATA_LOW TIPC_LOW_IMPORTANCE -#define DATA_MEDIUM TIPC_MEDIUM_IMPORTANCE -#define DATA_HIGH TIPC_HIGH_IMPORTANCE -#define DATA_CRITICAL TIPC_CRITICAL_IMPORTANCE -#define SHORT_H_SIZE 24 /* Connected,in cluster */ + +#define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ #define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ -#define CONN_MSG_H_SIZE 36 /* Routed connected msgs*/ -#define LONG_H_SIZE 40 /* Named Messages */ +#define LONG_H_SIZE 40 /* Named messages */ #define MCAST_H_SIZE 44 /* Multicast messages */ -#define MAX_H_SIZE 60 /* Inclusive full options */ +#define INT_H_SIZE 40 /* Internal messages */ +#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ +#define MAX_H_SIZE 60 /* Largest possible TIPC header size */ + #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -#define LINK_CONFIG 13 /* @@ -72,8 +70,10 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { val = (val & mask) << pos; - m->hdr[w] &= ~htonl(mask << pos); - m->hdr[w] |= htonl(val); + val = htonl(val); + mask = htonl(mask << pos); + m->hdr[w] &= ~mask; + m->hdr[w] |= val; } /* @@ -87,7 +87,7 @@ static inline u32 msg_version(struct tipc_msg *m) static inline void msg_set_version(struct tipc_msg *m) { - msg_set_bits(m, 0, 29, 0xf, TIPC_VERSION); + msg_set_bits(m, 0, 29, 7, TIPC_VERSION); } static inline u32 msg_user(struct tipc_msg *m) @@ -97,7 +97,7 @@ static inline u32 msg_user(struct tipc_msg *m) static inline u32 msg_isdata(struct tipc_msg *m) { - return (msg_user(m) <= DATA_CRITICAL); + return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); } static inline void msg_set_user(struct tipc_msg *m, u32 n) @@ -190,18 +190,6 @@ static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 19, 0x3, n); } -static inline void msg_set_options(struct tipc_msg *m, const char *opt, u32 sz) -{ - u32 hsz = msg_hdr_sz(m); - char *to = (char *)&m->hdr[hsz/4]; - - if ((hsz < DIR_MSG_H_SIZE) || ((hsz + sz) > MAX_H_SIZE)) - return; - msg_set_bits(m, 1, 16, 0x7, (hsz - 28)/4); - msg_set_hdr_sz(m, hsz + sz); - memcpy(to, opt, sz); -} - static inline u32 msg_bcast_ack(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); @@ -330,17 +318,6 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) return (struct tipc_msg *)msg_data(m); } -static inline void msg_expand(struct tipc_msg *m, u32 destnode) -{ - if (!msg_short(m)) - return; - msg_set_hdr_sz(m, LONG_H_SIZE); - msg_set_orignode(m, msg_prevnode(m)); - msg_set_destnode(m, destnode); - memset(&m->hdr[8], 0, 12); -} - - /* TIPC internal message header format, version 2 @@ -388,7 +365,6 @@ static inline void msg_expand(struct tipc_msg *m, u32 destnode) #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 -#define INT_H_SIZE 40 #define DSC_H_SIZE 40 /* diff --git a/net/tipc/port.c b/net/tipc/port.c index f508614ca59..e2646a96935 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -242,7 +242,8 @@ u32 tipc_createport_raw(void *usr_handle, p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; - msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0); + msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, + 0); msg_set_orignode(msg, tipc_own_addr); msg_set_prevnode(msg, tipc_own_addr); msg_set_origport(msg, ref); @@ -413,13 +414,6 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, return buf; } -int tipc_set_msg_option(struct tipc_port *tp_ptr, const char *opt, const u32 sz) -{ - msg_expand(&tp_ptr->phdr, msg_destnode(&tp_ptr->phdr)); - msg_set_options(&tp_ptr->phdr, opt, sz); - return TIPC_OK; -} - int tipc_reject_msg(struct sk_buff *buf, u32 err) { struct tipc_msg *msg = buf_msg(buf); @@ -632,7 +626,7 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf) msg_orignode(msg), msg_destport(msg), tipc_own_addr, - DATA_HIGH, + TIPC_HIGH_IMPORTANCE, TIPC_CONN_MSG, err, 0, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9ae8e9f7402..3220d5cb5b5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1419,7 +1419,7 @@ exit: /** * shutdown - shutdown socket connection * @sock: socket structure - * @how: direction to close (unused; always treated as read + write) + * @how: direction to close (must be SHUT_RDWR) * * Terminates connection (if necessary), then purges socket's receive queue. * @@ -1432,7 +1432,8 @@ static int shutdown(struct socket *sock, int how) struct sk_buff *buf; int res; - /* Could return -EINVAL for an invalid "how", but why bother? */ + if (how != SHUT_RDWR) + return -EINVAL; if (mutex_lock_interruptible(&tsock->lock)) return -ERESTARTSYS; |